diff options
Diffstat (limited to 'coverage')
| -rw-r--r-- | coverage/data.py | 71 | ||||
| -rw-r--r-- | coverage/sqldata.py | 3 |
2 files changed, 45 insertions, 29 deletions
diff --git a/coverage/data.py b/coverage/data.py index 4bdfe301..798d167f 100644 --- a/coverage/data.py +++ b/coverage/data.py @@ -11,6 +11,7 @@ imports working. """ import glob +import hashlib import os.path from coverage.exceptions import CoverageException, NoDataError @@ -110,7 +111,9 @@ def combine_parallel_data( if strict and not files_to_combine: raise NoDataError("No data to combine") - files_combined = 0 + file_hashes = set() + combined_any = False + for f in files_to_combine: if f == data.data_filename(): # Sometimes we are combining into a file which is one of the @@ -118,34 +121,50 @@ def combine_parallel_data( if data._debug.should('dataio'): data._debug.write(f"Skipping combining ourself: {f!r}") continue - if data._debug.should('dataio'): - data._debug.write(f"Combining data file {f!r}") + try: - new_data = CoverageData(f, debug=data._debug) - new_data.read() - except CoverageException as exc: - if data._warn: - # The CoverageException has the file name in it, so just - # use the message as the warning. - data._warn(str(exc)) + rel_file_name = os.path.relpath(f) + except ValueError: + # ValueError can be raised under Windows when os.getcwd() returns a + # folder from a different drive than the drive of f, in which case + # we print the original value of f instead of its relative path + rel_file_name = f + + with open(f, "rb") as fobj: + hasher = hashlib.new("sha3_256") + hasher.update(fobj.read()) + sha = hasher.digest() + combine_this_one = sha not in file_hashes + + delete_this_one = not keep + if combine_this_one: + if data._debug.should('dataio'): + data._debug.write(f"Combining data file {f!r}") + file_hashes.add(sha) + try: + new_data = CoverageData(f, debug=data._debug) + new_data.read() + except CoverageException as exc: + if data._warn: + # The CoverageException has the file name in it, so just + # use the message as the warning. + data._warn(str(exc)) + delete_this_one = False + else: + data.update(new_data, aliases=aliases) + combined_any = True + if message: + message(f"Combined data file {rel_file_name}") else: - data.update(new_data, aliases=aliases) - files_combined += 1 if message: - try: - file_name = os.path.relpath(f) - except ValueError: - # ValueError can be raised under Windows when os.getcwd() returns a - # folder from a different drive than the drive of f, in which case - # we print the original value of f instead of its relative path - file_name = f - message(f"Combined data file {file_name}") - if not keep: - if data._debug.should('dataio'): - data._debug.write(f"Deleting combined data file {f!r}") - file_be_gone(f) - - if strict and not files_combined: + message(f"Skipping duplicate data {rel_file_name}") + + if delete_this_one: + if data._debug.should('dataio'): + data._debug.write(f"Deleting data file {f!r}") + file_be_gone(f) + + if strict and not combined_any: raise NoDataError("No usable data files") diff --git a/coverage/sqldata.py b/coverage/sqldata.py index 2b773053..2fbc53f5 100644 --- a/coverage/sqldata.py +++ b/coverage/sqldata.py @@ -4,7 +4,6 @@ """SQLite coverage data.""" import collections -import datetime import functools import glob import itertools @@ -56,7 +55,6 @@ CREATE TABLE meta ( -- 'has_arcs' boolean -- Is this data recording branches? -- 'sys_argv' text -- The coverage command line that recorded the data. -- 'version' text -- The version of coverage.py that made the file. - -- 'when' text -- Datetime when the file was created. ); CREATE TABLE file ( @@ -305,7 +303,6 @@ class CoverageData(SimpleReprMixin): [ ("sys_argv", str(getattr(sys, "argv", None))), ("version", __version__), - ("when", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")), ] ) |
