#!/usr/bin/env python3 # # Usage: gphoto-m4-sync [--diff] ... # gphoto-m4-sync --help # # The gphoto-m4-sync script helps with keeping track of which files in # which gphoto-m4 tree copy differ from the original gphoto-m4 tree. # # In normal operation, gphoto-m4-sync will search for gphoto-m4 # directories anywhere in the directory trees given on the command # line and compare the gphoto-m4 tree from which gphoto-m4-sync was # started to those other trees. # # When not given a --diff options, gphoto-m4-sync will print a human # readable report on which files are different in which gphoto-m4 # tree. # # Options: # # --diff Print a list of 'diff' command lines to compare # the different files instead. Pipe into something like # "| sh | less" to execute. # # --help Print this help message. # # Exit code: # # 0 when no differences have been found among the gphoto-m4 trees # 1 when any differences have been found among the gphoto-m4 trees # 2 any other error ######################################################################## import hashlib import os import sys ######################################################################## class File(object): def __init__(self, tree, fname): self.tree = tree self.fname = fname self.fpath = os.path.join(tree.top, fname) self.statinfo = os.stat(self.fpath) m = hashlib.sha1() m.update(open(self.fpath, 'rb').read()) self.digest = m.hexdigest() def __repr__(self): return 'File(%s,%s)' % (repr(self.fname), repr(self.digest)) def __str__(self): return '%s %s' % (self.digest, self.fname) ######################################################################## class BaseTree(object): def __init__(self, top): self.top = os.path.abspath(top) self._files = self.__scan_files() def __repr__(self): return '%s(%s)[%s]' % (self.__class__.__name__, self.top, self._files) def __iter__(self): return sorted(self._files).__iter__() def __getitem__(self, key): return self._files[key] def __scan_files(self): files = {} for dirpath, dirnames, filenames in os.walk(self.top, topdown=True): try: # do not descend into these directories dirnames.remove('.git') except ValueError: pass for fname in filenames: # Ignore a bunch of files if fname[-1] == '~': continue if fname.startswith('.git'): continue if fname in ['Makefile.in', 'Makefile']: continue abs_fname = os.path.join(dirpath, fname) rel_fname = os.path.relpath(abs_fname, start=self.top) files[rel_fname] = File(self, rel_fname) return files ######################################################################## class GitTree(BaseTree): def __init__(self, top): path = os.path.join(top, '.git') if not os.path.exists(path): raise AssertionError("File or directory does not exist: %s" % repr(path)) super(GitTree, self).__init__(top) ######################################################################## class NotGitTree(BaseTree): def __init__(self, top): path = os.path.join(top, '.git') if os.path.exists(path): raise AssertionError("File or directory does exist: %s" % repr(path)) super(NotGitTree, self).__init__(top) ######################################################################## def scan_tree(top): for dirpath, dirnames, filenames in os.walk(top): if os.path.basename(dirpath) == 'gphoto-m4': if 'gp-camlibs.m4' not in filenames: continue yield (dirpath, NotGitTree(dirpath)) ######################################################################## def print_help(): skip_line = True skip_lines = ['#', '# '] for line in open(__file__, 'r'): if line[-1] == '\n': line = line[:-1] if line.startswith('#!'): continue elif skip_line and (line in skip_lines): continue elif skip_line and (line not in skip_lines): skip_line = False elif line == '': break if not skip_line: print(line[2:]) ######################################################################## class ResultTable(object): def __init__(self): self.lines = {} self.files_with_differences = 0 self.differences = 0 def __setitem__(self, key, value): assert(key not in self.lines) self.lines[key] = value if value.file_versions > 0: self.files_with_differences += 1 self.differences += value.file_versions def __getitem__(self, key): assert(self.files_with_differences != None) return self.lines[key] def items(self): for k in sorted(self.lines.keys()): v = self.lines[k] yield k,v def close(self): pass ######################################################################## class ResultLine(object): def __init__(self, fname): self.fname = fname self.__digest_map = {} self.__digests = {} self.__digest_list = None self.__flags = {} self.__fpaths = {} def set_digest(self, index, digest): self.__digest_map[index] = digest self.__digests[digest] = True def close(self, file_versions): self.file_versions = file_versions self.__digest_list = sorted(self.__digests.keys()) assert(len(self.__digest_list) > 0) if file_versions == 0: # All files are equal, so we do not need different characters # to distinguish different digest values - a space will do as # well. self.__digest_map = {} def get_digest(self, index): if self.__digest_list == None: raise RuntimeError("You need to call ResultLine.close() before Result_Line.get_digest()") if index in self.__digest_map: dig = self.__digest_map[index] idx = self.__digest_list.index(dig) return 'abcdefghijklmnopqrstuvwxyz'[idx] else: return ' ' def set_flag(self, index, flag, fpath): self.__flags[index] = flag self.__fpaths[index] = fpath def get_flag(self, index): return self.__flags[index] def get_fpath(self, index): return self.__fpaths[index] ######################################################################## def cmd_print_report(result_table, all_files, treelist, trees): # Enumerate list of trees print("Trees (0 is the original tree):") for i, tree in enumerate(treelist): print(" %d. %s" % (i,tree)) print() # Determine maximum length of file name fn_maxlen = 0 for fn in all_files: if len(fn) > fn_maxlen: fn_maxlen = len(fn) fmt = " %%-%ds " % fn_maxlen print("File table:") # print table head print(fmt % '', end='') print((' {0:-^%d}' % (3*len(treelist)-1)).format('Tree')) print(fmt % 'file name', end='') for i, tree in enumerate(treelist): print(' %2d' % i, end='') print(' file diffs') sep_line = (' ' + '-' * (fn_maxlen + 1 + 3*len(treelist) + 2 + len('file diffs'))) print(sep_line) # print table body for fname in sorted(all_files): result_line = result_table[fname] print(fmt % fname, end='') print(" %s%s" % (result_line.get_flag(0), result_line.get_digest(0)), end='') for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1): tree = trees[tree_top] print(" %s%s" % (result_line.get_flag(tree_idx), result_line.get_digest(tree_idx)), end='') if result_line.file_versions > 0: print(' %3d' % result_line.file_versions) else: print(' ok') print(sep_line) print() print("Legend:") legend = [ ('N', 'new file'), ('O', 'original file'), ('/', 'no such file'), ('=', 'same content as the original file'), ('<', 'file with different content is younger than original file'), ('>', 'file with different content is older than original file'), ] for ch, descr in legend: print(" %s %s" % (ch, descr)) print(" ") print(" Small letters identify file contents: Same letter means same content.") print() # Determine exit code exit_code = 0 if result_table.differences > 0: exit_code = 1 # Print summary print("Summary:") if result_table.differences > 0: print(" About %d difference(s) found in %d file(s)." % (result_table.differences, result_table.files_with_differences)) print(" ") print(" Diff commands for comparing differing files can be obtained with the") print(" '--diff' option.") else: print(" All gphoto-m4 trees are equal.") # Finally exit. sys.exit(exit_code) ######################################################################## def print_diff_commands(diff_commands): print("#!/bin/sh") print("#") print("# This file has been autogenerated by %s" % __file__) print("#") print("# List of diff commands. You can pipe these into") print("# | sh | colordiff | less -r '+/comparing '") print("# or") print("# | sh | less '+/^comparing '") print("# or") print("# | less") for fname, orig_dig, other_dig, orig_fpath, other_fpath in diff_commands: if orig_fpath: orig_label = "%s (digest '%s')" % (orig_fpath, orig_dig) else: orig_fpath = '/dev/null' orig_label = '(no such file)' if other_dig: other_label = "%s (digest '%s')" % (other_fpath, other_dig) else: other_label = other_fpath print() print("""echo 'comparing fname %s'""" % fname) print("""diff -u --label "%s" %s --label "%s" %s""" % (orig_label, orig_fpath, other_label, other_fpath)) ######################################################################## def gphoto_m4_sync(dir_list, print_diffs): # List all files in this clone of the `gphoto-m4` repository orig_top = os.path.dirname(os.path.abspath(__file__)) orig_tree = GitTree(orig_top) # For each `gphoto-m4` directory given on the command line, find # all files. trees = {} for top in dir_list: for dirpath, tree in scan_tree(os.path.abspath(top)): trees[dirpath] = tree if len(trees) == 0: print("No gphoto-m4 trees found in directories given on command line.") sys.exit(2) # Make a list of all files within all `gphoto-m4` trees all_files = {} for i in orig_tree: all_files[i] = True for tree in trees.values(): for i in tree: all_files[i] = True all_files = sorted(all_files.keys()) # calculate table values diff_params = [] result_table = ResultTable() for fname in sorted(all_files): result_line = ResultLine(fname) file_diffs = 0 if fname in orig_tree: result_line.set_flag(0, 'O', orig_tree[fname].fpath) orig_dig = orig_tree[fname].digest result_line.set_digest(0, orig_dig) else: result_line.set_flag(0, '/', None) orig_dig = None comp_digs = {} for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1): tree = trees[tree_top] if fname in tree: dig = tree[fname].digest flag = 'N' if orig_dig == dig: flag = '=' elif orig_dig: if tree[fname].statinfo.st_mtime > orig_tree[fname].statinfo.st_mtime: flag = '>' elif tree[fname].statinfo.st_mtime < orig_tree[fname].statinfo.st_mtime: flag = '<' result_line.set_digest(tree_idx, dig) else: flag = '/' if fname in tree: _fpath = tree[fname].fpath else: _fpath = None result_line.set_flag(tree_idx, flag, _fpath) if orig_dig: if result_line.get_flag(tree_idx) != '=': file_diffs += 1 else: if result_line.get_flag(tree_idx) != '/': file_diffs += 1 result_line.close(file_diffs) del file_diffs result_table[fname] = result_line result_table.close() if False: # Diff all files - (some comparisons are unnecessary) for fname in sorted(all_files): result_line = result_table[fname] orig_dig = result_line.get_digest(0) orig_fpath = result_line.get_fpath(0) for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1): tree = trees[tree_top] if result_line.get_flag(0) == 'O': if result_line.get_flag(tree_idx) not in ['=', '/']: diff_params.append((fname, orig_fpath, orig_dig, result_line.get_fpath(tree_idx), result_line.get_digest(tree_idx))) else: if result_line.get_flag(tree_idx) != '/': diff_params.append((fname, None, None, result_line.get_fpath(tree_idx), None)) # Print report if not print_diffs: cmd_print_report(result_table, all_files, [orig_top] + sorted(trees.keys()), trees) # Print diffs if print_diffs: # print("# Calculate minimum set of diff commands:") diff_commands = [] for fname, result_line in result_table.items(): line_flags = [] if result_line.file_versions > 0: # print("# -", fname) all_trees = [orig_tree] + [ trees[k] for k in sorted(trees.keys()) ] for idx_a in range(len(all_trees)): tree_a = all_trees[idx_a] dig_a = result_line.get_digest(idx_a) if dig_a == ' ': continue # print("# tree_a", tree_a) for idx_b in range(len(all_trees)): tree_b = all_trees[idx_b] dig_b = result_line.get_digest(idx_b) if dig_a == dig_b: continue if dig_b == ' ': continue # print("# tree_b", tree_b) flag = (fname, dig_a, dig_b) rev_flag = (fname, dig_b, dig_a) if flag in line_flags: pass elif rev_flag in line_flags: pass else: line_flags.append(flag) cmd = (fname, dig_a, dig_b, tree_a[fname].fpath, tree_b[fname].fpath) diff_commands.append(cmd) break del line_flags # print("#") print_diff_commands(diff_commands) sys.exit(0) ####################################################################### def main(args): if (args == []): print_help() sys.exit(0) arg_diff = False for i, arg in enumerate(args): if arg == '--': i += 1 break elif arg == '--help': print_help() sys.exit(0) elif arg == '--diff': arg_diff = True elif arg.startswith('--'): raise ValueError("Unhandled command line option '%s'" % arg) else: assert(arg[:2] != '--') break dir_list = args[i:] if False: print("Arguments:", dir_list) print() gphoto_m4_sync(dir_list, arg_diff) ######################################################################## if __name__ == '__main__': main(sys.argv[1:]) ########################################################################