diff options
author | Tristan Maat <tristan.maat@codethink.co.uk> | 2018-03-09 10:55:25 +0100 |
---|---|---|
committer | Tristan Maat <tristan.maat@codethink.co.uk> | 2018-03-27 10:58:18 +0100 |
commit | b606fd14e1f9ec12c9e5ef20b4bc5f499792a496 (patch) | |
tree | 3bb43d8bae326d81b454d479ec1736f7575be7e7 | |
parent | b0e6a45e7d34ef969734bc5c607ce97a229a4f77 (diff) | |
download | buildstream-b606fd14e1f9ec12c9e5ef20b4bc5f499792a496.tar.gz |
_ostree.py: Add diff_dirs function
-rw-r--r-- | buildstream/_ostree.py | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/buildstream/_ostree.py b/buildstream/_ostree.py index 0abb51da8..b3e19e27d 100644 --- a/buildstream/_ostree.py +++ b/buildstream/_ostree.py @@ -27,6 +27,7 @@ # pylint: disable=bad-exception-context,catching-non-exception import os +from collections import namedtuple import gi from gi.repository.GLib import Variant, VariantDict @@ -243,6 +244,172 @@ def checksum(repo, ref): return checksum_ +OSTREE_GIO_FAST_QUERYINFO = ("standard::name,standard::type,standard::size," + "standard::is-symlink,standard::symlink-target," + "unix::device,unix::inode,unix::mode,unix::uid," + "unix::gid,unix::rdev") + + +DiffItem = namedtuple('DiffItem', ['src', 'src_info', + 'target', 'target_info', + 'src_checksum', 'target_checksum']) + + +# diff_dirs(): +# +# Compute the difference between directory a and b as 3 separate sets +# of OSTree.DiffItem. +# +# This is more-or-less a direct port of OSTree.diff_dirs (which cannot +# be used via PyGobject), but does not support options. +# +# Args: +# a (Gio.File): The first directory for the comparison. +# b (Gio.File): The second directory for the comparison. +# +# Returns: +# (modified, removed, added) +# +def diff_dirs(a, b): + # get_file_checksum(): + # + # Helper to compute the checksum of an arbitrary file (different + # objects have different methods to compute these). + # + def get_file_checksum(f, f_info): + if isinstance(f, OSTree.RepoFile): + return f.get_checksum() + else: + contents = None + if f_info.get_file_type() == Gio.FileType.REGULAR: + contents = f.read() + + csum = OSTree.checksum_file_from_input(f_info, None, contents, + OSTree.ObjectType.FILE) + return OSTree.checksum_from_bytes(csum) + + # diff_files(): + # + # Helper to compute a diff between two files. + # + def diff_files(a, a_info, b, b_info): + checksum_a = get_file_checksum(a, a_info) + checksum_b = get_file_checksum(b, b_info) + + if checksum_a != checksum_b: + return DiffItem(a, a_info, b, b_info, checksum_a, checksum_b) + + return None + + # diff_add_dir_recurse(): + # + # Helper to collect all files in a directory recursively. + # + def diff_add_dir_recurse(d): + added = [] + + dir_enum = d.enumerate_children(OSTREE_GIO_FAST_QUERYINFO, + Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS) + + for child_info in dir_enum: + name = child_info.get_name() + child = d.get_child(name) + added.append(child) + + if child_info.get_file_type() == Gio.FileType.DIRECTORY: + added.extend(diff_add_dir_recurse(child)) + + return added + + modified = [] + removed = [] + added = [] + + child_a_info = a.query_info(OSTREE_GIO_FAST_QUERYINFO, + Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS) + child_b_info = b.query_info(OSTREE_GIO_FAST_QUERYINFO, + Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS) + + # If both are directories and have the same checksum, we know that + # none of the underlying files changed, so we can save time. + if (child_a_info.get_file_type() == Gio.FileType.DIRECTORY and + child_b_info.get_file_type() == Gio.FileType.DIRECTORY and + isinstance(a, OSTree.RepoFileClass) and + isinstance(b, OSTree.RepoFileClass)): + if a.tree_get_contents_checksum() == b.tree_get_contents_checksum(): + return modified, removed, added + + # We walk through 'a' first + dir_enum = a.enumerate_children(OSTREE_GIO_FAST_QUERYINFO, + Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS) + for child_a_info in dir_enum: + name = child_a_info.get_name() + + child_a = a.get_child(name) + child_a_type = child_a_info.get_file_type() + + try: + child_b = b.get_child(name) + child_b_info = child_b.query_info(OSTREE_GIO_FAST_QUERYINFO, + Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS) + except GLib.Error as e: + # If the file does not exist in b, it has been removed + if e.matches(Gio.io_error_quark(), Gio.IOErrorEnum.NOT_FOUND): + removed.append(child_a) + continue + else: + raise + + # If the files differ but are of different types, we report a + # modification, saving a bit of time because we won't need a + # checksum + child_b_type = child_b_info.get_file_type() + if child_a_type != child_b_type: + diff_item = DiffItem(child_a, child_a_info, + child_b, child_b_info, + None, None) + modified.append(diff_item) + # Finally, we compute checksums and compare the file contents directly + else: + diff_item = diff_files(child_a, child_a_info, child_b, child_b_info) + + if diff_item: + modified.append(diff_item) + + # If the files are both directories, we recursively use + # this function to find differences - saving time if they + # are equal. + if child_a_type == Gio.FileType.DIRECTORY: + subdir = diff_dirs(child_a, child_b) + modified.extend(subdir[0]) + removed.extend(subdir[1]) + added.extend(subdir[2]) + + # Now we walk through 'b' to find any files that were added + dir_enum = b.enumerate_children(OSTREE_GIO_FAST_QUERYINFO, + Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS) + for child_b_info in dir_enum: + name = child_b_info.get_name() + + child_b = b.get_child(name) + + try: + child_a = a.get_child(name) + child_a_info = child_a.query_info(OSTREE_GIO_FAST_QUERYINFO, + Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS) + except GLib.Error as e: + # If the file does not exist in 'a', it was added. + if e.matches(Gio.io_error_quark(), Gio.IOErrorEnum.NOT_FOUND): + added.append(child_b) + if child_b_info.get_file_type() == Gio.FileType.DIRECTORY: + added.extend(diff_add_dir_recurse(child_b)) + continue + else: + raise + + return modified, removed, added + + # fetch() # # Fetch new objects from a remote, if configured |