diff options
author | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2018-01-19 13:56:52 +0000 |
---|---|---|
committer | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2018-01-19 15:18:38 +0000 |
commit | 33c0c7ab2f6eaed0bb5cd6d7993e80199605837a (patch) | |
tree | e5c9b24eb591d1febf726f7e7bd51a1272a0da0a | |
parent | 633ef73519c695ccb254e3d5ed17c419f8737c79 (diff) | |
download | buildstream-sam/symlink-speedup.tar.gz |
utils.py: Wrap calls to os.path.realpath() in an LRU cachesam/symlink-speedup
The os.path.realpath() function is expensive and we call it many times,
to the point that os.path.realpath() calls make up around 40% of the
total time spent in Element.stage_artifact().
The cleanest way to fix this is with a `functools.lru_cache()` wrapper
that caches recently used values. None of the code in question can be
removed (as the tests added in the previous commit will demonstrate).
I tested this by running `bst shell base/base-system.bst true` in
the GNOME modulesets project.
o Without this patch there are 240,019 calls os.path.realpath()
o With this patch there are 10,379 calls to os.path.realpath()
o If we increase the cache size to 128 items, there are 10,359 calls
to os.path.realpath().
o If we reduce the cache size to 32 items, there are 10,426 calls.
o In all cases the number of *unique* calls is 10,327.
This fixes issue #174.
-rw-r--r-- | buildstream/utils.py | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/buildstream/utils.py b/buildstream/utils.py index eebef29ff..be7272cd8 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -35,6 +35,7 @@ import string import subprocess import tempfile import itertools +import functools from contextlib import contextmanager import pkg_resources @@ -562,6 +563,11 @@ def _copy_directories(srcdir, destdir, target): 'directory expected: {}'.format(old_dir)) +@functools.lru_cache(maxsize=64) +def _resolve_symlinks(path): + return os.path.realpath(path) + + def _ensure_real_directory(root, destpath): # The realpath in the sandbox may refer to a file outside of the # sandbox when any of the direcory branches are a symlink to an @@ -570,12 +576,12 @@ def _ensure_real_directory(root, destpath): # This should not happen as we rely on relative_symlink_target() below # when staging the actual symlinks which may lead up to this path. # - realpath = os.path.realpath(destpath) - if not realpath.startswith(os.path.realpath(root)): + destpath_resolved = _resolve_symlinks(destpath) + if not destpath_resolved.startswith(_resolve_symlinks(root)): raise UtilError('Destination path resolves to a path outside ' + 'of the staging area\n\n' + ' Destination path: {}\n'.format(destpath) + - ' Real path: {}'.format(realpath)) + ' Real path: {}'.format(destpath_resolved)) # Ensure the real destination path exists before trying to get the mode # of the real destination path. @@ -584,10 +590,10 @@ def _ensure_real_directory(root, destpath): # refer to non-existing directories, they will be created on demand here # at staging time. # - if not os.path.exists(realpath): - os.makedirs(realpath) + if not os.path.exists(destpath_resolved): + os.makedirs(destpath_resolved) - return realpath + return destpath_resolved # _process_list() @@ -658,7 +664,7 @@ def _process_list(srcdir, destdir, filelist, actionfunc, result, if not os.path.exists(destpath): _ensure_real_directory(destdir, destpath) - dest_stat = os.lstat(os.path.realpath(destpath)) + dest_stat = os.lstat(_resolve_symlinks(destpath)) if not stat.S_ISDIR(dest_stat.st_mode): raise UtilError('Destination not a directory. source has {}' ' destination has {}'.format(srcpath, destpath)) @@ -730,11 +736,11 @@ def _relative_symlink_target(root, symlink, target): # We want a relative path from the directory in which symlink # is located, not from the symlink itself. - symlinkdir, _ = os.path.split(os.path.realpath(symlink)) + symlinkdir, _ = os.path.split(_resolve_symlinks(symlink)) # Create a full path to the target, including the leading staging # directory - fulltarget = os.path.join(os.path.realpath(root), target) + fulltarget = os.path.join(_resolve_symlinks(root), target) # now get the relative path from the directory where the symlink # is located within the staging root, to the target within the same |