summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2018-01-19 13:56:52 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2018-01-19 15:18:38 +0000
commit33c0c7ab2f6eaed0bb5cd6d7993e80199605837a (patch)
treee5c9b24eb591d1febf726f7e7bd51a1272a0da0a
parent633ef73519c695ccb254e3d5ed17c419f8737c79 (diff)
downloadbuildstream-sam/symlink-speedup.tar.gz
utils.py: Wrap calls to os.path.realpath() in an LRU cachesam/symlink-speedup
The os.path.realpath() function is expensive and we call it many times, to the point that os.path.realpath() calls make up around 40% of the total time spent in Element.stage_artifact(). The cleanest way to fix this is with a `functools.lru_cache()` wrapper that caches recently used values. None of the code in question can be removed (as the tests added in the previous commit will demonstrate). I tested this by running `bst shell base/base-system.bst true` in the GNOME modulesets project. o Without this patch there are 240,019 calls os.path.realpath() o With this patch there are 10,379 calls to os.path.realpath() o If we increase the cache size to 128 items, there are 10,359 calls to os.path.realpath(). o If we reduce the cache size to 32 items, there are 10,426 calls. o In all cases the number of *unique* calls is 10,327. This fixes issue #174.
-rw-r--r--buildstream/utils.py24
1 files changed, 15 insertions, 9 deletions
diff --git a/buildstream/utils.py b/buildstream/utils.py
index eebef29ff..be7272cd8 100644
--- a/buildstream/utils.py
+++ b/buildstream/utils.py
@@ -35,6 +35,7 @@ import string
import subprocess
import tempfile
import itertools
+import functools
from contextlib import contextmanager
import pkg_resources
@@ -562,6 +563,11 @@ def _copy_directories(srcdir, destdir, target):
'directory expected: {}'.format(old_dir))
+@functools.lru_cache(maxsize=64)
+def _resolve_symlinks(path):
+ return os.path.realpath(path)
+
+
def _ensure_real_directory(root, destpath):
# The realpath in the sandbox may refer to a file outside of the
# sandbox when any of the direcory branches are a symlink to an
@@ -570,12 +576,12 @@ def _ensure_real_directory(root, destpath):
# This should not happen as we rely on relative_symlink_target() below
# when staging the actual symlinks which may lead up to this path.
#
- realpath = os.path.realpath(destpath)
- if not realpath.startswith(os.path.realpath(root)):
+ destpath_resolved = _resolve_symlinks(destpath)
+ if not destpath_resolved.startswith(_resolve_symlinks(root)):
raise UtilError('Destination path resolves to a path outside ' +
'of the staging area\n\n' +
' Destination path: {}\n'.format(destpath) +
- ' Real path: {}'.format(realpath))
+ ' Real path: {}'.format(destpath_resolved))
# Ensure the real destination path exists before trying to get the mode
# of the real destination path.
@@ -584,10 +590,10 @@ def _ensure_real_directory(root, destpath):
# refer to non-existing directories, they will be created on demand here
# at staging time.
#
- if not os.path.exists(realpath):
- os.makedirs(realpath)
+ if not os.path.exists(destpath_resolved):
+ os.makedirs(destpath_resolved)
- return realpath
+ return destpath_resolved
# _process_list()
@@ -658,7 +664,7 @@ def _process_list(srcdir, destdir, filelist, actionfunc, result,
if not os.path.exists(destpath):
_ensure_real_directory(destdir, destpath)
- dest_stat = os.lstat(os.path.realpath(destpath))
+ dest_stat = os.lstat(_resolve_symlinks(destpath))
if not stat.S_ISDIR(dest_stat.st_mode):
raise UtilError('Destination not a directory. source has {}'
' destination has {}'.format(srcpath, destpath))
@@ -730,11 +736,11 @@ def _relative_symlink_target(root, symlink, target):
# We want a relative path from the directory in which symlink
# is located, not from the symlink itself.
- symlinkdir, _ = os.path.split(os.path.realpath(symlink))
+ symlinkdir, _ = os.path.split(_resolve_symlinks(symlink))
# Create a full path to the target, including the leading staging
# directory
- fulltarget = os.path.join(os.path.realpath(root), target)
+ fulltarget = os.path.join(_resolve_symlinks(root), target)
# now get the relative path from the directory where the symlink
# is located within the staging root, to the target within the same