summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2017-12-22 12:04:14 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2017-12-22 12:14:24 +0000
commit7eea41ad6d5310d4d45d620814da843b6dc8f1c8 (patch)
tree9cc556eab594132ed9216f9c09e9a976d8682e0d
parentfc665e4410ed36c52f02610de634adc0acff38f5 (diff)
downloadbuildstream-7eea41ad6d5310d4d45d620814da843b6dc8f1c8.tar.gz
utils.py: _process_list(): Only call _ensure_real_directory() when needed
Staging performance has been quite slow for me and the biggest offender when profiling has been os.path.realpath() being called from _ensure_real_directory(). When tracing calls to this function, I found that we called os.path.realpath() on the same directory many times. This is unnecessary as we process the filelist in directory-first order. The code is now changed to only ensure directories exist when we hit a directory; the rest of the time this isn't needed. In my profile of `bst shell` on base.bst from the gnome-modulesets repo, time spent in _ensure_real_directory() goes from 11 seconds to 0.14 seconds.
-rw-r--r--buildstream/utils.py24
1 files changed, 16 insertions, 8 deletions
diff --git a/buildstream/utils.py b/buildstream/utils.py
index 7d898bd4c..3fb55efe3 100644
--- a/buildstream/utils.py
+++ b/buildstream/utils.py
@@ -572,15 +572,23 @@ def _process_list(srcdir, destdir, filelist, actionfunc, result, ignore_missing=
# The destination directory may not have been created separately
permissions.extend(_copy_directories(srcdir, destdir, path))
- # Ensure that broken symlinks to directories have their targets
- # created before attempting to stage files across broken
- # symlink boundaries
- _ensure_real_directory(os.path.dirname(destpath))
-
if stat.S_ISDIR(mode):
- # Ensure directory exists in destination
- if not os.path.exists(destpath):
- _ensure_real_directory(destpath)
+ # Ensure directory exists in destination. Since we process the
+ # filelist in directory-first order, we only need to do this when
+ # we encounter a directory.
+ #
+ # Any symlinks /within/ 'destpath' will be resolved. This is an
+ # unusual situation: bear in mind the filelist could be a combined
+ # list of the contents of multiple artifacts, and also consider that
+ # two artifacts could stage something in the same place. If one
+ # puts a symlink at /bin and the other puts a directory at /bin,
+ # the symlink could come first and the directory will not overwrite
+ # it, so we may have pathnames from the second artifact such as
+ # /bin/sh which end up having a symlink as part of their path. The
+ # symlink in the path will be resolved when we try and actually
+ # create the file, and we need to make sure that its target exists
+ # first.
+ _ensure_real_directory(destpath)
dest_stat = os.lstat(os.path.realpath(destpath))
if not stat.S_ISDIR(dest_stat.st_mode):