From 3ebe37b1db1637f351d5d4652b0b80be0403f373 Mon Sep 17 00:00:00 2001 From: Adam Coldrick Date: Wed, 1 Apr 2015 15:34:26 +0000 Subject: Improve downloading artifacts for OSTree artifact cache This fixes two issues: first, downloads were being put into the default TMPDIR, which is an in-memory file system. When large artifacts are put into /tmp such as gcc-devel, which is over 400MB, I found that my system became unusable due to out of memory / swap death. Ideally we'd fix Linux's memory management, but a more practical solution for now is to download the files to the artifact cache directory. I've not noticed any loss in speed from this change. Second, temporary files should always cleaned up now, even when there are errors during transfer or extraction. --- morphlib/ostreeartifactcache.py | 48 +++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/morphlib/ostreeartifactcache.py b/morphlib/ostreeartifactcache.py index 64c6e8f8..0c00c659 100644 --- a/morphlib/ostreeartifactcache.py +++ b/morphlib/ostreeartifactcache.py @@ -15,6 +15,7 @@ import collections +import contextlib import logging import os import shutil @@ -40,6 +41,7 @@ class OSTreeArtifactCache(object): if self.status_cb is not None: self.status_cb(*args, **kwargs) + @contextlib.contextmanager def _get_file_from_remote(self, artifact, remote, metadata_name=None): if metadata_name: handle = remote.get_artifact_metadata(artifact, metadata_name) @@ -52,10 +54,12 @@ class OSTreeArtifactCache(object): msg='Downloading %(name)s as a tarball.', chatty=True, name=artifact.name) - fd, path = tempfile.mkstemp() - with open(path, 'w+') as temp: - shutil.copyfileobj(handle, temp) - return path + try: + temporary_download = tempfile.NamedTemporaryFile(dir=self.cachedir) + shutil.copyfileobj(handle, temporary_download) + yield temporary_download.name + finally: + temporary_download.close() def _get_artifact_cache_name(self, artifact): logging.debug('LAC: %s' % artifact.basename()) @@ -90,25 +94,23 @@ class OSTreeArtifactCache(object): else: filename = self.artifact_filename(artifact) shutil.copy(location, filename) - os.remove(location) def copy_from_remote(self, artifact, remote): """Get 'artifact' from remote artifact cache and store it locally.""" if remote.method == 'tarball': - location = self._get_file_from_remote(artifact, remote) - try: - tempdir = tempfile.mkdtemp() - with tarfile.open(name=location) as tf: - tf.extractall(path=tempdir) + with self._get_file_from_remote(artifact, remote) as location: try: - self.put(tempdir, artifact) - finally: - os.remove(location) - shutil.rmtree(tempdir) - except tarfile.ReadError: - # Reading the artifact as a tarball failed, so it must be a - # single file (for example a stratum artifact). - self.put_non_ostree_artifact(artifact, location) + tempdir = tempfile.mkdtemp(dir=self.cachedir) + try: + with tarfile.open(name=location) as tf: + tf.extractall(path=tempdir) + self.put(tempdir, artifact) + finally: + shutil.rmtree(tempdir) + except tarfile.ReadError: + # Reading the artifact as a tarball failed, so it must be a + # single file (for example a stratum artifact). + self.put_non_ostree_artifact(artifact, location) elif remote.method == 'ostree': self.status(msg='Pulling artifact for %(name)s from remote.', @@ -118,14 +120,14 @@ class OSTreeArtifactCache(object): except Exception: # if we can't split the name properly, we must want metadata a, name = artifact.basename().split('.', 1) - location = self._get_file_from_remote( - ArtifactCacheReference(a), remote, name) - self.put_non_ostree_artifact(artifact, location, name) + with self._get_file_from_remote(ArtifactCacheReference(a), + remote, name) as location: + self.put_non_ostree_artifact(artifact, location, name) return if artifact.basename().split('.', 2)[1] == 'stratum': - location = self._get_file_from_remote(artifact, remote) - self.put_non_ostree_artifact(artifact, location) + with self._get_file_from_remote(artifact, remote) as location: + self.put_non_ostree_artifact(artifact, location) return try: -- cgit v1.2.1