From b53860b0aa27e5c004adc45552e7fead71de09e7 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Mon, 3 Sep 2012 15:30:10 +0100 Subject: Add a .gitignore to ignore *.pyc To reduce the noise when I run 'git status' this gitignore will mean that git won't notify me of repocache.pyc and __init__.pyc --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..0d20b648 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc -- cgit v1.2.1 From cd00de30a0f4d2d422053692948ea9986960c43f Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Tue, 4 Sep 2012 10:49:35 +0100 Subject: A direct-mode for git cache access Direct-mode, when enabled, causes morph-cache-server to assume a more Trove-like structure for the repositories, rather than the morph-cache structure which it was originally written for. This means that for the workers, we can use the original code and for Trove, the direct mode. --- morph-cache-server | 5 ++++- morphcacheserver/repocache.py | 34 ++++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/morph-cache-server b/morph-cache-server index 3f72c186..bb84915a 100755 --- a/morph-cache-server +++ b/morph-cache-server @@ -48,13 +48,16 @@ class MorphCacheServer(cliapp.Application): 'path to the artifact cache directory', metavar='PATH', default=defaults['artifact-dir']) + self.settings.boolean(['direct-mode'], + 'cache directories are directly managed') def process_args(self, args): app = Bottle() repo_cache = RepoCache(self, self.settings['repo-dir'], - self.settings['bundle-dir']) + self.settings['bundle-dir'], + self.settings['direct-mode']) @app.get('/sha1s') def sha1(): diff --git a/morphcacheserver/repocache.py b/morphcacheserver/repocache.py index 7061508d..c226ef40 100644 --- a/morphcacheserver/repocache.py +++ b/morphcacheserver/repocache.py @@ -17,6 +17,7 @@ import cliapp import os import string +import urlparse class RepositoryNotFoundError(cliapp.AppException): @@ -44,19 +45,25 @@ class UnresolvedNamedReferenceError(cliapp.AppException): class RepoCache(object): - def __init__(self, app, repo_cache_dir, bundle_cache_dir): + def __init__(self, app, repo_cache_dir, bundle_cache_dir, direct_mode): self.app = app self.repo_cache_dir = repo_cache_dir self.bundle_cache_dir = bundle_cache_dir + self.direct_mode = direct_mode def resolve_ref(self, repo_url, ref): quoted_url = self._quote_url(repo_url) repo_dir = os.path.join(self.repo_cache_dir, quoted_url) if not os.path.exists(repo_dir): - raise RepositoryNotFoundError(repo_url) + repo_dir = "%s.git" % repo_dir + if not os.path.exists(repo_dir): + raise RepositoryNotFoundError(repo_url) try: refs = self._show_ref(repo_dir, ref).split('\n') - refs = [x.split() for x in refs if 'origin' in x] + if self.direct_mode: + refs = [x.split() for x in refs] + else: + refs = [x.split() for x in refs if 'origin' in x] return refs[0][0] except cliapp.AppException: pass @@ -70,6 +77,10 @@ class RepoCache(object): def cat_file(self, repo_url, ref, filename): quoted_url = self._quote_url(repo_url) repo_dir = os.path.join(self.repo_cache_dir, quoted_url) + if not os.path.exists(repo_dir): + repo_dir = "%s.git" % repo_dir + if not os.path.exists(repo_dir): + raise RepositoryNotFoundError(repo_url) if not self._is_valid_sha1(ref): raise UnresolvedNamedReferenceError(repo_url, ref) if not os.path.exists(repo_dir): @@ -84,6 +95,10 @@ class RepoCache(object): def ls_tree(self, repo_url, ref, path): quoted_url = self._quote_url(repo_url) repo_dir = os.path.join(self.repo_cache_dir, quoted_url) + if not os.path.exists(repo_dir): + repo_dir = "%s.git" % repo_dir + if not os.path.exists(repo_dir): + raise RepositoryNotFoundError(repo_url) if not self._is_valid_sha1(ref): raise UnresolvedNamedReferenceError(repo_url, ref) if not os.path.exists(repo_dir): @@ -108,13 +123,16 @@ class RepoCache(object): return data def get_bundle_filename(self, repo_url): - quoted_url = self._quote_url(repo_url) + quoted_url = self._quote_url(repo_url, True) return os.path.join(self.bundle_cache_dir, '%s.bndl' % quoted_url) - def _quote_url(self, url): - valid_chars = string.digits + string.letters + '%_' - transl = lambda x: x if x in valid_chars else '_' - return ''.join([transl(x) for x in url]) + def _quote_url(self, url, always_indirect=False): + if self.direct_mode and not always_indirect: + return urlparse.urlparse(url)[2] + else: + valid_chars = string.digits + string.letters + '%_' + transl = lambda x: x if x in valid_chars else '_' + return ''.join([transl(x) for x in url]) def _show_ref(self, repo_dir, ref): return self.app.runcmd(['git', 'show-ref', ref], cwd=repo_dir) -- cgit v1.2.1 From 2c04007fc74d5971b12f351a4c2076e403386997 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Wed, 5 Sep 2012 17:49:21 +0100 Subject: Return tree SHA1 when looking for ref resolution. Morph now expects the tree SHA1 in addition when resolving references using the cache server. This is to better facilitate correct cache key computation since commits can be made which have no tree changes and thus nothing to usefully affect the build. (For example the morph branch and build features) --- morph-cache-server | 5 +++-- morphcacheserver/repocache.py | 13 +++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/morph-cache-server b/morph-cache-server index bb84915a..3a121d49 100755 --- a/morph-cache-server +++ b/morph-cache-server @@ -65,11 +65,12 @@ class MorphCacheServer(cliapp.Application): ref = self._unescape_parameter(request.query.ref) try: response.set_header('Cache-Control', 'no-cache') - sha1 = repo_cache.resolve_ref(repo, ref) + sha1, tree = repo_cache.resolve_ref(repo, ref) return { 'repo': '%s' % repo, 'ref': '%s' % ref, - 'sha1': '%s' % sha1 + 'sha1': '%s' % sha1, + 'tree': '%s' % tree } except Exception, e: response.status = 404 diff --git a/morphcacheserver/repocache.py b/morphcacheserver/repocache.py index c226ef40..b55692f2 100644 --- a/morphcacheserver/repocache.py +++ b/morphcacheserver/repocache.py @@ -64,16 +64,25 @@ class RepoCache(object): refs = [x.split() for x in refs] else: refs = [x.split() for x in refs if 'origin' in x] - return refs[0][0] + return refs[0][0], self._tree_from_commit(repo_dir, refs[0][0]) + except cliapp.AppException: pass + if not self._is_valid_sha1(ref): raise InvalidReferenceError(repo_url, ref) try: - return self._rev_list(ref).strip() + sha = self._rev_list(ref).strip() + return sha, self._tree_from_commit(repo_dir, sha) except: raise InvalidReferenceError(repo_url, ref) + def _tree_from_commit(self, repo_dir, commitsha): + commit_info = self.app.runcmd(['git', 'log', '-1', + '--format=format:%T', commitsha], + cwd=repo_dir) + return commit_info.strip() + def cat_file(self, repo_url, ref, filename): quoted_url = self._quote_url(repo_url) repo_dir = os.path.join(self.repo_cache_dir, quoted_url) -- cgit v1.2.1 From c2998750dbb3d79b7455a079aa3f3d243715a15f Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Fri, 7 Sep 2012 13:26:38 +0100 Subject: Support running on a different port from 8080 In order to allow multiple morph-cache-server instances to run on a single system, we need to support running on different ports. --- morph-cache-server | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/morph-cache-server b/morph-cache-server index 3a121d49..827da10a 100755 --- a/morph-cache-server +++ b/morph-cache-server @@ -30,12 +30,17 @@ defaults = { 'repo-dir': '/var/cache/morph-cache-server/gits', 'bundle-dir': '/var/cache/morph-cache-server/bundles', 'artifact-dir': '/var/cache/morph-cache-server/artifacts', + 'port': 8080, } class MorphCacheServer(cliapp.Application): def add_settings(self): + self.settings.integer(['port'], + 'port to listen on', + metavar='PORTNUM', + default=defaults['port']) self.settings.string(['repo-dir'], 'path to the repository cache directory', metavar='PATH', @@ -128,7 +133,7 @@ class MorphCacheServer(cliapp.Application): root = Bottle() root.mount(app, '/1.0') - run(root, host='0.0.0.0', port=8080, reloader=True) + run(root, host='0.0.0.0', port=self.settings['port'], reloader=True) def _unescape_parameter(self, param): return urllib.unquote(param) -- cgit v1.2.1 From 9c3279221262057c7eb8ebdcb29f366dc4de66d5 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Fri, 7 Sep 2012 10:14:27 +0100 Subject: Add ability to have 'writable' cache servers. Since we need to be able to update the cache from builders, this patch introduces a --enable-writes argument to morph-cache-server and also adds a @writable decorator to the class ready for marking particular paths which are only available when --enable-writes is set. --- morph-cache-server | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/morph-cache-server b/morph-cache-server index 827da10a..ba5f0b2a 100755 --- a/morph-cache-server +++ b/morph-cache-server @@ -55,6 +55,8 @@ class MorphCacheServer(cliapp.Application): default=defaults['artifact-dir']) self.settings.boolean(['direct-mode'], 'cache directories are directly managed') + self.settings.boolean(['enable-writes'], + 'enable the write methods (fetch and delete)') def process_args(self, args): app = Bottle() @@ -64,6 +66,23 @@ class MorphCacheServer(cliapp.Application): self.settings['bundle-dir'], self.settings['direct-mode']) + def writable(prefix): + """Selectively enable bottle prefixes. + + prefix -- The path prefix we are enabling + + If the runtime configuration setting --enable-writes is provided + then we return the app.get() decorator for the given path prefix + otherwise we return a lambda which passes the function through + undecorated. + + This has the effect of being a runtime-enablable @app.get(...) + + """ + if self.settings['enable-writes']: + return app.get(prefix) + return lambda fn: fn + @app.get('/sha1s') def sha1(): repo = self._unescape_parameter(request.query.repo) -- cgit v1.2.1 From 465e830d1d6d2c51425e2418b8e802a95145b6ee Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Fri, 7 Sep 2012 10:14:27 +0100 Subject: Add a /list method When --enable-writes is set, we provide a /list target which produces a JSON dictionary of information about the state of the artifact cache. The dictionary is of the form: { "freespace": NBYTES_OF_SPACE, "files": { "artifact-filename": { "atime": ATIME_AS_NUMBER, "size": NBYTES_SIZE_OF_FILE, "used": NBYTES_USED_ON_DISK }, ... } } This allows a controller to decide which artifacts have not been requested in some time and also how big artifacts are, not only in terms of their 'byte' size, but also the space they consume on disk. System images in particular may differ in this respect since they should be sparsely stored. --- morph-cache-server | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/morph-cache-server b/morph-cache-server index ba5f0b2a..b726c1e5 100755 --- a/morph-cache-server +++ b/morph-cache-server @@ -83,6 +83,28 @@ class MorphCacheServer(cliapp.Application): return app.get(prefix) return lambda fn: fn + @writable('/list') + def list(): + response.set_header('Cache-Control', 'no-cache') + results = {} + files = {} + results["files"] = files + for artifactdir, __, filenames in \ + os.walk(self.settings['artifact-dir']): + fsstinfo = os.statvfs(artifactdir) + results["freespace"] = fsstinfo.f_bsize * fsstinfo.f_bavail + for fname in filenames: + try: + stinfo = os.stat("%s/%s" % (artifactdir, fname)) + files[fname] = { + "atime": stinfo.st_atime, + "size": stinfo.st_size, + "used": stinfo.st_blocks * 512, + } + except Exception, e: + print(e) + return results + @app.get('/sha1s') def sha1(): repo = self._unescape_parameter(request.query.repo) -- cgit v1.2.1 From 1a0e40d854d37f81e9cdaf8bb23e480790614d2a Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Fri, 7 Sep 2012 13:17:13 +0100 Subject: Support for fetching artifacts to the cache Rather than pushing artifacts to the cache, this method allows the caller to specify a host and artifact which the cache server will then fetch into its local cache. It takes the following arguments: host=hostname:port artifact=artifactname This is transformed into a fetch to: http://hostname:port/artifacts?basename=artifactname Which is then fetched into the cache under the given name. The return from this is a JSON object of the form: { "filename": artifactname, "size": NBYTES_SIZE_OF_FILE, "used": NBYTES_DISK_SPACE_USED } --- morph-cache-server | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/morph-cache-server b/morph-cache-server index b726c1e5..286e56db 100755 --- a/morph-cache-server +++ b/morph-cache-server @@ -20,6 +20,7 @@ import cliapp import logging import os import urllib +import shutil from bottle import Bottle, request, response, run, static_file @@ -94,17 +95,48 @@ class MorphCacheServer(cliapp.Application): fsstinfo = os.statvfs(artifactdir) results["freespace"] = fsstinfo.f_bsize * fsstinfo.f_bavail for fname in filenames: - try: - stinfo = os.stat("%s/%s" % (artifactdir, fname)) - files[fname] = { - "atime": stinfo.st_atime, - "size": stinfo.st_size, - "used": stinfo.st_blocks * 512, - } - except Exception, e: - print(e) + if not fname.startswith(".dl."): + try: + stinfo = os.stat("%s/%s" % (artifactdir, fname)) + files[fname] = { + "atime": stinfo.st_atime, + "size": stinfo.st_size, + "used": stinfo.st_blocks * 512, + } + except Exception, e: + print(e) return results + @writable('/fetch') + def fetch(): + host = self._unescape_parameter(request.query.host) + artifact = self._unescape_parameter(request.query.artifact) + try: + response.set_header('Cache-Control', 'no-cache') + in_fh = urllib.urlopen("http://%s/artifacts?basename=%s" % + (host, urllib.quote(artifact))) + tmpname = "%s/.dl.%s" % ( + self.settings['artifact-dir'], + artifact) + localtmp = open(tmpname, "w") + shutil.copyfileobj(in_fh, localtmp) + localtmp.close() + in_fh.close() + artifilename = "%s/%s" % (self.settings['artifact-dir'], + artifact) + os.rename(tmpname, artifilename) + stinfo = os.stat(artifilename) + ret = {} + ret[artifact] = { + "size": stinfo.st_size, + "used": stinfo.st_blocks * 512 + } + return ret + + except Exception, e: + response.status = 500 + logging.debug('%s' % e) + @app.get('/sha1s') def sha1(): repo = self._unescape_parameter(request.query.repo) -- cgit v1.2.1 From 5870c3581ac10c14d68337fc875000ead522e99d Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Fri, 7 Sep 2012 13:23:51 +0100 Subject: Add facility to delete artifacts In order to allow the artifact cache to be cleaned up, this patch allows for a /delete method which can remove artifacts from the cache. It takes the following arguments: artifact=artifactname The artifact will be deleted and a JSON object returned in the form: { "status": errno, "reason": strerror } Where errno is zero on success, 1 on EPERM, 2 on ENOENT etc. and reason is the strerror of the errno, in case the architectures differ between caller and cache. --- morph-cache-server | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/morph-cache-server b/morph-cache-server index 286e56db..b4f8fa1a 100755 --- a/morph-cache-server +++ b/morph-cache-server @@ -137,6 +137,19 @@ class MorphCacheServer(cliapp.Application): response.status = 500 logging.debug('%s' % e) + @writable('/delete') + def delete(): + artifact = self._unescape_parameter(request.query.artifact) + try: + os.unlink('%s/%s' % (self.settings['artifact-dir'], + artifact)) + return { "status": 0, "reason": "success" } + except OSError, ose: + return { "status": ose.errno, "reason": ose.strerror } + except Exception, e: + response.status = 500 + logging.debug('%s' % e) + @app.get('/sha1s') def sha1(): repo = self._unescape_parameter(request.query.repo) -- cgit v1.2.1