diff options
author | Richard Maw <richard.maw@codethink.co.uk> | 2014-07-11 15:09:14 +0000 |
---|---|---|
committer | Richard Maw <richard.maw@codethink.co.uk> | 2014-07-11 15:09:14 +0000 |
commit | 1a5e3e748a5ea4f48e3e88fa3859db4c186d6ba5 (patch) | |
tree | a3748d413eaee6a24dffd6246aa4ec106fc090bf /morphlib/builder2.py | |
parent | ce1fedb4e5ab82105853c4f3a8e05fb83f62c18e (diff) | |
download | morph-1a5e3e748a5ea4f48e3e88fa3859db4c186d6ba5.tar.gz |
Make our use of json binary path safebaserock/richardmaw/bugfix/unicode-safe-json
json only accepts unicode. Various APIs such as file paths and environment
variables allow binary data, so we need to support this properly.
This patch changes every[1] use of json.load or json.dump to escape
non-unicode data strings. This appears exactly as it used to if the
input was valid unicode, if it isn't it will insert \xabcd escapes in
the place of non-unicode data.
When loading back in, if json.load is told to unescape it with
`encoding='unicode-escape'` then it will convert it back correctly.
This change was primarily to support file paths that weren't valid
unicode, where this would choke and die. Now it works, but any tools
that parsed the metadata need to unescape the paths.
[1]: The interface to the remote repo cache uses json data, but I haven't
changes its json.load calls to unescape the data, since the repo
caches haven't been made to escape the data.
Diffstat (limited to 'morphlib/builder2.py')
-rw-r--r-- | morphlib/builder2.py | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/morphlib/builder2.py b/morphlib/builder2.py index 3c0d9e02..4bb435d9 100644 --- a/morphlib/builder2.py +++ b/morphlib/builder2.py @@ -154,7 +154,8 @@ def get_chunk_files(f): # pragma: no cover def get_stratum_files(f, lac): # pragma: no cover - for ca in (ArtifactCacheReference(a) for a in json.load(f)): + for ca in (ArtifactCacheReference(a) + for a in json.load(f, encoding='unicode-escape')): cf = lac.get(ca) for filename in get_chunk_files(cf): yield filename @@ -197,7 +198,7 @@ def write_overlap_metadata(artifact, overlaps, lac): # pragma: no cover [ [a.name for a in afs], list(files) ] for afs, files in overlaps.iteritems() - ], f, indent=4) + ], f, indent=4, encoding='unicode-escape') f.close() @@ -234,7 +235,8 @@ class BuilderBase(object): with self.local_artifact_cache.put_source_metadata( self.artifact.source, self.artifact.cache_key, 'meta') as f: - json.dump(meta, f, indent=4, sort_keys=True) + json.dump(meta, f, indent=4, sort_keys=True, + encoding='unicode-escape') f.write('\n') def create_metadata(self, artifact_name, contents=[]): @@ -294,7 +296,7 @@ class BuilderBase(object): # Unit tests use StringIO, which in Python 2.6 isn't usable with # the "with" statement. So we don't do it with "with". f = self._open(filename, 'w') - f.write(json.dumps(meta, indent=4, sort_keys=True)) + json.dump(meta, f, indent=4, sort_keys=True, encoding='unicode-escape') f.close() def new_artifact(self, artifact_name): @@ -580,9 +582,11 @@ class StratumBuilder(BuilderBase): meta = self.create_metadata(self.artifact.name, [x.name for x in constituents]) with lac.put_artifact_metadata(self.artifact, 'meta') as f: - json.dump(meta, f, indent=4, sort_keys=True) + json.dump(meta, f, indent=4, sort_keys=True, + encoding='unicode-escape') with self.local_artifact_cache.put(self.artifact) as f: - json.dump([c.basename() for c in constituents], f) + json.dump([c.basename() for c in constituents], f, + encoding='unicode-escape') self.save_build_times() return [self.artifact] @@ -643,7 +647,7 @@ class SystemBuilder(BuilderBase): # pragma: no cover cache = self.local_artifact_cache with cache.get(stratum_artifact) as stratum_file: - artifact_list = json.load(stratum_file) + artifact_list = json.load(stratum_file, encoding='unicode-escape') for chunk in (ArtifactCacheReference(a) for a in artifact_list): self.app.status(msg='Unpacking chunk %(basename)s', basename=chunk.basename(), chatty=True) @@ -671,7 +675,8 @@ class SystemBuilder(BuilderBase): # pragma: no cover # download the chunk artifacts if necessary for stratum_artifact in self.artifact.dependencies: f = self.local_artifact_cache.get(stratum_artifact) - chunks = [ArtifactCacheReference(a) for a in json.load(f)] + chunks = [ArtifactCacheReference(a) + for a in json.load(f, encoding='unicode-escape')] download_depends(chunks, self.local_artifact_cache, self.remote_artifact_cache) |