diff options
author | Patrick Steinhardt <ps@pks.im> | 2017-10-13 13:15:13 +0200 |
---|---|---|
committer | Patrick Steinhardt <ps@pks.im> | 2018-06-22 09:50:07 +0200 |
commit | 9ac79ecce24a2c65e91ee0c2b414fd606b26309f (patch) | |
tree | 100434f61710ea0cd00d357ad9bdbe0a859828ea | |
parent | bbbe8441750a9072a6f4e96c8d364ede79dd2300 (diff) | |
download | libgit2-9ac79ecce24a2c65e91ee0c2b414fd606b26309f.tar.gz |
blob: implement function to parse raw data
Currently, parsing objects is strictly tied to having an ODB object
available. This makes it hard to parse an object when all that is
available is its raw object and size. Furthermore, hacking around that
limitation by directly creating an ODB structure either on stack or on
heap does not really work that well due to ODB objects being reference
counted and then automatically free'd when reaching a reference count of
zero.
In some occasions parsing raw objects without touching the ODB
is actually recuired, though. One use case is for example object
verification, where we want to assure that an object is valid before
inserting it into the ODB or writing it into the git repository.
Asa first step towards that, introduce a distinction between raw and ODB
objects for blobs. Creation of ODB objects stays the same by simply
using `git_blob__parse`, but a new function `git_blob__parse_raw` has
been added that creates a blob from a pair of data and size. By setting
a new flag inside of the blob, we can now distinguish whether it is a
raw or ODB object now and treat it accordingly in several places.
Note that the blob data passed in is not being copied. Because of that,
callers need to make sure to keep it alive during the blob's life time.
This is being used to avoid unnecessarily increasing the memory
footprint when parsing largish blobs.
-rw-r--r-- | src/blob.c | 32 | ||||
-rw-r--r-- | src/blob.h | 11 |
2 files changed, 36 insertions, 7 deletions
diff --git a/src/blob.c b/src/blob.c index b1c028081..bcd3f41e1 100644 --- a/src/blob.c +++ b/src/blob.c @@ -19,13 +19,19 @@ const void *git_blob_rawcontent(const git_blob *blob) { assert(blob); - return git_odb_object_data(blob->odb_object); + if (blob->raw) + return blob->data.raw.data; + else + return git_odb_object_data(blob->data.odb); } git_off_t git_blob_rawsize(const git_blob *blob) { assert(blob); - return (git_off_t)git_odb_object_size(blob->odb_object); + if (blob->raw) + return blob->data.raw.size; + else + return (git_off_t)git_odb_object_size(blob->data.odb); } int git_blob__getbuf(git_buf *buffer, git_blob *blob) @@ -36,17 +42,31 @@ int git_blob__getbuf(git_buf *buffer, git_blob *blob) git_blob_rawsize(blob)); } -void git_blob__free(void *blob) +void git_blob__free(void *_blob) { - git_odb_object_free(((git_blob *)blob)->odb_object); + git_blob *blob = (git_blob *) _blob; + if (!blob->raw) + git_odb_object_free(blob->data.odb); git__free(blob); } -int git_blob__parse(void *blob, git_odb_object *odb_obj) +int git_blob__parse_raw(void *_blob, const char *data, size_t size) { + git_blob *blob = (git_blob *) _blob; + assert(blob); + blob->raw = 1; + blob->data.raw.data = data; + blob->data.raw.size = size; + return 0; +} + +int git_blob__parse(void *_blob, git_odb_object *odb_obj) +{ + git_blob *blob = (git_blob *) _blob; assert(blob); git_cached_obj_incref((git_cached_obj *)odb_obj); - ((git_blob *)blob)->odb_object = odb_obj; + blob->raw = 0; + blob->data.odb = odb_obj; return 0; } diff --git a/src/blob.h b/src/blob.h index 3f1f97719..f644ec583 100644 --- a/src/blob.h +++ b/src/blob.h @@ -16,11 +16,20 @@ struct git_blob { git_object object; - git_odb_object *odb_object; + + union { + git_odb_object *odb; + struct { + const char *data; + git_off_t size; + } raw; + } data; + unsigned int raw:1; }; void git_blob__free(void *blob); int git_blob__parse(void *blob, git_odb_object *obj); +int git_blob__parse_raw(void *blob, const char *data, size_t size); int git_blob__getbuf(git_buf *buffer, git_blob *blob); extern int git_blob__create_from_paths( |