summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2017-10-13 13:15:13 +0200
committerPatrick Steinhardt <ps@pks.im>2018-06-22 09:50:07 +0200
commit9ac79ecce24a2c65e91ee0c2b414fd606b26309f (patch)
tree100434f61710ea0cd00d357ad9bdbe0a859828ea
parentbbbe8441750a9072a6f4e96c8d364ede79dd2300 (diff)
downloadlibgit2-9ac79ecce24a2c65e91ee0c2b414fd606b26309f.tar.gz
blob: implement function to parse raw data
Currently, parsing objects is strictly tied to having an ODB object available. This makes it hard to parse an object when all that is available is its raw object and size. Furthermore, hacking around that limitation by directly creating an ODB structure either on stack or on heap does not really work that well due to ODB objects being reference counted and then automatically free'd when reaching a reference count of zero. In some occasions parsing raw objects without touching the ODB is actually recuired, though. One use case is for example object verification, where we want to assure that an object is valid before inserting it into the ODB or writing it into the git repository. Asa first step towards that, introduce a distinction between raw and ODB objects for blobs. Creation of ODB objects stays the same by simply using `git_blob__parse`, but a new function `git_blob__parse_raw` has been added that creates a blob from a pair of data and size. By setting a new flag inside of the blob, we can now distinguish whether it is a raw or ODB object now and treat it accordingly in several places. Note that the blob data passed in is not being copied. Because of that, callers need to make sure to keep it alive during the blob's life time. This is being used to avoid unnecessarily increasing the memory footprint when parsing largish blobs.
-rw-r--r--src/blob.c32
-rw-r--r--src/blob.h11
2 files changed, 36 insertions, 7 deletions
diff --git a/src/blob.c b/src/blob.c
index b1c028081..bcd3f41e1 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -19,13 +19,19 @@
const void *git_blob_rawcontent(const git_blob *blob)
{
assert(blob);
- return git_odb_object_data(blob->odb_object);
+ if (blob->raw)
+ return blob->data.raw.data;
+ else
+ return git_odb_object_data(blob->data.odb);
}
git_off_t git_blob_rawsize(const git_blob *blob)
{
assert(blob);
- return (git_off_t)git_odb_object_size(blob->odb_object);
+ if (blob->raw)
+ return blob->data.raw.size;
+ else
+ return (git_off_t)git_odb_object_size(blob->data.odb);
}
int git_blob__getbuf(git_buf *buffer, git_blob *blob)
@@ -36,17 +42,31 @@ int git_blob__getbuf(git_buf *buffer, git_blob *blob)
git_blob_rawsize(blob));
}
-void git_blob__free(void *blob)
+void git_blob__free(void *_blob)
{
- git_odb_object_free(((git_blob *)blob)->odb_object);
+ git_blob *blob = (git_blob *) _blob;
+ if (!blob->raw)
+ git_odb_object_free(blob->data.odb);
git__free(blob);
}
-int git_blob__parse(void *blob, git_odb_object *odb_obj)
+int git_blob__parse_raw(void *_blob, const char *data, size_t size)
{
+ git_blob *blob = (git_blob *) _blob;
+ assert(blob);
+ blob->raw = 1;
+ blob->data.raw.data = data;
+ blob->data.raw.size = size;
+ return 0;
+}
+
+int git_blob__parse(void *_blob, git_odb_object *odb_obj)
+{
+ git_blob *blob = (git_blob *) _blob;
assert(blob);
git_cached_obj_incref((git_cached_obj *)odb_obj);
- ((git_blob *)blob)->odb_object = odb_obj;
+ blob->raw = 0;
+ blob->data.odb = odb_obj;
return 0;
}
diff --git a/src/blob.h b/src/blob.h
index 3f1f97719..f644ec583 100644
--- a/src/blob.h
+++ b/src/blob.h
@@ -16,11 +16,20 @@
struct git_blob {
git_object object;
- git_odb_object *odb_object;
+
+ union {
+ git_odb_object *odb;
+ struct {
+ const char *data;
+ git_off_t size;
+ } raw;
+ } data;
+ unsigned int raw:1;
};
void git_blob__free(void *blob);
int git_blob__parse(void *blob, git_odb_object *obj);
+int git_blob__parse_raw(void *blob, const char *data, size_t size);
int git_blob__getbuf(git_buf *buffer, git_blob *blob);
extern int git_blob__create_from_paths(