summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2011-07-09 02:10:46 +0200
committerVicent Marti <tanoku@gmail.com>2011-07-09 02:40:16 +0200
commitafeecf4f262b74270368ef8a70c582ea9d5a18e8 (patch)
treec078ee522e3d9f7bf01fe7e85f5baa7f41dacde4 /src
parent2fc78e700cc4684c1e5899d7a4a619da1e3e3679 (diff)
downloadlibgit2-afeecf4f262b74270368ef8a70c582ea9d5a18e8.tar.gz
odb: Direct writes are back
DIRECT WRITES ARE BACK AND FASTER THAN EVER. The streaming writer to the ODB was an overkill for the smaller objects like Commit and Tags; most of the streaming logic was taking too long. This commit makes Commits, Tags and Trees to be built-up in memory, and then written to disk in 2 pushes (header + data), instead of streaming everything. This is *always* faster, even for big files (since the git_filebuf class still does streaming writes when the memory cache overflows). This is also a gazillion lines of code smaller, because we don't have to precompute the final size of the object before starting the stream (this was kind of defeating the point of streaming, anyway). Blobs are still written with full streaming instead of loading them in memory, since this is still the fastest way. A new `git_buf` class has been added. It's missing some features, but it'll get there.
Diffstat (limited to 'src')
-rw-r--r--src/buffer.c95
-rw-r--r--src/buffer.h24
-rw-r--r--src/commit.c55
-rw-r--r--src/filebuf.c4
-rw-r--r--src/filebuf.h2
-rw-r--r--src/odb_loose.c44
-rw-r--r--src/oid.c10
-rw-r--r--src/repository.h2
-rw-r--r--src/signature.c18
-rw-r--r--src/signature.h1
-rw-r--r--src/tag.c52
-rw-r--r--src/tree.c35
12 files changed, 244 insertions, 98 deletions
diff --git a/src/buffer.c b/src/buffer.c
new file mode 100644
index 000000000..6af4c9195
--- /dev/null
+++ b/src/buffer.c
@@ -0,0 +1,95 @@
+#include "buffer.h"
+#include "posix.h"
+#include <stdarg.h>
+
+#define ENSURE_SIZE(b, d) \
+ if ((ssize_t)(d) >= buf->asize && git_buf_grow(b, (d)) < GIT_SUCCESS)\
+ return;
+
+int git_buf_grow(git_buf *buf, size_t target_size)
+{
+ char *new_ptr;
+
+ if (buf->asize < 0)
+ return GIT_ENOMEM;
+
+ if (buf->asize == 0)
+ buf->asize = target_size;
+
+ /* grow the buffer size by 1.5, until it's big enough
+ * to fit our target size */
+ while (buf->asize < (int)target_size)
+ buf->asize = (buf->asize << 1) - (buf->asize >> 1);
+
+ new_ptr = git__realloc(buf->ptr, buf->asize);
+ if (!new_ptr) {
+ buf->asize = -1;
+ return GIT_ENOMEM;
+ }
+
+ buf->ptr = new_ptr;
+ return GIT_SUCCESS;
+}
+
+int git_buf_oom(const git_buf *buf)
+{
+ return (buf->asize < 0);
+}
+
+void git_buf_putc(git_buf *buf, char c)
+{
+ ENSURE_SIZE(buf, buf->size + 1);
+ buf->ptr[buf->size++] = c;
+}
+
+void git_buf_put(git_buf *buf, const char *data, size_t len)
+{
+ ENSURE_SIZE(buf, buf->size + len);
+ memcpy(buf->ptr + buf->size, data, len);
+ buf->size += len;
+}
+
+void git_buf_puts(git_buf *buf, const char *string)
+{
+ git_buf_put(buf, string, strlen(string));
+}
+
+void git_buf_printf(git_buf *buf, const char *format, ...)
+{
+ int len;
+ va_list arglist;
+
+ ENSURE_SIZE(buf, buf->size + 1);
+
+ while (1) {
+ va_start(arglist, format);
+ len = p_vsnprintf(buf->ptr + buf->size, buf->asize - buf->size, format, arglist);
+ va_end(arglist);
+
+ if (len < 0) {
+ buf->asize = -1;
+ return;
+ }
+
+ if (len + 1 <= buf->asize - buf->size) {
+ buf->size += len;
+ return;
+ }
+
+ ENSURE_SIZE(buf, buf->size + len + 1);
+ }
+}
+
+const char *git_buf_cstr(git_buf *buf)
+{
+ if (buf->size + 1 >= buf->asize && git_buf_grow(buf, buf->size + 1) < GIT_SUCCESS)
+ return NULL;
+
+ buf->ptr[buf->size] = '\0';
+ return buf->ptr;
+}
+
+void git_buf_free(git_buf *buf)
+{
+ free(buf->ptr);
+}
diff --git a/src/buffer.h b/src/buffer.h
new file mode 100644
index 000000000..1209340a1
--- /dev/null
+++ b/src/buffer.h
@@ -0,0 +1,24 @@
+#ifndef INCLUDE_buffer_h__
+#define INCLUDE_buffer_h__
+
+#include "common.h"
+
+typedef struct {
+ char *ptr;
+ ssize_t asize, size;
+} git_buf;
+
+#define GIT_BUF_INIT {NULL, 0, 0}
+
+int git_buf_grow(git_buf *buf, size_t target_size);
+int git_buf_oom(const git_buf *buf);
+void git_buf_putc(git_buf *buf, char c);
+void git_buf_put(git_buf *buf, const char *data, size_t len);
+void git_buf_puts(git_buf *buf, const char *string);
+void git_buf_printf(git_buf *buf, const char *format, ...) GIT_FORMAT_PRINTF(2, 3);
+const char *git_buf_cstr(git_buf *buf);
+void git_buf_free(git_buf *buf);
+
+#define git_buf_PUTS(buf, str) git_buf_put(buf, str, sizeof(str) - 1)
+
+#endif
diff --git a/src/commit.c b/src/commit.c
index f7c26a682..05f75731a 100644
--- a/src/commit.c
+++ b/src/commit.c
@@ -116,53 +116,36 @@ int git_commit_create(
int parent_count,
const git_commit *parents[])
{
- size_t final_size = 0;
- int message_length, author_length, committer_length;
-
- char *author_str, *committer_str;
-
+ git_buf commit = GIT_BUF_INIT;
int error, i;
- git_odb_stream *stream;
-
- message_length = strlen(message);
- author_length = git_signature__write(&author_str, "author ", author);
- committer_length = git_signature__write(&committer_str, "committer ", committer);
-
- if (author_length < 0 || committer_length < 0)
- return git__throw(GIT_EINVALIDARGS, "Cannot create commit. Failed to parse signature");
-
- final_size += GIT_OID_LINE_LENGTH("tree");
- final_size += GIT_OID_LINE_LENGTH("parent") * parent_count;
- final_size += author_length;
- final_size += committer_length;
- final_size += 1 + message_length;
-
- if ((error = git_odb_open_wstream(&stream, repo->db, final_size, GIT_OBJ_COMMIT)) < GIT_SUCCESS)
- return git__rethrow(error, "Failed to create commit");
if (git_object_owner((const git_object *)tree) != repo)
return git__throw(GIT_EINVALIDARGS, "The given tree does not belong to this repository");
- git__write_oid(stream, "tree", git_object_id((const git_object *)tree));
+ git_oid__writebuf(&commit, "tree ", git_object_id((const git_object *)tree));
for (i = 0; i < parent_count; ++i) {
- if (git_object_owner((const git_object *)parents[i]) != repo)
- return git__throw(GIT_EINVALIDARGS, "The given parent does not belong to this repository");
+ if (git_object_owner((const git_object *)parents[i]) != repo) {
+ error = git__throw(GIT_EINVALIDARGS, "The given parent does not belong to this repository");
+ goto cleanup;
+ }
- git__write_oid(stream, "parent", git_object_id((const git_object *)parents[i]));
+ git_oid__writebuf(&commit, "parent ", git_object_id((const git_object *)parents[i]));
}
- stream->write(stream, author_str, author_length);
- free(author_str);
+ git_signature__writebuf(&commit, "author ", author);
+ git_signature__writebuf(&commit, "committer ", committer);
- stream->write(stream, committer_str, committer_length);
- free(committer_str);
+ git_buf_putc(&commit, '\n');
+ git_buf_puts(&commit, message);
- stream->write(stream, "\n", 1);
- stream->write(stream, message, message_length);
+ if (git_buf_oom(&commit)) {
+ error = git__throw(GIT_ENOMEM, "Not enough memory to build the commit data");
+ goto cleanup;
+ }
- error = stream->finalize_write(oid, stream);
- stream->free(stream);
+ error = git_odb_write(oid, git_repository_database(repo), commit.ptr, commit.size, GIT_OBJ_COMMIT);
+ git_buf_free(&commit);
if (error == GIT_SUCCESS && update_ref != NULL) {
git_reference *head;
@@ -192,6 +175,10 @@ int git_commit_create(
return git__rethrow(error, "Failed to create commit");
return GIT_SUCCESS;
+
+cleanup:
+ git_buf_free(&commit);
+ return error;
}
int commit_parse_buffer(git_commit *commit, const void *data, size_t len)
diff --git a/src/filebuf.c b/src/filebuf.c
index 86a643d38..1fbbaa3d4 100644
--- a/src/filebuf.c
+++ b/src/filebuf.c
@@ -374,7 +374,7 @@ int git_filebuf_printf(git_filebuf *file, const char *format, ...)
if (len < 0)
return git__throw(GIT_EOSERR, "Failed to format string");
- if ((size_t)len <= space_left) {
+ if ((size_t)len + 1 <= space_left) {
file->buf_pos += len;
return GIT_SUCCESS;
}
@@ -384,7 +384,7 @@ int git_filebuf_printf(git_filebuf *file, const char *format, ...)
space_left = file->buf_size - file->buf_pos;
- } while ((size_t)len <= space_left);
+ } while ((size_t)len + 1 <= space_left);
tmp_buffer = git__malloc(len + 1);
if (!tmp_buffer)
diff --git a/src/filebuf.h b/src/filebuf.h
index 37cb36784..1567b115c 100644
--- a/src/filebuf.h
+++ b/src/filebuf.h
@@ -41,7 +41,7 @@ typedef struct git_filebuf git_filebuf;
int git_filebuf_write(git_filebuf *lock, const void *buff, size_t len);
int git_filebuf_reserve(git_filebuf *file, void **buff, size_t len);
-int git_filebuf_printf(git_filebuf *file, const char *format, ...);
+int git_filebuf_printf(git_filebuf *file, const char *format, ...) GIT_FORMAT_PRINTF(2, 3);
int git_filebuf_open(git_filebuf *lock, const char *path, int flags);
int git_filebuf_commit(git_filebuf *lock);
diff --git a/src/odb_loose.c b/src/odb_loose.c
index 2a2c464c7..a3a4e5b56 100644
--- a/src/odb_loose.c
+++ b/src/odb_loose.c
@@ -772,6 +772,49 @@ int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend
return GIT_SUCCESS;
}
+int loose_backend__write(git_oid *oid, git_odb_backend *_backend, const void *data, size_t len, git_otype type)
+{
+ int error, header_len;
+ char final_path[GIT_PATH_MAX], header[64];
+ git_filebuf fbuf;
+ loose_backend *backend;
+
+ backend = (loose_backend *)_backend;
+
+ /* prepare the header for the file */
+ {
+ header_len = format_object_header(header, sizeof(header), len, type);
+ if (header_len < GIT_SUCCESS)
+ return GIT_EOBJCORRUPTED;
+ }
+
+ git_path_join(final_path, backend->objects_dir, "tmp_object");
+
+ error = git_filebuf_open(&fbuf, final_path,
+ GIT_FILEBUF_HASH_CONTENTS |
+ GIT_FILEBUF_DEFLATE_CONTENTS |
+ GIT_FILEBUF_TEMPORARY);
+
+ if (error < GIT_SUCCESS)
+ return error;
+
+ git_filebuf_write(&fbuf, header, header_len);
+ git_filebuf_write(&fbuf, data, len);
+ git_filebuf_hash(oid, &fbuf);
+
+ if ((error = object_file_name(final_path, sizeof(final_path), backend->objects_dir, oid)) < GIT_SUCCESS)
+ goto cleanup;
+
+ if ((error = git_futils_mkpath2file(final_path)) < GIT_SUCCESS)
+ goto cleanup;
+
+ return git_filebuf_commit_at(&fbuf, final_path);
+
+cleanup:
+ git_filebuf_cleanup(&fbuf);
+ return error;
+}
+
void loose_backend__free(git_odb_backend *_backend)
{
loose_backend *backend;
@@ -800,6 +843,7 @@ int git_odb_backend_loose(git_odb_backend **backend_out, const char *objects_dir
backend->fsync_object_files = 0;
backend->parent.read = &loose_backend__read;
+ backend->parent.write = &loose_backend__write;
backend->parent.read_prefix = &loose_backend__read_prefix;
backend->parent.read_header = &loose_backend__read_header;
backend->parent.writestream = &loose_backend__stream;
diff --git a/src/oid.c b/src/oid.c
index 2348d1434..8274cb55c 100644
--- a/src/oid.c
+++ b/src/oid.c
@@ -175,6 +175,16 @@ int git__write_oid(git_odb_stream *stream, const char *header, const git_oid *oi
return GIT_SUCCESS;
}
+void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid)
+{
+ char hex_oid[GIT_OID_HEXSZ];
+
+ git_oid_fmt(hex_oid, oid);
+ git_buf_puts(buf, header);
+ git_buf_put(buf, hex_oid, GIT_OID_HEXSZ);
+ git_buf_putc(buf, '\n');
+}
+
void git_oid_fromraw(git_oid *out, const unsigned char *raw)
{
memcpy(out->id, raw, sizeof(out->id));
diff --git a/src/repository.h b/src/repository.h
index bcf9b2bc8..1cf426128 100644
--- a/src/repository.h
+++ b/src/repository.h
@@ -11,6 +11,7 @@
#include "index.h"
#include "cache.h"
#include "refs.h"
+#include "buffer.h"
#define DOT_GIT ".git"
#define GIT_DIR DOT_GIT "/"
@@ -44,5 +45,6 @@ void git_object__free(void *object);
int git__parse_oid(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header);
int git__write_oid(git_odb_stream *src, const char *header, const git_oid *oid);
+void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid);
#endif
diff --git a/src/signature.c b/src/signature.c
index 6d9569d15..be9ed1ccb 100644
--- a/src/signature.c
+++ b/src/signature.c
@@ -342,4 +342,22 @@ int git_signature__write(char **signature, const char *header, const git_signatu
return sig_buffer_len;
}
+void git_signature__writebuf(git_buf *buf, const char *header, const git_signature *sig)
+{
+ int offset, hours, mins;
+ char sign;
+
+ offset = sig->when.offset;
+ sign = (sig->when.offset < 0) ? '-' : '+';
+
+ if (offset < 0)
+ offset = -offset;
+
+ hours = offset / 60;
+ mins = offset % 60;
+
+ git_buf_printf(buf, "%s%s <%s> %u %c%02d%02d\n",
+ header ? header : "", sig->name, sig->email,
+ (unsigned)sig->when.time, sign, hours, mins);
+}
diff --git a/src/signature.h b/src/signature.h
index feba6578d..41bc25871 100644
--- a/src/signature.h
+++ b/src/signature.h
@@ -8,5 +8,6 @@
int git_signature__parse(git_signature *sig, const char **buffer_out, const char *buffer_end, const char *header);
int git_signature__write(char **signature, const char *header, const git_signature *sig);
+void git_signature__writebuf(git_buf *buf, const char *header, const git_signature *sig);
#endif
diff --git a/src/tag.c b/src/tag.c
index 39254a961..12929015e 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -189,16 +189,10 @@ int git_tag_create(
const char *message,
int allow_ref_overwrite)
{
- size_t final_size = 0;
- git_odb_stream *stream;
-
- const char *type_str;
- char *tagger_str;
git_reference *new_ref = NULL;
-
char ref_name[GIT_REFNAME_MAX];
+ git_buf tag = GIT_BUF_INIT;
- int type_str_len, tag_name_len, tagger_str_len, message_len;
int error, should_update_ref = 0;
if (git_object_owner(target) != repo)
@@ -226,40 +220,20 @@ int git_tag_create(
}
}
- type_str = git_object_type2string(git_object_type(target));
- tagger_str_len = git_signature__write(&tagger_str, "tagger ", tagger);
-
- type_str_len = strlen(type_str);
- tag_name_len = strlen(tag_name);
- message_len = strlen(message);
-
- final_size += GIT_OID_LINE_LENGTH("object");
- final_size += STRLEN("type ") + type_str_len + 1;
- final_size += STRLEN("tag ") + tag_name_len + 1;
- final_size += tagger_str_len;
- final_size += 1 + message_len;
-
- if ((error = git_odb_open_wstream(&stream, repo->db, final_size, GIT_OBJ_TAG)) < GIT_SUCCESS)
- return git__rethrow(error, "Failed to create tag");
-
- git__write_oid(stream, "object", git_object_id(target));
-
- stream->write(stream, "type ", STRLEN("type "));
- stream->write(stream, type_str, type_str_len);
-
- stream->write(stream, "\ntag ", STRLEN("\ntag "));
- stream->write(stream, tag_name, tag_name_len);
- stream->write(stream, "\n", 1);
-
- stream->write(stream, tagger_str, tagger_str_len);
- free(tagger_str);
-
- stream->write(stream, "\n", 1);
- stream->write(stream, message, message_len);
+ git_oid__writebuf(&tag, "object ", git_object_id(target));
+ git_buf_printf(&tag, "type %s\n", git_object_type2string(git_object_type(target)));
+ git_buf_printf(&tag, "tag %s\n", tag_name);
+ git_signature__writebuf(&tag, "tagger ", tagger);
+ git_buf_putc(&tag, '\n');
+ git_buf_puts(&tag, message);
+ if (git_buf_oom(&tag)) {
+ git_buf_free(&tag);
+ return git__throw(GIT_ENOMEM, "Not enough memory to build the tag data");
+ }
- error = stream->finalize_write(oid, stream);
- stream->free(stream);
+ error = git_odb_write(oid, git_repository_database(repo), tag.ptr, tag.size, GIT_OBJ_TAG);
+ git_buf_free(&tag);
if (error < GIT_SUCCESS)
return git__rethrow(error, "Failed to create tag");
diff --git a/src/tree.c b/src/tree.c
index fff5068f8..ad9643f69 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -421,44 +421,35 @@ int git_treebuilder_remove(git_treebuilder *bld, const char *filename)
int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *bld)
{
- unsigned int i, size = 0;
- char filemode[MAX_FILEMODE_BYTES + 1 + 1];
- git_odb_stream *stream;
+ unsigned int i;
int error;
+ git_buf tree = GIT_BUF_INIT;
assert(bld);
sort_entries(bld);
+ /* Grow the buffer beforehand to an estimated size */
+ git_buf_grow(&tree, bld->entries.length * 72);
+
for (i = 0; i < bld->entries.length; ++i) {
git_tree_entry *entry = bld->entries.contents[i];
if (entry->removed)
continue;
- snprintf(filemode, sizeof(filemode), "%o ", entry->attr);
- size += strlen(filemode);
- size += entry->filename_len + 1;
- size += GIT_OID_RAWSZ;
+ git_buf_printf(&tree, "%o ", entry->attr);
+ git_buf_put(&tree, entry->filename, entry->filename_len + 1);
+ git_buf_put(&tree, (char *)entry->oid.id, GIT_OID_RAWSZ);
}
- if ((error = git_odb_open_wstream(&stream, git_repository_database(repo), size, GIT_OBJ_TREE)) < GIT_SUCCESS)
- return git__rethrow(error, "Failed to write tree. Can't open write stream");
-
- for (i = 0; i < bld->entries.length; ++i) {
- git_tree_entry *entry = bld->entries.contents[i];
-
- if (entry->removed)
- continue;
-
- snprintf(filemode, sizeof(filemode), "%o ", entry->attr);
- stream->write(stream, filemode, strlen(filemode));
- stream->write(stream, entry->filename, entry->filename_len + 1);
- stream->write(stream, (char *)entry->oid.id, GIT_OID_RAWSZ);
+ if (git_buf_oom(&tree)) {
+ git_buf_free(&tree);
+ return git__throw(GIT_ENOMEM, "Not enough memory to build the tree data");
}
- error = stream->finalize_write(oid, stream);
- stream->free(stream);
+ error = git_odb_write(oid, git_repository_database(repo), tree.ptr, tree.size, GIT_OBJ_TREE);
+ git_buf_free(&tree);
return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to write tree");
}