diff options
author | Vicent Marti <tanoku@gmail.com> | 2011-02-17 21:32:00 +0200 |
---|---|---|
committer | Vicent Marti <tanoku@gmail.com> | 2011-02-17 23:20:47 +0200 |
commit | 348c7335dd804ea12eabac7106c5f9675a421ba0 (patch) | |
tree | 89d918bbc50ee95c44609947d284d49597c157fd /src/index.c | |
parent | 81d0ff1ca5db0ab1a3920c96605899c6161467c8 (diff) | |
download | libgit2-348c7335dd804ea12eabac7106c5f9675a421ba0.tar.gz |
Improve the performance when writing Index files
In response to issue #60 (git_index_write really slow), the write_index
function has been rewritten to improve its performance -- it should now
be in par with the performance of git.git.
On top of that, if Posix Threads are available when compiling libgit2, a
new threaded writing system will be used (3 separate threads take care
of solving byte-endianness, hashing the contents of the index and
writing to disk, respectively). For very long Index files, this method
is up to 3x times faster than git.git.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Diffstat (limited to 'src/index.c')
-rw-r--r-- | src/index.c | 425 |
1 files changed, 314 insertions, 111 deletions
diff --git a/src/index.c b/src/index.c index 0e77105b8..2d56cb956 100644 --- a/src/index.c +++ b/src/index.c @@ -32,23 +32,20 @@ #include "git2/odb.h" #include "git2/blob.h" -#define entry_padding(type, len) (8 - ((offsetof(type, path) + (len)) & 0x7)) -#define short_entry_padding(len) entry_padding(struct entry_short, len) -#define long_entry_padding(len) entry_padding(struct entry_long, len) - #define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7) #define short_entry_size(len) entry_size(struct entry_short, len) #define long_entry_size(len) entry_size(struct entry_long, len) #define minimal_entry_size (offsetof(struct entry_short, path)) -static const char INDEX_HEADER_SIG[] = {'D', 'I', 'R', 'C'}; -static const char INDEX_EXT_TREECACHE_SIG[] = {'T', 'R', 'E', 'E'}; - static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ; static const size_t INDEX_HEADER_SIZE = 12; static const unsigned int INDEX_VERSION_NUMBER = 2; +static const unsigned int INDEX_VERSION_NUMBER_EXT = 3; + +static const unsigned int INDEX_HEADER_SIG = 0x44495243; +static const char INDEX_EXT_TREECACHE_SIG[] = {'T', 'R', 'E', 'E'}; struct index_header { uint32_t signature; @@ -103,6 +100,9 @@ static int read_header(struct index_header *dest, const void *buffer); static int read_tree(git_index *index, const char *buffer, size_t buffer_size); static git_index_tree *read_tree_internal(const char **, const char *, git_index_tree *); +static int parse_index(git_index *index, const char *buffer, size_t buffer_size); +static void sort_index(git_index *index); +static int write_index(git_index *index, git_filelock *file); int index_srch(const void *key, const void *array_member) { @@ -143,6 +143,9 @@ static int index_initialize(git_index **index_out, git_repository *owner, const git_vector_init(&index->entries, 32, index_cmp, index_srch); + /* the index is empty; the index is sorted */ + index->sorted = 1; + /* Check if index file is stored on disk already */ if (gitfo_exists(index->index_file_path) == 0) index->on_disk = 1; @@ -164,6 +167,33 @@ int git_index_open_inrepo(git_index **index_out, git_repository *repo) return index_initialize(index_out, repo, repo->path_index); } +void git_index_free(git_index *index) +{ + if (index == NULL) + return; + + git_index_clear(index); + git_vector_free(&index->entries); + + free(index->index_file_path); + free(index); +} + +static void free_tree(git_index_tree *tree) +{ + unsigned int i; + + if (tree == NULL) + return; + + for (i = 0; i < tree->children_count; ++i) + free_tree(tree->children[i]); + + free(tree->name); + free(tree->children); + free(tree); +} + void git_index_clear(git_index *index) { unsigned int i; @@ -181,23 +211,10 @@ void git_index_clear(git_index *index) index->last_modified = 0; index->sorted = 1; - git_index_tree__free(index->tree); + free_tree(index->tree); index->tree = NULL; } -void git_index_free(git_index *index) -{ - if (index == NULL) - return; - - git_index_clear(index); - git_vector_free(&index->entries); - - free(index->index_file_path); - free(index); -} - - int git_index_read(git_index *index) { struct stat indexst; @@ -225,7 +242,7 @@ int git_index_read(git_index *index) return GIT_EOSERR; git_index_clear(index); - error = git_index__parse(index, buffer.data, buffer.len); + error = parse_index(index, buffer.data, buffer.len); if (error == GIT_SUCCESS) index->last_modified = indexst.st_mtime; @@ -240,23 +257,24 @@ int git_index_write(git_index *index) { git_filelock file; struct stat indexst; + int error; if (!index->sorted) - git_index__sort(index); + sort_index(index); - if (git_filelock_init(&file, index->index_file_path) < GIT_SUCCESS) - return GIT_EFLOCKFAIL; + if ((error = git_filelock_init(&file, index->index_file_path)) < GIT_SUCCESS) + return error; - if (git_filelock_lock(&file, 0) < GIT_SUCCESS) - return GIT_EFLOCKFAIL; + if ((error = git_filelock_lock(&file, 0)) < GIT_SUCCESS) + return error; - if (git_index__write(index, &file) < GIT_SUCCESS) { + if ((error = write_index(index, &file)) < GIT_SUCCESS) { git_filelock_unlock(&file); - return GIT_EOSERR; + return error; } - if (git_filelock_commit(&file) < GIT_SUCCESS) - return GIT_EFLOCKFAIL; + if ((error = git_filelock_commit(&file)) < GIT_SUCCESS) + return error; if (gitfo_stat(index->index_file_path, &indexst) == 0) { index->last_modified = indexst.st_mtime; @@ -275,7 +293,7 @@ unsigned int git_index_entrycount(git_index *index) git_index_entry *git_index_get(git_index *index, int n) { assert(index); - git_index__sort(index); + sort_index(index); return git_vector_get(&index->entries, (unsigned int)n); } @@ -323,7 +341,7 @@ int git_index_add(git_index *index, const char *rel_path, int stage) return git_index_insert(index, &entry); } -void git_index__sort(git_index *index) +void sort_index(git_index *index) { if (index->sorted == 0) { git_vector_sort(&index->entries); @@ -391,31 +409,16 @@ int git_index_insert(git_index *index, const git_index_entry *source_entry) int git_index_remove(git_index *index, int position) { assert(index); - git_index__sort(index); + sort_index(index); return git_vector_remove(&index->entries, (unsigned int)position); } int git_index_find(git_index *index, const char *path) { - git_index__sort(index); + sort_index(index); return git_vector_search(&index->entries, path); } -void git_index_tree__free(git_index_tree *tree) -{ - unsigned int i; - - if (tree == NULL) - return; - - for (i = 0; i < tree->children_count; ++i) - git_index_tree__free(tree->children[i]); - - free(tree->name); - free(tree->children); - free(tree); -} - static git_index_tree *read_tree_internal( const char **buffer_in, const char *buffer_end, git_index_tree *parent) { @@ -475,7 +478,7 @@ static git_index_tree *read_tree_internal( return tree; error_cleanup: - git_index_tree__free(tree); + free_tree(tree); return NULL; } @@ -556,12 +559,13 @@ static int read_header(struct index_header *dest, const void *buffer) const struct index_header *source; source = (const struct index_header *)(buffer); - dest->signature = source->signature; - if (memcmp(&dest->signature, INDEX_HEADER_SIG, 4) != 0) + dest->signature = ntohl(source->signature); + if (dest->signature != INDEX_HEADER_SIG) return GIT_EOBJCORRUPTED; dest->version = ntohl(source->version); - if (dest->version != INDEX_VERSION_NUMBER) + if (dest->version != INDEX_VERSION_NUMBER_EXT && + dest->version != INDEX_VERSION_NUMBER) return GIT_EOBJCORRUPTED; dest->entry_count = ntohl(source->entry_count); @@ -601,7 +605,7 @@ static size_t read_extension(git_index *index, const char *buffer, size_t buffer return total_size; } -int git_index__parse(git_index *index, const char *buffer, size_t buffer_size) +static int parse_index(git_index *index, const char *buffer, size_t buffer_size) { unsigned int i; struct index_header header; @@ -680,87 +684,286 @@ int git_index__parse(git_index *index, const char *buffer, size_t buffer_size) return GIT_SUCCESS; } -int git_index__write(git_index *index, git_filelock *file) +static void *create_disk_entry(size_t *disk_size, git_index_entry *entry) { - static const char NULL_BYTES[] = {0, 0, 0, 0, 0, 0, 0, 0}; + struct entry_short *ondisk; + size_t path_len; + char *path; - int error = GIT_SUCCESS; - unsigned int i; + path_len = strlen(entry->path); - git_hash_ctx *digest; - git_oid hash_final; + if (entry->flags & GIT_IDXENTRY_EXTENDED) + *disk_size = long_entry_size(path_len); + else + *disk_size = short_entry_size(path_len); - assert(index && file && file->is_locked); + ondisk = git__calloc(1, *disk_size); + if (ondisk == NULL) + return NULL; - if ((digest = git_hash_new_ctx()) == NULL) - return GIT_ENOMEM; + ondisk->ctime.seconds = htonl(entry->ctime.seconds); + ondisk->mtime.seconds = htonl(entry->mtime.seconds); + ondisk->ctime.nanoseconds = htonl(entry->ctime.nanoseconds); + ondisk->mtime.nanoseconds = htonl(entry->mtime.nanoseconds); + ondisk->dev = htonl(entry->dev); + ondisk->ino = htonl(entry->ino); + ondisk->mode = htonl(entry->mode); + ondisk->uid = htonl(entry->uid); + ondisk->gid = htonl(entry->gid); + ondisk->file_size = htonl(entry->file_size); + + git_oid_cpy(&ondisk->oid, &entry->oid); + + ondisk->flags = htons(entry->flags); + + if (entry->flags & GIT_IDXENTRY_EXTENDED) { + struct entry_long *ondisk_ext; + ondisk_ext = (struct entry_long *)ondisk; + ondisk_ext->flags_extended = htons(entry->flags_extended); + path = ondisk_ext->path; + } + else + path = ondisk->path; + + memcpy(path, entry->path, path_len); -#define WRITE_WORD(_word) {\ - uint32_t network_word = htonl(((uint32_t)(_word)));\ - git_filelock_write(file, &network_word, 4);\ - git_hash_update(digest, &network_word, 4);\ + return ondisk; } -#define WRITE_SHORT(_shrt) {\ - uint16_t network_shrt = htons((_shrt));\ - git_filelock_write(file, &network_shrt, 2);\ - git_hash_update(digest, &network_shrt, 2);\ +#ifdef GIT_THREADS + +#define THREAD_QUEUE_SIZE 8 + +typedef struct { + void *data; + size_t size; + git_refcnt refcount; +} index_thread_entry; + +typedef struct { + void *extra_data; + void (*process_entry)(void *extra_data, index_thread_entry *entry); + + index_thread_entry *buffer[THREAD_QUEUE_SIZE]; + int count, pos; + + git_lck mutex; + git_cnd entry_available, space_available; +} index_thread_queue; + +void index_thread_enqueue(index_thread_queue *queue, index_thread_entry *entry) +{ + gitlck_lock(&queue->mutex); + if (queue->count == THREAD_QUEUE_SIZE) + gitcnd_wait(&queue->space_available, &queue->mutex); + + queue->buffer[queue->pos++ % THREAD_QUEUE_SIZE] = entry; + queue->count++; + + gitcnd_signal(&queue->entry_available); + gitlck_unlock(&queue->mutex); +} + +void thread_hash_entry(void *digest, index_thread_entry *entry) +{ + git_hash_update((git_hash_ctx *)digest, entry->data, entry->size); +} + +void thread_write_entry(void *file, index_thread_entry *entry) +{ + git_filelock_write((git_filelock *)file, entry->data, entry->size); +} + +void *index_thread(void *attr) +{ + index_thread_queue *queue = (index_thread_queue *)attr; + + for (;;) { + index_thread_entry *entry; + + gitlck_lock(&queue->mutex); + if (queue->count == 0) + gitcnd_wait(&queue->entry_available, &queue->mutex); + + entry = queue->buffer[(queue->pos - 1) % THREAD_QUEUE_SIZE]; + queue->count--; + + gitcnd_signal(&queue->space_available); + gitlck_unlock(&queue->mutex); + + queue->process_entry(queue->extra_data, entry); + + if (gitrc_dec(&entry->refcount)) { + gitrc_free(&entry->refcount); + free(entry->data); + free(entry); + } + } + + git_thread_exit(NULL); } -#define WRITE_BYTES(_bytes, _n) {\ - git_filelock_write(file, _bytes, _n);\ - git_hash_update(digest, _bytes, _n);\ +static int write_entries(git_index *index, git_filelock *file, git_hash_ctx *digest) +{ + git_thread write_thread, hash_thread; + index_thread_queue *write_queue, *hash_queue; + int error = GIT_SUCCESS; + unsigned int i; + + write_queue = git__malloc(sizeof(index_thread_queue)); + hash_queue = git__malloc(sizeof(index_thread_queue)); + + if (write_queue == NULL || hash_queue == NULL) + return GIT_ENOMEM; + + /* + * Init the writer thread. + * This thread takes care of all the blocking I/O: reads + * the produced index entries and writes them back to disk + * via the filelock API. + */ + { + write_queue->extra_data = (void *)file; + write_queue->process_entry = thread_write_entry; + + write_queue->count = 0; + write_queue->pos = 0; + gitlck_init(&write_queue->mutex); + gitcnd_init(&write_queue->space_available, NULL); + gitcnd_init(&write_queue->entry_available, NULL); + + if (git_thread_create(&write_thread, NULL, index_thread, (void *)write_queue) < 0) { + error = GIT_EOSERR; + goto cleanup; + } + } + + /* + * Init the hasher thread. + * This thread takes care of doing an incremental + * SHA1 hash on all the written data; the final value + * of this hash must be appended at the end of the + * written index file. + */ + { + hash_queue->extra_data = (void *)digest; + hash_queue->process_entry = thread_hash_entry; + + hash_queue->count = 0; + hash_queue->pos = 0; + gitlck_init(&hash_queue->mutex); + gitcnd_init(&hash_queue->space_available, NULL); + gitcnd_init(&hash_queue->entry_available, NULL); + + if (git_thread_create(&hash_thread, NULL, index_thread, (void *)hash_queue) < 0) { + error = GIT_EOSERR; + goto cleanup; + } + } + + /* + * Do the processing. + * This is the main thread. Takes care of preparing all + * the entries that will be written to disk + */ + for (i = 0; i < index->entries.length; ++i) { + git_index_entry *entry; + index_thread_entry *thread_entry; + + entry = git_vector_get(&index->entries, i); + + thread_entry = git__malloc(sizeof(index_thread_entry)); + if (thread_entry == NULL) { + error = GIT_ENOMEM; + goto cleanup; + } + + thread_entry->data = create_disk_entry(&thread_entry->size, entry); + if (thread_entry->data == NULL) { + error = GIT_ENOMEM; + goto cleanup; + } + + /* queue in both queues */ + gitrc_init(&thread_entry->refcount, 2); + + index_thread_enqueue(write_queue, thread_entry); + index_thread_enqueue(hash_queue, thread_entry); + } + +cleanup: + git_thread_kill(write_thread); + git_thread_kill(hash_thread); + + free(write_queue); + free(hash_queue); + + return error; } - WRITE_BYTES(INDEX_HEADER_SIG, 4); +#else - WRITE_WORD(INDEX_VERSION_NUMBER); - WRITE_WORD(index->entries.length); +static int write_entries(git_index *index, git_filelock *file, git_hash_ctx *digest) +{ + unsigned int i; for (i = 0; i < index->entries.length; ++i) { git_index_entry *entry; - size_t path_length, padding; + void *disk_entry; + size_t disk_size; entry = git_vector_get(&index->entries, i); - path_length = strlen(entry->path); - - WRITE_WORD(entry->ctime.seconds); - WRITE_WORD(entry->ctime.nanoseconds); - WRITE_WORD(entry->mtime.seconds); - WRITE_WORD(entry->mtime.nanoseconds); - WRITE_WORD(entry->dev); - WRITE_WORD(entry->ino); - WRITE_WORD(entry->mode); - WRITE_WORD(entry->uid); - WRITE_WORD(entry->gid); - WRITE_WORD(entry->file_size); - WRITE_BYTES(entry->oid.id, GIT_OID_RAWSZ); - - if (entry->flags_extended != 0) - entry->flags |= GIT_IDXENTRY_EXTENDED; - - WRITE_SHORT(entry->flags); - - if (entry->flags & GIT_IDXENTRY_EXTENDED) { - WRITE_SHORT(entry->flags_extended); - padding = long_entry_padding(path_length); - } else - padding = short_entry_padding(path_length); - - WRITE_BYTES(entry->path, path_length); - WRITE_BYTES(NULL_BYTES, padding); + disk_entry = create_disk_entry(&disk_size, entry); + + if (disk_entry == NULL) + return GIT_ENOMEM; + + if (git_filelock_write(file, disk_entry, disk_size) < GIT_SUCCESS) + return GIT_EOSERR; + + git_hash_update(digest, disk_entry, disk_size); + + free(disk_entry); } -#undef WRITE_WORD -#undef WRITE_BYTES -#undef WRITE_SHORT -#undef WRITE_FLAGS + return GIT_SUCCESS; +} + +#endif + +static int write_index(git_index *index, git_filelock *file) +{ + int error = GIT_SUCCESS; + + git_hash_ctx *digest; + git_oid hash_final; + + struct index_header header; + + int is_extended = 1; + + assert(index && file && file->is_locked); + + if ((digest = git_hash_new_ctx()) == NULL) + return GIT_ENOMEM; + + header.signature = htonl(INDEX_HEADER_SIG); + header.version = htonl(is_extended ? INDEX_VERSION_NUMBER : INDEX_VERSION_NUMBER_EXT); + header.entry_count = htonl(index->entries.length); + + git_filelock_write(file, &header, sizeof(struct index_header)); + git_hash_update(digest, &header, sizeof(struct index_header)); + + error = write_entries(index, file, digest); + if (error < GIT_SUCCESS) + goto cleanup; /* TODO: write extensions (tree cache) */ git_hash_final(&hash_final, digest); - git_hash_free_ctx(digest); git_filelock_write(file, hash_final.id, GIT_OID_RAWSZ); +cleanup: + git_hash_free_ctx(digest); return error; } |