diff options
Diffstat (limited to 'src/indexer.c')
-rw-r--r-- | src/indexer.c | 461 |
1 files changed, 377 insertions, 84 deletions
diff --git a/src/indexer.c b/src/indexer.c index 1b5339f23..df1ce7cfb 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -18,6 +18,7 @@ #include "filebuf.h" #include "oid.h" #include "oidmap.h" +#include "compress.h" #define UINT31_MAX (0x7FFFFFFF) @@ -28,13 +29,15 @@ struct entry { uint64_t offset_long; }; -struct git_indexer_stream { +struct git_indexer { unsigned int parsed_header :1, opened_pack :1, have_stream :1, have_delta :1; + struct git_pack_header hdr; struct git_pack_file *pack; git_filebuf pack_file; + unsigned int mode; git_off_t off; git_off_t entry_start; git_packfile_stream stream; @@ -47,13 +50,21 @@ struct git_indexer_stream { git_transfer_progress_callback progress_cb; void *progress_payload; char objbuf[8*1024]; + + /* Needed to look up objects which we want to inject to fix a thin pack */ + git_odb *odb; + + /* Fields for calculating the packfile trailer (hash of everything before it) */ + char inbuf[GIT_OID_RAWSZ]; + size_t inbuf_len; + git_hash_ctx trailer; }; struct delta_info { git_off_t delta_off; }; -const git_oid *git_indexer_stream_hash(const git_indexer_stream *idx) +const git_oid *git_indexer_hash(const git_indexer *idx) { return &idx->hash; } @@ -106,28 +117,34 @@ static int objects_cmp(const void *a, const void *b) return git_oid__cmp(&entrya->oid, &entryb->oid); } -int git_indexer_stream_new( - git_indexer_stream **out, +int git_indexer_new( + git_indexer **out, const char *prefix, + unsigned int mode, + git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_payload) { - git_indexer_stream *idx; + git_indexer *idx; git_buf path = GIT_BUF_INIT; static const char suff[] = "/pack"; int error; - idx = git__calloc(1, sizeof(git_indexer_stream)); + idx = git__calloc(1, sizeof(git_indexer)); GITERR_CHECK_ALLOC(idx); + idx->odb = odb; idx->progress_cb = progress_cb; idx->progress_payload = progress_payload; + idx->mode = mode ? mode : GIT_PACK_FILE_MODE; + git_hash_ctx_init(&idx->trailer); error = git_buf_joinpath(&path, prefix, suff); if (error < 0) goto cleanup; error = git_filebuf_open(&idx->pack_file, path.ptr, - GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER); + GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER, + idx->mode); git_buf_free(&path); if (error < 0) goto cleanup; @@ -143,7 +160,7 @@ cleanup: } /* Try to store the delta so we can try to resolve it later */ -static int store_delta(git_indexer_stream *idx) +static int store_delta(git_indexer *idx) { struct delta_info *delta; @@ -166,7 +183,7 @@ static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type) git_hash_update(ctx, buffer, hdrlen); } -static int hash_object_stream(git_indexer_stream *idx, git_packfile_stream *stream) +static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream) { ssize_t read; @@ -186,7 +203,7 @@ static int hash_object_stream(git_indexer_stream *idx, git_packfile_stream *stre } /* In order to create the packfile stream, we need to skip over the delta base description */ -static int advance_delta_offset(git_indexer_stream *idx, git_otype type) +static int advance_delta_offset(git_indexer *idx, git_otype type) { git_mwindow *w = NULL; @@ -205,7 +222,7 @@ static int advance_delta_offset(git_indexer_stream *idx, git_otype type) } /* Read from the stream and discard any output */ -static int read_object_stream(git_indexer_stream *idx, git_packfile_stream *stream) +static int read_object_stream(git_indexer *idx, git_packfile_stream *stream) { ssize_t read; @@ -245,7 +262,7 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, return 0; } -static int store_object(git_indexer_stream *idx) +static int store_object(git_indexer *idx) { int i, error; khiter_t k; @@ -303,17 +320,10 @@ on_error: return -1; } -static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start) +static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start) { int i, error; khiter_t k; - git_oid oid; - size_t entry_size; - struct entry *entry; - struct git_pack_entry *pentry; - - entry = git__calloc(1, sizeof(*entry)); - GITERR_CHECK_ALLOC(entry); if (entry_start > UINT31_MAX) { entry->offset = UINT32_MAX; @@ -322,25 +332,43 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent entry->offset = (uint32_t)entry_start; } - /* FIXME: Parse the object instead of hashing it */ - if (git_odb__hashobj(&oid, obj) < 0) { - giterr_set(GITERR_INDEXER, "Failed to hash object"); + pentry->offset = entry_start; + k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error); + if (!error) + return -1; + + kh_value(idx->pack->idx_cache, k) = pentry; + + /* Add the object to the list */ + if (git_vector_insert(&idx->objects, entry) < 0) return -1; + + for (i = entry->oid.id[0]; i < 256; ++i) { + idx->fanout[i]++; } - pentry = git__calloc(1, sizeof(struct git_pack_entry)); - GITERR_CHECK_ALLOC(pentry); + return 0; +} - git_oid_cpy(&pentry->sha1, &oid); - pentry->offset = entry_start; - k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error); - if (!error) { - git__free(pentry); +static int hash_and_save(git_indexer *idx, git_rawobj *obj, git_off_t entry_start) +{ + git_oid oid; + size_t entry_size; + struct entry *entry; + struct git_pack_entry *pentry; + + entry = git__calloc(1, sizeof(*entry)); + GITERR_CHECK_ALLOC(entry); + + if (git_odb__hashobj(&oid, obj) < 0) { + giterr_set(GITERR_INDEXER, "Failed to hash object"); goto on_error; } - kh_value(idx->pack->idx_cache, k) = pentry; + pentry = git__calloc(1, sizeof(struct git_pack_entry)); + GITERR_CHECK_ALLOC(pentry); + git_oid_cpy(&pentry->sha1, &oid); git_oid_cpy(&entry->oid, &oid); entry->crc = crc32(0L, Z_NULL, 0); @@ -348,15 +376,7 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) goto on_error; - /* Add the object to the list */ - if (git_vector_insert(&idx->objects, entry) < 0) - goto on_error; - - for (i = oid.id[0]; i < 256; ++i) { - idx->fanout[i]++; - } - - return 0; + return save_entry(idx, entry, pentry, entry_start); on_error: git__free(entry); @@ -364,17 +384,54 @@ on_error: return -1; } -static int do_progress_callback(git_indexer_stream *idx, git_transfer_progress *stats) +static int do_progress_callback(git_indexer *idx, git_transfer_progress *stats) { if (!idx->progress_cb) return 0; return idx->progress_cb(stats, idx->progress_payload); } -int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats) +/* Hash everything but the last 20B of input */ +static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size) +{ + size_t to_expell, to_keep; + + if (size == 0) + return; + + /* Easy case, dump the buffer and the data minus the last 20 bytes */ + if (size >= GIT_OID_RAWSZ) { + git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len); + git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ); + + data += size - GIT_OID_RAWSZ; + memcpy(idx->inbuf, data, GIT_OID_RAWSZ); + idx->inbuf_len = GIT_OID_RAWSZ; + return; + } + + /* We can just append */ + if (idx->inbuf_len + size <= GIT_OID_RAWSZ) { + memcpy(idx->inbuf + idx->inbuf_len, data, size); + idx->inbuf_len += size; + return; + } + + /* We need to partially drain the buffer and then append */ + to_keep = GIT_OID_RAWSZ - size; + to_expell = idx->inbuf_len - to_keep; + + git_hash_update(&idx->trailer, idx->inbuf, to_expell); + + memmove(idx->inbuf, idx->inbuf + to_expell, to_keep); + memcpy(idx->inbuf + to_keep, data, size); + idx->inbuf_len += size - to_expell; +} + +int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats) { int error = -1; - struct git_pack_header hdr; size_t processed; + struct git_pack_header *hdr = &idx->hdr; git_mwindow_file *mwf = &idx->pack->mwf; assert(idx && data && stats); @@ -384,6 +441,8 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz if (git_filebuf_write(&idx->pack_file, data, size) < 0) return -1; + hash_partially(idx, data, (int)size); + /* Make sure we set the new size of the pack */ if (idx->opened_pack) { idx->pack->mwf.size += size; @@ -399,14 +458,14 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz if (!idx->parsed_header) { unsigned int total_objects; - if ((unsigned)idx->pack->mwf.size < sizeof(hdr)) + if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header)) return 0; - if (parse_header(&hdr, idx->pack) < 0) + if (parse_header(&idx->hdr, idx->pack) < 0) return -1; idx->parsed_header = 1; - idx->nr_objects = ntohl(hdr.hdr_entries); + idx->nr_objects = ntohl(hdr->hdr_entries); idx->off = sizeof(struct git_pack_header); /* for now, limit to 2^32 objects */ @@ -427,6 +486,9 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz return -1; stats->received_objects = 0; + stats->local_objects = 0; + stats->total_deltas = 0; + stats->indexed_deltas = 0; processed = stats->indexed_objects = 0; stats->total_objects = total_objects; do_progress_callback(idx, stats); @@ -512,6 +574,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz stats->received_objects++; if (do_progress_callback(idx, stats) != 0) { + giterr_clear(); error = GIT_EUSER; goto on_error; } @@ -524,7 +587,7 @@ on_error: return error; } -static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char *suffix) +static int index_path(git_buf *path, git_indexer *idx, const char *suffix) { const char prefix[] = "pack-"; size_t slash = (size_t)path->size; @@ -546,68 +609,306 @@ static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char return git_buf_oom(path) ? -1 : 0; } -static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats) +/** + * Rewind the packfile by the trailer, as we might need to fix the + * packfile by injecting objects at the tail and must overwrite it. + */ +static git_off_t seek_back_trailer(git_indexer *idx) +{ + git_off_t off; + + if ((off = p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_CUR)) < 0) + return -1; + + idx->pack->mwf.size -= GIT_OID_RAWSZ; + git_mwindow_free_all(&idx->pack->mwf); + + return off; +} + +static int inject_object(git_indexer *idx, git_oid *id) { + git_odb_object *obj; + struct entry *entry; + struct git_pack_entry *pentry; + git_oid foo = {{0}}; + unsigned char hdr[64]; + git_buf buf = GIT_BUF_INIT; + git_off_t entry_start; + const void *data; + size_t len, hdr_len; + int error; + + entry = git__calloc(1, sizeof(*entry)); + GITERR_CHECK_ALLOC(entry); + + entry_start = seek_back_trailer(idx); + + if (git_odb_read(&obj, idx->odb, id) < 0) + return -1; + + data = git_odb_object_data(obj); + len = git_odb_object_size(obj); + + entry->crc = crc32(0L, Z_NULL, 0); + + /* Write out the object header */ + hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj)); + git_filebuf_write(&idx->pack_file, hdr, hdr_len); + idx->pack->mwf.size += hdr_len; + entry->crc = crc32(entry->crc, hdr, hdr_len); + + if ((error = git__compress(&buf, data, len)) < 0) + goto cleanup; + + /* And then the compressed object */ + git_filebuf_write(&idx->pack_file, buf.ptr, buf.size); + idx->pack->mwf.size += buf.size; + entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size)); + git_buf_free(&buf); + + /* Write a fake trailer so the pack functions play ball */ + if ((error = git_filebuf_write(&idx->pack_file, &foo, GIT_OID_RAWSZ)) < 0) + goto cleanup; + + idx->pack->mwf.size += GIT_OID_RAWSZ; + + pentry = git__calloc(1, sizeof(struct git_pack_entry)); + GITERR_CHECK_ALLOC(pentry); + + git_oid_cpy(&pentry->sha1, id); + git_oid_cpy(&entry->oid, id); + idx->off = entry_start + hdr_len + len; + + if ((error = save_entry(idx, entry, pentry, entry_start)) < 0) + git__free(pentry); + +cleanup: + git_odb_object_free(obj); + return error; +} + +static int fix_thin_pack(git_indexer *idx, git_transfer_progress *stats) +{ + int error, found_ref_delta = 0; unsigned int i; struct delta_info *delta; + size_t size; + git_otype type; + git_mwindow *w = NULL; + git_off_t curpos; + unsigned char *base_info; + unsigned int left = 0; + git_oid base; + + assert(git_vector_length(&idx->deltas) > 0); + + if (idx->odb == NULL) { + giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB"); + return -1; + } + /* Loop until we find the first REF delta */ git_vector_foreach(&idx->deltas, i, delta) { - git_rawobj obj; + curpos = delta->delta_off; + error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos); + git_mwindow_close(&w); + if (error < 0) + return error; + + if (type == GIT_OBJ_REF_DELTA) { + found_ref_delta = 1; + break; + } + } + + if (!found_ref_delta) { + giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object"); + return -1; + } + + /* curpos now points to the base information, which is an OID */ + base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left); + if (base_info == NULL) { + giterr_set(GITERR_INDEXER, "failed to map delta information"); + return -1; + } + + git_oid_fromraw(&base, base_info); + git_mwindow_close(&w); + + if (inject_object(idx, &base) < 0) + return -1; + + stats->local_objects++; + + return 0; +} + +static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats) +{ + unsigned int i; + struct delta_info *delta; + int progressed = 0; + + while (idx->deltas.length > 0) { + progressed = 0; + git_vector_foreach(&idx->deltas, i, delta) { + git_rawobj obj; + + idx->off = delta->delta_off; + if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0) + continue; + + if (hash_and_save(idx, &obj, delta->delta_off) < 0) + continue; + + git__free(obj.data); + stats->indexed_objects++; + stats->indexed_deltas++; + progressed = 1; + do_progress_callback(idx, stats); + + /* + * Remove this delta from the list and + * decrease i so we don't skip over the next + * delta. + */ + git_vector_remove(&idx->deltas, i); + git__free(delta); + i--; + } - idx->off = delta->delta_off; - if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0) + if (!progressed && (fix_thin_pack(idx, stats) < 0)) { + giterr_set(GITERR_INDEXER, "missing delta bases"); return -1; + } + } + + return 0; +} + +static int update_header_and_rehash(git_indexer *idx, git_transfer_progress *stats) +{ + void *ptr; + size_t chunk = 1024*1024; + git_off_t hashed = 0; + git_mwindow *w = NULL; + git_mwindow_file *mwf; + unsigned int left; + git_hash_ctx *ctx; + + mwf = &idx->pack->mwf; + ctx = &idx->trailer; + + git_hash_ctx_init(ctx); + git_mwindow_free_all(mwf); - if (hash_and_save(idx, &obj, delta->delta_off) < 0) + /* Update the header to include the numer of local objects we injected */ + idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects); + if (p_lseek(idx->pack_file.fd, 0, SEEK_SET) < 0) { + giterr_set(GITERR_OS, "failed to seek to the beginning of the pack"); + return -1; + } + + if (p_write(idx->pack_file.fd, &idx->hdr, sizeof(struct git_pack_header)) < 0) { + giterr_set(GITERR_OS, "failed to update the pack header"); + return -1; + } + + /* + * We now use the same technique as before to determine the + * hash. We keep reading up to the end and let + * hash_partially() keep the existing trailer out of the + * calculation. + */ + idx->inbuf_len = 0; + while (hashed < mwf->size) { + ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left); + if (ptr == NULL) return -1; - git__free(obj.data); - stats->indexed_objects++; - do_progress_callback(idx, stats); + hash_partially(idx, ptr, left); + hashed += left; + + git_mwindow_close(&w); } return 0; } -int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *stats) +int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats) { git_mwindow *w = NULL; unsigned int i, long_offsets = 0, left; struct git_pack_idx_header hdr; git_buf filename = GIT_BUF_INIT; struct entry *entry; - void *packfile_hash; - git_oid file_hash; + git_oid trailer_hash, file_hash; git_hash_ctx ctx; git_filebuf index_file = {0}; + void *packfile_trailer; if (git_hash_ctx_init(&ctx) < 0) return -1; /* Test for this before resolve_deltas(), as it plays with idx->off */ - if (idx->off < idx->pack->mwf.size - GIT_OID_RAWSZ) { - giterr_set(GITERR_INDEXER, "Indexing error: unexpected data at the end of the pack"); + if (idx->off < idx->pack->mwf.size - 20) { + giterr_set(GITERR_INDEXER, "unexpected data at the end of the pack"); return -1; } - if (idx->deltas.length > 0) - if (resolve_deltas(idx, stats) < 0) - return -1; + packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); + if (packfile_trailer == NULL) { + git_mwindow_close(&w); + goto on_error; + } + + /* Compare the packfile trailer as it was sent to us and what we calculated */ + git_oid_fromraw(&file_hash, packfile_trailer); + git_mwindow_close(&w); + + git_hash_final(&trailer_hash, &idx->trailer); + if (git_oid_cmp(&file_hash, &trailer_hash)) { + giterr_set(GITERR_INDEXER, "packfile trailer mismatch"); + return -1; + } + + /* Freeze the number of deltas */ + stats->total_deltas = stats->total_objects - stats->indexed_objects; + + if (resolve_deltas(idx, stats) < 0) + return -1; if (stats->indexed_objects != stats->total_objects) { - giterr_set(GITERR_INDEXER, "Indexing error: early EOF"); + giterr_set(GITERR_INDEXER, "early EOF"); return -1; } + if (stats->local_objects > 0) { + if (update_header_and_rehash(idx, stats) < 0) + return -1; + + git_hash_final(&trailer_hash, &idx->trailer); + if (p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_END) < 0) + return -1; + + if (p_write(idx->pack_file.fd, &trailer_hash, GIT_OID_RAWSZ) < 0) { + giterr_set(GITERR_OS, "failed to update pack trailer"); + return -1; + } + } + git_vector_sort(&idx->objects); git_buf_sets(&filename, idx->pack->pack_name); - git_buf_truncate(&filename, filename.size - strlen("pack")); + git_buf_shorten(&filename, strlen("pack")); git_buf_puts(&filename, "idx"); if (git_buf_oom(&filename)) return -1; - if (git_filebuf_open(&index_file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS) < 0) + if (git_filebuf_open(&index_file, filename.ptr, + GIT_FILEBUF_HASH_CONTENTS, idx->mode) < 0) goto on_error; /* Write out the header */ @@ -658,30 +959,22 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress * git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2); } - /* Write out the packfile trailer */ - packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); - if (packfile_hash == NULL) { - git_mwindow_close(&w); + /* Write out the packfile trailer to the index */ + if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0) goto on_error; - } - - memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); - git_mwindow_close(&w); - - git_filebuf_write(&index_file, &file_hash, sizeof(git_oid)); - /* Write out the packfile trailer to the idx file as well */ - if (git_filebuf_hash(&file_hash, &index_file) < 0) + /* Write out the hash of the idx */ + if (git_filebuf_hash(&trailer_hash, &index_file) < 0) goto on_error; - git_filebuf_write(&index_file, &file_hash, sizeof(git_oid)); + git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid)); /* Figure out what the final name should be */ - if (index_path_stream(&filename, idx, ".idx") < 0) + if (index_path(&filename, idx, ".idx") < 0) goto on_error; /* Commit file */ - if (git_filebuf_commit_at(&index_file, filename.ptr, GIT_PACK_FILE_MODE) < 0) + if (git_filebuf_commit_at(&index_file, filename.ptr) < 0) goto on_error; git_mwindow_free_all(&idx->pack->mwf); @@ -689,10 +982,10 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress * p_close(idx->pack->mwf.fd); idx->pack->mwf.fd = -1; - if (index_path_stream(&filename, idx, ".pack") < 0) + if (index_path(&filename, idx, ".pack") < 0) goto on_error; /* And don't forget to rename the packfile to its new place. */ - if (git_filebuf_commit_at(&idx->pack_file, filename.ptr, GIT_PACK_FILE_MODE) < 0) + if (git_filebuf_commit_at(&idx->pack_file, filename.ptr) < 0) return -1; git_buf_free(&filename); @@ -706,7 +999,7 @@ on_error: return -1; } -void git_indexer_stream_free(git_indexer_stream *idx) +void git_indexer_free(git_indexer *idx) { khiter_t k; unsigned int i; |