diff options
| -rw-r--r-- | src/indexer.c | 251 | ||||
| -rw-r--r-- | src/odb.c | 6 | ||||
| -rw-r--r-- | src/odb.h | 4 | ||||
| -rw-r--r-- | src/pack.c | 66 | ||||
| -rw-r--r-- | src/pack.h | 14 | 
5 files changed, 282 insertions, 59 deletions
| diff --git a/src/indexer.c b/src/indexer.c index a51d903ed..f78ca5774 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -38,15 +38,20 @@ struct git_indexer {  struct git_indexer_stream {  	unsigned int parsed_header :1, -		opened_pack; +		opened_pack :1, +		have_stream :1, +		have_delta :1;  	struct git_pack_file *pack;  	git_filebuf pack_file;  	git_filebuf index_file;  	git_off_t off; +	git_off_t entry_start; +	git_packfile_stream stream;  	size_t nr_objects;  	git_vector objects;  	git_vector deltas;  	unsigned int fanout[256]; +	git_hash_ctx hash_ctx;  	git_oid hash;  	git_transfer_progress_callback progress_cb;  	void *progress_payload; @@ -176,56 +181,169 @@ cleanup:  }  /* Try to store the delta so we can try to resolve it later */ -static int store_delta(git_indexer_stream *idx, git_off_t entry_start, size_t entry_size, git_otype type) +static int store_delta(git_indexer_stream *idx)  { -	git_mwindow *w = NULL;  	struct delta_info *delta; -	git_rawobj obj; -	int error; + +	delta = git__calloc(1, sizeof(struct delta_info)); +	GITERR_CHECK_ALLOC(delta); +	delta->delta_off = idx->entry_start; + +	if (git_vector_insert(&idx->deltas, delta) < 0) +		return -1; + +	return 0; +} + +static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type) +{ +	char buffer[64]; +	size_t hdrlen; + +	hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), len, type); +	git_hash_update(ctx, buffer, hdrlen); +} + +static int hash_object_stream(git_hash_ctx *ctx, git_packfile_stream *stream) +{ +	char buffer[8*1024]; +	ssize_t read; + +	assert(ctx && stream); + +	do { +		if ((read = git_packfile_stream_read(stream, buffer, sizeof(buffer))) < 0) +			break; + +		git_hash_update(ctx, buffer, read); +	} while (read > 0); + +	if (read < 0) +		return (int)read; + +	return 0; +} + +/* In order to create the packfile stream, we need to skip over the delta base description */ +static int advance_delta_offset(git_indexer_stream *idx, git_otype type) +{ +	git_mwindow *w = NULL;  	assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);  	if (type == GIT_OBJ_REF_DELTA) {  		idx->off += GIT_OID_RAWSZ;  	} else { -		git_off_t base_off; - -		base_off = get_delta_base(idx->pack, &w, &idx->off, type, entry_start); +		git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);  		git_mwindow_close(&w);  		if (base_off < 0)  			return (int)base_off;  	} -	error = packfile_unpack_compressed(&obj, idx->pack, &w, &idx->off, entry_size, type); -	if (error == GIT_EBUFS) { -		idx->off = entry_start; -		return GIT_EBUFS; -	} else if (error < 0){ -		return -1; +	return 0; +} + +/* Read from the stream and discard any output */ +static int read_object_stream(git_packfile_stream *stream) +{ +	char buffer[4*1024]; +	ssize_t read; + +	assert(stream); + +	do { +		read = git_packfile_stream_read(stream, buffer, sizeof(buffer)); +	} while (read > 0); + +	if (read < 0) +		return (int)read; + +	return 0; +} + +static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size) +{ +	void *ptr; +	uint32_t crc; +	unsigned int left, len; +	git_mwindow *w = NULL; + +	crc = crc32(0L, Z_NULL, 0); +	while (size) { +		ptr = git_mwindow_open(mwf, &w, start, size, &left); +		if (ptr == NULL) +			return -1; + +		len = min(left, size); +		crc = crc32(crc, ptr, len); +		size -= len; +		start += len; +		git_mwindow_close(&w);  	} -	delta = git__calloc(1, sizeof(struct delta_info)); -	GITERR_CHECK_ALLOC(delta); -	delta->delta_off = entry_start; +	*crc_out = htonl(crc); +	return 0; +} -	git__free(obj.data); +static int store_object(git_indexer_stream *idx) +{ +	int i; +	git_oid oid; +	struct entry *entry; +	git_off_t entry_size; +	struct git_pack_entry *pentry; +	git_hash_ctx *ctx = &idx->hash_ctx; +	git_off_t entry_start = idx->entry_start; -	if (git_vector_insert(&idx->deltas, delta) < 0) -		return -1; +	entry = git__calloc(1, sizeof(*entry)); +	GITERR_CHECK_ALLOC(entry); + +	pentry = git__malloc(sizeof(struct git_pack_entry)); +	GITERR_CHECK_ALLOC(pentry); + +	git_hash_final(&oid, ctx); +	entry_size = idx->off - entry_start; +	if (entry_start > UINT31_MAX) { +		entry->offset = UINT32_MAX; +		entry->offset_long = entry_start; +	} else { +		entry->offset = (uint32_t)entry_start; +	} + +	git_oid_cpy(&pentry->sha1, &oid); +	pentry->offset = entry_start; +	if (git_vector_insert(&idx->pack->cache, pentry) < 0) { +		git__free(pentry); +		goto on_error; +	} + +	git_oid_cpy(&entry->oid, &oid); + +	if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) +		goto on_error; + +	/* Add the object to the list */ +	if (git_vector_insert(&idx->objects, entry) < 0) +		goto on_error; + +	for (i = oid.id[0]; i < 256; ++i) { +		idx->fanout[i]++; +	}  	return 0; + +on_error: +	git__free(entry); + +	return -1;  }  static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)  {  	int i;  	git_oid oid; -	void *packed;  	size_t entry_size; -	unsigned int left;  	struct entry *entry; -	git_mwindow *w = NULL; -	git_mwindow_file *mwf = &idx->pack->mwf;  	struct git_pack_entry *pentry;  	entry = git__calloc(1, sizeof(*entry)); @@ -258,13 +376,9 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent  	entry->crc = crc32(0L, Z_NULL, 0);  	entry_size = (size_t)(idx->off - entry_start); -	packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); -	if (packed == NULL) +	if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)  		goto on_error; -	entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size)); -	git_mwindow_close(&w); -  	/* Add the object to the list */  	if (git_vector_insert(&idx->objects, entry) < 0)  		goto on_error; @@ -349,7 +463,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz  	/* As the file grows any windows we try to use will be out of date */  	git_mwindow_free_all(mwf);  	while (processed < idx->nr_objects) { -		git_rawobj obj; +		git_packfile_stream *stream = &idx->stream;  		git_off_t entry_start = idx->off;  		size_t entry_size;  		git_otype type; @@ -358,46 +472,71 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz  		if (idx->pack->mwf.size <= idx->off + 20)  			return 0; -		error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off); -		if (error == GIT_EBUFS) { -			idx->off = entry_start; -			return 0; -		} -		if (error < 0) -			return -1; - -		git_mwindow_close(&w); - -		if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) { -			error = store_delta(idx, entry_start, entry_size, type); +		if (!idx->have_stream) { +			error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);  			if (error == GIT_EBUFS) {  				idx->off = entry_start;  				return 0;  			}  			if (error < 0) -				return error; +				return -1; + +			git_mwindow_close(&w); +			idx->entry_start = entry_start; +			git_hash_ctx_init(&idx->hash_ctx); + +			if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) { +				error = advance_delta_offset(idx, type); +				if (error == GIT_EBUFS) { +					idx->off = entry_start; +					return 0; +				} +				if (error < 0) +					return -1; + +				idx->have_delta = 1; +			} else { +				idx->have_delta = 0; +				hash_header(&idx->hash_ctx, entry_size, type); +			} + +			idx->have_stream = 1; +			if (git_packfile_stream_open(stream, idx->pack, idx->off) < 0) +				goto on_error; -			stats->received_objects++; -			do_progress_callback(idx, stats); -			continue;  		} -		idx->off = entry_start; -		error = git_packfile_unpack(&obj, idx->pack, &idx->off); -		if (error == GIT_EBUFS) { -			idx->off = entry_start; -			return 0; +		if (idx->have_delta) { +			error = read_object_stream(stream); +		} else { +			error = hash_object_stream(&idx->hash_ctx, stream);  		} -		if (error < 0) -			return -1; -		if (hash_and_save(idx, &obj, entry_start) < 0) +		idx->off = stream->curpos; +		if (error == GIT_EBUFS) +			return 0; + +		/* We want to free the stream reasorces no matter what here */ +		idx->have_stream = 0; +		git_packfile_stream_free(stream); + +		if (error < 0)  			goto on_error; -		git__free(obj.data); +		if (idx->have_delta) { +			error = store_delta(idx); +		} else { +			error = store_object(idx); +		} -		stats->indexed_objects = (unsigned int)++processed; +		if (error < 0) +			goto on_error; + +		if (!idx->have_delta) { +			stats->indexed_objects = (unsigned int)++processed; +		}  		stats->received_objects++; +  		do_progress_callback(idx, stats);  	} @@ -34,7 +34,7 @@ typedef struct  static int load_alternates(git_odb *odb, const char *objects_dir, int alternate_depth); -static int format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type) +int git_odb__format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type)  {  	const char *type_str = git_object_type2string(obj_type);  	int len = p_snprintf(hdr, n, "%s %"PRIuZ, type_str, obj_len); @@ -55,7 +55,7 @@ int git_odb__hashobj(git_oid *id, git_rawobj *obj)  	if (!obj->data && obj->len != 0)  		return -1; -	hdrlen = format_object_header(header, sizeof(header), obj->len, obj->type); +	hdrlen = git_odb__format_object_header(header, sizeof(header), obj->len, obj->type);  	vec[0].data = header;  	vec[0].len = hdrlen; @@ -133,7 +133,7 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)  	if ((error = git_hash_ctx_init(&ctx)) < 0)  		return -1; -	hdr_len = format_object_header(hdr, sizeof(hdr), size, type); +	hdr_len = git_odb__format_object_header(hdr, sizeof(hdr), size, type);  	if ((error = git_hash_update(&ctx, hdr, hdr_len)) < 0)  		goto done; @@ -46,6 +46,10 @@ struct git_odb {  int git_odb__hashobj(git_oid *id, git_rawobj *obj);  /* + * Format the object header such as it would appear in the on-disk object + */ +int git_odb__format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type); +/*   * Hash an open file descriptor.   * This is a performance call when the contents of a fd need to be hashed,   * but the fd is already open and we have the size of the contents. diff --git a/src/pack.c b/src/pack.c index d7d39392f..520e13828 100644 --- a/src/pack.c +++ b/src/pack.c @@ -441,6 +441,72 @@ static void use_git_free(void *opaq, void *ptr)  	git__free(ptr);  } +int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, git_off_t curpos) +{ +	int st; + +	memset(obj, 0, sizeof(git_packfile_stream)); +	obj->curpos = curpos; +	obj->p = p; +	obj->zstream.zalloc = use_git_alloc; +	obj->zstream.zfree = use_git_free; +	obj->zstream.next_in = Z_NULL; +	obj->zstream.next_out = Z_NULL; +	st = inflateInit(&obj->zstream); +	if (st != Z_OK) { +		git__free(obj); +		giterr_set(GITERR_ZLIB, "Failed to inflate packfile"); +		return -1; +	} + +	return 0; +} + +ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len) +{ +	unsigned char *in; +	size_t written; +	int st; + +	if (obj->done) +		return 0; + +	in = pack_window_open(obj->p, &obj->mw, obj->curpos, &obj->zstream.avail_in); +	if (in == NULL) +		return GIT_EBUFS; + +	obj->zstream.next_out = buffer; +	obj->zstream.avail_out = len; +	obj->zstream.next_in = in; + +	st = inflate(&obj->zstream, Z_SYNC_FLUSH); +	git_mwindow_close(&obj->mw); + +	obj->curpos += obj->zstream.next_in - in; +	written = len - obj->zstream.avail_out; + +	if (st != Z_OK && st != Z_STREAM_END) { +		giterr_set(GITERR_ZLIB, "Failed to inflate packfile"); +		return -1; +	} + +	if (st == Z_STREAM_END) +		obj->done = 1; + + +	/* If we didn't write anything out but we're not done, we need more data */ +	if (!written && st != Z_STREAM_END) +		return GIT_EBUFS; + +	return written; + +} + +void git_packfile_stream_free(git_packfile_stream *obj) +{ +	inflateEnd(&obj->zstream); +} +  int packfile_unpack_compressed(  	git_rawobj *obj,  	struct git_pack_file *p, diff --git a/src/pack.h b/src/pack.h index c1277fdfb..188ea2bbd 100644 --- a/src/pack.h +++ b/src/pack.h @@ -8,6 +8,8 @@  #ifndef INCLUDE_pack_h__  #define INCLUDE_pack_h__ +#include <zlib.h> +  #include "git2/oid.h"  #include "common.h" @@ -76,6 +78,14 @@ struct git_pack_entry {  	struct git_pack_file *p;  }; +typedef struct git_packfile_stream { +	git_off_t curpos; +	int done; +	z_stream zstream; +	struct git_pack_file *p; +	git_mwindow *mw; +} git_packfile_stream; +  int git_packfile_unpack_header(  		size_t *size_p,  		git_otype *type_p, @@ -98,6 +108,10 @@ int packfile_unpack_compressed(  	size_t size,  	git_otype type); +int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, git_off_t curpos); +ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len); +void git_packfile_stream_free(git_packfile_stream *obj); +  git_off_t get_delta_base(struct git_pack_file *p, git_mwindow **w_curs,  		git_off_t *curpos, git_otype type,  		git_off_t delta_obj_offset); | 
