summaryrefslogtreecommitdiff
path: root/builtin-pack-objects.c
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2007-05-29 01:16:28 -0700
committerJunio C Hamano <junkio@cox.net>2007-05-29 01:16:28 -0700
commita77a33a51df9b7655d80299487ec6fbb10445496 (patch)
treec3abb0c4e3163da11464d846b6b35d77d78c1431 /builtin-pack-objects.c
parent41ffe5cdf5b0192cd70ef71b1cc37c7286d1dab0 (diff)
parentdca3957b8581ffd0faab135191bbee3029953bd2 (diff)
downloadgit-a77a33a51df9b7655d80299487ec6fbb10445496.tar.gz
Merge branch 'dh/repack' (early part)
* 'dh/repack' (early part): Ensure git-repack -a -d --max-pack-size=N deletes correct packs pack-objects: clarification & option checks for --max-pack-size git-repack --max-pack-size: add option parsing to enable feature git-repack --max-pack-size: split packs as asked by write_{object,one}() git-repack --max-pack-size: write_{object,one}() respect pack limit git-repack --max-pack-size: new file statics and code restructuring Alter sha1close() 3rd argument to request flush only
Diffstat (limited to 'builtin-pack-objects.c')
-rw-r--r--builtin-pack-objects.c329
1 files changed, 224 insertions, 105 deletions
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index eca130f055..e52332df99 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -16,7 +16,7 @@
#include "progress.h"
static const char pack_usage[] = "\
-git-pack-objects [{ -q | --progress | --all-progress }] \n\
+git-pack-objects [{ -q | --progress | --all-progress }] [--max-pack-size=N] \n\
[--local] [--incremental] [--window=N] [--depth=N] \n\
[--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
[--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
@@ -54,7 +54,8 @@ struct object_entry {
* nice "minimum seek" order.
*/
static struct object_entry *objects;
-static uint32_t nr_objects, nr_alloc, nr_result;
+static struct object_entry **written_list;
+static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
static int non_empty;
static int no_reuse_delta, no_reuse_object;
@@ -63,9 +64,11 @@ static int incremental;
static int allow_ofs_delta;
static const char *pack_tmp_name, *idx_tmp_name;
static char tmpname[PATH_MAX];
+static const char *base_name;
static unsigned char pack_file_sha1[20];
static int progress = 1;
static int window = 10;
+static uint32_t pack_size_limit;
static int depth = 50;
static int pack_to_stdout;
static int num_preferred_base;
@@ -351,16 +354,31 @@ static void copy_pack_data(struct sha1file *f,
}
static unsigned long write_object(struct sha1file *f,
- struct object_entry *entry)
+ struct object_entry *entry,
+ off_t write_offset)
{
unsigned long size;
enum object_type type;
void *buf;
unsigned char header[10];
+ unsigned char dheader[10];
unsigned hdrlen;
off_t datalen;
enum object_type obj_type;
int to_reuse = 0;
+ /* write limit if limited packsize and not first object */
+ unsigned long limit = pack_size_limit && nr_written ?
+ pack_size_limit - write_offset : 0;
+ /* no if no delta */
+ int usable_delta = !entry->delta ? 0 :
+ /* yes if unlimited packfile */
+ !pack_size_limit ? 1 :
+ /* no if base written to previous pack */
+ entry->delta->offset == (off_t)-1 ? 0 :
+ /* otherwise double-check written to this
+ * pack, like we do below
+ */
+ entry->delta->offset ? 1 : 0;
if (!pack_to_stdout)
crc32_begin(f);
@@ -371,7 +389,9 @@ static unsigned long write_object(struct sha1file *f,
else if (!entry->in_pack)
to_reuse = 0; /* can't reuse what we don't have */
else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
- to_reuse = 1; /* check_object() decided it for us */
+ /* check_object() decided it for us ... */
+ to_reuse = usable_delta;
+ /* ... but pack split may override that */
else if (obj_type != entry->in_pack_type)
to_reuse = 0; /* pack has delta which is unusable */
else if (entry->delta)
@@ -382,24 +402,48 @@ static unsigned long write_object(struct sha1file *f,
*/
if (!to_reuse) {
+ z_stream stream;
+ unsigned long maxsize;
+ void *out;
buf = read_sha1_file(entry->sha1, &type, &size);
if (!buf)
die("unable to read %s", sha1_to_hex(entry->sha1));
if (size != entry->size)
die("object %s size inconsistency (%lu vs %lu)",
sha1_to_hex(entry->sha1), size, entry->size);
- if (entry->delta) {
+ if (usable_delta) {
buf = delta_against(buf, size, entry);
size = entry->delta_size;
obj_type = (allow_ofs_delta && entry->delta->offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA;
+ } else {
+ /*
+ * recover real object type in case
+ * check_object() wanted to re-use a delta,
+ * but we couldn't since base was in previous split pack
+ */
+ obj_type = type;
}
+ /* compress the data to store and put compressed length in datalen */
+ memset(&stream, 0, sizeof(stream));
+ deflateInit(&stream, pack_compression_level);
+ maxsize = deflateBound(&stream, size);
+ out = xmalloc(maxsize);
+ /* Compress it */
+ stream.next_in = buf;
+ stream.avail_in = size;
+ stream.next_out = out;
+ stream.avail_out = maxsize;
+ while (deflate(&stream, Z_FINISH) == Z_OK)
+ /* nothing */;
+ deflateEnd(&stream);
+ datalen = stream.total_out;
+ deflateEnd(&stream);
/*
* The object header is a byte of 'type' followed by zero or
* more bytes of length.
*/
hdrlen = encode_header(obj_type, size, header);
- sha1write(f, header, hdrlen);
if (obj_type == OBJ_OFS_DELTA) {
/*
@@ -408,21 +452,41 @@ static unsigned long write_object(struct sha1file *f,
* base from this object's position in the pack.
*/
off_t ofs = entry->offset - entry->delta->offset;
- unsigned pos = sizeof(header) - 1;
- header[pos] = ofs & 127;
+ unsigned pos = sizeof(dheader) - 1;
+ dheader[pos] = ofs & 127;
while (ofs >>= 7)
- header[--pos] = 128 | (--ofs & 127);
- sha1write(f, header + pos, sizeof(header) - pos);
- hdrlen += sizeof(header) - pos;
+ dheader[--pos] = 128 | (--ofs & 127);
+ if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
+ free(out);
+ free(buf);
+ return 0;
+ }
+ sha1write(f, header, hdrlen);
+ sha1write(f, dheader + pos, sizeof(dheader) - pos);
+ hdrlen += sizeof(dheader) - pos;
} else if (obj_type == OBJ_REF_DELTA) {
/*
* Deltas with a base reference contain
* an additional 20 bytes for the base sha1.
*/
+ if (limit && hdrlen + 20 + datalen + 20 >= limit) {
+ free(out);
+ free(buf);
+ return 0;
+ }
+ sha1write(f, header, hdrlen);
sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
+ } else {
+ if (limit && hdrlen + datalen + 20 >= limit) {
+ free(out);
+ free(buf);
+ return 0;
+ }
+ sha1write(f, header, hdrlen);
}
- datalen = sha1write_compressed(f, buf, size, pack_compression_level);
+ sha1write(f, out, datalen);
+ free(out);
free(buf);
}
else {
@@ -437,20 +501,6 @@ static unsigned long write_object(struct sha1file *f,
reused_delta++;
}
hdrlen = encode_header(obj_type, entry->size, header);
- sha1write(f, header, hdrlen);
- if (obj_type == OBJ_OFS_DELTA) {
- off_t ofs = entry->offset - entry->delta->offset;
- unsigned pos = sizeof(header) - 1;
- header[pos] = ofs & 127;
- while (ofs >>= 7)
- header[--pos] = 128 | (--ofs & 127);
- sha1write(f, header + pos, sizeof(header) - pos);
- hdrlen += sizeof(header) - pos;
- } else if (obj_type == OBJ_REF_DELTA) {
- sha1write(f, entry->delta->sha1, 20);
- hdrlen += 20;
- }
-
offset = entry->in_pack_offset;
revidx = find_packed_object(p, offset);
datalen = revidx[1].offset - offset;
@@ -459,6 +509,29 @@ static unsigned long write_object(struct sha1file *f,
die("bad packed object CRC for %s", sha1_to_hex(entry->sha1));
offset += entry->in_pack_header_size;
datalen -= entry->in_pack_header_size;
+ if (obj_type == OBJ_OFS_DELTA) {
+ off_t ofs = entry->offset - entry->delta->offset;
+ unsigned pos = sizeof(dheader) - 1;
+ dheader[pos] = ofs & 127;
+ while (ofs >>= 7)
+ dheader[--pos] = 128 | (--ofs & 127);
+ if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit)
+ return 0;
+ sha1write(f, header, hdrlen);
+ sha1write(f, dheader + pos, sizeof(dheader) - pos);
+ hdrlen += sizeof(dheader) - pos;
+ } else if (obj_type == OBJ_REF_DELTA) {
+ if (limit && hdrlen + 20 + datalen + 20 >= limit)
+ return 0;
+ sha1write(f, header, hdrlen);
+ sha1write(f, entry->delta->sha1, 20);
+ hdrlen += 20;
+ } else {
+ if (limit && hdrlen + datalen + 20 >= limit)
+ return 0;
+ sha1write(f, header, hdrlen);
+ }
+
if (!pack_to_stdout && p->index_version == 1 &&
check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
die("corrupt packed object for %s", sha1_to_hex(entry->sha1));
@@ -466,7 +539,7 @@ static unsigned long write_object(struct sha1file *f,
unuse_pack(&w_curs);
reused++;
}
- if (entry->delta)
+ if (usable_delta)
written_delta++;
written++;
if (!pack_to_stdout)
@@ -485,11 +558,19 @@ static off_t write_one(struct sha1file *f,
return offset;
/* if we are deltified, write out base object first. */
- if (e->delta)
+ if (e->delta) {
offset = write_one(f, e->delta, offset);
+ if (!offset)
+ return 0;
+ }
e->offset = offset;
- size = write_object(f, e);
+ size = write_object(f, e, offset);
+ if (!size) {
+ e->offset = 0;
+ return 0;
+ }
+ written_list[nr_written++] = e;
/* make sure off_t is sufficiently large not to wrap */
if (offset > offset + size)
@@ -503,49 +584,113 @@ static int open_object_dir_tmp(const char *path)
return mkstemp(tmpname);
}
-static off_t write_pack_file(void)
+/* forward declarations for write_pack_file */
+static void write_index_file(off_t last_obj_offset, unsigned char *sha1);
+static int adjust_perm(const char *path, mode_t mode);
+
+static void write_pack_file(void)
{
- uint32_t i;
+ uint32_t i = 0, j;
struct sha1file *f;
- off_t offset, last_obj_offset = 0;
+ off_t offset, offset_one, last_obj_offset = 0;
struct pack_header hdr;
- int do_progress = progress;
-
- if (pack_to_stdout) {
- f = sha1fd(1, "<stdout>");
- do_progress >>= 1;
- } else {
- int fd = open_object_dir_tmp("tmp_pack_XXXXXX");
- if (fd < 0)
- die("unable to create %s: %s\n", tmpname, strerror(errno));
- pack_tmp_name = xstrdup(tmpname);
- f = sha1fd(fd, pack_tmp_name);
- }
+ int do_progress = progress >> pack_to_stdout;
+ uint32_t nr_remaining = nr_result;
if (do_progress)
start_progress(&progress_state, "Writing %u objects...", "", nr_result);
+ written_list = xmalloc(nr_objects * sizeof(struct object_entry *));
- hdr.hdr_signature = htonl(PACK_SIGNATURE);
- hdr.hdr_version = htonl(PACK_VERSION);
- hdr.hdr_entries = htonl(nr_result);
- sha1write(f, &hdr, sizeof(hdr));
- offset = sizeof(hdr);
- if (!nr_result)
- goto done;
- for (i = 0; i < nr_objects; i++) {
- last_obj_offset = offset;
- offset = write_one(f, objects + i, offset);
- if (do_progress)
- display_progress(&progress_state, written);
- }
+ do {
+ if (pack_to_stdout) {
+ f = sha1fd(1, "<stdout>");
+ } else {
+ int fd = open_object_dir_tmp("tmp_pack_XXXXXX");
+ if (fd < 0)
+ die("unable to create %s: %s\n", tmpname, strerror(errno));
+ pack_tmp_name = xstrdup(tmpname);
+ f = sha1fd(fd, pack_tmp_name);
+ }
+
+ hdr.hdr_signature = htonl(PACK_SIGNATURE);
+ hdr.hdr_version = htonl(PACK_VERSION);
+ hdr.hdr_entries = htonl(nr_remaining);
+ sha1write(f, &hdr, sizeof(hdr));
+ offset = sizeof(hdr);
+ nr_written = 0;
+ for (; i < nr_objects; i++) {
+ last_obj_offset = offset;
+ offset_one = write_one(f, objects + i, offset);
+ if (!offset_one)
+ break;
+ offset = offset_one;
+ if (do_progress)
+ display_progress(&progress_state, written);
+ }
+
+ /*
+ * Did we write the wrong # entries in the header?
+ * If so, rewrite it like in fast-import
+ */
+ if (pack_to_stdout || nr_written == nr_remaining) {
+ sha1close(f, pack_file_sha1, 1);
+ } else {
+ sha1close(f, pack_file_sha1, 0);
+ fixup_pack_header_footer(f->fd, pack_file_sha1, pack_tmp_name, nr_written);
+ close(f->fd);
+ }
+
+ if (!pack_to_stdout) {
+ unsigned char object_list_sha1[20];
+ mode_t mode = umask(0);
+
+ umask(mode);
+ mode = 0444 & ~mode;
+
+ write_index_file(last_obj_offset, object_list_sha1);
+ snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
+ base_name, sha1_to_hex(object_list_sha1));
+ if (adjust_perm(pack_tmp_name, mode))
+ die("unable to make temporary pack file readable: %s",
+ strerror(errno));
+ if (rename(pack_tmp_name, tmpname))
+ die("unable to rename temporary pack file: %s",
+ strerror(errno));
+ snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
+ base_name, sha1_to_hex(object_list_sha1));
+ if (adjust_perm(idx_tmp_name, mode))
+ die("unable to make temporary index file readable: %s",
+ strerror(errno));
+ if (rename(idx_tmp_name, tmpname))
+ die("unable to rename temporary index file: %s",
+ strerror(errno));
+ puts(sha1_to_hex(object_list_sha1));
+ }
+
+ /* mark written objects as written to previous pack */
+ for (j = 0; j < nr_written; j++) {
+ written_list[j]->offset = (off_t)-1;
+ }
+ nr_remaining -= nr_written;
+ } while (nr_remaining && i < nr_objects);
+
+ free(written_list);
if (do_progress)
stop_progress(&progress_state);
- done:
if (written != nr_result)
die("wrote %u objects while expecting %u", written, nr_result);
- sha1close(f, pack_file_sha1, 1);
-
- return last_obj_offset;
+ /*
+ * We have scanned through [0 ... i). Since we have written
+ * the correct number of objects, the remaining [i ... nr_objects)
+ * items must be either already written (due to out-of-order delta base)
+ * or a preferred base. Count those which are neither and complain if any.
+ */
+ for (j = 0; i < nr_objects; i++) {
+ struct object_entry *e = objects + i;
+ j += !e->offset && !e->preferred_base;
+ }
+ if (j)
+ die("wrote %u objects as expected but %u unwritten", written, j);
}
static int sha1_sort(const void *_a, const void *_b)
@@ -572,18 +717,11 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
idx_tmp_name = xstrdup(tmpname);
f = sha1fd(fd, idx_tmp_name);
- if (nr_result) {
- uint32_t j = 0;
- sorted_by_sha =
- xcalloc(nr_result, sizeof(struct object_entry *));
- for (i = 0; i < nr_objects; i++)
- if (!objects[i].preferred_base)
- sorted_by_sha[j++] = objects + i;
- if (j != nr_result)
- die("listed %u objects while expecting %u", j, nr_result);
- qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort);
+ if (nr_written) {
+ sorted_by_sha = written_list;
+ qsort(sorted_by_sha, nr_written, sizeof(*sorted_by_sha), sha1_sort);
list = sorted_by_sha;
- last = sorted_by_sha + nr_result;
+ last = sorted_by_sha + nr_written;
} else
sorted_by_sha = list = last = NULL;
@@ -621,7 +759,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* Write the actual SHA1 entries. */
list = sorted_by_sha;
- for (i = 0; i < nr_result; i++) {
+ for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
if (index_version < 2) {
uint32_t offset = htonl(entry->offset);
@@ -636,7 +774,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* write the crc32 table */
list = sorted_by_sha;
- for (i = 0; i < nr_objects; i++) {
+ for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
uint32_t crc32_val = htonl(entry->crc32);
sha1write(f, &crc32_val, 4);
@@ -644,7 +782,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* write the 32-bit offset table */
list = sorted_by_sha;
- for (i = 0; i < nr_objects; i++) {
+ for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
uint32_t offset = (entry->offset <= index_off32_limit) ?
entry->offset : (0x80000000 | nr_large_offset++);
@@ -669,7 +807,6 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
sha1write(f, pack_file_sha1, 20);
sha1close(f, NULL, 1);
- free(sorted_by_sha);
SHA1_Final(sha1, &ctx);
}
@@ -1569,8 +1706,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
int use_internal_rev_list = 0;
int thin = 0;
uint32_t i;
- off_t last_obj_offset;
- const char *base_name = NULL;
const char **rp_av;
int rp_ac_alloc = 64;
int rp_ac;
@@ -1616,6 +1751,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
pack_compression_level = level;
continue;
}
+ if (!prefixcmp(arg, "--max-pack-size=")) {
+ char *end;
+ pack_size_limit = strtoul(arg+16, &end, 0) * 1024 * 1024;
+ if (!arg[16] || *end)
+ usage(pack_usage);
+ continue;
+ }
if (!prefixcmp(arg, "--window=")) {
char *end;
window = strtoul(arg+9, &end, 0);
@@ -1714,6 +1856,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (pack_to_stdout != !base_name)
usage(pack_usage);
+ if (pack_to_stdout && pack_size_limit)
+ die("--max-pack-size cannot be used to build a pack for transfer.");
+
if (!pack_to_stdout && thin)
die("--thin cannot be used to build an indexable pack.");
@@ -1739,33 +1884,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
fprintf(stderr, "Result has %u objects.\n", nr_result);
if (nr_result)
prepare_pack(window, depth);
- last_obj_offset = write_pack_file();
- if (!pack_to_stdout) {
- unsigned char object_list_sha1[20];
- mode_t mode = umask(0);
-
- umask(mode);
- mode = 0444 & ~mode;
-
- write_index_file(last_obj_offset, object_list_sha1);
- snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
- base_name, sha1_to_hex(object_list_sha1));
- if (adjust_perm(pack_tmp_name, mode))
- die("unable to make temporary pack file readable: %s",
- strerror(errno));
- if (rename(pack_tmp_name, tmpname))
- die("unable to rename temporary pack file: %s",
- strerror(errno));
- snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
- base_name, sha1_to_hex(object_list_sha1));
- if (adjust_perm(idx_tmp_name, mode))
- die("unable to make temporary index file readable: %s",
- strerror(errno));
- if (rename(idx_tmp_name, tmpname))
- die("unable to rename temporary index file: %s",
- strerror(errno));
- puts(sha1_to_hex(object_list_sha1));
- }
+ write_pack_file();
if (progress)
fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n",
written, written_delta, reused, reused_delta);