summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--builtin/pack-objects.c28
-rw-r--r--pack-bitmap.c25
-rw-r--r--pack-bitmap.h7
-rw-r--r--pack-objects.c19
-rw-r--r--pack-objects.h20
-rw-r--r--t/perf/README25
-rwxr-xr-xt/perf/aggregate.perl69
-rwxr-xr-xt/perf/p5311-pack-bitmaps-fetch.sh45
-rw-r--r--t/perf/perf-lib.sh74
9 files changed, 260 insertions, 52 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index caa4cd0211..f434e1fb0c 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -41,6 +41,7 @@
#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
+#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
@@ -60,6 +61,7 @@ static struct packing_data to_pack;
static struct pack_idx_entry **written_list;
static uint32_t nr_result, nr_written, nr_seen;
+static struct bitmap_index *bitmap_git;
static int non_empty;
static int reuse_delta = 1, reuse_object = 1;
@@ -80,6 +82,7 @@ static unsigned long pack_size_limit;
static int depth = 50;
static int delta_search_threads;
static int pack_to_stdout;
+static int thin;
static int num_preferred_base;
static struct progress *progress_state;
@@ -1538,11 +1541,15 @@ static void check_object(struct object_entry *entry)
break;
}
- if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
+ if (base_ref && (
+ (base_entry = packlist_find(&to_pack, base_ref, NULL)) ||
+ (thin &&
+ bitmap_has_sha1_in_uninteresting(bitmap_git, base_ref)))) {
/*
* If base_ref was set above that means we wish to
- * reuse delta data, and we even found that base
- * in the list of objects we want to pack. Goodie!
+ * reuse delta data, and either we found that object in
+ * the list of objects we want to pack, or it's one we
+ * know the receiver has.
*
* Depth value does not matter - find_deltas() will
* never consider reused delta as the base object to
@@ -1551,10 +1558,16 @@ static void check_object(struct object_entry *entry)
*/
oe_set_type(entry, entry->in_pack_type);
SET_SIZE(entry, in_pack_size); /* delta size */
- SET_DELTA(entry, base_entry);
SET_DELTA_SIZE(entry, in_pack_size);
- entry->delta_sibling_idx = base_entry->delta_child_idx;
- SET_DELTA_CHILD(base_entry, entry);
+
+ if (base_entry) {
+ SET_DELTA(entry, base_entry);
+ entry->delta_sibling_idx = base_entry->delta_child_idx;
+ SET_DELTA_CHILD(base_entry, entry);
+ } else {
+ SET_DELTA_EXT(entry, base_ref);
+ }
+
unuse_pack(&w_curs);
return;
}
@@ -2979,7 +2992,6 @@ static int pack_options_allow_reuse(void)
static int get_object_list_from_bitmap(struct rev_info *revs)
{
- struct bitmap_index *bitmap_git;
if (!(bitmap_git = prepare_bitmap_walk(revs)))
return -1;
@@ -2995,7 +3007,6 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
}
traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap);
- free_bitmap_index(bitmap_git);
return 0;
}
@@ -3143,7 +3154,6 @@ static int option_parse_unpack_unreachable(const struct option *opt,
int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{
int use_internal_rev_list = 0;
- int thin = 0;
int shallow = 0;
int all_progress_implied = 0;
struct argv_array rp = ARGV_ARRAY_INIT;
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4e50ab391f..c7d593c91c 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -86,6 +86,9 @@ struct bitmap_index {
/* Bitmap result of the last performed walk */
struct bitmap *result;
+ /* "have" bitmap from the last performed walk */
+ struct bitmap *haves;
+
/* Version of the bitmap index */
unsigned int version;
@@ -759,8 +762,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs)
bitmap_and_not(wants_bitmap, haves_bitmap);
bitmap_git->result = wants_bitmap;
+ bitmap_git->haves = haves_bitmap;
- bitmap_free(haves_bitmap);
return bitmap_git;
cleanup:
@@ -1114,5 +1117,25 @@ void free_bitmap_index(struct bitmap_index *b)
free(b->ext_index.objects);
free(b->ext_index.hashes);
bitmap_free(b->result);
+ bitmap_free(b->haves);
free(b);
}
+
+int bitmap_has_sha1_in_uninteresting(struct bitmap_index *bitmap_git,
+ const unsigned char *sha1)
+{
+ int pos;
+
+ if (!bitmap_git)
+ return 0; /* no bitmap loaded */
+ if (!bitmap_git->result)
+ BUG("failed to perform bitmap walk before querying");
+ if (!bitmap_git->haves)
+ return 0; /* walk had no "haves" */
+
+ pos = bitmap_position_packfile(bitmap_git, sha1);
+ if (pos < 0)
+ return 0;
+
+ return bitmap_get(bitmap_git->haves, pos);
+}
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 8a04741e12..c633bf5238 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -53,6 +53,13 @@ int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping
khash_sha1 *reused_bitmaps, int show_progress);
void free_bitmap_index(struct bitmap_index *);
+/*
+ * After a traversal has been performed on the bitmap_index, this can be
+ * queried to see if a particular object was reachable from any of the
+ * objects flagged as UNINTERESTING.
+ */
+int bitmap_has_sha1_in_uninteresting(struct bitmap_index *, const unsigned char *sha1);
+
void bitmap_writer_show_progress(int show);
void bitmap_writer_set_checksum(unsigned char *sha1);
void bitmap_writer_build_type_index(struct packing_data *to_pack,
diff --git a/pack-objects.c b/pack-objects.c
index d04cfa8e9f..3462858772 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -181,3 +181,22 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
return new_entry;
}
+
+void oe_set_delta_ext(struct packing_data *pdata,
+ struct object_entry *delta,
+ const unsigned char *sha1)
+{
+ struct object_entry *base;
+
+ ALLOC_GROW(pdata->ext_bases, pdata->nr_ext + 1, pdata->alloc_ext);
+ base = &pdata->ext_bases[pdata->nr_ext++];
+ memset(base, 0, sizeof(*base));
+ hashcpy(base->idx.oid.hash, sha1);
+
+ /* These flags mark that we are not part of the actual pack output. */
+ base->preferred_base = 1;
+ base->filled = 1;
+
+ delta->ext_base = 1;
+ delta->delta_idx = base - pdata->ext_bases + 1;
+}
diff --git a/pack-objects.h b/pack-objects.h
index 62806ccc39..b0c1f137c6 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -112,6 +112,7 @@ struct object_entry {
unsigned filled:1; /* assigned write-order */
unsigned dfs_state:OE_DFS_STATE_BITS;
unsigned depth:OE_DEPTH_BITS;
+ unsigned ext_base:1; /* delta_idx points outside packlist */
/*
* pahole results on 64-bit linux (gcc and clang)
@@ -147,6 +148,14 @@ struct packing_data {
pthread_mutex_t lock;
#endif
+ /*
+ * This list contains entries for bases which we know the other side
+ * has (e.g., via reachability bitmaps), but which aren't in our
+ * "objects" list.
+ */
+ struct object_entry *ext_bases;
+ uint32_t nr_ext, alloc_ext;
+
uintmax_t oe_size_limit;
uintmax_t oe_delta_size_limit;
};
@@ -249,9 +258,12 @@ static inline struct object_entry *oe_delta(
const struct packing_data *pack,
const struct object_entry *e)
{
- if (e->delta_idx)
+ if (!e->delta_idx)
+ return NULL;
+ if (e->ext_base)
+ return &pack->ext_bases[e->delta_idx - 1];
+ else
return &pack->objects[e->delta_idx - 1];
- return NULL;
}
static inline void oe_set_delta(struct packing_data *pack,
@@ -264,6 +276,10 @@ static inline void oe_set_delta(struct packing_data *pack,
e->delta_idx = 0;
}
+void oe_set_delta_ext(struct packing_data *pack,
+ struct object_entry *e,
+ const unsigned char *sha1);
+
static inline struct object_entry *oe_delta_child(
const struct packing_data *pack,
const struct object_entry *e)
diff --git a/t/perf/README b/t/perf/README
index 21321a0f36..be12090c38 100644
--- a/t/perf/README
+++ b/t/perf/README
@@ -168,3 +168,28 @@ that
While we have tried to make sure that it can cope with embedded
whitespace and other special characters, it will not work with
multi-line data.
+
+Rather than tracking the performance by run-time as `test_perf` does, you
+may also track output size by using `test_size`. The stdout of the
+function should be a single numeric value, which will be captured and
+shown in the aggregated output. For example:
+
+ test_perf 'time foo' '
+ ./foo >foo.out
+ '
+
+ test_size 'output size'
+ wc -c <foo.out
+ '
+
+might produce output like:
+
+ Test origin HEAD
+ -------------------------------------------------------------
+ 1234.1 time foo 0.37(0.79+0.02) 0.26(0.51+0.02) -29.7%
+ 1234.2 output size 4.3M 3.6M -14.7%
+
+The item being measured (and its units) is up to the test; the context
+and the test title should make it clear to the user whether bigger or
+smaller numbers are better. Unlike test_perf, the test code will only be
+run once, since output sizes tend to be more deterministic than timings.
diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index bc865160e7..494907a892 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -13,27 +13,42 @@ sub get_times {
my $line = <$fh>;
return undef if not defined $line;
close $fh or die "cannot close $name: $!";
- $line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/
- or die "bad input line: $line";
- my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3;
- return ($rt, $4, $5);
+ # times
+ if ($line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/) {
+ my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3;
+ return ($rt, $4, $5);
+ # size
+ } elsif ($line =~ /^\d+$/) {
+ return $&;
+ } else {
+ die "bad input line: $line";
+ }
+}
+
+sub relative_change {
+ my ($r, $firstr) = @_;
+ if ($firstr > 0) {
+ return sprintf "%+.1f%%", 100.0*($r-$firstr)/$firstr;
+ } elsif ($r == 0) {
+ return "=";
+ } else {
+ return "+inf";
+ }
}
sub format_times {
my ($r, $u, $s, $firstr) = @_;
+ # no value means we did not finish the test
if (!defined $r) {
return "<missing>";
}
- my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s;
- if (defined $firstr) {
- if ($firstr > 0) {
- $out .= sprintf " %+.1f%%", 100.0*($r-$firstr)/$firstr;
- } elsif ($r == 0) {
- $out .= " =";
- } else {
- $out .= " +inf";
- }
+ # a single value means we have a size, not times
+ if (!defined $u) {
+ return format_size($r, $firstr);
}
+ # otherwise, we have real/user/system times
+ my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s;
+ $out .= ' ' . relative_change($r, $firstr) if defined $firstr;
return $out;
}
@@ -51,6 +66,25 @@ EOT
exit(1);
}
+sub human_size {
+ my $n = shift;
+ my @units = ('', qw(K M G));
+ while ($n > 900 && @units > 1) {
+ $n /= 1000;
+ shift @units;
+ }
+ return $n unless length $units[0];
+ return sprintf '%.1f%s', $n, $units[0];
+}
+
+sub format_size {
+ my ($size, $first) = @_;
+ # match the width of a time: 0.00(0.00+0.00)
+ my $out = sprintf '%15s', human_size($size);
+ $out .= ' ' . relative_change($size, $first) if defined $first;
+ return $out;
+}
+
my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests,
$codespeed, $sortby, $subsection, $reponame);
@@ -181,7 +215,14 @@ sub print_default_results {
my $firstr;
for my $i (0..$#dirs) {
my $d = $dirs[$i];
- $times{$prefixes{$d}.$t} = [get_times("$resultsdir/$prefixes{$d}$t.times")];
+ my $base = "$resultsdir/$prefixes{$d}$t";
+ $times{$prefixes{$d}.$t} = [];
+ foreach my $type (qw(times size)) {
+ if (-e "$base.$type") {
+ $times{$prefixes{$d}.$t} = [get_times("$base.$type")];
+ last;
+ }
+ }
my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
my $w = length format_times($r,$u,$s,$firstr);
$colwidth[$i] = $w if $w > $colwidth[$i];
diff --git a/t/perf/p5311-pack-bitmaps-fetch.sh b/t/perf/p5311-pack-bitmaps-fetch.sh
new file mode 100755
index 0000000000..b04575951f
--- /dev/null
+++ b/t/perf/p5311-pack-bitmaps-fetch.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+test_description='performance of fetches from bitmapped packs'
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+test_expect_success 'create bitmapped server repo' '
+ git config pack.writebitmaps true &&
+ git config pack.writebitmaphashcache true &&
+ git repack -ad
+'
+
+# simulate a fetch from a repository that last fetched N days ago, for
+# various values of N. We do so by following the first-parent chain,
+# and assume the first entry in the chain that is N days older than the current
+# HEAD is where the HEAD would have been then.
+for days in 1 2 4 8 16 32 64 128; do
+ title=$(printf '%10s' "($days days)")
+ test_expect_success "setup revs from $days days ago" '
+ now=$(git log -1 --format=%ct HEAD) &&
+ then=$(($now - ($days * 86400))) &&
+ tip=$(git rev-list -1 --first-parent --until=$then HEAD) &&
+ {
+ echo HEAD &&
+ echo ^$tip
+ } >revs
+ '
+
+ test_perf "server $title" '
+ git pack-objects --stdout --revs \
+ --thin --delta-base-offset \
+ <revs >tmp.pack
+ '
+
+ test_size "size $title" '
+ wc -c <tmp.pack
+ '
+
+ test_perf "client $title" '
+ git index-pack --stdin --fix-thin <tmp.pack
+ '
+done
+
+test_done
diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh
index e4c343a6b7..11d1922cf5 100644
--- a/t/perf/perf-lib.sh
+++ b/t/perf/perf-lib.sh
@@ -179,8 +179,8 @@ exit $ret' >&3 2>&4
return "$eval_ret"
}
-
-test_perf () {
+test_wrapper_ () {
+ test_wrapper_func_=$1; shift
test_start_
test "$#" = 3 && { test_prereq=$1; shift; } || test_prereq=
test "$#" = 2 ||
@@ -191,35 +191,57 @@ test_perf () {
base=$(basename "$0" .sh)
echo "$test_count" >>"$perf_results_dir"/$base.subtests
echo "$1" >"$perf_results_dir"/$base.$test_count.descr
- if test -z "$verbose"; then
- printf "%s" "perf $test_count - $1:"
- else
- echo "perf $test_count - $1:"
- fi
- for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do
- say >&3 "running: $2"
- if test_run_perf_ "$2"
- then
- if test -z "$verbose"; then
- printf " %s" "$i"
- else
- echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:"
- fi
+ base="$perf_results_dir"/"$perf_results_prefix$(basename "$0" .sh)"."$test_count"
+ "$test_wrapper_func_" "$@"
+ fi
+
+ test_finish_
+}
+
+test_perf_ () {
+ if test -z "$verbose"; then
+ printf "%s" "perf $test_count - $1:"
+ else
+ echo "perf $test_count - $1:"
+ fi
+ for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do
+ say >&3 "running: $2"
+ if test_run_perf_ "$2"
+ then
+ if test -z "$verbose"; then
+ printf " %s" "$i"
else
- test -z "$verbose" && echo
- test_failure_ "$@"
- break
+ echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:"
fi
- done
- if test -z "$verbose"; then
- echo " ok"
else
- test_ok_ "$1"
+ test -z "$verbose" && echo
+ test_failure_ "$@"
+ break
fi
- base="$perf_results_dir"/"$perf_results_prefix$(basename "$0" .sh)"."$test_count"
- "$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times
+ done
+ if test -z "$verbose"; then
+ echo " ok"
+ else
+ test_ok_ "$1"
fi
- test_finish_
+ "$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times
+}
+
+test_perf () {
+ test_wrapper_ test_perf_ "$@"
+}
+
+test_size_ () {
+ say >&3 "running: $2"
+ if test_eval_ "$2" 3>"$base".size; then
+ test_ok_ "$1"
+ else
+ test_failure_ "$@"
+ fi
+}
+
+test_size () {
+ test_wrapper_ test_size_ "$@"
}
# We extend test_done to print timings at the end (./run disables this