From 443596850f464c768624e6b6477acadc2aa35e8f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:17:02 -0400 Subject: fetch-pack: sort incoming heads There's no reason to preserve the incoming order of the heads we're requested to fetch. By having them sorted, we can replace some of the quadratic algorithms with linear ones. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'builtin/fetch-pack.c') diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 10db15b184..d9e0ee51b0 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -1057,6 +1057,11 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) return ret; } +static int compare_heads(const void *a, const void *b) +{ + return strcmp(*(const char **)a, *(const char **)b); +} + struct ref *fetch_pack(struct fetch_pack_args *my_args, int fd[], struct child_process *conn, const struct ref *ref, @@ -1076,8 +1081,11 @@ struct ref *fetch_pack(struct fetch_pack_args *my_args, st.st_mtime = 0; } - if (heads && nr_heads) + if (heads && nr_heads) { nr_heads = remove_duplicates(nr_heads, heads); + qsort(heads, nr_heads, sizeof(*heads), compare_heads); + } + if (!ref) { packet_flush(fd[1]); die("no matching remote head"); -- cgit v1.2.1 From 7db8d5370f070ab983e1bc3569380b8aca899c6d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:17:20 -0400 Subject: fetch-pack: avoid quadratic behavior in remove_duplicates We remove duplicate entries from the list of refs we are fed in fetch-pack. The original algorithm is quadratic over the number of refs, but since the list is now guaranteed to be sorted, we can do it in linear time. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'builtin/fetch-pack.c') diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index d9e0ee51b0..e101842627 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -834,21 +834,12 @@ static int remove_duplicates(int nr_heads, char **heads) { int src, dst; - for (src = dst = 0; src < nr_heads; src++) { - /* If heads[src] is different from any of - * heads[0..dst], push it in. - */ - int i; - for (i = 0; i < dst; i++) { - if (!strcmp(heads[i], heads[src])) - break; - } - if (i < dst) - continue; - if (src != dst) - heads[dst] = heads[src]; - dst++; - } + if (!nr_heads) + return 0; + + for (src = dst = 1; src < nr_heads; src++) + if (strcmp(heads[src], heads[dst-1])) + heads[dst++] = heads[src]; return dst; } -- cgit v1.2.1 From 9e8e704f0bc14da325aa63f639c6dc782c81e26f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:19:51 -0400 Subject: fetch-pack: sort the list of incoming refs Having the list sorted means we can avoid some quadratic algorithms when comparing lists. These should typically be sorted already, but they do come from the remote, so let's be extra careful. Our ref-sorting implementation does a mergesort, so we do not have to care about performance degrading in the common case that the list is already sorted. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'builtin/fetch-pack.c') diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index e101842627..7d708fb8ad 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -777,6 +777,8 @@ static struct ref *do_fetch_pack(int fd[2], struct ref *ref = copy_ref_list(orig_ref); unsigned char sha1[20]; + sort_ref_list(&ref, ref_compare_name); + if (is_repository_shallow() && !server_supports("shallow")) die("Server does not support shallow clients"); if (server_supports("multi_ack_detailed")) { -- cgit v1.2.1 From a0de28805dceaca86a6f7bedfd3e8c227b781d9d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:23:29 -0400 Subject: fetch-pack: avoid quadratic loop in filter_refs We have a list of refs that we want to compare against the "match" array. The current code searches the match list linearly, giving quadratic behavior over the number of refs when you want to fetch all of them. Instead, we can compare the lists as we go, giving us linear behavior. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'builtin/fetch-pack.c') diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 7d708fb8ad..8a724739ba 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -528,6 +528,7 @@ static void filter_refs(struct ref **refs, int nr_match, char **match) struct ref **newtail = &newlist; struct ref *ref, *next; struct ref *fastarray[32]; + int match_pos; if (nr_match && !args.fetch_all) { if (ARRAY_SIZE(fastarray) < nr_match) @@ -540,6 +541,7 @@ static void filter_refs(struct ref **refs, int nr_match, char **match) else return_refs = NULL; + match_pos = 0; for (ref = *refs; ref; ref = next) { next = ref->next; if (!memcmp(ref->name, "refs/", 5) && @@ -553,15 +555,20 @@ static void filter_refs(struct ref **refs, int nr_match, char **match) continue; } else { - int i; - for (i = 0; i < nr_match; i++) { - if (!strcmp(ref->name, match[i])) { - match[i][0] = '\0'; - return_refs[i] = ref; + int cmp = -1; + while (match_pos < nr_match) { + cmp = strcmp(ref->name, match[match_pos]); + if (cmp < 0) /* definitely do not have it */ + break; + else if (cmp == 0) { /* definitely have it */ + match[match_pos][0] = '\0'; + return_refs[match_pos] = ref; break; } + else /* might have it; keep looking */ + match_pos++; } - if (i < nr_match) + if (!cmp) continue; /* we will link it later */ } free(ref); -- cgit v1.2.1 From 3d2a33e57faa84be3ab83a80c8b75dad3e747054 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 May 2012 02:04:51 -0400 Subject: fetch-pack: sort incoming heads list earlier Commit 4435968 started sorting heads fed to fetch-pack so that later commits could use more optimized algorithms; commit 7db8d53 switched the remove_duplicates function to such an algorithm. Of course, the sorting is more effective if you do it _before_ the algorithm in question. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin/fetch-pack.c') diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 8a724739ba..b18ba05bbe 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -1082,8 +1082,8 @@ struct ref *fetch_pack(struct fetch_pack_args *my_args, } if (heads && nr_heads) { - nr_heads = remove_duplicates(nr_heads, heads); qsort(heads, nr_heads, sizeof(*heads), compare_heads); + nr_heads = remove_duplicates(nr_heads, heads); } if (!ref) { -- cgit v1.2.1