From 73cf0822b2a4ffa7ad559d1f0772e39718fc7776 Mon Sep 17 00:00:00 2001 From: Julian Phillips Date: Sun, 25 Oct 2009 21:28:11 +0000 Subject: remote: Make ref_remove_duplicates faster for large numbers of refs The ref_remove_duplicates function was very slow at dealing with very large numbers of refs. This is because it was using a linear search through all remaining refs to find any duplicates of the current ref. Rewriting it to use a string list to keep track of which refs have already been seen and removing duplicates when they are found is much more efficient. Signed-off-by: Julian Phillips Signed-off-by: Junio C Hamano --- remote.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/remote.c b/remote.c index 73d33f2584..4f9f0ccc7b 100644 --- a/remote.c +++ b/remote.c @@ -6,6 +6,7 @@ #include "revision.h" #include "dir.h" #include "tag.h" +#include "string-list.h" static struct refspec s_tag_refspec = { 0, @@ -734,29 +735,31 @@ int for_each_remote(each_remote_fn fn, void *priv) void ref_remove_duplicates(struct ref *ref_map) { - struct ref **posn; - struct ref *next; - for (; ref_map; ref_map = ref_map->next) { + struct string_list refs = { NULL, 0, 0, 0 }; + struct string_list_item *item = NULL; + struct ref *prev = NULL, *next = NULL; + for (; ref_map; prev = ref_map, ref_map = next) { + next = ref_map->next; if (!ref_map->peer_ref) continue; - posn = &ref_map->next; - while (*posn) { - if ((*posn)->peer_ref && - !strcmp((*posn)->peer_ref->name, - ref_map->peer_ref->name)) { - if (strcmp((*posn)->name, ref_map->name)) - die("%s tracks both %s and %s", - ref_map->peer_ref->name, - (*posn)->name, ref_map->name); - next = (*posn)->next; - free((*posn)->peer_ref); - free(*posn); - *posn = next; - } else { - posn = &(*posn)->next; - } + + item = string_list_lookup(ref_map->peer_ref->name, &refs); + if (item) { + if (strcmp(((struct ref *)item->util)->name, + ref_map->name)) + die("%s tracks both %s and %s", + ref_map->peer_ref->name, + ((struct ref *)item->util)->name, + ref_map->name); + prev->next = ref_map->next; + free(ref_map->peer_ref); + free(ref_map); } + + item = string_list_insert(ref_map->peer_ref->name, &refs); + item->util = ref_map; } + string_list_clear(&refs, 0); } int remote_has_url(struct remote *remote, const char *url) -- cgit v1.2.1 From b1a01e1c0762d117da7dac009b773f310479be12 Mon Sep 17 00:00:00 2001 From: Julian Phillips Date: Sun, 25 Oct 2009 21:28:12 +0000 Subject: fetch: Speed up fetch of large numbers of refs When there are large numbers of refs, calling read_ref for each ref is inefficent (and infact downright slow) - so instead use for_each_ref to build up a string list of all the refs that we currently have, which significantly improves the volume. Signed-off-by: Julian Phillips Signed-off-by: Junio C Hamano --- builtin-fetch.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/builtin-fetch.c b/builtin-fetch.c index a35a6f8cb8..5c7465cfeb 100644 --- a/builtin-fetch.c +++ b/builtin-fetch.c @@ -489,7 +489,8 @@ static int add_existing(const char *refname, const unsigned char *sha1, int flag, void *cbdata) { struct string_list *list = (struct string_list *)cbdata; - string_list_insert(refname, list); + struct string_list_item *item = string_list_insert(refname, list); + item->util = (void *)sha1; return 0; } @@ -615,9 +616,14 @@ static void check_not_current_branch(struct ref *ref_map) static int do_fetch(struct transport *transport, struct refspec *refs, int ref_count) { + struct string_list existing_refs = { NULL, 0, 0, 0 }; + struct string_list_item *peer_item = NULL; struct ref *ref_map; struct ref *rm; int autotags = (transport->remote->fetch_tags == 1); + + for_each_ref(add_existing, &existing_refs); + if (transport->remote->fetch_tags == 2 && tags != TAGS_UNSET) tags = TAGS_SET; if (transport->remote->fetch_tags == -1) @@ -640,8 +646,13 @@ static int do_fetch(struct transport *transport, check_not_current_branch(ref_map); for (rm = ref_map; rm; rm = rm->next) { - if (rm->peer_ref) - read_ref(rm->peer_ref->name, rm->peer_ref->old_sha1); + if (rm->peer_ref) { + peer_item = string_list_lookup(rm->peer_ref->name, + &existing_refs); + if (peer_item) + hashcpy(rm->peer_ref->old_sha1, + peer_item->util); + } } if (tags == TAGS_DEFAULT && autotags) -- cgit v1.2.1 From 95c96d48e65a597cfd2bf7228ddc8c7ca30b55b7 Mon Sep 17 00:00:00 2001 From: Julian Phillips Date: Fri, 13 Nov 2009 21:25:56 +0000 Subject: remote: fix use-after-free error detected by glibc in ref_remove_duplicates In ref_remove_duplicates, when we encounter a duplicate and remove it from the list we need to make sure that the prev pointer stays pointing at the last entry and also skip over adding the just freed entry to the string_list. Previously fetch could crash with: *** glibc detected *** git: corrupted double-linked list: ... Also add a test to try and catch problems with duplicate removal in the future. Acked-by: Nicolas Pitre Signed-off-by: Julian Phillips Signed-off-by: Junio C Hamano --- remote.c | 2 ++ t/t5510-fetch.sh | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/remote.c b/remote.c index 4f9f0ccc7b..e0d17bb830 100644 --- a/remote.c +++ b/remote.c @@ -754,6 +754,8 @@ void ref_remove_duplicates(struct ref *ref_map) prev->next = ref_map->next; free(ref_map->peer_ref); free(ref_map); + ref_map = prev; /* skip this; we freed it */ + continue; } item = string_list_insert(ref_map->peer_ref->name, &refs); diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index d13c806624..169af1edde 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -341,4 +341,15 @@ test_expect_success 'fetch into the current branch with --update-head-ok' ' ' +test_expect_success "should be able to fetch with duplicate refspecs" ' + mkdir dups && + cd dups && + git init && + git config branch.master.remote three && + git config remote.three.url ../three/.git && + git config remote.three.fetch +refs/heads/*:refs/remotes/origin/* && + git config --add remote.three.fetch +refs/heads/*:refs/remotes/origin/* && + git fetch three +' + test_done -- cgit v1.2.1