diff options
-rw-r--r-- | src/config.c | 4 | ||||
-rw-r--r-- | src/index.c | 12 | ||||
-rw-r--r-- | src/odb.c | 4 | ||||
-rw-r--r-- | src/odb_pack.c | 4 | ||||
-rw-r--r-- | src/refs.c | 4 | ||||
-rw-r--r-- | src/tree.c | 6 | ||||
-rw-r--r-- | src/tsort.c | 380 | ||||
-rw-r--r-- | src/util.c | 79 | ||||
-rw-r--r-- | src/util.h | 5 | ||||
-rw-r--r-- | src/vector.c | 6 | ||||
-rw-r--r-- | tests/t00-core.c | 5 |
11 files changed, 424 insertions, 85 deletions
diff --git a/src/config.c b/src/config.c index 730917634..815b08c55 100644 --- a/src/config.c +++ b/src/config.c @@ -56,8 +56,8 @@ void git_config_free(git_config *cfg) static int config_backend_cmp(const void *a, const void *b) { - const file_internal *bk_a = *(const file_internal **)(a); - const file_internal *bk_b = *(const file_internal **)(b); + const file_internal *bk_a = (const file_internal *)(a); + const file_internal *bk_b = (const file_internal *)(b); return bk_b->priority - bk_a->priority; } diff --git a/src/index.c b/src/index.c index dd33db92a..353c30025 100644 --- a/src/index.c +++ b/src/index.c @@ -109,15 +109,15 @@ static int write_index(git_index *index, git_filebuf *file); int index_srch(const void *key, const void *array_member) { const char *filename = (const char *)key; - const git_index_entry *entry = *(const git_index_entry **)(array_member); + const git_index_entry *entry = (const git_index_entry *)(array_member); return strcmp(filename, entry->path); } int index_cmp(const void *a, const void *b) { - const git_index_entry *entry_a = *(const git_index_entry **)(a); - const git_index_entry *entry_b = *(const git_index_entry **)(b); + const git_index_entry *entry_a = (const git_index_entry *)(a); + const git_index_entry *entry_b = (const git_index_entry *)(b); return strcmp(entry_a->path, entry_b->path); } @@ -125,15 +125,15 @@ int index_cmp(const void *a, const void *b) int unmerged_srch(const void *key, const void *array_member) { const char *path = (const char *) key; - const git_index_entry_unmerged *entry = *(const git_index_entry_unmerged **) (array_member); + const git_index_entry_unmerged *entry = (const git_index_entry_unmerged *)(array_member); return strcmp(path, entry->path); } int unmerged_cmp(const void *a, const void *b) { - const git_index_entry_unmerged *info_a = *(const git_index_entry_unmerged **)(a); - const git_index_entry_unmerged *info_b = *(const git_index_entry_unmerged **)(b); + const git_index_entry_unmerged *info_a = (const git_index_entry_unmerged *)(a); + const git_index_entry_unmerged *info_b = (const git_index_entry_unmerged *)(b); return strcmp(info_a->path, info_b->path); } @@ -223,8 +223,8 @@ static int init_fake_wstream(git_odb_stream **stream_p, git_odb_backend *backend static int backend_sort_cmp(const void *a, const void *b) { - const backend_internal *backend_a = *(const backend_internal **)(a); - const backend_internal *backend_b = *(const backend_internal **)(b); + const backend_internal *backend_a = (const backend_internal *)(a); + const backend_internal *backend_b = (const backend_internal *)(b); if (backend_a->is_alternate == backend_b->is_alternate) return (backend_b->priority - backend_a->priority); diff --git a/src/odb_pack.c b/src/odb_pack.c index 44604ef91..20b0f1e4d 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -699,8 +699,8 @@ static int pack_index_open(struct pack_file *p) static int packfile_sort__cb(const void *a_, const void *b_) { - struct pack_file *a = *((struct pack_file **)a_); - struct pack_file *b = *((struct pack_file **)b_); + struct pack_file *a = (struct pack_file *)a_; + struct pack_file *b = (struct pack_file *)b_; int st; /* diff --git a/src/refs.c b/src/refs.c index 84224e88e..5e0fe09b2 100644 --- a/src/refs.c +++ b/src/refs.c @@ -825,8 +825,8 @@ static int packed_remove_loose(git_repository *repo, git_vector *packing_list) static int packed_sort(const void *a, const void *b) { - const git_reference *ref_a = *(const git_reference **)a; - const git_reference *ref_b = *(const git_reference **)b; + const git_reference *ref_a = (const git_reference *)a; + const git_reference *ref_b = (const git_reference *)b; return strcmp(ref_a->name, ref_b->name); } diff --git a/src/tree.c b/src/tree.c index 51b19f535..fff5068f8 100644 --- a/src/tree.c +++ b/src/tree.c @@ -40,7 +40,7 @@ static int valid_attributes(const int attributes) { int entry_search_cmp(const void *key, const void *array_member) { const char *filename = (const char *)key; - const git_tree_entry *entry = *(const git_tree_entry **)(array_member); + const git_tree_entry *entry = (const git_tree_entry *)(array_member); return strcmp(filename, entry->filename); } @@ -53,8 +53,8 @@ static int valid_attributes(const int attributes) { int entry_sort_cmp(const void *a, const void *b) { - const git_tree_entry *entry_a = *(const git_tree_entry **)(a); - const git_tree_entry *entry_b = *(const git_tree_entry **)(b); + const git_tree_entry *entry_a = (const git_tree_entry *)(a); + const git_tree_entry *entry_b = (const git_tree_entry *)(b); return git_futils_cmp_path(entry_a->filename, strlen(entry_a->filename), entry_a->attr & 040000, diff --git a/src/tsort.c b/src/tsort.c new file mode 100644 index 000000000..eda9a2fd2 --- /dev/null +++ b/src/tsort.c @@ -0,0 +1,380 @@ +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> + +/** + * An array-of-pointers implementation of Python's Timsort + * Based on code by Christopher Swenson under the MIT license + * + * Copyright (c) 2010 Christopher Swenson + * Copyright (c) 2011 Vicent Marti + */ + +#ifndef MAX +# define MAX(x,y) (((x) > (y) ? (x) : (y))) +#endif + +#ifndef MIN +# define MIN(x,y) (((x) < (y) ? (x) : (y))) +#endif + +#if defined(__GNUC__) +# define CLZ(x) __builtin_clz(x) +#elif defined(_MSV_VER) +# define CLZ(x) _CountLeadingZeros(x) +#else +int CLZ(int32_t x) +{ + int e = 31; + + if (!x) return 32; + if (x&0xFFFF0000) { e -=16; x >>=16; } + if (x&0x0000FF00) { e -= 8; x >>= 8; } + if (x&0x000000F0) { e -= 4; x >>= 4; } + if (x&0x0000000C) { e -= 2; x >>= 2; } + if (x&0x00000002) { e -= 1; } + return e; +} +#endif + +typedef int (*cmp_ptr_t)(const void *, const void *); + +static int binsearch(void **dst, const void *x, size_t size, cmp_ptr_t cmp) +{ + int l, c, r; + void *lx, *cx, *rx; + + l = 0; + r = size - 1; + c = r >> 1; + lx = dst[l]; + + /* check for beginning conditions */ + if (cmp(x, lx) < 0) + return 0; + + else if (cmp(x, lx) == 0) { + int i = 1; + while (cmp(x, dst[i]) == 0) + i++; + return i; + } + + rx = dst[r]; + /* guaranteed not to be >= rx */ + cx = dst[c]; + while (1) { + const int val = cmp(x, cx); + if (val < 0) { + if (c - l <= 1) return c; + r = c; + rx = cx; + } else if (val > 0) { + if (r - c <= 1) return c + 1; + l = c; + lx = cx; + } else { + do { + cx = dst[++c]; + } while (cmp(x, cx) == 0); + return c; + } + c = l + ((r - l) >> 1); + cx = dst[c]; + } +} + +/* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */ +static void bisort(void **dst, size_t start, size_t size, cmp_ptr_t cmp) +{ + size_t i; + + for (i = start; i < size; i++) { + int j; + /* If this entry is already correct, just move along */ + if (cmp(dst[i - 1], dst[i]) <= 0) + continue; + + /* Else we need to find the right place, shift everything over, and squeeze in */ + void *x = dst[i]; + int location = binsearch(dst, x, i, cmp); + for (j = i - 1; j >= location; j--) { + dst[j + 1] = dst[j]; + } + dst[location] = x; + } +} + + +/* timsort implementation, based on timsort.txt */ +struct tsort_run { + ssize_t start; + ssize_t length; +}; + +struct tsort_store { + size_t alloc; + cmp_ptr_t cmp; + void **storage; +}; + +static void reverse_elements(void **dst, int start, int end) +{ + while (start < end) { + void *tmp = dst[start]; + dst[start] = dst[end]; + dst[end] = tmp; + + start++; + end--; + } +} + +static int count_run(void **dst, ssize_t start, ssize_t size, struct tsort_store *store) +{ + ssize_t curr = start + 2; + + if (size - start == 1) + return 1; + + if (start >= size - 2) { + if (store->cmp(dst[size - 2], dst[size - 1]) > 0) { + void *tmp = dst[size - 1]; + dst[size - 1] = dst[size - 2]; + dst[size - 2] = tmp; + } + + return 2; + } + + if (store->cmp(dst[start], dst[start + 1]) <= 0) { + while (curr < size - 1 && store->cmp(dst[curr - 1], dst[curr]) <= 0) + curr++; + + return curr - start; + } else { + while (curr < size - 1 && store->cmp(dst[curr - 1], dst[curr]) > 0) + curr++; + + /* reverse in-place */ + reverse_elements(dst, start, curr - 1); + return curr - start; + } +} + +static int compute_minrun(size_t n) +{ + int r = 0; + while (n >= 64) { + r |= n & 1; + n >>= 1; + } + return n + r; +} + +static int check_invariant(struct tsort_run *stack, int stack_curr) +{ + if (stack_curr < 2) + return 1; + + else if (stack_curr == 2) { + const int A = stack[stack_curr - 2].length; + const int B = stack[stack_curr - 1].length; + return (A > B); + } else { + const int A = stack[stack_curr - 3].length; + const int B = stack[stack_curr - 2].length; + const int C = stack[stack_curr - 1].length; + return !((A <= B + C) || (B <= C)); + } +} + +static int resize(struct tsort_store *store, size_t new_size) +{ + if (store->alloc < new_size) { + void **tempstore = realloc(store->storage, new_size * sizeof(void *)); + + /** + * Do not propagate on OOM; this will abort the sort and + * leave the array unsorted, but no error code will be + * raised + */ + if (tempstore == NULL) + return -1; + + store->storage = tempstore; + store->alloc = new_size; + } + + return 0; +} + +static void merge(void **dst, const struct tsort_run *stack, int stack_curr, struct tsort_store *store) +{ + const ssize_t A = stack[stack_curr - 2].length; + const ssize_t B = stack[stack_curr - 1].length; + const ssize_t curr = stack[stack_curr - 2].start; + + void **storage; + ssize_t i, j, k; + + if (resize(store, MIN(A, B)) < 0) + return; + + storage = store->storage; + + /* left merge */ + if (A < B) { + memcpy(storage, &dst[curr], A * sizeof(void *)); + i = 0; + j = curr + A; + + for (k = curr; k < curr + A + B; k++) { + if ((i < A) && (j < curr + A + B)) { + if (store->cmp(storage[i], dst[j]) <= 0) + dst[k] = storage[i++]; + else + dst[k] = dst[j++]; + } else if (i < A) { + dst[k] = storage[i++]; + } else + dst[k] = dst[j++]; + } + } else { + memcpy(storage, &dst[curr + A], B * sizeof(void *)); + i = B - 1; + j = curr + A - 1; + + for (k = curr + A + B - 1; k >= curr; k--) { + if ((i >= 0) && (j >= curr)) { + if (store->cmp(dst[j], storage[i]) > 0) + dst[k] = dst[j--]; + else + dst[k] = storage[i--]; + } else if (i >= 0) + dst[k] = storage[i--]; + else + dst[k] = dst[j--]; + } + } +} + +static ssize_t collapse(void **dst, struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store, ssize_t size) +{ + ssize_t A, B, C; + + while (1) { + /* if the stack only has one thing on it, we are done with the collapse */ + if (stack_curr <= 1) + break; + + /* if this is the last merge, just do it */ + if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) { + merge(dst, stack, stack_curr, store); + stack[0].length += stack[1].length; + stack_curr--; + break; + } + + /* check if the invariant is off for a stack of 2 elements */ + else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) { + merge(dst, stack, stack_curr, store); + stack[0].length += stack[1].length; + stack_curr--; + break; + } + else if (stack_curr == 2) + break; + + A = stack[stack_curr - 3].length; + B = stack[stack_curr - 2].length; + C = stack[stack_curr - 1].length; + + /* check first invariant */ + if (A <= B + C) { + if (A < C) { + merge(dst, stack, stack_curr - 1, store); + stack[stack_curr - 3].length += stack[stack_curr - 2].length; + stack[stack_curr - 2] = stack[stack_curr - 1]; + stack_curr--; + } else { + merge(dst, stack, stack_curr, store); + stack[stack_curr - 2].length += stack[stack_curr - 1].length; + stack_curr--; + } + } else if (B <= C) { + merge(dst, stack, stack_curr, store); + stack[stack_curr - 2].length += stack[stack_curr - 1].length; + stack_curr--; + } else + break; + } + + return stack_curr; +} + +#define PUSH_NEXT() do {\ + len = count_run(dst, curr, size, store);\ + run = minrun;\ + if (run < minrun) run = minrun;\ + if (run > (ssize_t)size - curr) run = size - curr;\ + if (run > len) {\ + bisort(&dst[curr], len, run, cmp);\ + len = run;\ + }\ + run_stack[stack_curr++] = (struct tsort_run) {curr, len};\ + curr += len;\ + if (curr == (ssize_t)size) {\ + /* finish up */ \ + while (stack_curr > 1) { \ + merge(dst, run_stack, stack_curr, store); \ + run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \ + stack_curr--; \ + } \ + if (store->storage != NULL) {\ + free(store->storage);\ + store->storage = NULL;\ + }\ + return;\ + }\ +}\ +while (0) + + +void git__tsort(void **dst, size_t size, cmp_ptr_t cmp) +{ + struct tsort_store _store, *store = &_store; + struct tsort_run run_stack[128]; + + ssize_t stack_curr = 0; + ssize_t len, run; + ssize_t curr = 0; + ssize_t minrun; + + if (size < 64) { + bisort(dst, 1, size, cmp); + return; + } + + /* compute the minimum run length */ + minrun = compute_minrun(size); + + /* temporary storage for merges */ + store->alloc = 0; + store->storage = NULL; + store->cmp = cmp; + + PUSH_NEXT(); + PUSH_NEXT(); + PUSH_NEXT(); + + while (1) { + if (!check_invariant(run_stack, stack_curr)) { + stack_curr = collapse(dst, run_stack, stack_curr, store, size); + continue; + } + + PUSH_NEXT(); + } +} diff --git a/src/util.c b/src/util.c index 03e911241..4a44f9988 100644 --- a/src/util.c +++ b/src/util.c @@ -331,66 +331,27 @@ uint32_t git__hash(const void *key, int len, uint32_t seed) } #endif -/* - * A merge sort implementation, simplified from the qsort implementation - * by Mike Haertel, which is a part of the GNU C Library. +/** + * A modified `bsearch` from the BSD glibc. + * + * Copyright (c) 1990 Regents of the University of California. + * All rights reserved. */ -static void msort_with_tmp(void *b, size_t n, size_t s, - int (*cmp)(const void *, const void *), - char *t) +void **git__bsearch(const void *key, void **base, size_t nmemb, int (*compar)(const void *, const void *)) { - char *tmp; - char *b1, *b2; - size_t n1, n2; - - if (n <= 1) - return; - - n1 = n / 2; - n2 = n - n1; - b1 = b; - b2 = (char *)b + (n1 * s); - - msort_with_tmp(b1, n1, s, cmp, t); - msort_with_tmp(b2, n2, s, cmp, t); - - tmp = t; - - while (n1 > 0 && n2 > 0) { - if (cmp(b1, b2) <= 0) { - memcpy(tmp, b1, s); - tmp += s; - b1 += s; - --n1; - } else { - memcpy(tmp, b2, s); - tmp += s; - b2 += s; - --n2; - } - } - if (n1 > 0) - memcpy(tmp, b1, n1 * s); - memcpy(b, t, (n - n2) * s); + int lim, cmp; + void **p; + + for (lim = nmemb; lim != 0; lim >>= 1) { + p = base + (lim >> 1); + cmp = (*compar)(key, *p); + if (cmp > 0) { /* key > p: move right */ + base = p + 1; + lim--; + } else if (cmp == 0) { + return (void **)p; + } /* else move left */ + } + return NULL; } -int git__msort(void *b, size_t n, size_t s, - int (*cmp)(const void *, const void *)) -{ - const size_t size = n * s; - char buf[1024]; - - if (size < sizeof(buf)) { - /* The temporary array fits on the small on-stack buffer. */ - msort_with_tmp(b, n, s, cmp, buf); - } else { - /* It's somewhat large, so malloc it. */ - char *tmp = git__malloc(size); - if (tmp == NULL) - return GIT_ENOMEM; - msort_with_tmp(b, n, s, cmp, tmp); - free(tmp); - } - - return GIT_SUCCESS; -} diff --git a/src/util.h b/src/util.h index c9ca4dec0..410ebfb26 100644 --- a/src/util.h +++ b/src/util.h @@ -118,7 +118,8 @@ extern int git__fnmatch(const char *pattern, const char *name, int flags); } \ } while (0) -extern int git__msort(void *b, size_t n, size_t s, - int (*cmp)(const void *, const void *)); +extern int git__tsort(void **b, size_t n, int (*cmp)(const void *, const void *)); +extern void **git__bsearch(const void *key, void **base, size_t nmemb, + int (*compar)(const void *, const void *)); #endif /* INCLUDE_util_h__ */ diff --git a/src/vector.c b/src/vector.c index 2fc051f0c..0b83b8b0d 100644 --- a/src/vector.c +++ b/src/vector.c @@ -94,7 +94,7 @@ void git_vector_sort(git_vector *v) if (v->sorted || v->_cmp == NULL) return; - git__msort(v->contents, v->length, sizeof(void *), v->_cmp); + git__tsort(v->contents, v->length, v->_cmp); v->sorted = 1; } @@ -110,7 +110,7 @@ int git_vector_bsearch2(git_vector *v, git_vector_cmp key_lookup, const void *ke git_vector_sort(v); - find = bsearch(key, v->contents, v->length, sizeof(void *), key_lookup); + find = git__bsearch(key, v->contents, v->length, key_lookup); if (find != NULL) return (int)(find - v->contents); @@ -174,7 +174,7 @@ void git_vector_uniq(git_vector *v) cmp = v->_cmp ? v->_cmp : strict_comparison; for (i = 0, j = 1 ; j < v->length; ++j) - if (!cmp(v->contents + i, v->contents + j)) + if (!cmp(v->contents[i], v->contents[j])) v->contents[i] = v->contents[j]; else v->contents[++i] = v->contents[j]; diff --git a/tests/t00-core.c b/tests/t00-core.c index 7bcc77aeb..660f10c89 100644 --- a/tests/t00-core.c +++ b/tests/t00-core.c @@ -75,10 +75,7 @@ END_TEST static int test_cmp(const void *a, const void *b) { - int n1 = *(int *)a; - int n2 = *(int *)b; - - return n1 - n2; + return a - b; } BEGIN_TEST(vector2, "remove duplicates") |