summaryrefslogtreecommitdiff
path: root/oidset.h
diff options
context:
space:
mode:
authorRené Scharfe <l.s.r@web.de>2018-10-04 17:13:06 +0200
committerJunio C Hamano <gitster@pobox.com>2018-10-04 11:12:13 -0700
commit8b2f8cbcb16b1a9775214fe1d69aeb1580ae179d (patch)
tree4b53010729a0740a6a09d614d3dafe8ace095063 /oidset.h
parent9249ca26aca3ae3f21f812593c7a5498736ae29a (diff)
downloadgit-8b2f8cbcb16b1a9775214fe1d69aeb1580ae179d.tar.gz
oidset: use khash
Reimplement oidset using khash.h in order to reduce its memory footprint and make it faster. Performance of a command that mainly checks for duplicate objects using an oidset, with master and Clang 6.0.1: $ cmd="./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'" $ /usr/bin/time $cmd >/dev/null 0.22user 0.03system 0:00.25elapsed 99%CPU (0avgtext+0avgdata 48484maxresident)k 0inputs+0outputs (0major+11204minor)pagefaults 0swaps $ hyperfine "$cmd" Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)' Time (mean ± σ): 250.0 ms ± 6.0 ms [User: 225.9 ms, System: 23.6 ms] Range (min … max): 242.0 ms … 261.1 ms And with this patch: $ /usr/bin/time $cmd >/dev/null 0.14user 0.00system 0:00.15elapsed 100%CPU (0avgtext+0avgdata 41396maxresident)k 0inputs+0outputs (0major+8318minor)pagefaults 0swaps $ hyperfine "$cmd" Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)' Time (mean ± σ): 151.9 ms ± 4.9 ms [User: 130.5 ms, System: 21.2 ms] Range (min … max): 148.2 ms … 170.4 ms Initial-patch-by: Jeff King <peff@peff.net> Signed-off-by: Rene Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'oidset.h')
-rw-r--r--oidset.h36
1 files changed, 28 insertions, 8 deletions
diff --git a/oidset.h b/oidset.h
index 40ec5f87fe..4b90540cd4 100644
--- a/oidset.h
+++ b/oidset.h
@@ -1,7 +1,8 @@
#ifndef OIDSET_H
#define OIDSET_H
-#include "oidmap.h"
+#include "hashmap.h"
+#include "khash.h"
/**
* This API is similar to sha1-array, in that it maintains a set of object ids
@@ -15,19 +16,33 @@
* table overhead.
*/
+static inline unsigned int oid_hash(struct object_id oid)
+{
+ return sha1hash(oid.hash);
+}
+
+static inline int oid_equal(struct object_id a, struct object_id b)
+{
+ return oideq(&a, &b);
+}
+
+KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
+
/**
* A single oidset; should be zero-initialized (or use OIDSET_INIT).
*/
struct oidset {
- struct oidmap map;
+ kh_oid_t set;
};
-#define OIDSET_INIT { OIDMAP_INIT }
+#define OIDSET_INIT { { 0 } }
static inline void oidset_init(struct oidset *set, size_t initial_size)
{
- oidmap_init(&set->map, initial_size);
+ memset(&set->set, 0, sizeof(set->set));
+ if (initial_size)
+ kh_resize_oid(&set->set, initial_size);
}
/**
@@ -58,19 +73,24 @@ int oidset_remove(struct oidset *set, const struct object_id *oid);
void oidset_clear(struct oidset *set);
struct oidset_iter {
- struct oidmap_iter m_iter;
+ kh_oid_t *set;
+ khiter_t iter;
};
static inline void oidset_iter_init(struct oidset *set,
struct oidset_iter *iter)
{
- oidmap_iter_init(&set->map, &iter->m_iter);
+ iter->set = &set->set;
+ iter->iter = kh_begin(iter->set);
}
static inline struct object_id *oidset_iter_next(struct oidset_iter *iter)
{
- struct oidmap_entry *e = oidmap_iter_next(&iter->m_iter);
- return e ? &e->oid : NULL;
+ for (; iter->iter != kh_end(iter->set); iter->iter++) {
+ if (kh_exist(iter->set, iter->iter))
+ return &kh_key(iter->set, iter->iter++);
+ }
+ return NULL;
}
static inline struct object_id *oidset_iter_first(struct oidset *set,