From 16f9b4cdd31de2b8185c5c717efa6d28c56b3c74 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 22 Mar 2017 17:14:20 +0000
Subject: name-hash: specify initial size for istate.dir_hash table

Specify an initial size for the istate.dir_hash HashMap matching
the size of the istate.name_hash.

Previously hashmap_init() was given 0, causing a 64 bucket
hashmap to be created.  When working with very large
repositories, this would cause numerous rehash() calls to
realloc and rebalance the hashmap. This is especially true
when the worktree is deep, with many directories containing
a few files.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 name-hash.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/name-hash.c b/name-hash.c
index 6d9f23e932..3f7722a63b 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -120,7 +120,8 @@ static void lazy_init_name_hash(struct index_state *istate)
 		return;
 	hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp,
 			istate->cache_nr);
-	hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0);
+	hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp,
+			istate->cache_nr);
 	for (nr = 0; nr < istate->cache_nr; nr++)
 		hash_index_entry(istate, istate->cache[nr]);
 	istate->name_hash_initialized = 1;
-- 
cgit v1.2.1


From f75619bd6d21760e1da416d4e27bce6468beffcd Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 22 Mar 2017 17:14:21 +0000
Subject: hashmap: allow memihash computation to be continued

Add variant of memihash() to allow the hash computation to
be continued.  There are times when we compute the hash on
a full path and then the hash on just the path to the parent
directory.  This can be expensive on large repositories.

With this, we can hash the parent directory first. And then
continue the computation to include the "/filename".

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 hashmap.c | 17 +++++++++++++++++
 hashmap.h |  1 +
 2 files changed, 18 insertions(+)

diff --git a/hashmap.c b/hashmap.c
index b10b642229..93793cb954 100644
--- a/hashmap.c
+++ b/hashmap.c
@@ -50,6 +50,23 @@ unsigned int memihash(const void *buf, size_t len)
 	return hash;
 }
 
+/*
+ * Incoporate another chunk of data into a memihash
+ * computation.
+ */
+unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len)
+{
+	unsigned int hash = hash_seed;
+	unsigned char *ucbuf = (unsigned char *) buf;
+	while (len--) {
+		unsigned int c = *ucbuf++;
+		if (c >= 'a' && c <= 'z')
+			c -= 'a' - 'A';
+		hash = (hash * FNV32_PRIME) ^ c;
+	}
+	return hash;
+}
+
 #define HASHMAP_INITIAL_SIZE 64
 /* grow / shrink by 2^2 */
 #define HASHMAP_RESIZE_BITS 2
diff --git a/hashmap.h b/hashmap.h
index ab7958ae33..45eda693bd 100644
--- a/hashmap.h
+++ b/hashmap.h
@@ -12,6 +12,7 @@ extern unsigned int strhash(const char *buf);
 extern unsigned int strihash(const char *buf);
 extern unsigned int memhash(const void *buf, size_t len);
 extern unsigned int memihash(const void *buf, size_t len);
+extern unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len);
 
 static inline unsigned int sha1hash(const unsigned char *sha1)
 {
-- 
cgit v1.2.1


From 0607e10009ee4e37cb49b4cec8d28a9dda1656a4 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Wed, 22 Mar 2017 17:14:22 +0000
Subject: hashmap: add disallow_rehash setting

Teach hashmap to allow rehashes to be suppressed.
This is useful when hashmaps are accessed by multiple
threads.  It still requires the caller to properly
manage their locking.  This just prevents unexpected
rehashing during inserts and deletes.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 hashmap.c | 12 +++++++++++-
 hashmap.h | 24 ++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/hashmap.c b/hashmap.c
index 93793cb954..7d1044eb5d 100644
--- a/hashmap.c
+++ b/hashmap.c
@@ -104,11 +104,19 @@ static inline unsigned int bucket(const struct hashmap *map,
 	return key->hash & (map->tablesize - 1);
 }
 
+int hashmap_bucket(const struct hashmap *map, unsigned int hash)
+{
+	return hash & (map->tablesize - 1);
+}
+
 static void rehash(struct hashmap *map, unsigned int newsize)
 {
 	unsigned int i, oldsize = map->tablesize;
 	struct hashmap_entry **oldtable = map->table;
 
+	if (map->disallow_rehash)
+		return;
+
 	alloc_table(map, newsize);
 	for (i = 0; i < oldsize; i++) {
 		struct hashmap_entry *e = oldtable[i];
@@ -141,7 +149,9 @@ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
 		size_t initial_size)
 {
 	unsigned int size = HASHMAP_INITIAL_SIZE;
-	map->size = 0;
+
+	memset(map, 0, sizeof(*map));
+
 	map->cmpfn = equals_function ? equals_function : always_equal;
 
 	/* calculate initial table size and allocate the table */
diff --git a/hashmap.h b/hashmap.h
index 45eda693bd..de6022a3a9 100644
--- a/hashmap.h
+++ b/hashmap.h
@@ -39,6 +39,7 @@ struct hashmap {
 	struct hashmap_entry **table;
 	hashmap_cmp_fn cmpfn;
 	unsigned int size, tablesize, grow_at, shrink_at;
+	unsigned disallow_rehash : 1;
 };
 
 struct hashmap_iter {
@@ -77,6 +78,29 @@ static inline void *hashmap_get_from_hash(const struct hashmap *map,
 	return hashmap_get(map, &key, keydata);
 }
 
+int hashmap_bucket(const struct hashmap *map, unsigned int hash);
+
+/*
+ * Disallow/allow rehashing of the hashmap.
+ * This is useful if the caller knows that the hashmap
+ * needs multi-threaded access.  The caller is still
+ * required to guard/lock searches and inserts in a
+ * manner appropriate to their usage.  This simply
+ * prevents the table from being unexpectedly re-mapped.
+ *
+ * If is up to the caller to ensure that the hashmap is
+ * initialized to a reasonable size to prevent poor
+ * performance.
+ *
+ * When value=1, prevent future rehashes on adds and deleted.
+ * When value=0, allow future rehahses.  This DOES NOT force
+ * a rehash now.
+ */
+static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
+{
+	map->disallow_rehash = value;
+}
+
 /* hashmap_iter functions */
 
 extern void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter);
-- 
cgit v1.2.1


From 4f4458aafa809e184358256e35fd90c227dba81d Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 23 Mar 2017 13:47:02 +0000
Subject: hashmap: document memihash_cont, hashmap_disallow_rehash api

Document memihash_cont() and hashmap_disallow_rehash()
in Documentation/technical/api-hashmap.txt.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/technical/api-hashmap.txt | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/Documentation/technical/api-hashmap.txt b/Documentation/technical/api-hashmap.txt
index 28f5a8b715..4e4ae6f249 100644
--- a/Documentation/technical/api-hashmap.txt
+++ b/Documentation/technical/api-hashmap.txt
@@ -21,6 +21,9 @@ that the hashmap is initialized. It may also be useful for statistical purposes
 `cmpfn` stores the comparison function specified in `hashmap_init()`. In
 advanced scenarios, it may be useful to change this, e.g. to switch between
 case-sensitive and case-insensitive lookup.
++
+When `disallow_rehash` is set, automatic rehashes are prevented during inserts
+and deletes.
 
 `struct hashmap_entry`::
 
@@ -57,6 +60,7 @@ Functions
 `unsigned int strihash(const char *buf)`::
 `unsigned int memhash(const void *buf, size_t len)`::
 `unsigned int memihash(const void *buf, size_t len)`::
+`unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len)`::
 
 	Ready-to-use hash functions for strings, using the FNV-1 algorithm (see
 	http://www.isthe.com/chongo/tech/comp/fnv).
@@ -65,6 +69,9 @@ Functions
 `memihash` operate on arbitrary-length memory.
 +
 `strihash` and `memihash` are case insensitive versions.
++
+`memihash_cont` is a variant of `memihash` that allows a computation to be
+continued with another chunk of data.
 
 `unsigned int sha1hash(const unsigned char *sha1)`::
 
@@ -184,6 +191,21 @@ passed to `hashmap_cmp_fn` to decide whether the entry matches the key.
 +
 Returns the removed entry, or NULL if not found.
 
+`void hashmap_disallow_rehash(struct hashmap *map, unsigned value)`::
+
+	Disallow/allow automatic rehashing of the hashmap during inserts
+	and deletes.
++
+This is useful if the caller knows that the hashmap will be accessed
+by multiple threads.
++
+The caller is still responsible for any necessary locking; this simply
+prevents unexpected rehashing.  The caller is also responsible for properly
+sizing the initial hashmap to ensure good performance.
++
+A call to allow rehashing does not force a rehash; that might happen
+with the next insert or delete.
+
 `void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)`::
 `void *hashmap_iter_next(struct hashmap_iter *iter)`::
 `void *hashmap_iter_first(struct hashmap *map, struct hashmap_iter *iter)`::
-- 
cgit v1.2.1


From 846df809bc671d6487fa2be7550e3ed2b62e62f9 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 23 Mar 2017 13:47:03 +0000
Subject: name-hash: perf improvement for lazy_init_name_hash

Improve performance of lazy_init_name_hash() when
ignore_case is set.  Teach name-hash to build the
istate.name_hash and istate.dir_hash simultaneously
using a forward-diving technique on the pathname
of the index_entry, rather than adding name_hash
entries and then searching backwards in the pathname
for parent directories.

This borrows algorithm ideas from clear_ce_flags_{1,dir}.

Multiple threads are used with the new algorithm to
speed hashmap construction.

This new code path is only used when threads are present
(a compiler settings) and when the index is large enough
to warrant the pthread complexity.

The code in clear_ce_flags_dir() uses a linear search to
find the adjacent index entries with the same prefix; a
binary search is used here handle_range_dir() to further
speed things up.

The size of LAZY_THREAD_COST was determined from rough
analysis using:
    t/helper/test-lazy-init-name-hash --analyze

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 name-hash.c | 492 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 485 insertions(+), 7 deletions(-)

diff --git a/name-hash.c b/name-hash.c
index 3f7722a63b..cac313c78d 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -23,15 +23,21 @@ static int dir_entry_cmp(const struct dir_entry *e1,
 			name ? name : e2->name, e1->namelen);
 }
 
-static struct dir_entry *find_dir_entry(struct index_state *istate,
-		const char *name, unsigned int namelen)
+static struct dir_entry *find_dir_entry__hash(struct index_state *istate,
+		const char *name, unsigned int namelen, unsigned int hash)
 {
 	struct dir_entry key;
-	hashmap_entry_init(&key, memihash(name, namelen));
+	hashmap_entry_init(&key, hash);
 	key.namelen = namelen;
 	return hashmap_get(&istate->dir_hash, &key, name);
 }
 
+static struct dir_entry *find_dir_entry(struct index_state *istate,
+		const char *name, unsigned int namelen)
+{
+	return find_dir_entry__hash(istate, name, namelen, memihash(name, namelen));
+}
+
 static struct dir_entry *hash_dir_entry(struct index_state *istate,
 		struct cache_entry *ce, int namelen)
 {
@@ -112,21 +118,493 @@ static int cache_entry_cmp(const struct cache_entry *ce1,
 	return remove ? !(ce1 == ce2) : 0;
 }
 
-static void lazy_init_name_hash(struct index_state *istate)
+static int lazy_try_threaded = 1;
+static int lazy_nr_dir_threads;
+
+#ifdef NO_PTHREADS
+
+static inline int lookup_lazy_params(struct index_state *istate)
 {
-	int nr;
+	return 0;
+}
+
+static inline void threaded_lazy_init_name_hash(
+	struct index_state *istate)
+{
+}
+
+#else
+
+#include "thread-utils.h"
+
+/*
+ * Set a minimum number of cache_entries that we will handle per
+ * thread and use that to decide how many threads to run (upto
+ * the number on the system).
+ *
+ * For guidance setting the lower per-thread bound, see:
+ *     t/helper/test-lazy-init-name-hash --analyze
+ */
+#define LAZY_THREAD_COST (2000)
+
+/*
+ * We use n mutexes to guard n partitions of the "istate->dir_hash"
+ * hashtable.  Since "find" and "insert" operations will hash to a
+ * particular bucket and modify/search a single chain, we can say
+ * that "all chains mod n" are guarded by the same mutex -- rather
+ * than having a single mutex to guard the entire table.  (This does
+ * require that we disable "rehashing" on the hashtable.)
+ *
+ * So, a larger value here decreases the probability of a collision
+ * and the time that each thread must wait for the mutex.
+ */
+#define LAZY_MAX_MUTEX   (32)
+
+static pthread_mutex_t *lazy_dir_mutex_array;
+
+/*
+ * An array of lazy_entry items is used by the n threads in
+ * the directory parse (first) phase to (lock-free) store the
+ * intermediate results.  These values are then referenced by
+ * the 2 threads in the second phase.
+ */
+struct lazy_entry {
+	struct dir_entry *dir;
+	unsigned int hash_dir;
+	unsigned int hash_name;
+};
+
+/*
+ * Decide if we want to use threads (if available) to load
+ * the hash tables.  We set "lazy_nr_dir_threads" to zero when
+ * it is not worth it.
+ */
+static int lookup_lazy_params(struct index_state *istate)
+{
+	int nr_cpus;
+
+	lazy_nr_dir_threads = 0;
+
+	if (!lazy_try_threaded)
+		return 0;
+
+	/*
+	 * If we are respecting case, just use the original
+	 * code to build the "istate->name_hash".  We don't
+	 * need the complexity here.
+	 */
+	if (!ignore_case)
+		return 0;
+
+	nr_cpus = online_cpus();
+	if (nr_cpus < 2)
+		return 0;
+
+	if (istate->cache_nr < 2 * LAZY_THREAD_COST)
+		return 0;
+
+	if (istate->cache_nr < nr_cpus * LAZY_THREAD_COST)
+		nr_cpus = istate->cache_nr / LAZY_THREAD_COST;
+	lazy_nr_dir_threads = nr_cpus;
+	return lazy_nr_dir_threads;
+}
+
+/*
+ * Initialize n mutexes for use when searching and inserting
+ * into "istate->dir_hash".  All "dir" threads are trying
+ * to insert partial pathnames into the hash as they iterate
+ * over their portions of the index, so lock contention is
+ * high.
+ *
+ * However, the hashmap is going to put items into bucket
+ * chains based on their hash values.  Use that to create n
+ * mutexes and lock on mutex[bucket(hash) % n].  This will
+ * decrease the collision rate by (hopefully) by a factor of n.
+ */
+static void init_dir_mutex(void)
+{
+	int j;
+
+	lazy_dir_mutex_array = xcalloc(LAZY_MAX_MUTEX, sizeof(pthread_mutex_t));
+
+	for (j = 0; j < LAZY_MAX_MUTEX; j++)
+		init_recursive_mutex(&lazy_dir_mutex_array[j]);
+}
+
+static void cleanup_dir_mutex(void)
+{
+	int j;
+
+	for (j = 0; j < LAZY_MAX_MUTEX; j++)
+		pthread_mutex_destroy(&lazy_dir_mutex_array[j]);
+
+	free(lazy_dir_mutex_array);
+}
+
+static void lock_dir_mutex(int j)
+{
+	pthread_mutex_lock(&lazy_dir_mutex_array[j]);
+}
+
+static void unlock_dir_mutex(int j)
+{
+	pthread_mutex_unlock(&lazy_dir_mutex_array[j]);
+}
+
+static inline int compute_dir_lock_nr(
+	const struct hashmap *map,
+	unsigned int hash)
+{
+	return hashmap_bucket(map, hash) % LAZY_MAX_MUTEX;
+}
+
+static struct dir_entry *hash_dir_entry_with_parent_and_prefix(
+	struct index_state *istate,
+	struct dir_entry *parent,
+	struct strbuf *prefix)
+{
+	struct dir_entry *dir;
+	unsigned int hash;
+	int lock_nr;
+
+	/*
+	 * Either we have a parent directory and path with slash(es)
+	 * or the directory is an immediate child of the root directory.
+	 */
+	assert((parent != NULL) ^ (strchr(prefix->buf, '/') == NULL));
+
+	if (parent)
+		hash = memihash_cont(parent->ent.hash,
+			prefix->buf + parent->namelen,
+			prefix->len - parent->namelen);
+	else
+		hash = memihash(prefix->buf, prefix->len);
+
+	lock_nr = compute_dir_lock_nr(&istate->dir_hash, hash);
+	lock_dir_mutex(lock_nr);
+
+	dir = find_dir_entry__hash(istate, prefix->buf, prefix->len, hash);
+	if (!dir) {
+		FLEX_ALLOC_MEM(dir, name, prefix->buf, prefix->len);
+		hashmap_entry_init(dir, hash);
+		dir->namelen = prefix->len;
+		dir->parent = parent;
+		hashmap_add(&istate->dir_hash, dir);
+
+		if (parent) {
+			unlock_dir_mutex(lock_nr);
+
+			/* All I really need here is an InterlockedIncrement(&(parent->nr)) */
+			lock_nr = compute_dir_lock_nr(&istate->dir_hash, parent->ent.hash);
+			lock_dir_mutex(lock_nr);
+			parent->nr++;
+		}
+	}
 
+	unlock_dir_mutex(lock_nr);
+
+	return dir;
+}
+
+/*
+ * handle_range_1() and handle_range_dir() are derived from
+ * clear_ce_flags_1() and clear_ce_flags_dir() in unpack-trees.c
+ * and handle the iteration over the entire array of index entries.
+ * They use recursion for adjacent entries in the same parent
+ * directory.
+ */
+static int handle_range_1(
+	struct index_state *istate,
+	int k_start,
+	int k_end,
+	struct dir_entry *parent,
+	struct strbuf *prefix,
+	struct lazy_entry *lazy_entries);
+
+static int handle_range_dir(
+	struct index_state *istate,
+	int k_start,
+	int k_end,
+	struct dir_entry *parent,
+	struct strbuf *prefix,
+	struct lazy_entry *lazy_entries,
+	struct dir_entry **dir_new_out)
+{
+	int rc, k;
+	int input_prefix_len = prefix->len;
+	struct dir_entry *dir_new;
+
+	dir_new = hash_dir_entry_with_parent_and_prefix(istate, parent, prefix);
+
+	strbuf_addch(prefix, '/');
+
+	/*
+	 * Scan forward in the index array for index entries having the same
+	 * path prefix (that are also in this directory).
+	 */
+	if (strncmp(istate->cache[k_start + 1]->name, prefix->buf, prefix->len) > 0)
+		k = k_start + 1;
+	else if (strncmp(istate->cache[k_end - 1]->name, prefix->buf, prefix->len) == 0)
+		k = k_end;
+	else {
+		int begin = k_start;
+		int end = k_end;
+		while (begin < end) {
+			int mid = (begin + end) >> 1;
+			int cmp = strncmp(istate->cache[mid]->name, prefix->buf, prefix->len);
+			if (cmp == 0) /* mid has same prefix; look in second part */
+				begin = mid + 1;
+			else if (cmp > 0) /* mid is past group; look in first part */
+				end = mid;
+			else
+				die("cache entry out of order");
+		}
+		k = begin;
+	}
+
+	/*
+	 * Recurse and process what we can of this subset [k_start, k).
+	 */
+	rc = handle_range_1(istate, k_start, k, dir_new, prefix, lazy_entries);
+
+	strbuf_setlen(prefix, input_prefix_len);
+
+	*dir_new_out = dir_new;
+	return rc;
+}
+
+static int handle_range_1(
+	struct index_state *istate,
+	int k_start,
+	int k_end,
+	struct dir_entry *parent,
+	struct strbuf *prefix,
+	struct lazy_entry *lazy_entries)
+{
+	int input_prefix_len = prefix->len;
+	int k = k_start;
+
+	while (k < k_end) {
+		struct cache_entry *ce_k = istate->cache[k];
+		const char *name, *slash;
+
+		if (prefix->len && strncmp(ce_k->name, prefix->buf, prefix->len))
+			break;
+
+		name = ce_k->name + prefix->len;
+		slash = strchr(name, '/');
+
+		if (slash) {
+			int len = slash - name;
+			int processed;
+			struct dir_entry *dir_new;
+
+			strbuf_add(prefix, name, len);
+			processed = handle_range_dir(istate, k, k_end, parent, prefix, lazy_entries, &dir_new);
+			if (processed) {
+				k += processed;
+				strbuf_setlen(prefix, input_prefix_len);
+				continue;
+			}
+
+			strbuf_addch(prefix, '/');
+			processed = handle_range_1(istate, k, k_end, dir_new, prefix, lazy_entries);
+			k += processed;
+			strbuf_setlen(prefix, input_prefix_len);
+			continue;
+		}
+
+		/*
+		 * It is too expensive to take a lock to insert "ce_k"
+		 * into "istate->name_hash" and increment the ref-count
+		 * on the "parent" dir.  So we defer actually updating
+		 * permanent data structures until phase 2 (where we
+		 * can change the locking requirements) and simply
+		 * accumulate our current results into the lazy_entries
+		 * data array).
+		 *
+		 * We do not need to lock the lazy_entries array because
+		 * we have exclusive access to the cells in the range
+		 * [k_start,k_end) that this thread was given.
+		 */
+		lazy_entries[k].dir = parent;
+		if (parent) {
+			lazy_entries[k].hash_name = memihash_cont(
+				parent->ent.hash,
+				ce_k->name + parent->namelen,
+				ce_namelen(ce_k) - parent->namelen);
+			lazy_entries[k].hash_dir = parent->ent.hash;
+		} else {
+			lazy_entries[k].hash_name = memihash(ce_k->name, ce_namelen(ce_k));
+		}
+
+		k++;
+	}
+
+	return k - k_start;
+}
+
+struct lazy_dir_thread_data {
+	pthread_t pthread;
+	struct index_state *istate;
+	struct lazy_entry *lazy_entries;
+	int k_start;
+	int k_end;
+};
+
+static void *lazy_dir_thread_proc(void *_data)
+{
+	struct lazy_dir_thread_data *d = _data;
+	struct strbuf prefix = STRBUF_INIT;
+	handle_range_1(d->istate, d->k_start, d->k_end, NULL, &prefix, d->lazy_entries);
+	strbuf_release(&prefix);
+	return NULL;
+}
+
+struct lazy_name_thread_data {
+	pthread_t pthread;
+	struct index_state *istate;
+	struct lazy_entry *lazy_entries;
+};
+
+static void *lazy_name_thread_proc(void *_data)
+{
+	struct lazy_name_thread_data *d = _data;
+	int k;
+
+	for (k = 0; k < d->istate->cache_nr; k++) {
+		struct cache_entry *ce_k = d->istate->cache[k];
+		ce_k->ce_flags |= CE_HASHED;
+		hashmap_entry_init(ce_k, d->lazy_entries[k].hash_name);
+		hashmap_add(&d->istate->name_hash, ce_k);
+	}
+
+	return NULL;
+}
+
+static inline void lazy_update_dir_ref_counts(
+	struct index_state *istate,
+	struct lazy_entry *lazy_entries)
+{
+	int k;
+
+	for (k = 0; k < istate->cache_nr; k++) {
+		if (lazy_entries[k].dir)
+			lazy_entries[k].dir->nr++;
+	}
+}
+
+static void threaded_lazy_init_name_hash(
+	struct index_state *istate)
+{
+	int nr_each;
+	int k_start;
+	int t;
+	struct lazy_entry *lazy_entries;
+	struct lazy_dir_thread_data *td_dir;
+	struct lazy_name_thread_data *td_name;
+
+	k_start = 0;
+	nr_each = DIV_ROUND_UP(istate->cache_nr, lazy_nr_dir_threads);
+
+	lazy_entries = xcalloc(istate->cache_nr, sizeof(struct lazy_entry));
+	td_dir = xcalloc(lazy_nr_dir_threads, sizeof(struct lazy_dir_thread_data));
+	td_name = xcalloc(1, sizeof(struct lazy_name_thread_data));
+
+	init_dir_mutex();
+
+	/*
+	 * Phase 1:
+	 * Build "istate->dir_hash" using n "dir" threads (and a read-only index).
+	 */
+	for (t = 0; t < lazy_nr_dir_threads; t++) {
+		struct lazy_dir_thread_data *td_dir_t = td_dir + t;
+		td_dir_t->istate = istate;
+		td_dir_t->lazy_entries = lazy_entries;
+		td_dir_t->k_start = k_start;
+		k_start += nr_each;
+		if (k_start > istate->cache_nr)
+			k_start = istate->cache_nr;
+		td_dir_t->k_end = k_start;
+		if (pthread_create(&td_dir_t->pthread, NULL, lazy_dir_thread_proc, td_dir_t))
+			die("unable to create lazy_dir_thread");
+	}
+	for (t = 0; t < lazy_nr_dir_threads; t++) {
+		struct lazy_dir_thread_data *td_dir_t = td_dir + t;
+		if (pthread_join(td_dir_t->pthread, NULL))
+			die("unable to join lazy_dir_thread");
+	}
+
+	/*
+	 * Phase 2:
+	 * Iterate over all index entries and add them to the "istate->name_hash"
+	 * using a single "name" background thread.
+	 * (Testing showed it wasn't worth running more than 1 thread for this.)
+	 *
+	 * Meanwhile, finish updating the parent directory ref-counts for each
+	 * index entry using the current thread.  (This step is very fast and
+	 * doesn't need threading.)
+	 */
+	td_name->istate = istate;
+	td_name->lazy_entries = lazy_entries;
+	if (pthread_create(&td_name->pthread, NULL, lazy_name_thread_proc, td_name))
+		die("unable to create lazy_name_thread");
+
+	lazy_update_dir_ref_counts(istate, lazy_entries);
+
+	if (pthread_join(td_name->pthread, NULL))
+		die("unable to join lazy_name_thread");
+
+	cleanup_dir_mutex();
+
+	free(td_name);
+	free(td_dir);
+	free(lazy_entries);
+}
+
+#endif
+
+static void lazy_init_name_hash(struct index_state *istate)
+{
 	if (istate->name_hash_initialized)
 		return;
 	hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp,
 			istate->cache_nr);
 	hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp,
 			istate->cache_nr);
-	for (nr = 0; nr < istate->cache_nr; nr++)
-		hash_index_entry(istate, istate->cache[nr]);
+
+	if (lookup_lazy_params(istate)) {
+		hashmap_disallow_rehash(&istate->dir_hash, 1);
+		threaded_lazy_init_name_hash(istate);
+		hashmap_disallow_rehash(&istate->dir_hash, 0);
+	} else {
+		int nr;
+		for (nr = 0; nr < istate->cache_nr; nr++)
+			hash_index_entry(istate, istate->cache[nr]);
+	}
+
 	istate->name_hash_initialized = 1;
 }
 
+/*
+ * A test routine for t/helper/ sources.
+ *
+ * Returns the number of threads used or 0 when
+ * the non-threaded code path was used.
+ *
+ * Requesting threading WILL NOT override guards
+ * in lookup_lazy_params().
+ */
+int test_lazy_init_name_hash(struct index_state *istate, int try_threaded)
+{
+	lazy_nr_dir_threads = 0;
+	lazy_try_threaded = try_threaded;
+
+	lazy_init_name_hash(istate);
+
+	return lazy_nr_dir_threads;
+}
+
 void add_name_hash(struct index_state *istate, struct cache_entry *ce)
 {
 	if (istate->name_hash_initialized)
-- 
cgit v1.2.1


From ea194895326d10f6e255eee05ac25ab5ae096dd9 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 23 Mar 2017 13:47:04 +0000
Subject: name-hash: add test-lazy-init-name-hash

Add t/helper/test-lazy-init-name-hash.c test code
to demonstrate performance times for lazy_init_name_hash()
using the original single-threaded and the new multi-threaded
code paths.

Includes a --dump option to dump the created hashmaps to
stdout.  You can use this to run both code paths and
confirm that they generate the same hashmaps.

Includes a --analyze option to analyze performance of both
code paths over a range of index sizes to help you find a
lower bound for the LAZY_THREAD_COST in name-hash.c.
For example, passing "-a 4000" will set "istate.cache_nr"
to 4000 and then try the multi-threaded code -- probably
giving 2 threads with 2000 entries each.  It will then
run both the single-threaded (1x4000) and the multi-threaded
(2x2000) and compare the times.  It will then repeat the
test with 8000, 12000, and etc. so that you can see the
cross over.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile                            |   1 +
 cache.h                             |   1 +
 t/helper/test-lazy-init-name-hash.c | 264 ++++++++++++++++++++++++++++++++++++
 3 files changed, 266 insertions(+)
 create mode 100644 t/helper/test-lazy-init-name-hash.c

diff --git a/Makefile b/Makefile
index 76267262c1..061d9ea884 100644
--- a/Makefile
+++ b/Makefile
@@ -614,6 +614,7 @@ TEST_PROGRAMS_NEED_X += test-fake-ssh
 TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-hashmap
 TEST_PROGRAMS_NEED_X += test-index-version
+TEST_PROGRAMS_NEED_X += test-lazy-init-name-hash
 TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
 TEST_PROGRAMS_NEED_X += test-mergesort
diff --git a/cache.h b/cache.h
index 5c035dae57..6556fbb6fb 100644
--- a/cache.h
+++ b/cache.h
@@ -343,6 +343,7 @@ struct index_state {
 extern struct index_state the_index;
 
 /* Name hashing */
+extern int test_lazy_init_name_hash(struct index_state *istate, int try_threaded);
 extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
 extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce);
 extern void free_name_hash(struct index_state *istate);
diff --git a/t/helper/test-lazy-init-name-hash.c b/t/helper/test-lazy-init-name-hash.c
new file mode 100644
index 0000000000..6368a89345
--- /dev/null
+++ b/t/helper/test-lazy-init-name-hash.c
@@ -0,0 +1,264 @@
+#include "cache.h"
+#include "parse-options.h"
+
+static int single;
+static int multi;
+static int count = 1;
+static int dump;
+static int perf;
+static int analyze;
+static int analyze_step;
+
+/*
+ * Dump the contents of the "dir" and "name" hash tables to stdout.
+ * If you sort the result, you can compare it with the other type
+ * mode and verify that both single and multi produce the same set.
+ */
+static void dump_run(void)
+{
+	struct hashmap_iter iter_dir;
+	struct hashmap_iter iter_cache;
+
+	/* Stolen from name-hash.c */
+	struct dir_entry {
+		struct hashmap_entry ent;
+		struct dir_entry *parent;
+		int nr;
+		unsigned int namelen;
+		char name[FLEX_ARRAY];
+	};
+
+	struct dir_entry *dir;
+	struct cache_entry *ce;
+
+	read_cache();
+	if (single) {
+		test_lazy_init_name_hash(&the_index, 0);
+	} else {
+		int nr_threads_used = test_lazy_init_name_hash(&the_index, 1);
+		if (!nr_threads_used)
+			die("non-threaded code path used");
+	}
+
+	dir = hashmap_iter_first(&the_index.dir_hash, &iter_dir);
+	while (dir) {
+		printf("dir %08x %7d %s\n", dir->ent.hash, dir->nr, dir->name);
+		dir = hashmap_iter_next(&iter_dir);
+	}
+
+	ce = hashmap_iter_first(&the_index.name_hash, &iter_cache);
+	while (ce) {
+		printf("name %08x %s\n", ce->ent.hash, ce->name);
+		ce = hashmap_iter_next(&iter_cache);
+	}
+
+	discard_cache();
+}
+
+/*
+ * Run the single or multi threaded version "count" times and
+ * report on the time taken.
+ */
+static uint64_t time_runs(int try_threaded)
+{
+	uint64_t t0, t1, t2;
+	uint64_t sum = 0;
+	uint64_t avg;
+	int nr_threads_used;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		t0 = getnanotime();
+		read_cache();
+		t1 = getnanotime();
+		nr_threads_used = test_lazy_init_name_hash(&the_index, try_threaded);
+		t2 = getnanotime();
+
+		sum += (t2 - t1);
+
+		if (try_threaded && !nr_threads_used)
+			die("non-threaded code path used");
+
+		if (nr_threads_used)
+			printf("%f %f %d multi %d\n",
+				   ((double)(t1 - t0))/1000000000,
+				   ((double)(t2 - t1))/1000000000,
+				   the_index.cache_nr,
+				   nr_threads_used);
+		else
+			printf("%f %f %d single\n",
+				   ((double)(t1 - t0))/1000000000,
+				   ((double)(t2 - t1))/1000000000,
+				   the_index.cache_nr);
+		fflush(stdout);
+
+		discard_cache();
+	}
+
+	avg = sum / count;
+	if (count > 1)
+		printf("avg %f %s\n",
+			   (double)avg/1000000000,
+			   (try_threaded) ? "multi" : "single");
+
+	return avg;
+}
+
+/*
+ * Try a series of runs varying the "istate->cache_nr" and
+ * try to find a good value for the multi-threaded criteria.
+ */
+static void analyze_run(void)
+{
+	uint64_t t1s, t1m, t2s, t2m;
+	int cache_nr_limit;
+	int nr_threads_used;
+	int i;
+	int nr;
+
+	read_cache();
+	cache_nr_limit = the_index.cache_nr;
+	discard_cache();
+
+	nr = analyze;
+	while (1) {
+		uint64_t sum_single = 0;
+		uint64_t sum_multi = 0;
+		uint64_t avg_single;
+		uint64_t avg_multi;
+
+		if (nr > cache_nr_limit)
+			nr = cache_nr_limit;
+
+		for (i = 0; i < count; i++) {
+			read_cache();
+			the_index.cache_nr = nr; /* cheap truncate of index */
+			t1s = getnanotime();
+			test_lazy_init_name_hash(&the_index, 0);
+			t2s = getnanotime();
+			sum_single += (t2s - t1s);
+			the_index.cache_nr = cache_nr_limit;
+			discard_cache();
+
+			read_cache();
+			the_index.cache_nr = nr; /* cheap truncate of index */
+			t1m = getnanotime();
+			nr_threads_used = test_lazy_init_name_hash(&the_index, 1);
+			t2m = getnanotime();
+			sum_multi += (t2m - t1m);
+			the_index.cache_nr = cache_nr_limit;
+			discard_cache();
+
+			if (!nr_threads_used)
+				printf("    [size %8d] [single %f]   non-threaded code path used\n",
+					   nr, ((double)(t2s - t1s))/1000000000);
+			else
+				printf("    [size %8d] [single %f] %c [multi %f %d]\n",
+					   nr,
+					   ((double)(t2s - t1s))/1000000000,
+					   (((t2s - t1s) < (t2m - t1m)) ? '<' : '>'),
+					   ((double)(t2m - t1m))/1000000000,
+					   nr_threads_used);
+			fflush(stdout);
+		}
+		if (count > 1) {
+			avg_single = sum_single / count;
+			avg_multi = sum_multi / count;
+			if (!nr_threads_used)
+				printf("avg [size %8d] [single %f]\n",
+					   nr,
+					   (double)avg_single/1000000000);
+			else
+				printf("avg [size %8d] [single %f] %c [multi %f %d]\n",
+					   nr,
+					   (double)avg_single/1000000000,
+					   (avg_single < avg_multi ? '<' : '>'),
+					   (double)avg_multi/1000000000,
+					   nr_threads_used);
+			fflush(stdout);
+		}
+
+		if (nr >= cache_nr_limit)
+			return;
+		nr += analyze_step;
+	}
+}
+
+int cmd_main(int argc, const char **argv)
+{
+	const char *usage[] = {
+		"test-lazy-init-name-hash -d (-s | -m)",
+		"test-lazy-init-name-hash -p [-c c]",
+		"test-lazy-init-name-hash -a a [--step s] [-c c]",
+		"test-lazy-init-name-hash (-s | -m) [-c c]",
+		"test-lazy-init-name-hash -s -m [-c c]",
+		NULL
+	};
+	struct option options[] = {
+		OPT_BOOL('s', "single", &single, "run single-threaded code"),
+		OPT_BOOL('m', "multi", &multi, "run multi-threaded code"),
+		OPT_INTEGER('c', "count", &count, "number of passes"),
+		OPT_BOOL('d', "dump", &dump, "dump hash tables"),
+		OPT_BOOL('p', "perf", &perf, "compare single vs multi"),
+		OPT_INTEGER('a', "analyze", &analyze, "analyze different multi sizes"),
+		OPT_INTEGER(0, "step", &analyze_step, "analyze step factor"),
+		OPT_END(),
+	};
+	const char *prefix;
+	uint64_t avg_single, avg_multi;
+
+	prefix = setup_git_directory();
+
+	argc = parse_options(argc, argv, prefix, options, usage, 0);
+
+	/*
+	 * istate->dir_hash is only created when ignore_case is set.
+	 */
+	ignore_case = 1;
+
+	if (dump) {
+		if (perf || analyze > 0)
+			die("cannot combine dump, perf, or analyze");
+		if (count > 1)
+			die("count not valid with dump");
+		if (single && multi)
+			die("cannot use both single and multi with dump");
+		if (!single && !multi)
+			die("dump requires either single or multi");
+		dump_run();
+		return 0;
+	}
+
+	if (perf) {
+		if (analyze > 0)
+			die("cannot combine dump, perf, or analyze");
+		if (single || multi)
+			die("cannot use single or multi with perf");
+		avg_single = time_runs(0);
+		avg_multi = time_runs(1);
+		if (avg_multi > avg_single)
+			die("multi is slower");
+		return 0;
+	}
+
+	if (analyze) {
+		if (analyze < 500)
+			die("analyze must be at least 500");
+		if (!analyze_step)
+			analyze_step = analyze;
+		if (single || multi)
+			die("cannot use single or multi with analyze");
+		analyze_run();
+		return 0;
+	}
+
+	if (!single && !multi)
+		die("require either -s or -m or both");
+
+	if (single)
+		time_runs(0);
+	if (multi)
+		time_runs(1);
+
+	return 0;
+}
-- 
cgit v1.2.1


From 89c3b0ad43ec9169aba169b264abf5e6b82d2c06 Mon Sep 17 00:00:00 2001
From: Jeff Hostetler <jeffhost@microsoft.com>
Date: Thu, 23 Mar 2017 13:47:05 +0000
Subject: name-hash: add perf test for lazy_init_name_hash

Created t/perf/p0004-lazy-init-name-hash.sh test
to demonstrate correctness and performance gains
with the multithreaded version of lazy_init_name_hash().

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/p0004-lazy-init-name-hash.sh | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 t/perf/p0004-lazy-init-name-hash.sh

diff --git a/t/perf/p0004-lazy-init-name-hash.sh b/t/perf/p0004-lazy-init-name-hash.sh
new file mode 100644
index 0000000000..5afa8c8df3
--- /dev/null
+++ b/t/perf/p0004-lazy-init-name-hash.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='Tests multi-threaded lazy_init_name_hash'
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+test_expect_success 'verify both methods build the same hashmaps' '
+	$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --dump --single | sort >out.single &&
+	$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --dump --multi  | sort >out.multi  &&
+	test_cmp out.single out.multi
+'
+
+test_expect_success 'multithreaded should be faster' '
+	$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --perf >out.perf
+'
+
+test_done
-- 
cgit v1.2.1


From 41b3eb4a6bff4d38bb188d28544bf901080d9e96 Mon Sep 17 00:00:00 2001
From: Ramsay Jones <ramsay@ramsayjones.plus.com>
Date: Fri, 24 Mar 2017 17:26:50 +0000
Subject: name-hash: add test-lazy-init-name-hash to .gitignore

Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index d6e8b36798..758ed2e8fa 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -11,6 +11,7 @@
 /test-genrandom
 /test-hashmap
 /test-index-version
+/test-lazy-init-name-hash
 /test-line-buffer
 /test-match-trees
 /test-mergesort
-- 
cgit v1.2.1