diff options
| author | Thomas Rast <trast@student.ethz.ch> | 2012-04-06 23:01:23 +0200 | 
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2012-04-09 17:03:25 -0700 | 
| commit | 6942efcfa9f3083e7c7863348fa5bb1350412595 (patch) | |
| tree | 569985c285ef0acea5617a93ebdea46a299f46ee | |
| parent | e8dde3e5f9ddb7cf95a6ff3cea6cf07c3a2db80d (diff) | |
| download | git-6942efcfa9f3083e7c7863348fa5bb1350412595.tar.gz | |
xdiff: load full words in the inner loop of xdl_hash_record
Redo the hashing loop in xdl_hash_record in a way that loads an entire
'long' at a time, using masking tricks to see when and where we found
the terminating '\n'.
I stole inspiration and code from the posts by Linus Torvalds around
  https://lkml.org/lkml/2012/3/2/452
  https://lkml.org/lkml/2012/3/5/6
His method reads the buffers in sizeof(long) increments, and may thus
overrun it by at most sizeof(long)-1 bytes before it sees the final
newline (or hits the buffer length check).  I considered padding out
all buffers by a suitable amount to "catch" the overrun, but
* this does not work for mmap()'d buffers: if you map 4096+8 bytes
  from a 4096 byte file, accessing the last 8 bytes results in a
  SIGBUS on my machine; and
* it would also be extremely ugly because it intrudes deep into the
  unpacking machinery.
So I adapted it to not read beyond the buffer at all.  Instead, it
reads the final partial word byte-by-byte and strings it together.
Then it can use the same logic as before to finish the hashing.
So far we enable this only on x86_64, where it provides nice speedup
for diff-related work:
  Test                                  origin/next      tr/xdiff-fast-hash
  -----------------------------------------------------------------------------
  4000.1: log -3000 (baseline)          0.07(0.05+0.02)  0.08(0.06+0.02) +14.3%
  4000.2: log --raw -3000 (tree-only)   0.37(0.33+0.04)  0.37(0.32+0.04) +0.0%
  4000.3: log -p -3000 (Myers)          1.75(1.65+0.09)  1.60(1.49+0.10) -8.6%
  4000.4: log -p -3000 --histogram      1.73(1.62+0.09)  1.58(1.49+0.08) -8.7%
  4000.5: log -p -3000 --patience       2.11(2.00+0.10)  1.94(1.80+0.11) -8.1%
Perhaps other platforms could also benefit.  However it does NOT work
on big-endian systems!
[jc: minimum style and compilation fixes]
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
| -rw-r--r-- | Makefile | 12 | ||||
| -rw-r--r-- | xdiff/xutils.c | 112 | 
2 files changed, 124 insertions, 0 deletions
| @@ -288,6 +288,11 @@ all::  # dependency rules.  #  # Define NATIVE_CRLF if your platform uses CRLF for line endings. +# +# Define XDL_FAST_HASH to use an alternative line-hashing method in +# the diff algorithm.  It gives a nice speedup if your processor has +# fast unaligned word loads.  Does NOT work on big-endian systems! +# Enabled by default on x86_64.  GIT-VERSION-FILE: FORCE  	@$(SHELL_PATH) ./GIT-VERSION-GEN @@ -864,6 +869,9 @@ EXTLIBS =  # because maintaining the nesting to match is a pain.  If  # we had "elif" things would have been much nicer... +ifeq ($(uname_M),x86_64) +	XDL_FAST_HASH = YesPlease +endif  ifeq ($(uname_S),OSF1)  	# Need this for u_short definitions et al  	BASIC_CFLAGS += -D_OSF_SOURCE @@ -1737,6 +1745,10 @@ ifndef NO_MSGFMT_EXTENDED_OPTIONS  	MSGFMT += --check --statistics  endif +ifneq (,$(XDL_FAST_HASH)) +	BASIC_CFLAGS += -DXDL_FAST_HASH +endif +  ifeq ($(TCLTK_PATH),)  NO_TCLTK=NoThanks  endif diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 0de084e53f..e05b5c96aa 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -20,6 +20,8 @@   *   */ +#include <limits.h> +#include <assert.h>  #include "xinclude.h" @@ -276,6 +278,115 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,  	return ha;  } +#ifdef XDL_FAST_HASH + +#define ONEBYTES	0x0101010101010101ul +#define NEWLINEBYTES	0x0a0a0a0a0a0a0a0aul +#define HIGHBITS	0x8080808080808080ul + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ +	return ((a - ONEBYTES) & ~a) & HIGHBITS; +} + +#if __WORDSIZE == 64 + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ +	return mask * 0x0001020304050608 >> 56; +} + +#else	/* 32-bit case */ + +/* Modified Carl Chatfield G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ +	/* +	 * (a) gives us +	 *   -1 (0, ff), 0 (ffff) or 1 (ffffff) +	 * (b) gives us +	 *   0 for 0, 1 for (ff ffff ffffff) +	 * (a+b+1) gives us +	 *   correct 0-3 bytemask count result +	 */ +	long a = (mask - 256) >> 23; +	long b = mask & 1; +	return a + b + 1; +} + +#endif + +unsigned long xdl_hash_record(char const **data, char const *top, long flags) +{ +	unsigned long hash = 5381; +	unsigned long a = 0, mask = 0; +	char const *ptr = *data; +	char const *end = top - sizeof(unsigned long) + 1; + +	if (flags & XDF_WHITESPACE_FLAGS) +		return xdl_hash_record_with_whitespace(data, top, flags); + +	ptr -= sizeof(unsigned long); +	do { +		hash += hash << 5; +		hash ^= a; +		ptr += sizeof(unsigned long); +		if (ptr >= end) +			break; +		a = *(unsigned long *)ptr; +		/* Do we have any '\n' bytes in this word? */ +		mask = has_zero(a ^ NEWLINEBYTES); +	} while (!mask); + +	if (ptr >= end) { +		/* +		 * There is only a partial word left at the end of the +		 * buffer. Because we may work with a memory mapping, +		 * we have to grab the rest byte by byte instead of +		 * blindly reading it. +		 * +		 * To avoid problems with masking in a signed value, +		 * we use an unsigned char here. +		 */ +		const char *p; +		for (p = top - 1; p >= ptr; p--) +			a = (a << 8) + *((const unsigned char *)p); +		mask = has_zero(a ^ NEWLINEBYTES); +		if (!mask) +			/* +			 * No '\n' found in the partial word.  Make a +			 * mask that matches what we read. +			 */ +			mask = 1UL << (8 * (top - ptr) + 7); +	} + +	/* The mask *below* the first high bit set */ +	mask = (mask - 1) & ~mask; +	mask >>= 7; +	hash += hash << 5; +	hash ^= a & mask; + +	/* Advance past the last (possibly partial) word */ +	ptr += count_masked_bytes(mask); + +	if (ptr < top) { +		assert(*ptr == '\n'); +		ptr++; +	} + +	*data = ptr; + +	return hash; +} + +#else /* XDL_FAST_HASH */  unsigned long xdl_hash_record(char const **data, char const *top, long flags) {  	unsigned long ha = 5381; @@ -293,6 +404,7 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) {  	return ha;  } +#endif /* XDL_FAST_HASH */  unsigned int xdl_hashbits(unsigned int size) {  	unsigned int val = 1, bits = 0; | 
