diff options
author | Yves Orton <demerphq@gmail.com> | 2016-12-06 23:48:52 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2016-12-07 00:04:30 +0100 |
commit | 6b0260474df579e9412f57249519747ab8bb5c2b (patch) | |
tree | 320cbf33855b5de9da39a7f96bc0c5a3f5031e7d /hv_func.h | |
parent | 86f791091e14932363af3abc0e78844b973b86b5 (diff) | |
download | perl-6b0260474df579e9412f57249519747ab8bb5c2b.tar.gz |
use a hybrid hash function, OAATH for short keys, Siphash 1-3 for longer ones
Switch to an optimized/unrolled variant of OAATH for keys
16 bytes and less, and use Siphash 1-3 for longer keys. (On
64 bit builds, 32 bit is untouched.)
I've done a bunch of benchmarking with Porting/bench.pl to
see at what point the 8 byte reads of Siphash 1-3 start to
win over the low overhead costs of OAATH. It seems to be around
16 bytes. At the same time, we unroll OAATH so that we can save
some instructions per character. The net result is all key lengths
get faster as measured by Porting/bench.pl, and hashing longer keys
becomes *much* faster.
Interestingly the actual crossover point of OAATH being slower
than Siphash 1-3 is much higher than might be guessed from bulk
testing either in a raw benchmark. For instance, basic benchmarks
of the hash function alone predicts that Siphash 1-3 should "win"
at around 4 bytes. However, in practice it is four times longer.
I believe this is because setting up Siphash blows away a fair
amount of CPU state compared to OAATH, which is more important
when the hash is going to be used to manage a data structure.
So it requires a correspondingly longer string before the larger
sized read starts to win.
Summarized stats (higher is better):
AVERAGE
blead yves
------ ------
Ir 100.00 104.47
Dr 100.00 103.97
Dw 100.00 107.63
COND 100.00 112.16
IND 100.00 75.07
COND_m 100.00 125.75
IND_m 100.00 97.42
Ir_m1 100.00 137.11
Dr_m1 100.00 100.10
Dw_m1 100.00 99.62
Ir_mm 100.00 100.00
Dr_mm 100.00 100.00
Dw_mm 100.00 100.00
Diffstat (limited to 'hv_func.h')
-rw-r--r-- | hv_func.h | 240 |
1 files changed, 184 insertions, 56 deletions
@@ -14,6 +14,8 @@ #if !( 0 \ || defined(PERL_HASH_FUNC_SIPHASH) \ + || defined(PERL_HASH_FUNC_SIPHASH13) \ + || defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) \ || defined(PERL_HASH_FUNC_SDBM) \ || defined(PERL_HASH_FUNC_DJB2) \ || defined(PERL_HASH_FUNC_SUPERFAST) \ @@ -24,13 +26,25 @@ || defined(PERL_HASH_FUNC_MURMUR_HASH_64A) \ || defined(PERL_HASH_FUNC_MURMUR_HASH_64B) \ ) +#ifdef HAS_QUAD +#define PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13 +#else #define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD #endif +#endif #if defined(PERL_HASH_FUNC_SIPHASH) # define PERL_HASH_FUNC "SIPHASH_2_4" # define PERL_HASH_SEED_BYTES 16 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_SIPHASH13) +# define PERL_HASH_FUNC "SIPHASH_1_3" +# define PERL_HASH_SEED_BYTES 16 +# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_1_3((seed),(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) +# define PERL_HASH_FUNC "HYBRID_OAATHU_SIPHASH_1_3" +# define PERL_HASH_SEED_BYTES 24 +# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_oaathu_siphash_1_3((seed),(U8*)(str),(len)) #elif defined(PERL_HASH_FUNC_SUPERFAST) # define PERL_HASH_FUNC "SUPERFAST" # define PERL_HASH_SEED_BYTES 4 @@ -192,72 +206,89 @@ ((U64)((p)[7]) << 56)) #define SIPROUND \ - do { \ + STMT_START { \ v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \ v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \ v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \ v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \ - } while(0) + } STMT_END /* SipHash-2-4 */ -PERL_STATIC_INLINE U32 -S_perl_hash_siphash_2_4(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { - /* "somepseudorandomlygeneratedbytes" */ - U64 v0 = UINT64_C(0x736f6d6570736575); - U64 v1 = UINT64_C(0x646f72616e646f6d); - U64 v2 = UINT64_C(0x6c7967656e657261); - U64 v3 = UINT64_C(0x7465646279746573); - - U64 b; - U64 k0 = ((const U64*)seed)[0]; - U64 k1 = ((const U64*)seed)[1]; - U64 m; - const int left = inlen & 7; - const U8 *end = in + inlen - left; - - b = ( ( U64 )(inlen) ) << 56; - v3 ^= k1; - v2 ^= k0; - v1 ^= k1; - v0 ^= k0; - - for ( ; in != end; in += 8 ) - { - m = U8TO64_LE( in ); - v3 ^= m; - SIPROUND; - SIPROUND; - v0 ^= m; - } - - switch( left ) - { - case 7: b |= ( ( U64 )in[ 6] ) << 48; - case 6: b |= ( ( U64 )in[ 5] ) << 40; - case 5: b |= ( ( U64 )in[ 4] ) << 32; - case 4: b |= ( ( U64 )in[ 3] ) << 24; - case 3: b |= ( ( U64 )in[ 2] ) << 16; - case 2: b |= ( ( U64 )in[ 1] ) << 8; - case 1: b |= ( ( U64 )in[ 0] ); break; - case 0: break; - } - - v3 ^= b; - SIPROUND; - SIPROUND; - v0 ^= b; - - v2 ^= 0xff; - SIPROUND; - SIPROUND; - SIPROUND; - SIPROUND; - b = v0 ^ v1 ^ v2 ^ v3; - return (U32)(b & U32_MAX); + +#define PERL_SIPHASH_FNC(FNC,SIP_ROUNDS,SIP_FINAL_ROUNDS) \ +PERL_STATIC_INLINE U32 \ +FNC(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { \ + /* "somepseudorandomlygeneratedbytes" */ \ + U64 v0 = UINT64_C(0x736f6d6570736575); \ + U64 v1 = UINT64_C(0x646f72616e646f6d); \ + U64 v2 = UINT64_C(0x6c7967656e657261); \ + U64 v3 = UINT64_C(0x7465646279746573); \ + \ + U64 b; \ + U64 k0 = ((const U64*)seed)[0]; \ + U64 k1 = ((const U64*)seed)[1]; \ + U64 m; \ + const int left = inlen & 7; \ + const U8 *end = in + inlen - left; \ + \ + b = ( ( U64 )(inlen) ) << 56; \ + v3 ^= k1; \ + v2 ^= k0; \ + v1 ^= k1; \ + v0 ^= k0; \ + \ + for ( ; in != end; in += 8 ) \ + { \ + m = U8TO64_LE( in ); \ + v3 ^= m; \ + \ + SIP_ROUNDS; \ + \ + v0 ^= m; \ + } \ + \ + switch( left ) \ + { \ + case 7: b |= ( ( U64 )in[ 6] ) << 48; \ + case 6: b |= ( ( U64 )in[ 5] ) << 40; \ + case 5: b |= ( ( U64 )in[ 4] ) << 32; \ + case 4: b |= ( ( U64 )in[ 3] ) << 24; \ + case 3: b |= ( ( U64 )in[ 2] ) << 16; \ + case 2: b |= ( ( U64 )in[ 1] ) << 8; \ + case 1: b |= ( ( U64 )in[ 0] ); break; \ + case 0: break; \ + } \ + \ + v3 ^= b; \ + \ + SIP_ROUNDS; \ + \ + v0 ^= b; \ + \ + v2 ^= 0xff; \ + \ + SIP_FINAL_ROUNDS \ + \ + b = v0 ^ v1 ^ v2 ^ v3; \ + return (U32)(b & U32_MAX); \ } + +PERL_SIPHASH_FNC( + S_perl_hash_siphash_1_3 + ,SIPROUND; + ,SIPROUND;SIPROUND;SIPROUND; +) + +PERL_SIPHASH_FNC( + S_perl_hash_siphash_2_4 + ,SIPROUND;SIPROUND; + ,SIPROUND;SIPROUND;SIPROUND;SIPROUND; +) + #endif /* defined(HAS_QUAD) */ + /* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in * (http://burtleburtle.net/bob/hash/doobs.html) * It is by Paul Hsieh (c) 2004 and is analysed here @@ -550,6 +581,102 @@ S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned return (hash + (hash << 15)); } +#ifdef HAS_QUAD +/* For short strings, 16 bytes or shorter, we use an optimised variant + * of One At A Time Hard, and for longer strings, we use siphash_1_3. + */ +PERL_STATIC_INLINE U32 +S_perl_hash_oaathu_siphash_1_3(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { + U32 hash = *((const U32*)seed) + (U32)len; + switch (len) { + case 16: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[15]; + case 15: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[14]; + case 14: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[13]; + case 13: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[12]; + case 12: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[11]; + case 11: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[10]; + case 10: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[9]; + case 9: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[8]; + case 8: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[7]; + case 7: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[6]; + case 6: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[5]; + case 5: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[4]; + case 4: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[3]; + case 3: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[2]; + case 2: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[1]; + case 1: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[0]; + case 0: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += seed[4]; + hash += (hash << 10); + hash ^= (hash >> 6); + hash += seed[5]; + hash += (hash << 10); + hash ^= (hash >> 6); + hash += seed[6]; + hash += (hash << 10); + hash ^= (hash >> 6); + hash += seed[7]; + hash += (hash << 10); + hash ^= (hash >> 6); + + hash += (hash << 3); + hash ^= (hash >> 11); + return (hash + (hash << 15)); + } + return S_perl_hash_siphash_1_3(seed+8, str, len); +} +#endif /* defined(HAS_QUAD) */ + PERL_STATIC_INLINE U32 S_perl_hash_old_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { const unsigned char * const end = (const unsigned char *)str + len; @@ -692,6 +819,7 @@ S_perl_hash_murmur_hash_64b (const unsigned char * const seed, const unsigned ch } #endif + /* legacy - only mod_perl should be doing this. */ #ifdef PERL_HASH_INTERNAL_ACCESS #define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len) |