diff options
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | dump.c | 17 | ||||
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | embedvar.h | 7 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/10_hash.t | 110 | ||||
-rw-r--r-- | ext/Hash-Util/Util.xs | 160 | ||||
-rw-r--r-- | ext/Hash-Util/lib/Hash/Util.pm | 103 | ||||
-rw-r--r-- | ext/Hash-Util/t/Util.t | 30 | ||||
-rw-r--r-- | hv.c | 113 | ||||
-rw-r--r-- | hv.h | 536 | ||||
-rw-r--r-- | intrpvar.h | 6 | ||||
-rw-r--r-- | perl.c | 39 | ||||
-rw-r--r-- | perlapi.h | 4 | ||||
-rw-r--r-- | perlvars.h | 3 | ||||
-rw-r--r-- | proto.h | 6 | ||||
-rw-r--r-- | sv.c | 16 | ||||
-rw-r--r-- | sv.h | 2 | ||||
-rw-r--r-- | t/lib/universal.t | 2 | ||||
-rw-r--r-- | t/op/hash.t | 102 | ||||
-rw-r--r-- | t/run/runenv.t | 38 | ||||
-rw-r--r-- | universal.c | 41 | ||||
-rw-r--r-- | util.c | 63 |
23 files changed, 917 insertions, 486 deletions
@@ -3769,7 +3769,6 @@ ext/Hash-Util-FieldHash/t/02_function.t Test script ext/Hash-Util-FieldHash/t/03_class.t Test script ext/Hash-Util-FieldHash/t/04_thread.t Test script ext/Hash-Util-FieldHash/t/05_perlhook.t Test script -ext/Hash-Util-FieldHash/t/10_hash.t Adapted from t/op/hash.t ext/Hash-Util-FieldHash/t/11_hashassign.t Adapted from t/op/hashassign.t ext/Hash-Util-FieldHash/t/12_hashwarn.t Adapted from t/op/hashwarn.t ext/Hash-Util/lib/Hash/Util.pm Hash::Util @@ -1416,7 +1416,6 @@ const struct flag_to_name hv_flags_names[] = { {SVphv_SHAREKEYS, "SHAREKEYS,"}, {SVphv_LAZYDEL, "LAZYDEL,"}, {SVphv_HASKFLAGS, "HASKFLAGS,"}, - {SVphv_REHASH, "REHASH,"}, {SVphv_CLONEABLE, "CLONEABLE,"} }; @@ -1900,7 +1899,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo SV * keysv; const char * keypv; SV * elt; - STRLEN len; + STRLEN len; if (count-- <= 0) goto DONEHV; @@ -1909,16 +1908,14 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo keypv = SvPV_const(keysv, len); elt = HeVAL(he); - Perl_dump_indent(aTHX_ level+1, file, "Elt %s ", pv_display(d, keypv, len, 0, pvlim)); - if (SvUTF8(keysv)) - PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 6 * SvCUR(keysv), UNI_DISPLAY_QQ)); + Perl_dump_indent(aTHX_ level+1, file, "Elt %s ", pv_display(d, keypv, len, 0, pvlim)); + if (SvUTF8(keysv)) + PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 6 * SvCUR(keysv), UNI_DISPLAY_QQ)); if (HvEITER_get(hv) == he) PerlIO_printf(file, "[CURRENT] "); - if (HeKREHASH(he)) - PerlIO_printf(file, "[REHASH] "); - PerlIO_printf(file, "HASH = 0x%"UVxf"\n", (UV) hash); - do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim); - } + PerlIO_printf(file, "HASH = 0x%"UVxf"\n", (UV) hash); + do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim); + } } DONEHV:; } @@ -1509,7 +1509,7 @@ p |I32 |wait4pid |Pid_t pid|NN int* statusp|int flags p |U32 |parse_unicode_opts|NN const char **popt Ap |U32 |seed : Only used in perl.c -pR |UV |get_hash_seed +p |void |get_hash_seed |NN unsigned char *seed_buffer : Used in doio.c, pp_hot.c, pp_sys.c p |void |report_evil_fh |NULLOK const GV *gv : Used in doio.c, pp_hot.c, pp_sys.c @@ -1082,7 +1082,7 @@ #define find_rundefsv2(a,b) Perl_find_rundefsv2(aTHX_ a,b) #define find_script(a,b,c,d) Perl_find_script(aTHX_ a,b,c,d) #define free_tied_hv_pool() Perl_free_tied_hv_pool(aTHX) -#define get_hash_seed() Perl_get_hash_seed(aTHX) +#define get_hash_seed(a) Perl_get_hash_seed(aTHX_ a) #define get_no_modify() Perl_get_no_modify(aTHX) #define get_opargs() Perl_get_opargs(aTHX) #define gv_try_downgrade(a) Perl_gv_try_downgrade(aTHX_ a) diff --git a/embedvar.h b/embedvar.h index 6efd53aef4..47e0b32bc8 100644 --- a/embedvar.h +++ b/embedvar.h @@ -181,7 +181,6 @@ #define PL_glob_index (vTHX->Iglob_index) #define PL_globalstash (vTHX->Iglobalstash) #define PL_globhook (vTHX->Iglobhook) -#define PL_hash_seed (vTHX->Ihash_seed) #define PL_hintgv (vTHX->Ihintgv) #define PL_hints (vTHX->Ihints) #define PL_hv_fetch_ent_mh (vTHX->Ihv_fetch_ent_mh) @@ -278,8 +277,6 @@ #define PL_registered_mros (vTHX->Iregistered_mros) #define PL_regmatch_slab (vTHX->Iregmatch_slab) #define PL_regmatch_state (vTHX->Iregmatch_state) -#define PL_rehash_seed (vTHX->Irehash_seed) -#define PL_rehash_seed_set (vTHX->Irehash_seed_set) #define PL_replgv (vTHX->Ireplgv) #define PL_restartjmpenv (vTHX->Irestartjmpenv) #define PL_restartop (vTHX->Irestartop) @@ -407,6 +404,10 @@ #define PL_Gdollarzero_mutex (my_vars->Gdollarzero_mutex) #define PL_fold_locale (my_vars->Gfold_locale) #define PL_Gfold_locale (my_vars->Gfold_locale) +#define PL_hash_seed (my_vars->Ghash_seed) +#define PL_Ghash_seed (my_vars->Ghash_seed) +#define PL_hash_seed_set (my_vars->Ghash_seed_set) +#define PL_Ghash_seed_set (my_vars->Ghash_seed_set) #define PL_hints_mutex (my_vars->Ghints_mutex) #define PL_Ghints_mutex (my_vars->Ghints_mutex) #define PL_keyword_plugin (my_vars->Gkeyword_plugin) diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t deleted file mode 100644 index 2cfb4e81fa..0000000000 --- a/ext/Hash-Util-FieldHash/t/10_hash.t +++ /dev/null @@ -1,110 +0,0 @@ -#!./perl -w -use Test::More; - -use strict; -use Hash::Util::FieldHash qw( :all); - -no warnings 'misc'; - -plan tests => 5; - -fieldhash my %h; - -ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on"); - -foreach (1..10) { - $h{"\0"x$_}++; -} - -ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash"); - -foreach (11..20) { - $h{"\0"x$_}++; -} - -ok (Internals::HvREHASH(%h), "20 entries triggers rehash"); - - - - -# second part using an emulation of the PERL_HASH in perl, mounting an -# attack on a pre-populated hash. This is also useful if you need normal -# keys which don't contain \0 -- suitable for stashes - -use constant MASK_U32 => 2**32; -use constant HASH_SEED => 0; -use constant THRESHOLD => 14; -use constant START => "a"; - -# some initial hash data -fieldhash my %h2; -%h2 = map {$_ => 1} 'a'..'cc'; - -ok (!Internals::HvREHASH(%h2), - "starting with pre-populated non-pathological hash (rehash flag if off)"); - -my @keys = get_keys(\%h2); -$h2{$_}++ for @keys; -ok (Internals::HvREHASH(%h2), - scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); - -sub get_keys { - my $hr = shift; - - # the minimum of bits required to mount the attack on a hash - my $min_bits = log(THRESHOLD)/log(2); - - # if the hash has already been populated with a significant amount - # of entries the number of mask bits can be higher - my $keys = scalar keys %$hr; - my $bits = $keys ? log($keys)/log(2) : 0; - $bits = $min_bits if $min_bits > $bits; - - $bits = int($bits) < $bits ? int($bits) + 1 : int($bits); - # need to add 2 bits to cover the internal split cases - $bits += 2; - my $mask = 2**$bits-1; - print "# using mask: $mask ($bits)\n"; - - my @keys; - my $s = START; - my $c = 0; - # get 2 keys on top of the THRESHOLD - my $hash; - while (@keys < THRESHOLD+2) { - # next if exists $hash->{$s}; - $hash = hash($s); - next unless ($hash & $mask) == 0; - $c++; - printf "# %2d: %5s, %10s\n", $c, $s, $hash; - push @keys, $s; - } continue { - $s++; - } - - return @keys; -} - - -# trying to provide the fastest equivalent of C macro's PERL_HASH in -# Perl - the main complication is that it uses U32 integer, which we -# can't do it perl, without doing some tricks -sub hash { - my $s = shift; - my @c = split //, $s; - my $u = HASH_SEED; - for (@c) { - # (A % M) + (B % M) == (A + B) % M - # This works because '+' produces a NV, which is big enough to hold - # the intermediate result. We only need the % before any "^" and "&" - # to get the result in the range for an I32. - # and << doesn't work on NV, so using 1 << 10 - $u += ord; - $u += $u * (1 << 10); $u %= MASK_U32; - $u ^= $u >> 6; - } - $u += $u << 3; $u %= MASK_U32; - $u ^= $u >> 11; $u %= MASK_U32; - $u += $u << 15; $u %= MASK_U32; - $u; -} diff --git a/ext/Hash-Util/Util.xs b/ext/Hash-Util/Util.xs index 678e64d9b7..c8a692f8db 100644 --- a/ext/Hash-Util/Util.xs +++ b/ext/Hash-Util/Util.xs @@ -60,3 +60,163 @@ hv_store(hash, key, val) XSRETURN_YES; } } + +void +hash_seed() + PROTOTYPE: + PPCODE: + mXPUSHs(newSVpvn((char *)PERL_HASH_SEED,PERL_HASH_SEED_BYTES)); + XSRETURN(1); + +void +hash_value(string) + SV* string + PROTOTYPE: $ + PPCODE: + STRLEN len; + char *pv; + UV uv; + + pv= SvPV(string,len); + PERL_HASH(uv,pv,len); + XSRETURN_UV(uv); + + +void +bucket_info(rhv) + SV* rhv + PPCODE: +{ + /* + + Takes a non-magical hash ref as an argument and returns a list of + statistics about the hash. The number and keys and the size of the + array will always be reported as the first two values. If the array is + actually allocated (they are lazily allocated), then additionally + will return a list of counts of bucket lengths. In other words in + + ($keys, $buckets, $used, @length_count)= hash::bucket_info(\%hash); + + $length_count[0] is the number of empty buckets, and $length_count[1] + is the number of buckets with only one key in it, $buckets - $length_count[0] + gives the number of used buckets, and @length_count-1 is the maximum + bucket depth. + + If the argument is not a hash ref, or if it is magical, then returns + nothing (the empty list). + + */ + if (SvROK(rhv) && SvTYPE(SvRV(rhv))==SVt_PVHV && !SvMAGICAL(SvRV(rhv))) { + const HV * const hv = (const HV *) SvRV(rhv); + U32 max_bucket_index= HvMAX(hv); + U32 total_keys= HvUSEDKEYS(hv); + HE **bucket_array= HvARRAY(hv); + mXPUSHi(total_keys); + mXPUSHi(max_bucket_index+1); + mXPUSHi(0); /* for the number of used buckets */ +#define BUCKET_INFO_ITEMS_ON_STACK 3 + if (!bucket_array) { + XSRETURN(BUCKET_INFO_ITEMS_ON_STACK); + } else { + /* we use chain_length to index the stack - we eliminate an add + * by initializing things with the number of items already on the stack. + * If we have 2 items then ST(2+0) (the third stack item) will be the counter + * for empty chains, ST(2+1) will be for chains with one element, etc. + */ + I32 max_chain_length= BUCKET_INFO_ITEMS_ON_STACK - 1; /* so we do not have to do an extra push for the 0 index */ + HE *he; + U32 bucket_index; + for ( bucket_index= 0; bucket_index <= max_bucket_index; bucket_index++ ) { + I32 chain_length= BUCKET_INFO_ITEMS_ON_STACK; + for (he= bucket_array[bucket_index]; he; he= HeNEXT(he) ) { + chain_length++; + } + while ( max_chain_length < chain_length ) { + mXPUSHi(0); + max_chain_length++; + } + SvIVX( ST( chain_length ) )++; + } + /* now set the number of used buckets */ + SvIVX( ST( BUCKET_INFO_ITEMS_ON_STACK - 1 ) ) = max_bucket_index - SvIVX( ST( BUCKET_INFO_ITEMS_ON_STACK ) ) + 1; + XSRETURN( max_chain_length + 1 ); /* max_chain_length is the index of the last item on the stack, so we add 1 */ + } +#undef BUCKET_INFO_ITEMS_ON_STACK + } + XSRETURN(0); +} + +void +bucket_array(rhv) + SV* rhv + PPCODE: +{ + /* Returns an array of arrays representing key/bucket mappings. + * Each element of the array contains either an integer or a reference + * to an array of keys. A plain integer represents K empty buckets. An + * array ref represents a single bucket, with each element being a key in + * the hash. (Note this treats a placeholder as a normal key.) + * + * This allows one to "see" the keyorder. Note the "insert first" nature + * of the hash store, combined with regular remappings means that relative + * order of keys changes each remap. + */ + if (SvROK(rhv) && SvTYPE(SvRV(rhv))==SVt_PVHV && !SvMAGICAL(SvRV(rhv))) { + const HV * const hv = (const HV *) SvRV(rhv); + HE **he_ptr= HvARRAY(hv); + if (!he_ptr) { + XSRETURN(0); + } else { + U32 i, max; + AV *info_av; + HE *he; + I32 empty_count=0; + if (SvMAGICAL(hv)) { + Perl_croak(aTHX_ "hash::bucket_array only works on 'normal' hashes"); + } + info_av= newAV(); + max= HvMAX(hv); + mXPUSHs(newRV_noinc((SV*)info_av)); + for ( i= 0; i <= max; i++ ) { + AV *key_av= NULL; + for (he= he_ptr[i]; he; he= HeNEXT(he) ) { + SV *key_sv; + char *str; + STRLEN len; + char mode; + if (!key_av) { + key_av= newAV(); + if (empty_count) { + av_push(info_av, newSViv(empty_count)); + empty_count= 0; + } + av_push(info_av, (SV *)newRV_noinc((SV *)key_av)); + } + if (HeKLEN(he) == HEf_SVKEY) { + SV *sv= HeSVKEY(he); + SvGETMAGIC(sv); + str= SvPV(sv, len); + mode= SvUTF8(sv) ? 1 : 0; + } else { + str= HeKEY(he); + len= HeKLEN(he); + mode= HeKUTF8(he) ? 1 : 0; + } + key_sv= newSVpvn(str,len); + av_push(key_av,key_sv); + if (mode) { + SvUTF8_on(key_sv); + } + } + if (!key_av) + empty_count++; + } + if (empty_count) { + av_push(info_av, newSViv(empty_count)); + empty_count++; + } + } + XSRETURN(1); + } + XSRETURN(0); +} diff --git a/ext/Hash-Util/lib/Hash/Util.pm b/ext/Hash-Util/lib/Hash/Util.pm index d8a6ec888b..20a730e0c1 100644 --- a/ext/Hash-Util/lib/Hash/Util.pm +++ b/ext/Hash-Util/lib/Hash/Util.pm @@ -28,10 +28,11 @@ our @EXPORT_OK = qw( lock_ref_keys_plus hidden_ref_keys legal_ref_keys - hash_seed hv_store + hash_seed hash_value bucket_stats bucket_info bucket_array + hv_store lock_hash_recurse unlock_hash_recurse ); -our $VERSION = '0.12'; +our $VERSION = '0.13'; require XSLoader; XSLoader::load(); @@ -459,9 +460,7 @@ unrestricted hash. my $hash_seed = hash_seed(); -hash_seed() returns the seed number used to randomise hash ordering. -Zero means the "traditional" random hash ordering, non-zero means the -new even more random hash ordering introduced in Perl 5.8.1. +hash_seed() returns the seed bytes used to randomise hash ordering. B<Note that the hash seed is sensitive information>: by knowing it one can craft a denial-of-service attack against Perl code, even remotely, @@ -469,10 +468,100 @@ see L<perlsec/"Algorithmic Complexity Attacks"> for more information. B<Do not disclose the hash seed> to people who don't need to know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. +Prior to Perl 5.17.6 this function returned a UV, it now returns a string, +which may be of nearly any size as determined by the hash function your +Perl has been built with. Possible sizes may be but are not limited to +4 bytes (for most hash algorithms) and 16 bytes (for siphash). + +=item B<hash_value> + + my $hash_value = hash_value($string); + +hash_value() returns the current perls internal hash value for a given +string. + +Returns a 32 bit integer representing the hash value of the string passed +in. This value is only reliable for the lifetime of the process. It may +be different depending on invocation, environment variables, perl version, +architectures, and build options. + +B<Note that the hash value of a given string is sensitive information>: +by knowing it one can deduce the hash seed which in turn can allow one to +craft a denial-of-service attack against Perl code, even remotely, +see L<perlsec/"Algorithmic Complexity Attacks"> for more information. +B<Do not disclose the hash value of a string> to people who don't need to +know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. + +=item B<bucket_info> + +Return a set of basic information about a hash. + + my ($keys, $buckets, $used, @length_counts)= bucket_info($hash); + +Fields are as follows: + + 0: Number of keys in the hash + 1: Number of buckets in the hash + 2: Number of used buckets in the hash + rest : list of counts, Kth element is the number of buckets + with K keys in it. + +See also bucket_stats() and bucket_array(). + +=item B<bucket_stats> + +Returns a list of statistics about a hash. + + my ($keys, buckets, $used, $utilization_ratio, $collision_pct, + $mean, $stddev, @length_counts)= bucket_info($hashref); + + +Fields are as follows: + + + 0: Number of keys in the hash + 1: Number of buckets in the hash + 2: Number of used buckets in the hash + 3: Percent of buckets used + 4: Percent of keys which are in collision + 5: Average bucket length + 6: Standard Deviation of bucket lengths. + rest : list of counts, Kth element is the number of buckets + with K keys in it. + +See also bucket_info() and bucket_array(). + +=item B<bucket_array> + + my $array= bucket_array(\%hash); + +Returns a packed representation of the bucket array associated with a hash. Each element +of the array is either an integer K, in which case it represents K empty buckets, or +a reference to another array which contains the keys that are in that bucket. + +B<Note that the information returned by bucket_array is sensitive information>: +by knowing it one can directly attack perls hash function which in turn may allow +one to craft a denial-of-service attack against Perl code, even remotely, +see L<perlsec/"Algorithmic Complexity Attacks"> for more information. +B<Do not disclose the outputof this function> to people who don't need to +know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly +for debugging and diagnostics purposes only, it is hard to imagine a reason why it +would be used in production code. + =cut -sub hash_seed () { - Internals::rehash_seed(); + +sub bucket_stats { + my ($hash)= @_; + my ($keys, $buckets, $used, @length_counts)= bucket_info($hash); + my $sum; + $sum += ($length_counts[$_] * $_) for 0 .. $#length_counts; + my $mean= $sum/$buckets; + $sum= 0; + $sum += ($length_counts[$_] * (($_-$mean)**2)) for 0 .. $#length_counts; + + my $stddev= sqrt($sum/$buckets); + return $keys, $buckets, $used, $keys ? ($used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : (); } =item B<hv_store> diff --git a/ext/Hash-Util/t/Util.t b/ext/Hash-Util/t/Util.t index d02defe9de..63769b8f02 100644 --- a/ext/Hash-Util/t/Util.t +++ b/ext/Hash-Util/t/Util.t @@ -33,10 +33,11 @@ BEGIN { lock_ref_keys_plus hidden_ref_keys legal_ref_keys - hash_seed hv_store + hash_seed hash_value bucket_stats bucket_info bucket_array + hv_store lock_hash_recurse unlock_hash_recurse ); - plan tests => 226 + @Exported_Funcs; + plan tests => 234 + @Exported_Funcs; use_ok 'Hash::Util', @Exported_Funcs; } foreach my $func (@Exported_Funcs) { @@ -326,7 +327,7 @@ like( } my $hash_seed = hash_seed(); -ok($hash_seed >= 0, "hash_seed $hash_seed"); +ok(defined($hash_seed) && $hash_seed ne '', "hash_seed $hash_seed"); { package Minder; @@ -530,3 +531,26 @@ ok($hash_seed >= 0, "hash_seed $hash_seed"); ok( hash_unlocked(%{$hash{c}[1]}), "unlock_hash_recurse(): element which is hashref in array ref not locked" ); } + +{ + my $h1= hash_value("foo"); + my $h2= hash_value("bar"); + is( $h1, hash_value("foo") ); + is( $h2, hash_value("bar") ); +} +{ + my @info1= bucket_info({}); + my @info2= bucket_info({1..10}); + my @stats1= bucket_stats({}); + my @stats2= bucket_stats({1..10}); + my $array1= bucket_array({}); + my $array2= bucket_array({1..10}); + is("@info1","0 8 0"); + is("@info2[0,1]","5 8"); + is("@stats1","0 8 0"); + is("@stats2[0,1]","5 8"); + my @keys1= sort map { ref $_ ? @$_ : () } @$array1; + my @keys2= sort map { ref $_ ? @$_ : () } @$array2; + is("@keys1",""); + is("@keys2","1 3 5 7 9"); +} @@ -613,18 +613,12 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } } - if (HvREHASH(hv) || (!hash && !(keysv && (SvIsCOW_shared_hash(keysv))))) - PERL_HASH_INTERNAL_(hash, key, klen, HvREHASH(hv)); - else if (!hash) - hash = SvSHARED_HASH(keysv); - - /* We don't have a pointer to the hv, so we have to replicate the - flag into every HEK, so that hv_iterkeysv can see it. - And yes, you do need this even though you are not "storing" because - you can flip the flags below if doing an lval lookup. (And that - was put in to give the semantics Andreas was expecting.) */ - if (HvREHASH(hv)) - flags |= HVhek_REHASH; + if (!hash) { + if (keysv && (SvIsCOW_shared_hash(keysv))) + hash = SvSHARED_HASH(keysv); + else + PERL_HASH(hash, key, klen); + } masked_flags = (flags & HVhek_MASK); @@ -813,7 +807,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, as we repeatedly double the number of buckets on every entry. Linear search feels a less worse thing to do. */ hsplit(hv); - } else if(!HvREHASH(hv)) { + } else { U32 n_links = 1; while ((counter = HeNEXT(counter))) @@ -978,10 +972,12 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HvHASKFLAGS_on(MUTABLE_SV(hv)); } - if (HvREHASH(hv) || (!hash && !(keysv && (SvIsCOW_shared_hash(keysv))))) - PERL_HASH_INTERNAL_(hash, key, klen, HvREHASH(hv)); - else if (!hash) - hash = SvSHARED_HASH(keysv); + if (!hash) { + if (keysv && (SvIsCOW_shared_hash(keysv))) + hash = SvSHARED_HASH(keysv); + else + PERL_HASH(hash, key, klen); + } masked_flags = (k_flags & HVhek_MASK); @@ -1118,8 +1114,6 @@ S_hsplit(pTHX_ HV *hv) I32 i; char *a = (char*) HvARRAY(hv); HE **aep; - int longest_chain = 0; - int was_shared; PERL_ARGS_ASSERT_HSPLIT; @@ -1166,8 +1160,6 @@ S_hsplit(pTHX_ HV *hv) aep = (HE**)a; for (i=0; i<oldsize; i++,aep++) { - int left_length = 0; - int right_length = 0; HE **oentry = aep; HE *entry = *aep; HE **bep; @@ -1180,91 +1172,16 @@ S_hsplit(pTHX_ HV *hv) *oentry = HeNEXT(entry); HeNEXT(entry) = *bep; *bep = entry; - right_length++; } else { oentry = &HeNEXT(entry); - left_length++; } entry = *oentry; } while (entry); /* I think we don't actually need to keep track of the longest length, merely flag if anything is too long. But for the moment while developing this code I'll track it. */ - if (left_length > longest_chain) - longest_chain = left_length; - if (right_length > longest_chain) - longest_chain = right_length; - } - - - /* Pick your policy for "hashing isn't working" here: */ - if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */ - || HvREHASH(hv)) { - return; - } - - if (hv == PL_strtab) { - /* Urg. Someone is doing something nasty to the string table. - Can't win. */ - return; - } - - /* Awooga. Awooga. Pathological data. */ - /*PerlIO_printf(PerlIO_stderr(), "%p %d of %d with %d/%d buckets\n", (void*)hv, - longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/ - - ++newsize; - Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (SvOOK(hv)) { - Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } - - was_shared = HvSHAREKEYS(hv); - - HvSHAREKEYS_off(hv); - HvREHASH_on(hv); - - aep = HvARRAY(hv); - - for (i=0; i<newsize; i++,aep++) { - HE *entry = *aep; - while (entry) { - /* We're going to trash this HE's next pointer when we chain it - into the new hash below, so store where we go next. */ - HE * const next = HeNEXT(entry); - UV hash; - HE **bep; - - /* Rehash it */ - PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry)); - - if (was_shared) { - /* Unshare it. */ - HEK * const new_hek - = save_hek_flags(HeKEY(entry), HeKLEN(entry), - hash, HeKFLAGS(entry)); - unshare_hek (HeKEY_hek(entry)); - HeKEY_hek(entry) = new_hek; - } else { - /* Not shared, so simply write the new hash in. */ - HeHASH(entry) = hash; - } - /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/ - HEK_REHASH_on(HeKEY_hek(entry)); - /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/ - - /* Copy oentry to the correct new chain. */ - bep = ((HE**)a) + (hash & (I32) xhv->xhv_max); - HeNEXT(entry) = *bep; - *bep = entry; - - entry = next; - } } - Safefree (HvARRAY(hv)); - HvARRAY(hv) = (HE **)a; } void @@ -1606,7 +1523,6 @@ Perl_hv_clear(pTHX_ HV *hv) mg_clear(MUTABLE_SV(hv)); HvHASKFLAGS_off(hv); - HvREHASH_off(hv); } if (SvOOK(hv)) { if(HvENAME_get(hv)) @@ -2478,9 +2394,6 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) hv_free_ent(hv, oldentry); } - /*if (HvREHASH(hv) && entry && !HeKREHASH(entry)) - PerlIO_printf(PerlIO_stderr(), "Awooga %p %p\n", (void*)hv, (void*)entry);*/ - iter->xhv_eiter = entry; /* HvEITER(hv) = entry */ return entry; } @@ -82,6 +82,7 @@ struct xpvhv_aux { AV *xhv_backreferences; /* back references for weak references */ HE *xhv_eiter; /* current entry of iterator */ I32 xhv_riter; /* current root of iterator */ + /* Concerning xhv_name_count: When non-zero, xhv_name_u contains a pointer * to an array of HEK pointers, this being the length. The first element is * the name of the stash, which may be NULL. If xhv_name_count is positive, @@ -103,9 +104,6 @@ struct xpvhv { }; /* hash a key */ -/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins - * from requirements by Colin Plumb. - * (http://burtleburtle.net/bob/hash/doobs.html) */ /* The use of a temporary pointer and the casting games * is needed to serve the dual purposes of * (a) the hashed data being interpreted as "unsigned char" (new since 5.8, @@ -118,35 +116,513 @@ struct xpvhv { * If USE_HASH_SEED is defined, hash randomisation is done by default * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done * only if the environment variable PERL_HASH_SEED is set. - * For maximal control, one can define PERL_HASH_SEED. - * (see also perl.c:perl_parse()). + * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed()) */ #ifndef PERL_HASH_SEED # if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) -# define PERL_HASH_SEED PL_hash_seed +# define PERL_HASH_SEED PL_hash_seed # else -# define PERL_HASH_SEED 0 +# define PERL_HASH_SEED "PeRlHaShhAcKpErl" # endif #endif -#define PERL_HASH(hash,str,len) PERL_HASH_INTERNAL_(hash,str,len,0) +#define PERL_HASH_SEED_U32 *((U32*)PERL_HASH_SEED) +#define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0]) +#define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1]) -/* Only hv.c and mod_perl should be doing this. */ +/* legacy - only mod_perl should be doing this. */ #ifdef PERL_HASH_INTERNAL_ACCESS -#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH_INTERNAL_(hash,str,len,1) +#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len) +#endif + +/* Uncomment one of the following lines to use an alternative hash algorithm. +#define PERL_HASH_FUNC_SDBM +#define PERL_HASH_FUNC_DJB2 +#define PERL_HASH_FUNC_SUPERFAST +#define PERL_HASH_FUNC_MURMUR3 +#define PERL_HASH_FUNC_SIPHASH +#define PERL_HASH_FUNC_ONE_AT_A_TIME +*/ + +#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME)) +#define PERL_HASH_FUNC_MURMUR3 +#endif + +#if defined(PERL_HASH_FUNC_SIPHASH) +#define PERL_HASH_FUNC "SIPHASH" +#define PERL_HASH_SEED_BYTES 16 + +/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein. + * The authors claim it is relatively secure compared to the alternatives + * and that performance wise it is a suitable hash for languages like Perl. + * See: + * + * https://www.131002.net/siphash/ + * + * This implementation seems to perform slightly slower than one-at-a-time for + * short keys, but degrades slower for longer keys. Murmur Hash outperforms it + * regardless of keys size. + * + * It is 64 bit only. + */ + +#define PERL_HASH_NEEDS_TWO_SEEDS + +#ifndef U64 +#define U64 uint64_t +#endif + +#define ROTL(x,b) (U64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) + +#define U32TO8_LE(p, v) \ + (p)[0] = (U8)((v) ); (p)[1] = (U8)((v) >> 8); \ + (p)[2] = (U8)((v) >> 16); (p)[3] = (U8)((v) >> 24); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (U32)((v) )); \ + U32TO8_LE((p) + 4, (U32)((v) >> 32)); + +#define U8TO64_LE(p) \ + (((U64)((p)[0]) ) | \ + ((U64)((p)[1]) << 8) | \ + ((U64)((p)[2]) << 16) | \ + ((U64)((p)[3]) << 24) | \ + ((U64)((p)[4]) << 32) | \ + ((U64)((p)[5]) << 40) | \ + ((U64)((p)[6]) << 48) | \ + ((U64)((p)[7]) << 56)) + +#define SIPROUND \ + do { \ + v0_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,13); v1_PeRlHaSh ^= v0_PeRlHaSh; v0_PeRlHaSh=ROTL(v0_PeRlHaSh,32); \ + v2_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,16); v3_PeRlHaSh ^= v2_PeRlHaSh; \ + v0_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,21); v3_PeRlHaSh ^= v0_PeRlHaSh; \ + v2_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,17); v1_PeRlHaSh ^= v2_PeRlHaSh; v2_PeRlHaSh=ROTL(v2_PeRlHaSh,32); \ + } while(0) + +/* SipHash-2-4 */ +#define PERL_HASH(hash,str,len) STMT_START { \ + const char * const strtmp_PeRlHaSh = (str); \ + const unsigned char *in_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \ + const U32 inlen_PeRlHaSh = (len); \ + /* "somepseudorandomlygeneratedbytes" */ \ + U64 v0_PeRlHaSh = 0x736f6d6570736575ULL; \ + U64 v1_PeRlHaSh = 0x646f72616e646f6dULL; \ + U64 v2_PeRlHaSh = 0x6c7967656e657261ULL; \ + U64 v3_PeRlHaSh = 0x7465646279746573ULL; \ +\ + U64 b_PeRlHaSh; \ + U64 k0_PeRlHaSh = PERL_HASH_SEED_U64_1; \ + U64 k1_PeRlHaSh = PERL_HASH_SEED_U64_2; \ + U64 m_PeRlHaSh; \ + const int left_PeRlHaSh = inlen_PeRlHaSh & 7; \ + const U8 *end_PeRlHaSh = in_PeRlHaSh + inlen_PeRlHaSh - left_PeRlHaSh; \ +\ + b_PeRlHaSh = ( ( U64 )(len) ) << 56; \ + v3_PeRlHaSh ^= k1_PeRlHaSh; \ + v2_PeRlHaSh ^= k0_PeRlHaSh; \ + v1_PeRlHaSh ^= k1_PeRlHaSh; \ + v0_PeRlHaSh ^= k0_PeRlHaSh; \ +\ + for ( ; in_PeRlHaSh != end_PeRlHaSh; in_PeRlHaSh += 8 ) \ + { \ + m_PeRlHaSh = U8TO64_LE( in_PeRlHaSh ); \ + v3_PeRlHaSh ^= m_PeRlHaSh; \ + SIPROUND; \ + SIPROUND; \ + v0_PeRlHaSh ^= m_PeRlHaSh; \ + } \ +\ + switch( left_PeRlHaSh ) \ + { \ + case 7: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 6] ) << 48; \ + case 6: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 5] ) << 40; \ + case 5: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 4] ) << 32; \ + case 4: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 3] ) << 24; \ + case 3: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 2] ) << 16; \ + case 2: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 1] ) << 8; \ + case 1: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 0] ); break; \ + case 0: break; \ + } \ +\ + v3_PeRlHaSh ^= b_PeRlHaSh; \ + SIPROUND; \ + SIPROUND; \ + v0_PeRlHaSh ^= b_PeRlHaSh; \ +\ + v2_PeRlHaSh ^= 0xff; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + b_PeRlHaSh = v0_PeRlHaSh ^ v1_PeRlHaSh ^ v2_PeRlHaSh ^ v3_PeRlHaSh; \ + (hash)= (U32)(b_PeRlHaSh & U32_MAX); \ +} STMT_END + +#elif defined(PERL_HASH_FUNC_SUPERFAST) +#define PERL_HASH_FUNC "SUPERFAST" +/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in + * (http://burtleburtle.net/bob/hash/doobs.html) + * It is by Paul Hsieh (c) 2004 and is analysed here + * http://www.azillionmonkeys.com/qed/hash.html + * license terms are here: + * http://www.azillionmonkeys.com/qed/weblicense.html + */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const U16 *) (d))) #endif -/* Common base for PERL_HASH and PERL_HASH_INTERNAL that parameterises - * the source of the seed. Not for direct use outside of hv.c. */ +#if !defined (get16bits) +#define get16bits(d) ((((const U8 *)(d))[1] << UINT32_C(8))\ + +((const U8 *)(d))[0]) +#endif +#define PERL_HASH(hash,str,len) \ + STMT_START { \ + register const char * const strtmp_PeRlHaSh = (str); \ + register const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \ + register U32 len_PeRlHaSh = (len); \ + register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ + register U32 tmp_PeRlHaSh; \ + register int rem_PeRlHaSh= len_PeRlHaSh & 3; \ + len_PeRlHaSh >>= 2; \ + \ + for (;len_PeRlHaSh > 0; len_PeRlHaSh--) { \ + hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ + tmp_PeRlHaSh = (get16bits (str_PeRlHaSh+2) << 11) ^ hash_PeRlHaSh; \ + hash_PeRlHaSh = (hash_PeRlHaSh << 16) ^ tmp_PeRlHaSh; \ + str_PeRlHaSh += 2 * sizeof (U16); \ + hash_PeRlHaSh += hash_PeRlHaSh >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (rem_PeRlHaSh) { \ + case 3: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ + hash_PeRlHaSh ^= hash_PeRlHaSh << 16; \ + hash_PeRlHaSh ^= str_PeRlHaSh[sizeof (U16)] << 18; \ + hash_PeRlHaSh += hash_PeRlHaSh >> 11; \ + break; \ + case 2: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ + hash_PeRlHaSh ^= hash_PeRlHaSh << 11; \ + hash_PeRlHaSh += hash_PeRlHaSh >> 17; \ + break; \ + case 1: hash_PeRlHaSh += *str_PeRlHaSh; \ + hash_PeRlHaSh ^= hash_PeRlHaSh << 10; \ + hash_PeRlHaSh += hash_PeRlHaSh >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hash_PeRlHaSh ^= hash_PeRlHaSh << 3; \ + hash_PeRlHaSh += hash_PeRlHaSh >> 5; \ + hash_PeRlHaSh ^= hash_PeRlHaSh << 4; \ + hash_PeRlHaSh += hash_PeRlHaSh >> 17; \ + hash_PeRlHaSh ^= hash_PeRlHaSh << 25; \ + (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh >> 6)); \ + } STMT_END + +#elif defined(PERL_HASH_FUNC_MURMUR3) +#define PERL_HASH_FUNC "MURMUR3" +#define PERL_HASH_SEED_BYTES 4 + +/*----------------------------------------------------------------------------- + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. + * + * This implementation was originally written by Shane Day, and is also public domain, + * and was modified to function as a macro similar to other perl hash functions by + * Yves Orton. + * + * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) + * with support for progressive processing. + * + * If you want to understand the MurmurHash algorithm you would be much better + * off reading the original source. Just point your browser at: + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + * + * How does it work? + * + * We can only process entire 32 bit chunks of input, except for the very end + * that may be shorter. + * + * To handle endianess I simply use a macro that reads a U32 and define + * that macro to be a direct read on little endian machines, a read and swap + * on big endian machines, or a byte-by-byte read if the endianess is unknown. + */ + + +/*----------------------------------------------------------------------------- + * Endianess, misalignment capabilities and util macros + * + * The following 3 macros are defined in this section. The other macros defined + * are only needed to help derive these 3. + * + * MURMUR_READ_UINT32(x) Read a little endian unsigned 32-bit int + * MURMUR_UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries + * MURMUR_ROTL32(x,r) Rotate x left by r bits + */ -#define PERL_HASH_INTERNAL_(hash,str,len,internal) \ +/* Convention is to define __BYTE_ORDER == to one of these values */ +#if !defined(__BIG_ENDIAN) + #define __BIG_ENDIAN 4321 +#endif +#if !defined(__LITTLE_ENDIAN) + #define __LITTLE_ENDIAN 1234 +#endif + +/* I386 */ +#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386) + #define __BYTE_ORDER __LITTLE_ENDIAN + #define MURMUR_UNALIGNED_SAFE +#endif + +/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __), + * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */ +#if !defined(__BYTE_ORDER) + #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1 + #define __BYTE_ORDER __LITTLE_ENDIAN + #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1 + #define __BYTE_ORDER __BIG_ENDIAN + #endif +#endif + +/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */ +#if !defined(__BYTE_ORDER) + #if defined(__ARMEL__) || defined(__MIPSEL__) + #define __BYTE_ORDER __LITTLE_ENDIAN + #endif + #if defined(__ARMEB__) || defined(__MIPSEB__) + #define __BYTE_ORDER __BIG_ENDIAN + #endif +#endif + +/* Now find best way we can to READ_UINT32 */ +#if __BYTE_ORDER==__LITTLE_ENDIAN + /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ + #define MURMUR_READ_UINT32(ptr) (*((U32*)(ptr))) +#elif __BYTE_ORDER==__BIG_ENDIAN + /* TODO: Add additional cases below where a compiler provided bswap32 is available */ + #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) + #define MURMUR_READ_UINT32(ptr) (__builtin_bswap32(*((U32*)(ptr)))) + #else + /* Without a known fast bswap32 we're just as well off doing this */ + #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) + #define MURMUR_UNALIGNED_SAFE + #endif +#else + /* Unknown endianess so last resort is to read individual bytes */ + #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) + + /* Since we're not doing word-reads we can skip the messing about with realignment */ + #define MURMUR_UNALIGNED_SAFE +#endif + +/* Find best way to ROTL32 */ +#if defined(_MSC_VER) + #include <stdlib.h> /* Microsoft put _rotl declaration in here */ + #define MURMUR_ROTL32(x,r) _rotl(x,r) +#else + /* gcc recognises this code and generates a rotate instruction for CPUs with one */ + #define MURMUR_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) +#endif + + +/*----------------------------------------------------------------------------- + * Core murmurhash algorithm macros */ + +#define MURMUR_C1 (0xcc9e2d51) +#define MURMUR_C2 (0x1b873593) +#define MURMUR_C3 (0xe6546b64) +#define MURMUR_C4 (0x85ebca6b) +#define MURMUR_C5 (0xc2b2ae35) + +/* This is the main processing body of the algorithm. It operates + * on each full 32-bits of input. */ +#define MURMUR_DOBLOCK(h1, k1) STMT_START { \ + k1 *= MURMUR_C1; \ + k1 = MURMUR_ROTL32(k1,15); \ + k1 *= MURMUR_C2; \ + \ + h1 ^= k1; \ + h1 = MURMUR_ROTL32(h1,13); \ + h1 = h1 * 5 + MURMUR_C3; \ +} STMT_END + + +/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ +/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ +#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \ + int MURMUR_DOBYTES_i = cnt; \ + while(MURMUR_DOBYTES_i--) { \ + c = c>>8 | *ptr++<<24; \ + n++; len--; \ + if(n==4) { \ + MURMUR_DOBLOCK(h1, c); \ + n = 0; \ + } \ + } \ +} STMT_END + +/* process the last 1..3 bytes and finalize */ +#define MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length) STMT_START { \ + /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */\ + PeRlHaSh_len -= PeRlHaSh_len/4*4; \ + \ + /* Append any remaining bytes into carry */ \ + MURMUR_DOBYTES(PeRlHaSh_len, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len); \ + \ + if (PeRlHaSh_bytes_in_carry) { \ + PeRlHaSh_k1 = PeRlHaSh_carry >> ( 4 - PeRlHaSh_bytes_in_carry ) * 8; \ + PeRlHaSh_k1 *= MURMUR_C1; \ + PeRlHaSh_k1 = MURMUR_ROTL32(PeRlHaSh_k1,15); \ + PeRlHaSh_k1 *= MURMUR_C2; \ + PeRlHaSh_h1 ^= PeRlHaSh_k1; \ + } \ + PeRlHaSh_h1 ^= PeRlHaSh_total_length; \ + \ + /* fmix */ \ + PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \ + PeRlHaSh_h1 *= MURMUR_C4; \ + PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 13; \ + PeRlHaSh_h1 *= MURMUR_C5; \ + PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \ + (hash)= PeRlHaSh_h1; \ +} STMT_END + +/* now we create the hash function */ + +#if defined(UNALIGNED_SAFE) +#define PERL_HASH(hash,str,len) STMT_START { \ + register const char * const s_PeRlHaSh_tmp = (str); \ + register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ + register I32 PeRlHaSh_len = len; \ + \ + U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \ + U32 PeRlHaSh_k1; \ + U32 PeRlHaSh_carry = 0; \ + \ + const unsigned char *PeRlHaSh_end; \ + \ + int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \ + I32 PeRlHaSh_total_length= PeRlHaSh_len; \ + \ + /* This CPU handles unaligned word access */ \ + /* Process 32-bit chunks */ \ + PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \ + for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ + PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ + MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ + } \ + \ + MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\ + } STMT_END +#else +#define PERL_HASH(hash,str,len) STMT_START { \ + register const char * const s_PeRlHaSh_tmp = (str); \ + register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ + register I32 PeRlHaSh_len = len; \ + \ + U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \ + U32 PeRlHaSh_k1; \ + U32 PeRlHaSh_carry = 0; \ + \ + const unsigned char *PeRlHaSh_end; \ + \ + int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \ + I32 PeRlHaSh_total_length= PeRlHaSh_len; \ + \ + /* This CPU does not handle unaligned word access */ \ + \ + /* Consume enough so that the next data byte is word aligned */ \ + int PeRlHaSh_i = -(long)PeRlHaSh_ptr & 3; \ + if(PeRlHaSh_i && PeRlHaSh_i <= PeRlHaSh_len) { \ + MURMUR_DOBYTES(PeRlHaSh_i, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len);\ + } \ + \ + /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ \ + PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \ + switch(PeRlHaSh_bytes_in_carry) { /* how many bytes in carry */ \ + case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ \ + for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ + PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ + MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ + } \ + break; \ + case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ \ + for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ + PeRlHaSh_k1 = PeRlHaSh_carry>>24; \ + PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ + PeRlHaSh_k1 |= PeRlHaSh_carry<<8; \ + MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ + } \ + break; \ + case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ \ + for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ + PeRlHaSh_k1 = PeRlHaSh_carry>>16; \ + PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ + PeRlHaSh_k1 |= PeRlHaSh_carry<<16; \ + MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ + } \ + break; \ + case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ \ + for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ + PeRlHaSh_k1 = PeRlHaSh_carry>>8; \ + PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ + PeRlHaSh_k1 |= PeRlHaSh_carry<<24; \ + MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ + } \ + } \ + \ + MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\ + } STMT_END +#endif + +#elif defined(PERL_HASH_FUNC_DJB2) +#define PERL_HASH_FUNC "DJB2" +#define PERL_HASH_SEED_BYTES 4 +#define PERL_HASH(hash,str,len) \ + STMT_START { \ + register const char * const s_PeRlHaSh_tmp = (str); \ + register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + register I32 i_PeRlHaSh = len; \ + register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ + while (i_PeRlHaSh--) { \ + hash_PeRlHaSh = ((hash_PeRlHaSh << 5) + hash_PeRlHaSh) + *s_PeRlHaSh++; \ + } \ + (hash) = hash_PeRlHaSh;\ + } STMT_END + +#elif defined(PERL_HASH_FUNC_SDBM) +#define PERL_HASH_FUNC "SDBM" +#define PERL_HASH_SEED_BYTES 4 +#define PERL_HASH(hash,str,len) \ + STMT_START { \ + register const char * const s_PeRlHaSh_tmp = (str); \ + register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + register I32 i_PeRlHaSh = len; \ + register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ + while (i_PeRlHaSh--) { \ + hash_PeRlHaSh = (hash_PeRlHaSh << 6) + (hash_PeRlHaSh << 16) - hash_PeRlHaSh + *s_PeRlHaSh++; \ + } \ + (hash) = hash_PeRlHaSh;\ + } STMT_END + +#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME) +/* DEFAULT/HISTORIC HASH FUNCTION */ +#define PERL_HASH_FUNC "ONE_AT_A_TIME" +#define PERL_HASH_SEED_BYTES 4 + +/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins + * from requirements by Colin Plumb. + * (http://burtleburtle.net/bob/hash/doobs.html) */ +#define PERL_HASH(hash,str,len) \ STMT_START { \ - const char * const s_PeRlHaSh_tmp = str; \ - const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - I32 i_PeRlHaSh = len; \ - U32 hash_PeRlHaSh = (internal ? PL_rehash_seed : PERL_HASH_SEED); \ + register const char * const s_PeRlHaSh_tmp = (str); \ + register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + register I32 i_PeRlHaSh = len; \ + register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ while (i_PeRlHaSh--) { \ - hash_PeRlHaSh += *s_PeRlHaSh++; \ + hash_PeRlHaSh += (U8)*s_PeRlHaSh++; \ hash_PeRlHaSh += (hash_PeRlHaSh << 10); \ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \ } \ @@ -154,7 +630,10 @@ struct xpvhv { hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \ (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \ } STMT_END - +#endif +#ifndef PERL_HASH +#error "No hash function defined!" +#endif /* =head1 Hash Manipulation Functions @@ -358,10 +837,6 @@ C<SV*>. #define HvLAZYDEL_on(hv) (SvFLAGS(hv) |= SVphv_LAZYDEL) #define HvLAZYDEL_off(hv) (SvFLAGS(hv) &= ~SVphv_LAZYDEL) -#define HvREHASH(hv) (SvFLAGS(hv) & SVphv_REHASH) -#define HvREHASH_on(hv) (SvFLAGS(hv) |= SVphv_REHASH) -#define HvREHASH_off(hv) (SvFLAGS(hv) &= ~SVphv_REHASH) - #ifndef PERL_CORE # define Nullhe Null(HE*) #endif @@ -372,7 +847,6 @@ C<SV*>. #define HeKLEN(he) HEK_LEN(HeKEY_hek(he)) #define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he)) #define HeKWASUTF8(he) HEK_WASUTF8(HeKEY_hek(he)) -#define HeKREHASH(he) HEK_REHASH(HeKEY_hek(he)) #define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he)) #define HeKFLAGS(he) HEK_FLAGS(HeKEY_hek(he)) #define HeVAL(he) (he)->he_valu.hent_val @@ -407,7 +881,6 @@ C<SV*>. #define HVhek_UTF8 0x01 /* Key is utf8 encoded. */ #define HVhek_WASUTF8 0x02 /* Key is bytes here, but was supplied as utf8. */ -#define HVhek_REHASH 0x04 /* This key is in an hv using a custom HASH . */ #define HVhek_UNSHARED 0x08 /* This key isn't a shared hash key. */ #define HVhek_FREEKEY 0x100 /* Internal flag to say key is malloc()ed. */ #define HVhek_PLACEHOLD 0x200 /* Internal flag to create placeholder. @@ -417,16 +890,7 @@ C<SV*>. converted to bytes. */ #define HVhek_MASK 0xFF -/* Which flags enable HvHASKFLAGS? Somewhat a hack on a hack, as - HVhek_REHASH is only needed because the rehash flag has to be duplicated - into all keys as hv_iternext has no access to the hash flags. At this - point Storable's tests get upset, because sometimes hashes are "keyed" - and sometimes not, depending on the order of data insertion, and whether - it triggered rehashing. So currently HVhek_REHASH is exempt. - Similarly UNSHARED -*/ - -#define HVhek_ENABLEHVKFLAGS (HVhek_MASK & ~(HVhek_REHASH|HVhek_UNSHARED)) +#define HVhek_ENABLEHVKFLAGS (HVhek_MASK & ~(HVhek_UNSHARED)) #define HEK_UTF8(hek) (HEK_FLAGS(hek) & HVhek_UTF8) #define HEK_UTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_UTF8) @@ -434,8 +898,6 @@ C<SV*>. #define HEK_WASUTF8(hek) (HEK_FLAGS(hek) & HVhek_WASUTF8) #define HEK_WASUTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_WASUTF8) #define HEK_WASUTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_WASUTF8) -#define HEK_REHASH(hek) (HEK_FLAGS(hek) & HVhek_REHASH) -#define HEK_REHASH_on(hek) (HEK_FLAGS(hek) |= HVhek_REHASH) /* calculate HV array allocation */ #ifndef PERL_USE_LARGE_HV_ALLOC diff --git a/intrpvar.h b/intrpvar.h index b6d69ed49d..2a2913c04b 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -162,7 +162,6 @@ PERLVARI(I, dumpindent, U16, 4) /* number of blanks per dump PERLVAR(I, utf8locale, bool) /* utf8 locale detected */ -PERLVARI(I, rehash_seed_set, bool, FALSE) /* 582 hash initialized? */ PERLVARA(I, colors,6, char *) /* from regcomp.c */ @@ -741,10 +740,6 @@ PERLVARI(I, destroyhook, destroyable_proc_t, Perl_sv_destroyable) PERLVARI(I, signalhook, despatch_signals_proc_t, Perl_despatch_signals) #endif -PERLVARI(I, hash_seed, UV, 0) /* Hash initializer */ - -PERLVARI(I, rehash_seed, UV, 0) /* 582 hash initializer */ - PERLVARI(I, isarev, HV *, NULL) /* Reverse map of @ISA dependencies */ /* Register of known Method Resolution Orders. @@ -770,6 +765,7 @@ PERLVAR(I, custom_ops, HV *) /* custom op registrations */ PERLVARI(I, globhook, globhook_t, NULL) PERLVARI(I, glob_index, int, 0) + PERLVAR(I, reentrant_retint, int) /* Integer return value from reentrant functions */ /* The last unconditional member of the interpreter structure when 5.10.0 was @@ -290,6 +290,19 @@ perl_construct(pTHXx) #ifdef USE_REENTRANT_API Perl_reentrant_init(aTHX); #endif +#if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) + /* [perl #22371] Algorimic Complexity Attack on Perl 5.6.1, 5.8.0 + * This MUST be done before any hash stores or fetches take place. + * If you set PL_hash_seed (and presumably also PL_hash_seed_set) + * yourself, it is your responsibility to provide a good random seed! + * You can also define PERL_HASH_SEED in compile time, see hv.h. + * + * XXX: fix this comment */ + if (PL_hash_seed_set == FALSE) { + Perl_get_hash_seed(aTHX_ PL_hash_seed); + PL_hash_seed_set= TRUE; + } +#endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */ /* Note that strtab is a rather special HV. Assumptions are made about not iterating on it, and not adding tie magic to it. @@ -1476,23 +1489,21 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env) #ifndef MULTIPLICITY PERL_UNUSED_ARG(my_perl); #endif - -#if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) - /* [perl #22371] Algorimic Complexity Attack on Perl 5.6.1, 5.8.0 - * This MUST be done before any hash stores or fetches take place. - * If you set PL_rehash_seed (and presumably also PL_rehash_seed_set) - * yourself, it is your responsibility to provide a good random seed! - * You can also define PERL_HASH_SEED in compile time, see hv.h. */ - if (!PL_rehash_seed_set) - PL_rehash_seed = get_hash_seed(); +#if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) || defined(USE_HASH_SEED_DEBUG) { - const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG"); - - if (s && (atoi(s) == 1)) - PerlIO_printf(Perl_debug_log, "HASH_SEED = %"UVuf"\n", PL_rehash_seed); + const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG"); + + if (s && (atoi(s) == 1)) { + unsigned char *seed= PERL_HASH_SEED; + unsigned char *seed_end= PERL_HASH_SEED + PERL_HASH_SEED_BYTES; + PerlIO_printf(Perl_debug_log, "HASH_FUNCTION = %s HASH_SEED = 0x", PERL_HASH_FUNC); + while (seed < seed_end) { + PerlIO_printf(Perl_debug_log, "%02x", *seed++); + } + PerlIO_printf(Perl_debug_log, "\n"); + } } #endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */ - PL_origargc = argc; PL_origargv = argv; @@ -115,6 +115,10 @@ END_EXTERN_C #define PL_dollarzero_mutex (*Perl_Gdollarzero_mutex_ptr(NULL)) #undef PL_fold_locale #define PL_fold_locale (*Perl_Gfold_locale_ptr(NULL)) +#undef PL_hash_seed +#define PL_hash_seed (*Perl_Ghash_seed_ptr(NULL)) +#undef PL_hash_seed_set +#define PL_hash_seed_set (*Perl_Ghash_seed_set_ptr(NULL)) #undef PL_hints_mutex #define PL_hints_mutex (*Perl_Ghints_mutex_ptr(NULL)) #undef PL_keyword_plugin diff --git a/perlvars.h b/perlvars.h index 20c3882fc8..68471a0b80 100644 --- a/perlvars.h +++ b/perlvars.h @@ -231,3 +231,6 @@ PERLVAR(G, sv_placeholder, SV) #if defined(MYMALLOC) && defined(USE_ITHREADS) PERLVAR(G, malloc_mutex, perl_mutex) /* Mutex for malloc */ #endif + +PERLVARI(G, hash_seed_set, bool, FALSE) /* perl.c */ +PERLVARA(G, hash_seed, 8, unsigned char) /* and hv.h */ @@ -1129,8 +1129,10 @@ PERL_CALLCONV void Perl_get_db_sub(pTHX_ SV **svp, CV *cv) #define PERL_ARGS_ASSERT_GET_DB_SUB \ assert(cv) -PERL_CALLCONV UV Perl_get_hash_seed(pTHX) - __attribute__warn_unused_result__; +PERL_CALLCONV void Perl_get_hash_seed(pTHX_ unsigned char *seed_buffer) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_GET_HASH_SEED \ + assert(seed_buffer) PERL_CALLCONV HV* Perl_get_hv(pTHX_ const char *name, I32 flags) __attribute__nonnull__(pTHX_1); @@ -4566,7 +4566,7 @@ Perl_sv_sethek(pTHX_ register SV *const sv, const HEK *const hek) sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL); SvUTF8_on(sv); return; - } else if (flags & (HVhek_REHASH|HVhek_UNSHARED)) { + } else if (flags & HVhek_UNSHARED) { sv_setpvn(sv, HEK_KEY(hek), HEK_LEN(hek)); if (HEK_UTF8(hek)) SvUTF8_on(sv); @@ -8457,13 +8457,8 @@ Perl_newSVhek(pTHX_ const HEK *const hek) sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL); SvUTF8_on (sv); return sv; - } else if (flags & (HVhek_REHASH|HVhek_UNSHARED)) { - /* We don't have a pointer to the hv, so we have to replicate the - flag into every HEK. This hv is using custom a hasing - algorithm. Hence we can't return a shared string scalar, as - that would contain the (wrong) hash value, and might get passed - into an hv routine with a regular hash. - Similarly, a hash that isn't using shared hash keys has to have + } else if (flags & HVhek_UNSHARED) { + /* A hash that isn't using shared hash keys has to have the flag in every key so that we know not to try to call share_hek_hek on it. */ @@ -12912,6 +12907,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_Proc = ipP; #endif /* PERL_IMPLICIT_SYS */ + param->flags = flags; /* Nothing in the core code uses this, but we make it available to extensions (using mg_dup). */ @@ -12921,6 +12917,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, param->new_perl = my_perl; param->unreferenced = NULL; + INIT_TRACK_MEMPOOL(my_perl->Imemory_debug_header, my_perl); PL_body_arenas = NULL; @@ -12933,9 +12930,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_debug = proto_perl->Idebug; - PL_hash_seed = proto_perl->Ihash_seed; - PL_rehash_seed = proto_perl->Irehash_seed; - /* dbargs array probably holds garbage */ PL_dbargs = NULL; @@ -360,8 +360,6 @@ perform the upgrade if necessary. See C<svtype>. 3: On a pad name SV, that slot in the frame AV is a REFCNT'ed reference to a lexical from "outside". */ -#define SVphv_REHASH SVf_FAKE /* 4: On a PVHV, hash values are being - recalculated */ #define SVf_OOK 0x02000000 /* has valid offset value. For a PVHV this means that a hv_aux struct is present after the main array */ diff --git a/t/lib/universal.t b/t/lib/universal.t index a52e01972f..71223b4fae 100644 --- a/t/lib/universal.t +++ b/t/lib/universal.t @@ -15,12 +15,10 @@ sub tryit { eval shift or warn \$@ } tryit "&Internals::SvREADONLY($arg)"; tryit "&Internals::SvREFCNT($arg)"; tryit "&Internals::hv_clear_placeholders($arg)"; -tryit "&Internals::HvREHASH($arg)"; ---- Usage: Internals::SvREADONLY(SCALAR[, ON]) at (eval 1) line 1. Usage: Internals::SvREFCNT(SCALAR[, REFCOUNT]) at (eval 2) line 1. Usage: Internals::hv_clear_placeholders(hv) at (eval 3) line 1. -Internals::HvREHASH $hashref at (eval 4) line 1. ==== } diff --git a/t/op/hash.t b/t/op/hash.t index 4093f2e0a0..597301adf5 100644 --- a/t/op/hash.t +++ b/t/op/hash.t @@ -8,107 +8,7 @@ BEGIN { use strict; -plan tests => 15; - -my %h; - -ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on"); - -foreach (1..10) { - $h{"\0"x$_}++; -} - -ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash"); - -foreach (11..20) { - $h{"\0"x$_}++; -} - -ok (Internals::HvREHASH(%h), "20 entries triggers rehash"); - - - - -# second part using an emulation of the PERL_HASH in perl, mounting an -# attack on a pre-populated hash. This is also useful if you need normal -# keys which don't contain \0 -- suitable for stashes - -use constant MASK_U32 => 2**32; -use constant HASH_SEED => 0; -use constant THRESHOLD => 14; -use constant START => "a"; - -# some initial hash data -my %h2 = map {$_ => 1} 'a'..'cc'; - -ok (!Internals::HvREHASH(%h2), - "starting with pre-populated non-pathological hash (rehash flag if off)"); - -my @keys = get_keys(\%h2); -$h2{$_}++ for @keys; -ok (Internals::HvREHASH(%h2), - scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); - -sub get_keys { - my $hr = shift; - - # the minimum of bits required to mount the attack on a hash - my $min_bits = log(THRESHOLD)/log(2); - - # if the hash has already been populated with a significant amount - # of entries the number of mask bits can be higher - my $keys = scalar keys %$hr; - my $bits = $keys ? log($keys)/log(2) : 0; - $bits = $min_bits if $min_bits > $bits; - - $bits = int($bits) < $bits ? int($bits) + 1 : int($bits); - # need to add 2 bits to cover the internal split cases - $bits += 2; - my $mask = 2**$bits-1; - print "# using mask: $mask ($bits)\n"; - - my @keys; - my $s = START; - my $c = 0; - # get 2 keys on top of the THRESHOLD - my $hash; - while (@keys < THRESHOLD+2) { - # next if exists $hash->{$s}; - $hash = hash($s); - next unless ($hash & $mask) == 0; - $c++; - printf "# %2d: %5s, %10s\n", $c, $s, $hash; - push @keys, $s; - } continue { - $s++; - } - - return @keys; -} - - -# trying to provide the fastest equivalent of C macro's PERL_HASH in -# Perl - the main complication is that it uses U32 integer, which we -# can't do in perl, without doing some tricks -sub hash { - my $s = shift; - my @c = split //, $s; - my $u = HASH_SEED; - for (@c) { - # (A % M) + (B % M) == (A + B) % M - # This works because '+' produces a NV, which is big enough to hold - # the intermediate result. We only need the % before any "^" and "&" - # to get the result in the range for an I32. - # and << doesn't work on NV, so using 1 << 10 - $u += ord; - $u += $u * (1 << 10); $u %= MASK_U32; - $u ^= $u >> 6; - } - $u += $u << 3; $u %= MASK_U32; - $u ^= $u >> 11; $u %= MASK_U32; - $u += $u << 15; $u %= MASK_U32; - $u; -} +plan tests => 10; # This will crash perl if it fails diff --git a/t/run/runenv.t b/t/run/runenv.t index cea2590414..521ba8bf80 100644 --- a/t/run/runenv.t +++ b/t/run/runenv.t @@ -12,7 +12,7 @@ BEGIN { skip_all_without_config('d_fork'); } -plan tests => 84; +plan tests => 94; my $STDOUT = tempfile(); my $STDERR = tempfile(); @@ -63,8 +63,16 @@ sub try { my ($env, $args, $stdout, $stderr) = @_; my ($actual_stdout, $actual_stderr) = runperl_and_capture($env, $args); local $::Level = $::Level + 1; - is ($stdout, $actual_stdout); - is ($stderr, $actual_stderr); + if (ref $stdout) { + ok ( $actual_stdout =~/$stdout/ ); + } else { + is ($stdout, $actual_stdout); + } + if (ref $stderr) { + ok ( $actual_stderr =~/$stderr/); + } else { + is ($stderr, $actual_stderr); + } } # PERL5OPT Command-line options (switches). Switches in @@ -191,6 +199,30 @@ try({PERL5LIB => "foo", '', ''); +try({PERL_HASH_SEED_DEBUG => 1}, + ['-e','1'], + '', + qr/HASH_FUNCTION =/); + +try({PERL_HASH_SEED_DEBUG => 1}, + ['-e','1'], + '', + qr/HASH_SEED =/); + +try({PERL_HASH_SEED_DEBUG => 1, PERL_HASH_SEED => "12345678"}, + ['-e','1'], + '', + qr/HASH_SEED = 0x12345678/); + +try({PERL_HASH_SEED_DEBUG => 1, PERL_HASH_SEED => "12"}, + ['-e','1'], + '', + qr/HASH_SEED = 0x12000000/); + +try({PERL_HASH_SEED_DEBUG => 1, PERL_HASH_SEED => "123456789"}, + ['-e','1'], + '', + qr/HASH_SEED = 0x12345678/); # Tests for S_incpush_use_sep(): my @dump_inc = ('-e', 'print "$_\n" foreach @INC'); diff --git a/universal.c b/universal.c index 76b6281441..805f376124 100644 --- a/universal.c +++ b/universal.c @@ -1086,44 +1086,6 @@ XS(XS_PerlIO_get_layers) XSRETURN(0); } -XS(XS_Internals_hash_seed) -{ - dVAR; - /* Using dXSARGS would also have dITEM and dSP, - * which define 2 unused local variables. */ - dAXMARK; - PERL_UNUSED_ARG(cv); - PERL_UNUSED_VAR(mark); - XSRETURN_UV(PERL_HASH_SEED); -} - -XS(XS_Internals_rehash_seed) -{ - dVAR; - /* Using dXSARGS would also have dITEM and dSP, - * which define 2 unused local variables. */ - dAXMARK; - PERL_UNUSED_ARG(cv); - PERL_UNUSED_VAR(mark); - XSRETURN_UV(PL_rehash_seed); -} - -XS(XS_Internals_HvREHASH) /* Subject to change */ -{ - dVAR; - dXSARGS; - PERL_UNUSED_ARG(cv); - if (SvROK(ST(0))) { - const HV * const hv = (const HV *) SvRV(ST(0)); - if (items == 1 && SvTYPE(hv) == SVt_PVHV) { - if (HvREHASH(hv)) - XSRETURN_YES; - else - XSRETURN_NO; - } - } - Perl_croak(aTHX_ "Internals::HvREHASH $hashref"); -} XS(XS_re_is_regexp) { @@ -1403,9 +1365,6 @@ const struct xsub_details details[] = { {"Internals::SvREFCNT", XS_Internals_SvREFCNT, "\\[$%@];$"}, {"Internals::hv_clear_placeholders", XS_Internals_hv_clear_placehold, "\\%"}, {"PerlIO::get_layers", XS_PerlIO_get_layers, "*;@"}, - {"Internals::hash_seed", XS_Internals_hash_seed, ""}, - {"Internals::rehash_seed", XS_Internals_rehash_seed, ""}, - {"Internals::HvREHASH", XS_Internals_HvREHASH, "\\%"}, {"re::is_regexp", XS_re_is_regexp, "$"}, {"re::regname", XS_re_regname, ";$$"}, {"re::regnames", XS_re_regnames, ";$"}, @@ -24,6 +24,7 @@ #include "EXTERN.h" #define PERL_IN_UTIL_C #include "perl.h" +#include "reentr.h" #ifdef USE_PERLIO #include "perliol.h" /* For PerlIOUnix_refcnt */ @@ -5666,43 +5667,41 @@ Perl_seed(pTHX) return u; } -UV -Perl_get_hash_seed(pTHX) +void +Perl_get_hash_seed(pTHX_ unsigned char *seed_buffer) { dVAR; - const char *s = PerlEnv_getenv("PERL_HASH_SEED"); - UV myseed = 0; + const char *s; + const unsigned char * const end= seed_buffer + PERL_HASH_SEED_BYTES; + + PERL_ARGS_ASSERT_GET_HASH_SEED; - if (s) - while (isSPACE(*s)) + s= PerlEnv_getenv("PERL_HASH_SEED"); + + if ( s ) +#ifndef USE_HASH_SEED_EXPLICIT + { + while (isSPACE(*s)) s++; - if (s && isDIGIT(*s)) - myseed = (UV)Atoul(s); - else -#ifdef USE_HASH_SEED_EXPLICIT - if (s) -#endif - { - /* Compute a random seed */ - (void)seedDrand01((Rand_seed_t)seed()); - myseed = (UV)(Drand01() * (NV)UV_MAX); -#if RANDBITS < (UVSIZE * 8) - /* Since there are not enough randbits to to reach all - * the bits of a UV, the low bits might need extra - * help. Sum in another random number that will - * fill in the low bits. */ - myseed += - (UV)(Drand01() * (NV)((((UV)1) << ((UVSIZE * 8 - RANDBITS))) - 1)); -#endif /* RANDBITS < (UVSIZE * 8) */ - if (myseed == 0) { /* Superparanoia. */ - myseed = (UV)(Drand01() * (NV)UV_MAX); /* One more chance. */ - if (myseed == 0) - Perl_croak(aTHX_ "Your random numbers are not that random"); - } - } - PL_rehash_seed_set = TRUE; - return myseed; + while (isXDIGIT(*s) && seed_buffer < end) { + *seed_buffer = READ_XDIGIT(s) << 4; + if (isXDIGIT(*s)) { + *seed_buffer |= READ_XDIGIT(s); + } + seed_buffer++; + } + /* should we check for unparsed crap? */ + } + else +#endif + { + (void)seedDrand01((Rand_seed_t)seed()); + + while (seed_buffer < end) { + *seed_buffer++ = (unsigned char)(Drand01() * (U8_MAX+1)); + } + } } #ifdef PERL_GLOBAL_STRUCT |