summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST1
-rw-r--r--dump.c17
-rw-r--r--embed.fnc2
-rw-r--r--embed.h2
-rw-r--r--embedvar.h7
-rw-r--r--ext/Hash-Util-FieldHash/t/10_hash.t110
-rw-r--r--ext/Hash-Util/Util.xs160
-rw-r--r--ext/Hash-Util/lib/Hash/Util.pm103
-rw-r--r--ext/Hash-Util/t/Util.t30
-rw-r--r--hv.c113
-rw-r--r--hv.h536
-rw-r--r--intrpvar.h6
-rw-r--r--perl.c39
-rw-r--r--perlapi.h4
-rw-r--r--perlvars.h3
-rw-r--r--proto.h6
-rw-r--r--sv.c16
-rw-r--r--sv.h2
-rw-r--r--t/lib/universal.t2
-rw-r--r--t/op/hash.t102
-rw-r--r--t/run/runenv.t38
-rw-r--r--universal.c41
-rw-r--r--util.c63
23 files changed, 917 insertions, 486 deletions
diff --git a/MANIFEST b/MANIFEST
index 722d00215c..17224f59d5 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3769,7 +3769,6 @@ ext/Hash-Util-FieldHash/t/02_function.t Test script
ext/Hash-Util-FieldHash/t/03_class.t Test script
ext/Hash-Util-FieldHash/t/04_thread.t Test script
ext/Hash-Util-FieldHash/t/05_perlhook.t Test script
-ext/Hash-Util-FieldHash/t/10_hash.t Adapted from t/op/hash.t
ext/Hash-Util-FieldHash/t/11_hashassign.t Adapted from t/op/hashassign.t
ext/Hash-Util-FieldHash/t/12_hashwarn.t Adapted from t/op/hashwarn.t
ext/Hash-Util/lib/Hash/Util.pm Hash::Util
diff --git a/dump.c b/dump.c
index 506f85d5a2..8ba60cf4e9 100644
--- a/dump.c
+++ b/dump.c
@@ -1416,7 +1416,6 @@ const struct flag_to_name hv_flags_names[] = {
{SVphv_SHAREKEYS, "SHAREKEYS,"},
{SVphv_LAZYDEL, "LAZYDEL,"},
{SVphv_HASKFLAGS, "HASKFLAGS,"},
- {SVphv_REHASH, "REHASH,"},
{SVphv_CLONEABLE, "CLONEABLE,"}
};
@@ -1900,7 +1899,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
SV * keysv;
const char * keypv;
SV * elt;
- STRLEN len;
+ STRLEN len;
if (count-- <= 0) goto DONEHV;
@@ -1909,16 +1908,14 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
keypv = SvPV_const(keysv, len);
elt = HeVAL(he);
- Perl_dump_indent(aTHX_ level+1, file, "Elt %s ", pv_display(d, keypv, len, 0, pvlim));
- if (SvUTF8(keysv))
- PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 6 * SvCUR(keysv), UNI_DISPLAY_QQ));
+ Perl_dump_indent(aTHX_ level+1, file, "Elt %s ", pv_display(d, keypv, len, 0, pvlim));
+ if (SvUTF8(keysv))
+ PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 6 * SvCUR(keysv), UNI_DISPLAY_QQ));
if (HvEITER_get(hv) == he)
PerlIO_printf(file, "[CURRENT] ");
- if (HeKREHASH(he))
- PerlIO_printf(file, "[REHASH] ");
- PerlIO_printf(file, "HASH = 0x%"UVxf"\n", (UV) hash);
- do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim);
- }
+ PerlIO_printf(file, "HASH = 0x%"UVxf"\n", (UV) hash);
+ do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim);
+ }
}
DONEHV:;
}
diff --git a/embed.fnc b/embed.fnc
index d4982b8421..b0b1ce9e58 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1509,7 +1509,7 @@ p |I32 |wait4pid |Pid_t pid|NN int* statusp|int flags
p |U32 |parse_unicode_opts|NN const char **popt
Ap |U32 |seed
: Only used in perl.c
-pR |UV |get_hash_seed
+p |void |get_hash_seed |NN unsigned char *seed_buffer
: Used in doio.c, pp_hot.c, pp_sys.c
p |void |report_evil_fh |NULLOK const GV *gv
: Used in doio.c, pp_hot.c, pp_sys.c
diff --git a/embed.h b/embed.h
index b8ad138179..2ecd3b31af 100644
--- a/embed.h
+++ b/embed.h
@@ -1082,7 +1082,7 @@
#define find_rundefsv2(a,b) Perl_find_rundefsv2(aTHX_ a,b)
#define find_script(a,b,c,d) Perl_find_script(aTHX_ a,b,c,d)
#define free_tied_hv_pool() Perl_free_tied_hv_pool(aTHX)
-#define get_hash_seed() Perl_get_hash_seed(aTHX)
+#define get_hash_seed(a) Perl_get_hash_seed(aTHX_ a)
#define get_no_modify() Perl_get_no_modify(aTHX)
#define get_opargs() Perl_get_opargs(aTHX)
#define gv_try_downgrade(a) Perl_gv_try_downgrade(aTHX_ a)
diff --git a/embedvar.h b/embedvar.h
index 6efd53aef4..47e0b32bc8 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -181,7 +181,6 @@
#define PL_glob_index (vTHX->Iglob_index)
#define PL_globalstash (vTHX->Iglobalstash)
#define PL_globhook (vTHX->Iglobhook)
-#define PL_hash_seed (vTHX->Ihash_seed)
#define PL_hintgv (vTHX->Ihintgv)
#define PL_hints (vTHX->Ihints)
#define PL_hv_fetch_ent_mh (vTHX->Ihv_fetch_ent_mh)
@@ -278,8 +277,6 @@
#define PL_registered_mros (vTHX->Iregistered_mros)
#define PL_regmatch_slab (vTHX->Iregmatch_slab)
#define PL_regmatch_state (vTHX->Iregmatch_state)
-#define PL_rehash_seed (vTHX->Irehash_seed)
-#define PL_rehash_seed_set (vTHX->Irehash_seed_set)
#define PL_replgv (vTHX->Ireplgv)
#define PL_restartjmpenv (vTHX->Irestartjmpenv)
#define PL_restartop (vTHX->Irestartop)
@@ -407,6 +404,10 @@
#define PL_Gdollarzero_mutex (my_vars->Gdollarzero_mutex)
#define PL_fold_locale (my_vars->Gfold_locale)
#define PL_Gfold_locale (my_vars->Gfold_locale)
+#define PL_hash_seed (my_vars->Ghash_seed)
+#define PL_Ghash_seed (my_vars->Ghash_seed)
+#define PL_hash_seed_set (my_vars->Ghash_seed_set)
+#define PL_Ghash_seed_set (my_vars->Ghash_seed_set)
#define PL_hints_mutex (my_vars->Ghints_mutex)
#define PL_Ghints_mutex (my_vars->Ghints_mutex)
#define PL_keyword_plugin (my_vars->Gkeyword_plugin)
diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t
deleted file mode 100644
index 2cfb4e81fa..0000000000
--- a/ext/Hash-Util-FieldHash/t/10_hash.t
+++ /dev/null
@@ -1,110 +0,0 @@
-#!./perl -w
-use Test::More;
-
-use strict;
-use Hash::Util::FieldHash qw( :all);
-
-no warnings 'misc';
-
-plan tests => 5;
-
-fieldhash my %h;
-
-ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on");
-
-foreach (1..10) {
- $h{"\0"x$_}++;
-}
-
-ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash");
-
-foreach (11..20) {
- $h{"\0"x$_}++;
-}
-
-ok (Internals::HvREHASH(%h), "20 entries triggers rehash");
-
-
-
-
-# second part using an emulation of the PERL_HASH in perl, mounting an
-# attack on a pre-populated hash. This is also useful if you need normal
-# keys which don't contain \0 -- suitable for stashes
-
-use constant MASK_U32 => 2**32;
-use constant HASH_SEED => 0;
-use constant THRESHOLD => 14;
-use constant START => "a";
-
-# some initial hash data
-fieldhash my %h2;
-%h2 = map {$_ => 1} 'a'..'cc';
-
-ok (!Internals::HvREHASH(%h2),
- "starting with pre-populated non-pathological hash (rehash flag if off)");
-
-my @keys = get_keys(\%h2);
-$h2{$_}++ for @keys;
-ok (Internals::HvREHASH(%h2),
- scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
-
-sub get_keys {
- my $hr = shift;
-
- # the minimum of bits required to mount the attack on a hash
- my $min_bits = log(THRESHOLD)/log(2);
-
- # if the hash has already been populated with a significant amount
- # of entries the number of mask bits can be higher
- my $keys = scalar keys %$hr;
- my $bits = $keys ? log($keys)/log(2) : 0;
- $bits = $min_bits if $min_bits > $bits;
-
- $bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
- # need to add 2 bits to cover the internal split cases
- $bits += 2;
- my $mask = 2**$bits-1;
- print "# using mask: $mask ($bits)\n";
-
- my @keys;
- my $s = START;
- my $c = 0;
- # get 2 keys on top of the THRESHOLD
- my $hash;
- while (@keys < THRESHOLD+2) {
- # next if exists $hash->{$s};
- $hash = hash($s);
- next unless ($hash & $mask) == 0;
- $c++;
- printf "# %2d: %5s, %10s\n", $c, $s, $hash;
- push @keys, $s;
- } continue {
- $s++;
- }
-
- return @keys;
-}
-
-
-# trying to provide the fastest equivalent of C macro's PERL_HASH in
-# Perl - the main complication is that it uses U32 integer, which we
-# can't do it perl, without doing some tricks
-sub hash {
- my $s = shift;
- my @c = split //, $s;
- my $u = HASH_SEED;
- for (@c) {
- # (A % M) + (B % M) == (A + B) % M
- # This works because '+' produces a NV, which is big enough to hold
- # the intermediate result. We only need the % before any "^" and "&"
- # to get the result in the range for an I32.
- # and << doesn't work on NV, so using 1 << 10
- $u += ord;
- $u += $u * (1 << 10); $u %= MASK_U32;
- $u ^= $u >> 6;
- }
- $u += $u << 3; $u %= MASK_U32;
- $u ^= $u >> 11; $u %= MASK_U32;
- $u += $u << 15; $u %= MASK_U32;
- $u;
-}
diff --git a/ext/Hash-Util/Util.xs b/ext/Hash-Util/Util.xs
index 678e64d9b7..c8a692f8db 100644
--- a/ext/Hash-Util/Util.xs
+++ b/ext/Hash-Util/Util.xs
@@ -60,3 +60,163 @@ hv_store(hash, key, val)
XSRETURN_YES;
}
}
+
+void
+hash_seed()
+ PROTOTYPE:
+ PPCODE:
+ mXPUSHs(newSVpvn((char *)PERL_HASH_SEED,PERL_HASH_SEED_BYTES));
+ XSRETURN(1);
+
+void
+hash_value(string)
+ SV* string
+ PROTOTYPE: $
+ PPCODE:
+ STRLEN len;
+ char *pv;
+ UV uv;
+
+ pv= SvPV(string,len);
+ PERL_HASH(uv,pv,len);
+ XSRETURN_UV(uv);
+
+
+void
+bucket_info(rhv)
+ SV* rhv
+ PPCODE:
+{
+ /*
+
+ Takes a non-magical hash ref as an argument and returns a list of
+ statistics about the hash. The number and keys and the size of the
+ array will always be reported as the first two values. If the array is
+ actually allocated (they are lazily allocated), then additionally
+ will return a list of counts of bucket lengths. In other words in
+
+ ($keys, $buckets, $used, @length_count)= hash::bucket_info(\%hash);
+
+ $length_count[0] is the number of empty buckets, and $length_count[1]
+ is the number of buckets with only one key in it, $buckets - $length_count[0]
+ gives the number of used buckets, and @length_count-1 is the maximum
+ bucket depth.
+
+ If the argument is not a hash ref, or if it is magical, then returns
+ nothing (the empty list).
+
+ */
+ if (SvROK(rhv) && SvTYPE(SvRV(rhv))==SVt_PVHV && !SvMAGICAL(SvRV(rhv))) {
+ const HV * const hv = (const HV *) SvRV(rhv);
+ U32 max_bucket_index= HvMAX(hv);
+ U32 total_keys= HvUSEDKEYS(hv);
+ HE **bucket_array= HvARRAY(hv);
+ mXPUSHi(total_keys);
+ mXPUSHi(max_bucket_index+1);
+ mXPUSHi(0); /* for the number of used buckets */
+#define BUCKET_INFO_ITEMS_ON_STACK 3
+ if (!bucket_array) {
+ XSRETURN(BUCKET_INFO_ITEMS_ON_STACK);
+ } else {
+ /* we use chain_length to index the stack - we eliminate an add
+ * by initializing things with the number of items already on the stack.
+ * If we have 2 items then ST(2+0) (the third stack item) will be the counter
+ * for empty chains, ST(2+1) will be for chains with one element, etc.
+ */
+ I32 max_chain_length= BUCKET_INFO_ITEMS_ON_STACK - 1; /* so we do not have to do an extra push for the 0 index */
+ HE *he;
+ U32 bucket_index;
+ for ( bucket_index= 0; bucket_index <= max_bucket_index; bucket_index++ ) {
+ I32 chain_length= BUCKET_INFO_ITEMS_ON_STACK;
+ for (he= bucket_array[bucket_index]; he; he= HeNEXT(he) ) {
+ chain_length++;
+ }
+ while ( max_chain_length < chain_length ) {
+ mXPUSHi(0);
+ max_chain_length++;
+ }
+ SvIVX( ST( chain_length ) )++;
+ }
+ /* now set the number of used buckets */
+ SvIVX( ST( BUCKET_INFO_ITEMS_ON_STACK - 1 ) ) = max_bucket_index - SvIVX( ST( BUCKET_INFO_ITEMS_ON_STACK ) ) + 1;
+ XSRETURN( max_chain_length + 1 ); /* max_chain_length is the index of the last item on the stack, so we add 1 */
+ }
+#undef BUCKET_INFO_ITEMS_ON_STACK
+ }
+ XSRETURN(0);
+}
+
+void
+bucket_array(rhv)
+ SV* rhv
+ PPCODE:
+{
+ /* Returns an array of arrays representing key/bucket mappings.
+ * Each element of the array contains either an integer or a reference
+ * to an array of keys. A plain integer represents K empty buckets. An
+ * array ref represents a single bucket, with each element being a key in
+ * the hash. (Note this treats a placeholder as a normal key.)
+ *
+ * This allows one to "see" the keyorder. Note the "insert first" nature
+ * of the hash store, combined with regular remappings means that relative
+ * order of keys changes each remap.
+ */
+ if (SvROK(rhv) && SvTYPE(SvRV(rhv))==SVt_PVHV && !SvMAGICAL(SvRV(rhv))) {
+ const HV * const hv = (const HV *) SvRV(rhv);
+ HE **he_ptr= HvARRAY(hv);
+ if (!he_ptr) {
+ XSRETURN(0);
+ } else {
+ U32 i, max;
+ AV *info_av;
+ HE *he;
+ I32 empty_count=0;
+ if (SvMAGICAL(hv)) {
+ Perl_croak(aTHX_ "hash::bucket_array only works on 'normal' hashes");
+ }
+ info_av= newAV();
+ max= HvMAX(hv);
+ mXPUSHs(newRV_noinc((SV*)info_av));
+ for ( i= 0; i <= max; i++ ) {
+ AV *key_av= NULL;
+ for (he= he_ptr[i]; he; he= HeNEXT(he) ) {
+ SV *key_sv;
+ char *str;
+ STRLEN len;
+ char mode;
+ if (!key_av) {
+ key_av= newAV();
+ if (empty_count) {
+ av_push(info_av, newSViv(empty_count));
+ empty_count= 0;
+ }
+ av_push(info_av, (SV *)newRV_noinc((SV *)key_av));
+ }
+ if (HeKLEN(he) == HEf_SVKEY) {
+ SV *sv= HeSVKEY(he);
+ SvGETMAGIC(sv);
+ str= SvPV(sv, len);
+ mode= SvUTF8(sv) ? 1 : 0;
+ } else {
+ str= HeKEY(he);
+ len= HeKLEN(he);
+ mode= HeKUTF8(he) ? 1 : 0;
+ }
+ key_sv= newSVpvn(str,len);
+ av_push(key_av,key_sv);
+ if (mode) {
+ SvUTF8_on(key_sv);
+ }
+ }
+ if (!key_av)
+ empty_count++;
+ }
+ if (empty_count) {
+ av_push(info_av, newSViv(empty_count));
+ empty_count++;
+ }
+ }
+ XSRETURN(1);
+ }
+ XSRETURN(0);
+}
diff --git a/ext/Hash-Util/lib/Hash/Util.pm b/ext/Hash-Util/lib/Hash/Util.pm
index d8a6ec888b..20a730e0c1 100644
--- a/ext/Hash-Util/lib/Hash/Util.pm
+++ b/ext/Hash-Util/lib/Hash/Util.pm
@@ -28,10 +28,11 @@ our @EXPORT_OK = qw(
lock_ref_keys_plus
hidden_ref_keys legal_ref_keys
- hash_seed hv_store
+ hash_seed hash_value bucket_stats bucket_info bucket_array
+ hv_store
lock_hash_recurse unlock_hash_recurse
);
-our $VERSION = '0.12';
+our $VERSION = '0.13';
require XSLoader;
XSLoader::load();
@@ -459,9 +460,7 @@ unrestricted hash.
my $hash_seed = hash_seed();
-hash_seed() returns the seed number used to randomise hash ordering.
-Zero means the "traditional" random hash ordering, non-zero means the
-new even more random hash ordering introduced in Perl 5.8.1.
+hash_seed() returns the seed bytes used to randomise hash ordering.
B<Note that the hash seed is sensitive information>: by knowing it one
can craft a denial-of-service attack against Perl code, even remotely,
@@ -469,10 +468,100 @@ see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
B<Do not disclose the hash seed> to people who don't need to know it.
See also L<perlrun/PERL_HASH_SEED_DEBUG>.
+Prior to Perl 5.17.6 this function returned a UV, it now returns a string,
+which may be of nearly any size as determined by the hash function your
+Perl has been built with. Possible sizes may be but are not limited to
+4 bytes (for most hash algorithms) and 16 bytes (for siphash).
+
+=item B<hash_value>
+
+ my $hash_value = hash_value($string);
+
+hash_value() returns the current perls internal hash value for a given
+string.
+
+Returns a 32 bit integer representing the hash value of the string passed
+in. This value is only reliable for the lifetime of the process. It may
+be different depending on invocation, environment variables, perl version,
+architectures, and build options.
+
+B<Note that the hash value of a given string is sensitive information>:
+by knowing it one can deduce the hash seed which in turn can allow one to
+craft a denial-of-service attack against Perl code, even remotely,
+see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
+B<Do not disclose the hash value of a string> to people who don't need to
+know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>.
+
+=item B<bucket_info>
+
+Return a set of basic information about a hash.
+
+ my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
+
+Fields are as follows:
+
+ 0: Number of keys in the hash
+ 1: Number of buckets in the hash
+ 2: Number of used buckets in the hash
+ rest : list of counts, Kth element is the number of buckets
+ with K keys in it.
+
+See also bucket_stats() and bucket_array().
+
+=item B<bucket_stats>
+
+Returns a list of statistics about a hash.
+
+ my ($keys, buckets, $used, $utilization_ratio, $collision_pct,
+ $mean, $stddev, @length_counts)= bucket_info($hashref);
+
+
+Fields are as follows:
+
+
+ 0: Number of keys in the hash
+ 1: Number of buckets in the hash
+ 2: Number of used buckets in the hash
+ 3: Percent of buckets used
+ 4: Percent of keys which are in collision
+ 5: Average bucket length
+ 6: Standard Deviation of bucket lengths.
+ rest : list of counts, Kth element is the number of buckets
+ with K keys in it.
+
+See also bucket_info() and bucket_array().
+
+=item B<bucket_array>
+
+ my $array= bucket_array(\%hash);
+
+Returns a packed representation of the bucket array associated with a hash. Each element
+of the array is either an integer K, in which case it represents K empty buckets, or
+a reference to another array which contains the keys that are in that bucket.
+
+B<Note that the information returned by bucket_array is sensitive information>:
+by knowing it one can directly attack perls hash function which in turn may allow
+one to craft a denial-of-service attack against Perl code, even remotely,
+see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
+B<Do not disclose the outputof this function> to people who don't need to
+know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly
+for debugging and diagnostics purposes only, it is hard to imagine a reason why it
+would be used in production code.
+
=cut
-sub hash_seed () {
- Internals::rehash_seed();
+
+sub bucket_stats {
+ my ($hash)= @_;
+ my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
+ my $sum;
+ $sum += ($length_counts[$_] * $_) for 0 .. $#length_counts;
+ my $mean= $sum/$buckets;
+ $sum= 0;
+ $sum += ($length_counts[$_] * (($_-$mean)**2)) for 0 .. $#length_counts;
+
+ my $stddev= sqrt($sum/$buckets);
+ return $keys, $buckets, $used, $keys ? ($used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : ();
}
=item B<hv_store>
diff --git a/ext/Hash-Util/t/Util.t b/ext/Hash-Util/t/Util.t
index d02defe9de..63769b8f02 100644
--- a/ext/Hash-Util/t/Util.t
+++ b/ext/Hash-Util/t/Util.t
@@ -33,10 +33,11 @@ BEGIN {
lock_ref_keys_plus
hidden_ref_keys legal_ref_keys
- hash_seed hv_store
+ hash_seed hash_value bucket_stats bucket_info bucket_array
+ hv_store
lock_hash_recurse unlock_hash_recurse
);
- plan tests => 226 + @Exported_Funcs;
+ plan tests => 234 + @Exported_Funcs;
use_ok 'Hash::Util', @Exported_Funcs;
}
foreach my $func (@Exported_Funcs) {
@@ -326,7 +327,7 @@ like(
}
my $hash_seed = hash_seed();
-ok($hash_seed >= 0, "hash_seed $hash_seed");
+ok(defined($hash_seed) && $hash_seed ne '', "hash_seed $hash_seed");
{
package Minder;
@@ -530,3 +531,26 @@ ok($hash_seed >= 0, "hash_seed $hash_seed");
ok( hash_unlocked(%{$hash{c}[1]}),
"unlock_hash_recurse(): element which is hashref in array ref not locked" );
}
+
+{
+ my $h1= hash_value("foo");
+ my $h2= hash_value("bar");
+ is( $h1, hash_value("foo") );
+ is( $h2, hash_value("bar") );
+}
+{
+ my @info1= bucket_info({});
+ my @info2= bucket_info({1..10});
+ my @stats1= bucket_stats({});
+ my @stats2= bucket_stats({1..10});
+ my $array1= bucket_array({});
+ my $array2= bucket_array({1..10});
+ is("@info1","0 8 0");
+ is("@info2[0,1]","5 8");
+ is("@stats1","0 8 0");
+ is("@stats2[0,1]","5 8");
+ my @keys1= sort map { ref $_ ? @$_ : () } @$array1;
+ my @keys2= sort map { ref $_ ? @$_ : () } @$array2;
+ is("@keys1","");
+ is("@keys2","1 3 5 7 9");
+}
diff --git a/hv.c b/hv.c
index ddefd6585e..3069e67bf4 100644
--- a/hv.c
+++ b/hv.c
@@ -613,18 +613,12 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
}
}
- if (HvREHASH(hv) || (!hash && !(keysv && (SvIsCOW_shared_hash(keysv)))))
- PERL_HASH_INTERNAL_(hash, key, klen, HvREHASH(hv));
- else if (!hash)
- hash = SvSHARED_HASH(keysv);
-
- /* We don't have a pointer to the hv, so we have to replicate the
- flag into every HEK, so that hv_iterkeysv can see it.
- And yes, you do need this even though you are not "storing" because
- you can flip the flags below if doing an lval lookup. (And that
- was put in to give the semantics Andreas was expecting.) */
- if (HvREHASH(hv))
- flags |= HVhek_REHASH;
+ if (!hash) {
+ if (keysv && (SvIsCOW_shared_hash(keysv)))
+ hash = SvSHARED_HASH(keysv);
+ else
+ PERL_HASH(hash, key, klen);
+ }
masked_flags = (flags & HVhek_MASK);
@@ -813,7 +807,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
as we repeatedly double the number of buckets on every
entry. Linear search feels a less worse thing to do. */
hsplit(hv);
- } else if(!HvREHASH(hv)) {
+ } else {
U32 n_links = 1;
while ((counter = HeNEXT(counter)))
@@ -978,10 +972,12 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
HvHASKFLAGS_on(MUTABLE_SV(hv));
}
- if (HvREHASH(hv) || (!hash && !(keysv && (SvIsCOW_shared_hash(keysv)))))
- PERL_HASH_INTERNAL_(hash, key, klen, HvREHASH(hv));
- else if (!hash)
- hash = SvSHARED_HASH(keysv);
+ if (!hash) {
+ if (keysv && (SvIsCOW_shared_hash(keysv)))
+ hash = SvSHARED_HASH(keysv);
+ else
+ PERL_HASH(hash, key, klen);
+ }
masked_flags = (k_flags & HVhek_MASK);
@@ -1118,8 +1114,6 @@ S_hsplit(pTHX_ HV *hv)
I32 i;
char *a = (char*) HvARRAY(hv);
HE **aep;
- int longest_chain = 0;
- int was_shared;
PERL_ARGS_ASSERT_HSPLIT;
@@ -1166,8 +1160,6 @@ S_hsplit(pTHX_ HV *hv)
aep = (HE**)a;
for (i=0; i<oldsize; i++,aep++) {
- int left_length = 0;
- int right_length = 0;
HE **oentry = aep;
HE *entry = *aep;
HE **bep;
@@ -1180,91 +1172,16 @@ S_hsplit(pTHX_ HV *hv)
*oentry = HeNEXT(entry);
HeNEXT(entry) = *bep;
*bep = entry;
- right_length++;
}
else {
oentry = &HeNEXT(entry);
- left_length++;
}
entry = *oentry;
} while (entry);
/* I think we don't actually need to keep track of the longest length,
merely flag if anything is too long. But for the moment while
developing this code I'll track it. */
- if (left_length > longest_chain)
- longest_chain = left_length;
- if (right_length > longest_chain)
- longest_chain = right_length;
- }
-
-
- /* Pick your policy for "hashing isn't working" here: */
- if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */
- || HvREHASH(hv)) {
- return;
- }
-
- if (hv == PL_strtab) {
- /* Urg. Someone is doing something nasty to the string table.
- Can't win. */
- return;
- }
-
- /* Awooga. Awooga. Pathological data. */
- /*PerlIO_printf(PerlIO_stderr(), "%p %d of %d with %d/%d buckets\n", (void*)hv,
- longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/
-
- ++newsize;
- Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize)
- + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char);
- if (SvOOK(hv)) {
- Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux);
- }
-
- was_shared = HvSHAREKEYS(hv);
-
- HvSHAREKEYS_off(hv);
- HvREHASH_on(hv);
-
- aep = HvARRAY(hv);
-
- for (i=0; i<newsize; i++,aep++) {
- HE *entry = *aep;
- while (entry) {
- /* We're going to trash this HE's next pointer when we chain it
- into the new hash below, so store where we go next. */
- HE * const next = HeNEXT(entry);
- UV hash;
- HE **bep;
-
- /* Rehash it */
- PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry));
-
- if (was_shared) {
- /* Unshare it. */
- HEK * const new_hek
- = save_hek_flags(HeKEY(entry), HeKLEN(entry),
- hash, HeKFLAGS(entry));
- unshare_hek (HeKEY_hek(entry));
- HeKEY_hek(entry) = new_hek;
- } else {
- /* Not shared, so simply write the new hash in. */
- HeHASH(entry) = hash;
- }
- /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/
- HEK_REHASH_on(HeKEY_hek(entry));
- /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/
-
- /* Copy oentry to the correct new chain. */
- bep = ((HE**)a) + (hash & (I32) xhv->xhv_max);
- HeNEXT(entry) = *bep;
- *bep = entry;
-
- entry = next;
- }
}
- Safefree (HvARRAY(hv));
- HvARRAY(hv) = (HE **)a;
}
void
@@ -1606,7 +1523,6 @@ Perl_hv_clear(pTHX_ HV *hv)
mg_clear(MUTABLE_SV(hv));
HvHASKFLAGS_off(hv);
- HvREHASH_off(hv);
}
if (SvOOK(hv)) {
if(HvENAME_get(hv))
@@ -2478,9 +2394,6 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags)
hv_free_ent(hv, oldentry);
}
- /*if (HvREHASH(hv) && entry && !HeKREHASH(entry))
- PerlIO_printf(PerlIO_stderr(), "Awooga %p %p\n", (void*)hv, (void*)entry);*/
-
iter->xhv_eiter = entry; /* HvEITER(hv) = entry */
return entry;
}
diff --git a/hv.h b/hv.h
index 1e32ab9b42..6983a8065e 100644
--- a/hv.h
+++ b/hv.h
@@ -82,6 +82,7 @@ struct xpvhv_aux {
AV *xhv_backreferences; /* back references for weak references */
HE *xhv_eiter; /* current entry of iterator */
I32 xhv_riter; /* current root of iterator */
+
/* Concerning xhv_name_count: When non-zero, xhv_name_u contains a pointer
* to an array of HEK pointers, this being the length. The first element is
* the name of the stash, which may be NULL. If xhv_name_count is positive,
@@ -103,9 +104,6 @@ struct xpvhv {
};
/* hash a key */
-/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins
- * from requirements by Colin Plumb.
- * (http://burtleburtle.net/bob/hash/doobs.html) */
/* The use of a temporary pointer and the casting games
* is needed to serve the dual purposes of
* (a) the hashed data being interpreted as "unsigned char" (new since 5.8,
@@ -118,35 +116,513 @@ struct xpvhv {
* If USE_HASH_SEED is defined, hash randomisation is done by default
* If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done
* only if the environment variable PERL_HASH_SEED is set.
- * For maximal control, one can define PERL_HASH_SEED.
- * (see also perl.c:perl_parse()).
+ * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed())
*/
#ifndef PERL_HASH_SEED
# if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
-# define PERL_HASH_SEED PL_hash_seed
+# define PERL_HASH_SEED PL_hash_seed
# else
-# define PERL_HASH_SEED 0
+# define PERL_HASH_SEED "PeRlHaShhAcKpErl"
# endif
#endif
-#define PERL_HASH(hash,str,len) PERL_HASH_INTERNAL_(hash,str,len,0)
+#define PERL_HASH_SEED_U32 *((U32*)PERL_HASH_SEED)
+#define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0])
+#define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1])
-/* Only hv.c and mod_perl should be doing this. */
+/* legacy - only mod_perl should be doing this. */
#ifdef PERL_HASH_INTERNAL_ACCESS
-#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH_INTERNAL_(hash,str,len,1)
+#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)
+#endif
+
+/* Uncomment one of the following lines to use an alternative hash algorithm.
+#define PERL_HASH_FUNC_SDBM
+#define PERL_HASH_FUNC_DJB2
+#define PERL_HASH_FUNC_SUPERFAST
+#define PERL_HASH_FUNC_MURMUR3
+#define PERL_HASH_FUNC_SIPHASH
+#define PERL_HASH_FUNC_ONE_AT_A_TIME
+*/
+
+#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME))
+#define PERL_HASH_FUNC_MURMUR3
+#endif
+
+#if defined(PERL_HASH_FUNC_SIPHASH)
+#define PERL_HASH_FUNC "SIPHASH"
+#define PERL_HASH_SEED_BYTES 16
+
+/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
+ * The authors claim it is relatively secure compared to the alternatives
+ * and that performance wise it is a suitable hash for languages like Perl.
+ * See:
+ *
+ * https://www.131002.net/siphash/
+ *
+ * This implementation seems to perform slightly slower than one-at-a-time for
+ * short keys, but degrades slower for longer keys. Murmur Hash outperforms it
+ * regardless of keys size.
+ *
+ * It is 64 bit only.
+ */
+
+#define PERL_HASH_NEEDS_TWO_SEEDS
+
+#ifndef U64
+#define U64 uint64_t
+#endif
+
+#define ROTL(x,b) (U64)( ((x) << (b)) | ( (x) >> (64 - (b))) )
+
+#define U32TO8_LE(p, v) \
+ (p)[0] = (U8)((v) ); (p)[1] = (U8)((v) >> 8); \
+ (p)[2] = (U8)((v) >> 16); (p)[3] = (U8)((v) >> 24);
+
+#define U64TO8_LE(p, v) \
+ U32TO8_LE((p), (U32)((v) )); \
+ U32TO8_LE((p) + 4, (U32)((v) >> 32));
+
+#define U8TO64_LE(p) \
+ (((U64)((p)[0]) ) | \
+ ((U64)((p)[1]) << 8) | \
+ ((U64)((p)[2]) << 16) | \
+ ((U64)((p)[3]) << 24) | \
+ ((U64)((p)[4]) << 32) | \
+ ((U64)((p)[5]) << 40) | \
+ ((U64)((p)[6]) << 48) | \
+ ((U64)((p)[7]) << 56))
+
+#define SIPROUND \
+ do { \
+ v0_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,13); v1_PeRlHaSh ^= v0_PeRlHaSh; v0_PeRlHaSh=ROTL(v0_PeRlHaSh,32); \
+ v2_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,16); v3_PeRlHaSh ^= v2_PeRlHaSh; \
+ v0_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,21); v3_PeRlHaSh ^= v0_PeRlHaSh; \
+ v2_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,17); v1_PeRlHaSh ^= v2_PeRlHaSh; v2_PeRlHaSh=ROTL(v2_PeRlHaSh,32); \
+ } while(0)
+
+/* SipHash-2-4 */
+#define PERL_HASH(hash,str,len) STMT_START { \
+ const char * const strtmp_PeRlHaSh = (str); \
+ const unsigned char *in_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \
+ const U32 inlen_PeRlHaSh = (len); \
+ /* "somepseudorandomlygeneratedbytes" */ \
+ U64 v0_PeRlHaSh = 0x736f6d6570736575ULL; \
+ U64 v1_PeRlHaSh = 0x646f72616e646f6dULL; \
+ U64 v2_PeRlHaSh = 0x6c7967656e657261ULL; \
+ U64 v3_PeRlHaSh = 0x7465646279746573ULL; \
+\
+ U64 b_PeRlHaSh; \
+ U64 k0_PeRlHaSh = PERL_HASH_SEED_U64_1; \
+ U64 k1_PeRlHaSh = PERL_HASH_SEED_U64_2; \
+ U64 m_PeRlHaSh; \
+ const int left_PeRlHaSh = inlen_PeRlHaSh & 7; \
+ const U8 *end_PeRlHaSh = in_PeRlHaSh + inlen_PeRlHaSh - left_PeRlHaSh; \
+\
+ b_PeRlHaSh = ( ( U64 )(len) ) << 56; \
+ v3_PeRlHaSh ^= k1_PeRlHaSh; \
+ v2_PeRlHaSh ^= k0_PeRlHaSh; \
+ v1_PeRlHaSh ^= k1_PeRlHaSh; \
+ v0_PeRlHaSh ^= k0_PeRlHaSh; \
+\
+ for ( ; in_PeRlHaSh != end_PeRlHaSh; in_PeRlHaSh += 8 ) \
+ { \
+ m_PeRlHaSh = U8TO64_LE( in_PeRlHaSh ); \
+ v3_PeRlHaSh ^= m_PeRlHaSh; \
+ SIPROUND; \
+ SIPROUND; \
+ v0_PeRlHaSh ^= m_PeRlHaSh; \
+ } \
+\
+ switch( left_PeRlHaSh ) \
+ { \
+ case 7: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 6] ) << 48; \
+ case 6: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 5] ) << 40; \
+ case 5: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 4] ) << 32; \
+ case 4: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 3] ) << 24; \
+ case 3: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 2] ) << 16; \
+ case 2: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 1] ) << 8; \
+ case 1: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 0] ); break; \
+ case 0: break; \
+ } \
+\
+ v3_PeRlHaSh ^= b_PeRlHaSh; \
+ SIPROUND; \
+ SIPROUND; \
+ v0_PeRlHaSh ^= b_PeRlHaSh; \
+\
+ v2_PeRlHaSh ^= 0xff; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ b_PeRlHaSh = v0_PeRlHaSh ^ v1_PeRlHaSh ^ v2_PeRlHaSh ^ v3_PeRlHaSh; \
+ (hash)= (U32)(b_PeRlHaSh & U32_MAX); \
+} STMT_END
+
+#elif defined(PERL_HASH_FUNC_SUPERFAST)
+#define PERL_HASH_FUNC "SUPERFAST"
+/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in
+ * (http://burtleburtle.net/bob/hash/doobs.html)
+ * It is by Paul Hsieh (c) 2004 and is analysed here
+ * http://www.azillionmonkeys.com/qed/hash.html
+ * license terms are here:
+ * http://www.azillionmonkeys.com/qed/weblicense.html
+ */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const U16 *) (d)))
#endif
-/* Common base for PERL_HASH and PERL_HASH_INTERNAL that parameterises
- * the source of the seed. Not for direct use outside of hv.c. */
+#if !defined (get16bits)
+#define get16bits(d) ((((const U8 *)(d))[1] << UINT32_C(8))\
+ +((const U8 *)(d))[0])
+#endif
+#define PERL_HASH(hash,str,len) \
+ STMT_START { \
+ register const char * const strtmp_PeRlHaSh = (str); \
+ register const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \
+ register U32 len_PeRlHaSh = (len); \
+ register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
+ register U32 tmp_PeRlHaSh; \
+ register int rem_PeRlHaSh= len_PeRlHaSh & 3; \
+ len_PeRlHaSh >>= 2; \
+ \
+ for (;len_PeRlHaSh > 0; len_PeRlHaSh--) { \
+ hash_PeRlHaSh += get16bits (str_PeRlHaSh); \
+ tmp_PeRlHaSh = (get16bits (str_PeRlHaSh+2) << 11) ^ hash_PeRlHaSh; \
+ hash_PeRlHaSh = (hash_PeRlHaSh << 16) ^ tmp_PeRlHaSh; \
+ str_PeRlHaSh += 2 * sizeof (U16); \
+ hash_PeRlHaSh += hash_PeRlHaSh >> 11; \
+ } \
+ \
+ /* Handle end cases */ \
+ switch (rem_PeRlHaSh) { \
+ case 3: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \
+ hash_PeRlHaSh ^= hash_PeRlHaSh << 16; \
+ hash_PeRlHaSh ^= str_PeRlHaSh[sizeof (U16)] << 18; \
+ hash_PeRlHaSh += hash_PeRlHaSh >> 11; \
+ break; \
+ case 2: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \
+ hash_PeRlHaSh ^= hash_PeRlHaSh << 11; \
+ hash_PeRlHaSh += hash_PeRlHaSh >> 17; \
+ break; \
+ case 1: hash_PeRlHaSh += *str_PeRlHaSh; \
+ hash_PeRlHaSh ^= hash_PeRlHaSh << 10; \
+ hash_PeRlHaSh += hash_PeRlHaSh >> 1; \
+ } \
+ \
+ /* Force "avalanching" of final 127 bits */ \
+ hash_PeRlHaSh ^= hash_PeRlHaSh << 3; \
+ hash_PeRlHaSh += hash_PeRlHaSh >> 5; \
+ hash_PeRlHaSh ^= hash_PeRlHaSh << 4; \
+ hash_PeRlHaSh += hash_PeRlHaSh >> 17; \
+ hash_PeRlHaSh ^= hash_PeRlHaSh << 25; \
+ (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh >> 6)); \
+ } STMT_END
+
+#elif defined(PERL_HASH_FUNC_MURMUR3)
+#define PERL_HASH_FUNC "MURMUR3"
+#define PERL_HASH_SEED_BYTES 4
+
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was originally written by Shane Day, and is also public domain,
+ * and was modified to function as a macro similar to other perl hash functions by
+ * Yves Orton.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ *
+ * If you want to understand the MurmurHash algorithm you would be much better
+ * off reading the original source. Just point your browser at:
+ * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+ *
+ * How does it work?
+ *
+ * We can only process entire 32 bit chunks of input, except for the very end
+ * that may be shorter.
+ *
+ * To handle endianess I simply use a macro that reads a U32 and define
+ * that macro to be a direct read on little endian machines, a read and swap
+ * on big endian machines, or a byte-by-byte read if the endianess is unknown.
+ */
+
+
+/*-----------------------------------------------------------------------------
+ * Endianess, misalignment capabilities and util macros
+ *
+ * The following 3 macros are defined in this section. The other macros defined
+ * are only needed to help derive these 3.
+ *
+ * MURMUR_READ_UINT32(x) Read a little endian unsigned 32-bit int
+ * MURMUR_UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries
+ * MURMUR_ROTL32(x,r) Rotate x left by r bits
+ */
-#define PERL_HASH_INTERNAL_(hash,str,len,internal) \
+/* Convention is to define __BYTE_ORDER == to one of these values */
+#if !defined(__BIG_ENDIAN)
+ #define __BIG_ENDIAN 4321
+#endif
+#if !defined(__LITTLE_ENDIAN)
+ #define __LITTLE_ENDIAN 1234
+#endif
+
+/* I386 */
+#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
+ #define __BYTE_ORDER __LITTLE_ENDIAN
+ #define MURMUR_UNALIGNED_SAFE
+#endif
+
+/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
+ * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
+#if !defined(__BYTE_ORDER)
+ #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
+ #define __BYTE_ORDER __LITTLE_ENDIAN
+ #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
+ #define __BYTE_ORDER __BIG_ENDIAN
+ #endif
+#endif
+
+/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
+#if !defined(__BYTE_ORDER)
+ #if defined(__ARMEL__) || defined(__MIPSEL__)
+ #define __BYTE_ORDER __LITTLE_ENDIAN
+ #endif
+ #if defined(__ARMEB__) || defined(__MIPSEB__)
+ #define __BYTE_ORDER __BIG_ENDIAN
+ #endif
+#endif
+
+/* Now find best way we can to READ_UINT32 */
+#if __BYTE_ORDER==__LITTLE_ENDIAN
+ /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
+ #define MURMUR_READ_UINT32(ptr) (*((U32*)(ptr)))
+#elif __BYTE_ORDER==__BIG_ENDIAN
+ /* TODO: Add additional cases below where a compiler provided bswap32 is available */
+ #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
+ #define MURMUR_READ_UINT32(ptr) (__builtin_bswap32(*((U32*)(ptr))))
+ #else
+ /* Without a known fast bswap32 we're just as well off doing this */
+ #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+ #define MURMUR_UNALIGNED_SAFE
+ #endif
+#else
+ /* Unknown endianess so last resort is to read individual bytes */
+ #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+
+ /* Since we're not doing word-reads we can skip the messing about with realignment */
+ #define MURMUR_UNALIGNED_SAFE
+#endif
+
+/* Find best way to ROTL32 */
+#if defined(_MSC_VER)
+ #include <stdlib.h> /* Microsoft put _rotl declaration in here */
+ #define MURMUR_ROTL32(x,r) _rotl(x,r)
+#else
+ /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+ #define MURMUR_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r)))
+#endif
+
+
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+#define MURMUR_C1 (0xcc9e2d51)
+#define MURMUR_C2 (0x1b873593)
+#define MURMUR_C3 (0xe6546b64)
+#define MURMUR_C4 (0x85ebca6b)
+#define MURMUR_C5 (0xc2b2ae35)
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 32-bits of input. */
+#define MURMUR_DOBLOCK(h1, k1) STMT_START { \
+ k1 *= MURMUR_C1; \
+ k1 = MURMUR_ROTL32(k1,15); \
+ k1 *= MURMUR_C2; \
+ \
+ h1 ^= k1; \
+ h1 = MURMUR_ROTL32(h1,13); \
+ h1 = h1 * 5 + MURMUR_C3; \
+} STMT_END
+
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
+/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
+#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \
+ int MURMUR_DOBYTES_i = cnt; \
+ while(MURMUR_DOBYTES_i--) { \
+ c = c>>8 | *ptr++<<24; \
+ n++; len--; \
+ if(n==4) { \
+ MURMUR_DOBLOCK(h1, c); \
+ n = 0; \
+ } \
+ } \
+} STMT_END
+
+/* process the last 1..3 bytes and finalize */
+#define MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length) STMT_START { \
+ /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */\
+ PeRlHaSh_len -= PeRlHaSh_len/4*4; \
+ \
+ /* Append any remaining bytes into carry */ \
+ MURMUR_DOBYTES(PeRlHaSh_len, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len); \
+ \
+ if (PeRlHaSh_bytes_in_carry) { \
+ PeRlHaSh_k1 = PeRlHaSh_carry >> ( 4 - PeRlHaSh_bytes_in_carry ) * 8; \
+ PeRlHaSh_k1 *= MURMUR_C1; \
+ PeRlHaSh_k1 = MURMUR_ROTL32(PeRlHaSh_k1,15); \
+ PeRlHaSh_k1 *= MURMUR_C2; \
+ PeRlHaSh_h1 ^= PeRlHaSh_k1; \
+ } \
+ PeRlHaSh_h1 ^= PeRlHaSh_total_length; \
+ \
+ /* fmix */ \
+ PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \
+ PeRlHaSh_h1 *= MURMUR_C4; \
+ PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 13; \
+ PeRlHaSh_h1 *= MURMUR_C5; \
+ PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \
+ (hash)= PeRlHaSh_h1; \
+} STMT_END
+
+/* now we create the hash function */
+
+#if defined(UNALIGNED_SAFE)
+#define PERL_HASH(hash,str,len) STMT_START { \
+ register const char * const s_PeRlHaSh_tmp = (str); \
+ register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \
+ register I32 PeRlHaSh_len = len; \
+ \
+ U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \
+ U32 PeRlHaSh_k1; \
+ U32 PeRlHaSh_carry = 0; \
+ \
+ const unsigned char *PeRlHaSh_end; \
+ \
+ int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \
+ I32 PeRlHaSh_total_length= PeRlHaSh_len; \
+ \
+ /* This CPU handles unaligned word access */ \
+ /* Process 32-bit chunks */ \
+ PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \
+ for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+ PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
+ MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
+ } \
+ \
+ MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\
+ } STMT_END
+#else
+#define PERL_HASH(hash,str,len) STMT_START { \
+ register const char * const s_PeRlHaSh_tmp = (str); \
+ register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \
+ register I32 PeRlHaSh_len = len; \
+ \
+ U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \
+ U32 PeRlHaSh_k1; \
+ U32 PeRlHaSh_carry = 0; \
+ \
+ const unsigned char *PeRlHaSh_end; \
+ \
+ int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \
+ I32 PeRlHaSh_total_length= PeRlHaSh_len; \
+ \
+ /* This CPU does not handle unaligned word access */ \
+ \
+ /* Consume enough so that the next data byte is word aligned */ \
+ int PeRlHaSh_i = -(long)PeRlHaSh_ptr & 3; \
+ if(PeRlHaSh_i && PeRlHaSh_i <= PeRlHaSh_len) { \
+ MURMUR_DOBYTES(PeRlHaSh_i, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len);\
+ } \
+ \
+ /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ \
+ PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \
+ switch(PeRlHaSh_bytes_in_carry) { /* how many bytes in carry */ \
+ case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ \
+ for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+ PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
+ MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
+ } \
+ break; \
+ case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ \
+ for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+ PeRlHaSh_k1 = PeRlHaSh_carry>>24; \
+ PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
+ PeRlHaSh_k1 |= PeRlHaSh_carry<<8; \
+ MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
+ } \
+ break; \
+ case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ \
+ for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+ PeRlHaSh_k1 = PeRlHaSh_carry>>16; \
+ PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
+ PeRlHaSh_k1 |= PeRlHaSh_carry<<16; \
+ MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
+ } \
+ break; \
+ case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ \
+ for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+ PeRlHaSh_k1 = PeRlHaSh_carry>>8; \
+ PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
+ PeRlHaSh_k1 |= PeRlHaSh_carry<<24; \
+ MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
+ } \
+ } \
+ \
+ MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\
+ } STMT_END
+#endif
+
+#elif defined(PERL_HASH_FUNC_DJB2)
+#define PERL_HASH_FUNC "DJB2"
+#define PERL_HASH_SEED_BYTES 4
+#define PERL_HASH(hash,str,len) \
+ STMT_START { \
+ register const char * const s_PeRlHaSh_tmp = (str); \
+ register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
+ register I32 i_PeRlHaSh = len; \
+ register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
+ while (i_PeRlHaSh--) { \
+ hash_PeRlHaSh = ((hash_PeRlHaSh << 5) + hash_PeRlHaSh) + *s_PeRlHaSh++; \
+ } \
+ (hash) = hash_PeRlHaSh;\
+ } STMT_END
+
+#elif defined(PERL_HASH_FUNC_SDBM)
+#define PERL_HASH_FUNC "SDBM"
+#define PERL_HASH_SEED_BYTES 4
+#define PERL_HASH(hash,str,len) \
+ STMT_START { \
+ register const char * const s_PeRlHaSh_tmp = (str); \
+ register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
+ register I32 i_PeRlHaSh = len; \
+ register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
+ while (i_PeRlHaSh--) { \
+ hash_PeRlHaSh = (hash_PeRlHaSh << 6) + (hash_PeRlHaSh << 16) - hash_PeRlHaSh + *s_PeRlHaSh++; \
+ } \
+ (hash) = hash_PeRlHaSh;\
+ } STMT_END
+
+#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME)
+/* DEFAULT/HISTORIC HASH FUNCTION */
+#define PERL_HASH_FUNC "ONE_AT_A_TIME"
+#define PERL_HASH_SEED_BYTES 4
+
+/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins
+ * from requirements by Colin Plumb.
+ * (http://burtleburtle.net/bob/hash/doobs.html) */
+#define PERL_HASH(hash,str,len) \
STMT_START { \
- const char * const s_PeRlHaSh_tmp = str; \
- const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
- I32 i_PeRlHaSh = len; \
- U32 hash_PeRlHaSh = (internal ? PL_rehash_seed : PERL_HASH_SEED); \
+ register const char * const s_PeRlHaSh_tmp = (str); \
+ register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
+ register I32 i_PeRlHaSh = len; \
+ register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
while (i_PeRlHaSh--) { \
- hash_PeRlHaSh += *s_PeRlHaSh++; \
+ hash_PeRlHaSh += (U8)*s_PeRlHaSh++; \
hash_PeRlHaSh += (hash_PeRlHaSh << 10); \
hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \
} \
@@ -154,7 +630,10 @@ struct xpvhv {
hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
(hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
} STMT_END
-
+#endif
+#ifndef PERL_HASH
+#error "No hash function defined!"
+#endif
/*
=head1 Hash Manipulation Functions
@@ -358,10 +837,6 @@ C<SV*>.
#define HvLAZYDEL_on(hv) (SvFLAGS(hv) |= SVphv_LAZYDEL)
#define HvLAZYDEL_off(hv) (SvFLAGS(hv) &= ~SVphv_LAZYDEL)
-#define HvREHASH(hv) (SvFLAGS(hv) & SVphv_REHASH)
-#define HvREHASH_on(hv) (SvFLAGS(hv) |= SVphv_REHASH)
-#define HvREHASH_off(hv) (SvFLAGS(hv) &= ~SVphv_REHASH)
-
#ifndef PERL_CORE
# define Nullhe Null(HE*)
#endif
@@ -372,7 +847,6 @@ C<SV*>.
#define HeKLEN(he) HEK_LEN(HeKEY_hek(he))
#define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he))
#define HeKWASUTF8(he) HEK_WASUTF8(HeKEY_hek(he))
-#define HeKREHASH(he) HEK_REHASH(HeKEY_hek(he))
#define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he))
#define HeKFLAGS(he) HEK_FLAGS(HeKEY_hek(he))
#define HeVAL(he) (he)->he_valu.hent_val
@@ -407,7 +881,6 @@ C<SV*>.
#define HVhek_UTF8 0x01 /* Key is utf8 encoded. */
#define HVhek_WASUTF8 0x02 /* Key is bytes here, but was supplied as utf8. */
-#define HVhek_REHASH 0x04 /* This key is in an hv using a custom HASH . */
#define HVhek_UNSHARED 0x08 /* This key isn't a shared hash key. */
#define HVhek_FREEKEY 0x100 /* Internal flag to say key is malloc()ed. */
#define HVhek_PLACEHOLD 0x200 /* Internal flag to create placeholder.
@@ -417,16 +890,7 @@ C<SV*>.
converted to bytes. */
#define HVhek_MASK 0xFF
-/* Which flags enable HvHASKFLAGS? Somewhat a hack on a hack, as
- HVhek_REHASH is only needed because the rehash flag has to be duplicated
- into all keys as hv_iternext has no access to the hash flags. At this
- point Storable's tests get upset, because sometimes hashes are "keyed"
- and sometimes not, depending on the order of data insertion, and whether
- it triggered rehashing. So currently HVhek_REHASH is exempt.
- Similarly UNSHARED
-*/
-
-#define HVhek_ENABLEHVKFLAGS (HVhek_MASK & ~(HVhek_REHASH|HVhek_UNSHARED))
+#define HVhek_ENABLEHVKFLAGS (HVhek_MASK & ~(HVhek_UNSHARED))
#define HEK_UTF8(hek) (HEK_FLAGS(hek) & HVhek_UTF8)
#define HEK_UTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_UTF8)
@@ -434,8 +898,6 @@ C<SV*>.
#define HEK_WASUTF8(hek) (HEK_FLAGS(hek) & HVhek_WASUTF8)
#define HEK_WASUTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_WASUTF8)
#define HEK_WASUTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_WASUTF8)
-#define HEK_REHASH(hek) (HEK_FLAGS(hek) & HVhek_REHASH)
-#define HEK_REHASH_on(hek) (HEK_FLAGS(hek) |= HVhek_REHASH)
/* calculate HV array allocation */
#ifndef PERL_USE_LARGE_HV_ALLOC
diff --git a/intrpvar.h b/intrpvar.h
index b6d69ed49d..2a2913c04b 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -162,7 +162,6 @@ PERLVARI(I, dumpindent, U16, 4) /* number of blanks per dump
PERLVAR(I, utf8locale, bool) /* utf8 locale detected */
-PERLVARI(I, rehash_seed_set, bool, FALSE) /* 582 hash initialized? */
PERLVARA(I, colors,6, char *) /* from regcomp.c */
@@ -741,10 +740,6 @@ PERLVARI(I, destroyhook, destroyable_proc_t, Perl_sv_destroyable)
PERLVARI(I, signalhook, despatch_signals_proc_t, Perl_despatch_signals)
#endif
-PERLVARI(I, hash_seed, UV, 0) /* Hash initializer */
-
-PERLVARI(I, rehash_seed, UV, 0) /* 582 hash initializer */
-
PERLVARI(I, isarev, HV *, NULL) /* Reverse map of @ISA dependencies */
/* Register of known Method Resolution Orders.
@@ -770,6 +765,7 @@ PERLVAR(I, custom_ops, HV *) /* custom op registrations */
PERLVARI(I, globhook, globhook_t, NULL)
PERLVARI(I, glob_index, int, 0)
+
PERLVAR(I, reentrant_retint, int) /* Integer return value from reentrant functions */
/* The last unconditional member of the interpreter structure when 5.10.0 was
diff --git a/perl.c b/perl.c
index 7bd9ab96cd..d7767b10c6 100644
--- a/perl.c
+++ b/perl.c
@@ -290,6 +290,19 @@ perl_construct(pTHXx)
#ifdef USE_REENTRANT_API
Perl_reentrant_init(aTHX);
#endif
+#if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
+ /* [perl #22371] Algorimic Complexity Attack on Perl 5.6.1, 5.8.0
+ * This MUST be done before any hash stores or fetches take place.
+ * If you set PL_hash_seed (and presumably also PL_hash_seed_set)
+ * yourself, it is your responsibility to provide a good random seed!
+ * You can also define PERL_HASH_SEED in compile time, see hv.h.
+ *
+ * XXX: fix this comment */
+ if (PL_hash_seed_set == FALSE) {
+ Perl_get_hash_seed(aTHX_ PL_hash_seed);
+ PL_hash_seed_set= TRUE;
+ }
+#endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */
/* Note that strtab is a rather special HV. Assumptions are made
about not iterating on it, and not adding tie magic to it.
@@ -1476,23 +1489,21 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env)
#ifndef MULTIPLICITY
PERL_UNUSED_ARG(my_perl);
#endif
-
-#if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
- /* [perl #22371] Algorimic Complexity Attack on Perl 5.6.1, 5.8.0
- * This MUST be done before any hash stores or fetches take place.
- * If you set PL_rehash_seed (and presumably also PL_rehash_seed_set)
- * yourself, it is your responsibility to provide a good random seed!
- * You can also define PERL_HASH_SEED in compile time, see hv.h. */
- if (!PL_rehash_seed_set)
- PL_rehash_seed = get_hash_seed();
+#if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) || defined(USE_HASH_SEED_DEBUG)
{
- const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG");
-
- if (s && (atoi(s) == 1))
- PerlIO_printf(Perl_debug_log, "HASH_SEED = %"UVuf"\n", PL_rehash_seed);
+ const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG");
+
+ if (s && (atoi(s) == 1)) {
+ unsigned char *seed= PERL_HASH_SEED;
+ unsigned char *seed_end= PERL_HASH_SEED + PERL_HASH_SEED_BYTES;
+ PerlIO_printf(Perl_debug_log, "HASH_FUNCTION = %s HASH_SEED = 0x", PERL_HASH_FUNC);
+ while (seed < seed_end) {
+ PerlIO_printf(Perl_debug_log, "%02x", *seed++);
+ }
+ PerlIO_printf(Perl_debug_log, "\n");
+ }
}
#endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */
-
PL_origargc = argc;
PL_origargv = argv;
diff --git a/perlapi.h b/perlapi.h
index 80425c368c..910f789540 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -115,6 +115,10 @@ END_EXTERN_C
#define PL_dollarzero_mutex (*Perl_Gdollarzero_mutex_ptr(NULL))
#undef PL_fold_locale
#define PL_fold_locale (*Perl_Gfold_locale_ptr(NULL))
+#undef PL_hash_seed
+#define PL_hash_seed (*Perl_Ghash_seed_ptr(NULL))
+#undef PL_hash_seed_set
+#define PL_hash_seed_set (*Perl_Ghash_seed_set_ptr(NULL))
#undef PL_hints_mutex
#define PL_hints_mutex (*Perl_Ghints_mutex_ptr(NULL))
#undef PL_keyword_plugin
diff --git a/perlvars.h b/perlvars.h
index 20c3882fc8..68471a0b80 100644
--- a/perlvars.h
+++ b/perlvars.h
@@ -231,3 +231,6 @@ PERLVAR(G, sv_placeholder, SV)
#if defined(MYMALLOC) && defined(USE_ITHREADS)
PERLVAR(G, malloc_mutex, perl_mutex) /* Mutex for malloc */
#endif
+
+PERLVARI(G, hash_seed_set, bool, FALSE) /* perl.c */
+PERLVARA(G, hash_seed, 8, unsigned char) /* and hv.h */
diff --git a/proto.h b/proto.h
index 5bb335217c..3fd54470ec 100644
--- a/proto.h
+++ b/proto.h
@@ -1129,8 +1129,10 @@ PERL_CALLCONV void Perl_get_db_sub(pTHX_ SV **svp, CV *cv)
#define PERL_ARGS_ASSERT_GET_DB_SUB \
assert(cv)
-PERL_CALLCONV UV Perl_get_hash_seed(pTHX)
- __attribute__warn_unused_result__;
+PERL_CALLCONV void Perl_get_hash_seed(pTHX_ unsigned char *seed_buffer)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_GET_HASH_SEED \
+ assert(seed_buffer)
PERL_CALLCONV HV* Perl_get_hv(pTHX_ const char *name, I32 flags)
__attribute__nonnull__(pTHX_1);
diff --git a/sv.c b/sv.c
index 75577907a7..b034f4e437 100644
--- a/sv.c
+++ b/sv.c
@@ -4566,7 +4566,7 @@ Perl_sv_sethek(pTHX_ register SV *const sv, const HEK *const hek)
sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL);
SvUTF8_on(sv);
return;
- } else if (flags & (HVhek_REHASH|HVhek_UNSHARED)) {
+ } else if (flags & HVhek_UNSHARED) {
sv_setpvn(sv, HEK_KEY(hek), HEK_LEN(hek));
if (HEK_UTF8(hek))
SvUTF8_on(sv);
@@ -8457,13 +8457,8 @@ Perl_newSVhek(pTHX_ const HEK *const hek)
sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL);
SvUTF8_on (sv);
return sv;
- } else if (flags & (HVhek_REHASH|HVhek_UNSHARED)) {
- /* We don't have a pointer to the hv, so we have to replicate the
- flag into every HEK. This hv is using custom a hasing
- algorithm. Hence we can't return a shared string scalar, as
- that would contain the (wrong) hash value, and might get passed
- into an hv routine with a regular hash.
- Similarly, a hash that isn't using shared hash keys has to have
+ } else if (flags & HVhek_UNSHARED) {
+ /* A hash that isn't using shared hash keys has to have
the flag in every key so that we know not to try to call
share_hek_hek on it. */
@@ -12912,6 +12907,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_Proc = ipP;
#endif /* PERL_IMPLICIT_SYS */
+
param->flags = flags;
/* Nothing in the core code uses this, but we make it available to
extensions (using mg_dup). */
@@ -12921,6 +12917,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
param->new_perl = my_perl;
param->unreferenced = NULL;
+
INIT_TRACK_MEMPOOL(my_perl->Imemory_debug_header, my_perl);
PL_body_arenas = NULL;
@@ -12933,9 +12930,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_debug = proto_perl->Idebug;
- PL_hash_seed = proto_perl->Ihash_seed;
- PL_rehash_seed = proto_perl->Irehash_seed;
-
/* dbargs array probably holds garbage */
PL_dbargs = NULL;
diff --git a/sv.h b/sv.h
index 5e41ecbef4..902cae7176 100644
--- a/sv.h
+++ b/sv.h
@@ -360,8 +360,6 @@ perform the upgrade if necessary. See C<svtype>.
3: On a pad name SV, that slot in the
frame AV is a REFCNT'ed reference
to a lexical from "outside". */
-#define SVphv_REHASH SVf_FAKE /* 4: On a PVHV, hash values are being
- recalculated */
#define SVf_OOK 0x02000000 /* has valid offset value. For a PVHV this
means that a hv_aux struct is present
after the main array */
diff --git a/t/lib/universal.t b/t/lib/universal.t
index a52e01972f..71223b4fae 100644
--- a/t/lib/universal.t
+++ b/t/lib/universal.t
@@ -15,12 +15,10 @@ sub tryit { eval shift or warn \$@ }
tryit "&Internals::SvREADONLY($arg)";
tryit "&Internals::SvREFCNT($arg)";
tryit "&Internals::hv_clear_placeholders($arg)";
-tryit "&Internals::HvREHASH($arg)";
----
Usage: Internals::SvREADONLY(SCALAR[, ON]) at (eval 1) line 1.
Usage: Internals::SvREFCNT(SCALAR[, REFCOUNT]) at (eval 2) line 1.
Usage: Internals::hv_clear_placeholders(hv) at (eval 3) line 1.
-Internals::HvREHASH $hashref at (eval 4) line 1.
====
}
diff --git a/t/op/hash.t b/t/op/hash.t
index 4093f2e0a0..597301adf5 100644
--- a/t/op/hash.t
+++ b/t/op/hash.t
@@ -8,107 +8,7 @@ BEGIN {
use strict;
-plan tests => 15;
-
-my %h;
-
-ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on");
-
-foreach (1..10) {
- $h{"\0"x$_}++;
-}
-
-ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash");
-
-foreach (11..20) {
- $h{"\0"x$_}++;
-}
-
-ok (Internals::HvREHASH(%h), "20 entries triggers rehash");
-
-
-
-
-# second part using an emulation of the PERL_HASH in perl, mounting an
-# attack on a pre-populated hash. This is also useful if you need normal
-# keys which don't contain \0 -- suitable for stashes
-
-use constant MASK_U32 => 2**32;
-use constant HASH_SEED => 0;
-use constant THRESHOLD => 14;
-use constant START => "a";
-
-# some initial hash data
-my %h2 = map {$_ => 1} 'a'..'cc';
-
-ok (!Internals::HvREHASH(%h2),
- "starting with pre-populated non-pathological hash (rehash flag if off)");
-
-my @keys = get_keys(\%h2);
-$h2{$_}++ for @keys;
-ok (Internals::HvREHASH(%h2),
- scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
-
-sub get_keys {
- my $hr = shift;
-
- # the minimum of bits required to mount the attack on a hash
- my $min_bits = log(THRESHOLD)/log(2);
-
- # if the hash has already been populated with a significant amount
- # of entries the number of mask bits can be higher
- my $keys = scalar keys %$hr;
- my $bits = $keys ? log($keys)/log(2) : 0;
- $bits = $min_bits if $min_bits > $bits;
-
- $bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
- # need to add 2 bits to cover the internal split cases
- $bits += 2;
- my $mask = 2**$bits-1;
- print "# using mask: $mask ($bits)\n";
-
- my @keys;
- my $s = START;
- my $c = 0;
- # get 2 keys on top of the THRESHOLD
- my $hash;
- while (@keys < THRESHOLD+2) {
- # next if exists $hash->{$s};
- $hash = hash($s);
- next unless ($hash & $mask) == 0;
- $c++;
- printf "# %2d: %5s, %10s\n", $c, $s, $hash;
- push @keys, $s;
- } continue {
- $s++;
- }
-
- return @keys;
-}
-
-
-# trying to provide the fastest equivalent of C macro's PERL_HASH in
-# Perl - the main complication is that it uses U32 integer, which we
-# can't do in perl, without doing some tricks
-sub hash {
- my $s = shift;
- my @c = split //, $s;
- my $u = HASH_SEED;
- for (@c) {
- # (A % M) + (B % M) == (A + B) % M
- # This works because '+' produces a NV, which is big enough to hold
- # the intermediate result. We only need the % before any "^" and "&"
- # to get the result in the range for an I32.
- # and << doesn't work on NV, so using 1 << 10
- $u += ord;
- $u += $u * (1 << 10); $u %= MASK_U32;
- $u ^= $u >> 6;
- }
- $u += $u << 3; $u %= MASK_U32;
- $u ^= $u >> 11; $u %= MASK_U32;
- $u += $u << 15; $u %= MASK_U32;
- $u;
-}
+plan tests => 10;
# This will crash perl if it fails
diff --git a/t/run/runenv.t b/t/run/runenv.t
index cea2590414..521ba8bf80 100644
--- a/t/run/runenv.t
+++ b/t/run/runenv.t
@@ -12,7 +12,7 @@ BEGIN {
skip_all_without_config('d_fork');
}
-plan tests => 84;
+plan tests => 94;
my $STDOUT = tempfile();
my $STDERR = tempfile();
@@ -63,8 +63,16 @@ sub try {
my ($env, $args, $stdout, $stderr) = @_;
my ($actual_stdout, $actual_stderr) = runperl_and_capture($env, $args);
local $::Level = $::Level + 1;
- is ($stdout, $actual_stdout);
- is ($stderr, $actual_stderr);
+ if (ref $stdout) {
+ ok ( $actual_stdout =~/$stdout/ );
+ } else {
+ is ($stdout, $actual_stdout);
+ }
+ if (ref $stderr) {
+ ok ( $actual_stderr =~/$stderr/);
+ } else {
+ is ($stderr, $actual_stderr);
+ }
}
# PERL5OPT Command-line options (switches). Switches in
@@ -191,6 +199,30 @@ try({PERL5LIB => "foo",
'',
'');
+try({PERL_HASH_SEED_DEBUG => 1},
+ ['-e','1'],
+ '',
+ qr/HASH_FUNCTION =/);
+
+try({PERL_HASH_SEED_DEBUG => 1},
+ ['-e','1'],
+ '',
+ qr/HASH_SEED =/);
+
+try({PERL_HASH_SEED_DEBUG => 1, PERL_HASH_SEED => "12345678"},
+ ['-e','1'],
+ '',
+ qr/HASH_SEED = 0x12345678/);
+
+try({PERL_HASH_SEED_DEBUG => 1, PERL_HASH_SEED => "12"},
+ ['-e','1'],
+ '',
+ qr/HASH_SEED = 0x12000000/);
+
+try({PERL_HASH_SEED_DEBUG => 1, PERL_HASH_SEED => "123456789"},
+ ['-e','1'],
+ '',
+ qr/HASH_SEED = 0x12345678/);
# Tests for S_incpush_use_sep():
my @dump_inc = ('-e', 'print "$_\n" foreach @INC');
diff --git a/universal.c b/universal.c
index 76b6281441..805f376124 100644
--- a/universal.c
+++ b/universal.c
@@ -1086,44 +1086,6 @@ XS(XS_PerlIO_get_layers)
XSRETURN(0);
}
-XS(XS_Internals_hash_seed)
-{
- dVAR;
- /* Using dXSARGS would also have dITEM and dSP,
- * which define 2 unused local variables. */
- dAXMARK;
- PERL_UNUSED_ARG(cv);
- PERL_UNUSED_VAR(mark);
- XSRETURN_UV(PERL_HASH_SEED);
-}
-
-XS(XS_Internals_rehash_seed)
-{
- dVAR;
- /* Using dXSARGS would also have dITEM and dSP,
- * which define 2 unused local variables. */
- dAXMARK;
- PERL_UNUSED_ARG(cv);
- PERL_UNUSED_VAR(mark);
- XSRETURN_UV(PL_rehash_seed);
-}
-
-XS(XS_Internals_HvREHASH) /* Subject to change */
-{
- dVAR;
- dXSARGS;
- PERL_UNUSED_ARG(cv);
- if (SvROK(ST(0))) {
- const HV * const hv = (const HV *) SvRV(ST(0));
- if (items == 1 && SvTYPE(hv) == SVt_PVHV) {
- if (HvREHASH(hv))
- XSRETURN_YES;
- else
- XSRETURN_NO;
- }
- }
- Perl_croak(aTHX_ "Internals::HvREHASH $hashref");
-}
XS(XS_re_is_regexp)
{
@@ -1403,9 +1365,6 @@ const struct xsub_details details[] = {
{"Internals::SvREFCNT", XS_Internals_SvREFCNT, "\\[$%@];$"},
{"Internals::hv_clear_placeholders", XS_Internals_hv_clear_placehold, "\\%"},
{"PerlIO::get_layers", XS_PerlIO_get_layers, "*;@"},
- {"Internals::hash_seed", XS_Internals_hash_seed, ""},
- {"Internals::rehash_seed", XS_Internals_rehash_seed, ""},
- {"Internals::HvREHASH", XS_Internals_HvREHASH, "\\%"},
{"re::is_regexp", XS_re_is_regexp, "$"},
{"re::regname", XS_re_regname, ";$$"},
{"re::regnames", XS_re_regnames, ";$"},
diff --git a/util.c b/util.c
index 6ed1fbb4aa..69fe9a9cff 100644
--- a/util.c
+++ b/util.c
@@ -24,6 +24,7 @@
#include "EXTERN.h"
#define PERL_IN_UTIL_C
#include "perl.h"
+#include "reentr.h"
#ifdef USE_PERLIO
#include "perliol.h" /* For PerlIOUnix_refcnt */
@@ -5666,43 +5667,41 @@ Perl_seed(pTHX)
return u;
}
-UV
-Perl_get_hash_seed(pTHX)
+void
+Perl_get_hash_seed(pTHX_ unsigned char *seed_buffer)
{
dVAR;
- const char *s = PerlEnv_getenv("PERL_HASH_SEED");
- UV myseed = 0;
+ const char *s;
+ const unsigned char * const end= seed_buffer + PERL_HASH_SEED_BYTES;
+
+ PERL_ARGS_ASSERT_GET_HASH_SEED;
- if (s)
- while (isSPACE(*s))
+ s= PerlEnv_getenv("PERL_HASH_SEED");
+
+ if ( s )
+#ifndef USE_HASH_SEED_EXPLICIT
+ {
+ while (isSPACE(*s))
s++;
- if (s && isDIGIT(*s))
- myseed = (UV)Atoul(s);
- else
-#ifdef USE_HASH_SEED_EXPLICIT
- if (s)
-#endif
- {
- /* Compute a random seed */
- (void)seedDrand01((Rand_seed_t)seed());
- myseed = (UV)(Drand01() * (NV)UV_MAX);
-#if RANDBITS < (UVSIZE * 8)
- /* Since there are not enough randbits to to reach all
- * the bits of a UV, the low bits might need extra
- * help. Sum in another random number that will
- * fill in the low bits. */
- myseed +=
- (UV)(Drand01() * (NV)((((UV)1) << ((UVSIZE * 8 - RANDBITS))) - 1));
-#endif /* RANDBITS < (UVSIZE * 8) */
- if (myseed == 0) { /* Superparanoia. */
- myseed = (UV)(Drand01() * (NV)UV_MAX); /* One more chance. */
- if (myseed == 0)
- Perl_croak(aTHX_ "Your random numbers are not that random");
- }
- }
- PL_rehash_seed_set = TRUE;
- return myseed;
+ while (isXDIGIT(*s) && seed_buffer < end) {
+ *seed_buffer = READ_XDIGIT(s) << 4;
+ if (isXDIGIT(*s)) {
+ *seed_buffer |= READ_XDIGIT(s);
+ }
+ seed_buffer++;
+ }
+ /* should we check for unparsed crap? */
+ }
+ else
+#endif
+ {
+ (void)seedDrand01((Rand_seed_t)seed());
+
+ while (seed_buffer < end) {
+ *seed_buffer++ = (unsigned char)(Drand01() * (U8_MAX+1));
+ }
+ }
}
#ifdef PERL_GLOBAL_STRUCT