summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2013-02-12 10:53:05 +0100
committerDavid Mitchell <davem@iabyn.com>2013-03-03 20:05:08 +0000
commitd59e31fc729d8a39a774f03bc6bc457029a7aef2 (patch)
tree827a91a04cbe5ddaf0f0c3b0ea382aa32b6e1858
parentaa489f198c5a1d90ce4dde17566ccbb61c7eb942 (diff)
downloadperl-d59e31fc729d8a39a774f03bc6bc457029a7aef2.tar.gz
Prevent premature hsplit() calls, and only trigger REHASH after hsplit()
Triggering a hsplit due to long chain length allows an attacker to create a carefully chosen set of keys which can cause the hash to use 2 * (2**32) * sizeof(void *) bytes ram. AKA a DOS via memory exhaustion. Doing so also takes non trivial time. Eliminating this check, and only inspecting chain length after a normal hsplit() (triggered when keys>buckets) prevents the attack entirely, and makes such attacks relatively benign. (cherry picked from commit f1220d61455253b170e81427c9d0357831ca0fac)
-rw-r--r--ext/Hash-Util-FieldHash/t/10_hash.t18
-rw-r--r--hv.c35
-rw-r--r--t/op/hash.t20
3 files changed, 41 insertions, 32 deletions
diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t
index 2cfb4e81fa..d58f053d97 100644
--- a/ext/Hash-Util-FieldHash/t/10_hash.t
+++ b/ext/Hash-Util-FieldHash/t/10_hash.t
@@ -38,15 +38,29 @@ use constant START => "a";
# some initial hash data
fieldhash my %h2;
-%h2 = map {$_ => 1} 'a'..'cc';
+my $counter= "a";
+$h2{$counter++}++ while $counter ne 'cd';
ok (!Internals::HvREHASH(%h2),
"starting with pre-populated non-pathological hash (rehash flag if off)");
my @keys = get_keys(\%h2);
+my $buckets= buckets(\%h2);
$h2{$_}++ for @keys;
+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
ok (Internals::HvREHASH(%h2),
- scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
+ scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
+
+# returns the number of buckets in a hash
+sub buckets {
+ my $hr = shift;
+ my $keys_buckets= scalar(%$hr);
+ if ($keys_buckets=~m!/([0-9]+)\z!) {
+ return 0+$1;
+ } else {
+ return 8;
+ }
+}
sub get_keys {
my $hr = shift;
diff --git a/hv.c b/hv.c
index 2be1feb7f4..abb9d7648f 100644
--- a/hv.c
+++ b/hv.c
@@ -35,7 +35,8 @@ holds the key and hash value.
#define PERL_HASH_INTERNAL_ACCESS
#include "perl.h"
-#define HV_MAX_LENGTH_BEFORE_SPLIT 14
+#define HV_MAX_LENGTH_BEFORE_REHASH 14
+#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */
static const char S_strtab_error[]
= "Cannot modify shared string table in hv_%s";
@@ -794,29 +795,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
if (masked_flags & HVhek_ENABLEHVKFLAGS)
HvHASKFLAGS_on(hv);
- {
- const HE *counter = HeNEXT(entry);
-
- xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
- if (!counter) { /* initial entry? */
- } else if (xhv->xhv_keys > xhv->xhv_max) {
- /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit
- bucket splits on a rehashed hash, as we're not going to
- split it again, and if someone is lucky (evil) enough to
- get all the keys in one list they could exhaust our memory
- as we repeatedly double the number of buckets on every
- entry. Linear search feels a less worse thing to do. */
- hsplit(hv);
- } else if(!HvREHASH(hv)) {
- U32 n_links = 1;
-
- while ((counter = HeNEXT(counter)))
- n_links++;
-
- if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) {
- hsplit(hv);
- }
- }
+ xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
+ if ( SHOULD_DO_HSPLIT(xhv) ) {
+ hsplit(hv);
}
if (return_svp) {
@@ -1192,7 +1173,7 @@ S_hsplit(pTHX_ HV *hv)
/* Pick your policy for "hashing isn't working" here: */
- if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */
+ if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked? */
|| HvREHASH(hv)) {
return;
}
@@ -2831,8 +2812,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags)
xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
if (!next) { /* initial entry? */
- } else if (xhv->xhv_keys > xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
- hsplit(PL_strtab);
+ } else if ( SHOULD_DO_HSPLIT(xhv) ) {
+ hsplit(PL_strtab);
}
}
diff --git a/t/op/hash.t b/t/op/hash.t
index 278bea7e14..201260acbc 100644
--- a/t/op/hash.t
+++ b/t/op/hash.t
@@ -39,22 +39,36 @@ use constant THRESHOLD => 14;
use constant START => "a";
# some initial hash data
-my %h2 = map {$_ => 1} 'a'..'cc';
+my %h2;
+my $counter= "a";
+$h2{$counter++}++ while $counter ne 'cd';
ok (!Internals::HvREHASH(%h2),
"starting with pre-populated non-pathological hash (rehash flag if off)");
my @keys = get_keys(\%h2);
+my $buckets= buckets(\%h2);
$h2{$_}++ for @keys;
+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
ok (Internals::HvREHASH(%h2),
- scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
+ scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
+
+# returns the number of buckets in a hash
+sub buckets {
+ my $hr = shift;
+ my $keys_buckets= scalar(%$hr);
+ if ($keys_buckets=~m!/([0-9]+)\z!) {
+ return 0+$1;
+ } else {
+ return 8;
+ }
+}
sub get_keys {
my $hr = shift;
# the minimum of bits required to mount the attack on a hash
my $min_bits = log(THRESHOLD)/log(2);
-
# if the hash has already been populated with a significant amount
# of entries the number of mask bits can be higher
my $keys = scalar keys %$hr;