Merge from 3.1: Issue #13703: add a way to randomize the hash values of basic types (str, bytes, datetime)

in order to make algorithmic complexity attacks on (e.g.) web apps much more complicated. The environment variable PYTHONHASHSEED and the new command line flag -R control this behavior.
author: Georg Brandl <georg@python.org> 2012-02-20 21:31:46 +0100
committer: Georg Brandl <georg@python.org> 2012-02-20 21:31:46 +0100
commit: 09a7c72cad48f568e0781541167cf9ea6a3f0760 (patch)
tree: d925894bfc3662e33c03ff7b6b2c5e9e38749b73 /Lib/test
parent: fee358b0df547e9451cfb0b3d25980e6cc7177cc (diff)
parent: 2daf6ae2495c862adf8bc717bfe9964081ea0b10 (diff)
download: cpython-git-09a7c72cad48f568e0781541167cf9ea6a3f0760.tar.gz
12 files changed, 196 insertions, 28 deletions
diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py
index d2b7a59a30..bc12c7756f 100644
--- a/Lib/test/mapping_tests.py
+++ b/Lib/test/mapping_tests.py
@@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase):
     def _reference(self):
         """Return a dictionary of values which are invariant by storage
         in the object under test."""
-        return {1:2, "key1":"value1", "key2":(1,2,3)}
+        return {"1": "2", "key1":"value1", "key2":(1,2,3)}
     def _empty_mapping(self):
         """Return an empty mapping object"""
         return self.type2test()
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
index 135a90e772..26ba9820d7 100755
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -496,6 +496,11 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
         except ValueError:
             print("Couldn't find starting test (%s), using all tests" % start)
     if randomize:
+        hashseed = os.getenv('PYTHONHASHSEED')
+        if not hashseed:
+            os.environ['PYTHONHASHSEED'] = str(random_seed)
+            os.execv(sys.executable, [sys.executable] + sys.argv)
+            return
         random.seed(random_seed)
         print("Using random seed", random_seed)
         random.shuffle(selected)
diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py
index 371c33d138..ba446cd69b 100644
--- a/Lib/test/script_helper.py
+++ b/Lib/test/script_helper.py
@@ -3,7 +3,6 @@
 
 import sys
 import os
-import re
 import os.path
 import tempfile
 import subprocess
@@ -20,11 +19,15 @@ def _assert_python(expected_success, *args, **env_vars):
     cmd_line = [sys.executable]
     if not env_vars:
         cmd_line.append('-E')
-    cmd_line.extend(args)
     # Need to preserve the original environment, for in-place testing of
     # shared library builds.
     env = os.environ.copy()
+    # But a special flag that can be set to override -- in this case, the
+    # caller is responsible to pass the full environment.
+    if env_vars.pop('__cleanenv', None):
+        env = {}
     env.update(env_vars)
+    cmd_line.extend(args)
     p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                          env=env)
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 2fca25ea08..1a21281e6d 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -330,6 +330,22 @@ class CmdLineTest(unittest.TestCase):
     def test_no_std_streams(self):
         self._test_no_stdio(['stdin', 'stdout', 'stderr'])
 
+    def test_hash_randomization(self):
+        # Verify that -R enables hash randomization:
+        self.verify_valid_flag('-R')
+        hashes = []
+        for i in range(2):
+            code = 'print(hash("spam"))'
+            rc, out, err = assert_python_ok('-R', '-c', code)
+            self.assertEqual(rc, 0)
+            hashes.append(out)
+        self.assertNotEqual(hashes[0], hashes[1])
+
+        # Verify that sys.flags contains hash_randomization
+        code = 'import sys; print("random is", sys.flags.hash_randomization)'
+        rc, out, err = assert_python_ok('-R', '-c', code)
+        self.assertEqual(rc, 0)
+        self.assertIn(b'random is 1', out)
 
 def test_main():
     test.support.run_unittest(CmdLineTest)
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py
index b214996aa9..77cadc01d9 100644
--- a/Lib/test/test_descr.py
+++ b/Lib/test/test_descr.py
@@ -4474,8 +4474,18 @@ class DictProxyTests(unittest.TestCase):
 
     def test_repr(self):
         # Testing dict_proxy.__repr__
+        def sorted_dict_repr(repr_):
+            # Given the repr of a dict, sort the keys
+            assert repr_.startswith('{')
+            assert repr_.endswith('}')
+            kvs = repr_[1:-1].split(', ')
+            return '{' + ', '.join(sorted(kvs)) + '}'
         dict_ = {k: v for k, v in self.C.__dict__.items()}
-        self.assertEqual(repr(self.C.__dict__), 'dict_proxy({!r})'.format(dict_))
+        repr_ = repr(self.C.__dict__)
+        self.assert_(repr_.startswith('dict_proxy('))
+        self.assert_(repr_.endswith(')'))
+        self.assertEqual(sorted_dict_repr(repr_[len('dict_proxy('):-len(')')]),
+                         sorted_dict_repr('{!r}'.format(dict_)))
 
 
 class PTypesLongInitTest(unittest.TestCase):
diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py
index 651aaeccfc..aea7c0c3ee 100644
--- a/Lib/test/test_gdb.py
+++ b/Lib/test/test_gdb.py
@@ -52,13 +52,18 @@ class DebuggerTests(unittest.TestCase):
 
     """Test that the debugger can debug Python."""
 
-    def run_gdb(self, *args):
+    def run_gdb(self, *args, **env_vars):
         """Runs gdb with the command line given by *args.
 
         Returns its stdout, stderr
         """
+        if env_vars:
+            env = os.environ.copy()
+            env.update(env_vars)
+        else:
+            env = None
         out, err = subprocess.Popen(
-            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env,
             ).communicate()
         return out.decode('utf-8', 'replace'), err.decode('utf-8', 'replace')
 
@@ -118,7 +123,7 @@ class DebuggerTests(unittest.TestCase):
         # print ' '.join(args)
 
         # Use "args" to invoke gdb, capturing stdout, stderr:
-        out, err = self.run_gdb(*args)
+        out, err = self.run_gdb(*args, PYTHONHASHSEED='0')
 
         # Ignore some noise on stderr due to the pending breakpoint:
         err = err.replace('Function "%s" not defined.\n' % breakpoint, '')
@@ -207,7 +212,8 @@ class PrettyPrintTests(DebuggerTests):
         'Verify the pretty-printing of dictionaries'
         self.assertGdbRepr({})
         self.assertGdbRepr({'foo': 'bar'})
-        self.assertGdbRepr({'foo': 'bar', 'douglas':42})
+        self.assertGdbRepr({'foo': 'bar', 'douglas': 42},
+                           "{'foo': 'bar', 'douglas': 42}")
 
     def test_lists(self):
         'Verify the pretty-printing of lists'
@@ -269,8 +275,8 @@ class PrettyPrintTests(DebuggerTests):
     def test_sets(self):
         'Verify the pretty-printing of sets'
         self.assertGdbRepr(set())
-        self.assertGdbRepr(set(['a', 'b']))
-        self.assertGdbRepr(set([4, 5, 6]))
+        self.assertGdbRepr(set(['a', 'b']), "{'a', 'b'}")
+        self.assertGdbRepr(set([4, 5, 6]), "{4, 5, 6}")
 
         # Ensure that we handle sets containing the "dummy" key value,
         # which happens on deletion:
@@ -282,8 +288,8 @@ id(s)''')
     def test_frozensets(self):
         'Verify the pretty-printing of frozensets'
         self.assertGdbRepr(frozenset())
-        self.assertGdbRepr(frozenset(['a', 'b']))
-        self.assertGdbRepr(frozenset([4, 5, 6]))
+        self.assertGdbRepr(frozenset(['a', 'b']), "frozenset({'a', 'b'})")
+        self.assertGdbRepr(frozenset([4, 5, 6]), "frozenset({4, 5, 6})")
 
     def test_exceptions(self):
         # Test a RuntimeError
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py
index fea1025e91..8253610c87 100644
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -3,10 +3,16 @@
 #
 # Also test that hash implementations are inherited as expected
 
+import datetime
+import os
+import sys
 import unittest
 from test import support
+from test.script_helper import assert_python_ok
 from collections import Hashable
 
+IS_64BIT = sys.maxsize > 2**32
+
 
 class HashEqualityTestCase(unittest.TestCase):
 
@@ -118,10 +124,92 @@ class HashBuiltinsTestCase(unittest.TestCase):
         for obj in self.hashes_to_check:
             self.assertEqual(hash(obj), _default_hash(obj))
 
+class HashRandomizationTests(unittest.TestCase):
+
+    # Each subclass should define a field "repr_", containing the repr() of
+    # an object to be tested
+
+    def get_hash_command(self, repr_):
+        return 'print(hash(%s))' % repr_
+
+    def get_hash(self, repr_, seed=None):
+        env = os.environ.copy()
+        env['__cleanenv'] = True  # signal to assert_python not to do a copy
+                                  # of os.environ on its own
+        if seed is not None:
+            env['PYTHONHASHSEED'] = str(seed)
+        else:
+            env.pop('PYTHONHASHSEED', None)
+        out = assert_python_ok(
+            '-c', self.get_hash_command(repr_),
+            **env)
+        stdout = out[1].strip()
+        return int(stdout)
+
+    def test_randomized_hash(self):
+        # two runs should return different hashes
+        run1 = self.get_hash(self.repr_, seed='random')
+        run2 = self.get_hash(self.repr_, seed='random')
+        self.assertNotEqual(run1, run2)
+
+class StringlikeHashRandomizationTests(HashRandomizationTests):
+    def test_null_hash(self):
+        # PYTHONHASHSEED=0 disables the randomized hash
+        if IS_64BIT:
+            known_hash_of_obj = 1453079729188098211
+        else:
+            known_hash_of_obj = -1600925533
+
+        # Randomization is disabled by default:
+        self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
+
+        # It can also be disabled by setting the seed to 0:
+        self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
+
+    def test_fixed_hash(self):
+        # test a fixed seed for the randomized hash
+        # Note that all types share the same values:
+        if IS_64BIT:
+            h = -4410911502303878509
+        else:
+            h = -206076799
+        self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+
+class StrHashRandomizationTests(StringlikeHashRandomizationTests):
+    repr_ = repr('abc')
+
+    def test_empty_string(self):
+        self.assertEqual(hash(""), 0)
+
+class BytesHashRandomizationTests(StringlikeHashRandomizationTests):
+    repr_ = repr(b'abc')
+
+    def test_empty_string(self):
+        self.assertEqual(hash(b""), 0)
+
+class DatetimeTests(HashRandomizationTests):
+    def get_hash_command(self, repr_):
+        return 'import datetime; print(hash(%s))' % repr_
+
+class DatetimeDateTests(DatetimeTests):
+    repr_ = repr(datetime.date(1066, 10, 14))
+
+class DatetimeDatetimeTests(DatetimeTests):
+    repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
+
+class DatetimeTimeTests(DatetimeTests):
+    repr_ = repr(datetime.time(0))
+
+
 def test_main():
     support.run_unittest(HashEqualityTestCase,
-                              HashInheritanceTestCase,
-                              HashBuiltinsTestCase)
+                         HashInheritanceTestCase,
+                         HashBuiltinsTestCase,
+                         StrHashRandomizationTests,
+                         BytesHashRandomizationTests,
+                         DatetimeDateTests,
+                         DatetimeDatetimeTests,
+                         DatetimeTimeTests)
 
 
 if __name__ == "__main__":
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index e573bd2a56..8bc8ba9fa6 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -15,6 +15,7 @@ from test import support
 import contextlib
 import mmap
 import uuid
+from test.script_helper import assert_python_ok
 
 # Detect whether we're on a Linux system that uses the (now outdated
 # and unmaintained) linuxthreads threading library.  There's an issue
@@ -611,14 +612,33 @@ class DevNullTests(unittest.TestCase):
             self.assertEqual(f.read(), b'')
 
 class URandomTests(unittest.TestCase):
-    def test_urandom(self):
-        try:
-            self.assertEqual(len(os.urandom(1)), 1)
-            self.assertEqual(len(os.urandom(10)), 10)
-            self.assertEqual(len(os.urandom(100)), 100)
-            self.assertEqual(len(os.urandom(1000)), 1000)
-        except NotImplementedError:
-            pass
+    def test_urandom_length(self):
+        self.assertEqual(len(os.urandom(0)), 0)
+        self.assertEqual(len(os.urandom(1)), 1)
+        self.assertEqual(len(os.urandom(10)), 10)
+        self.assertEqual(len(os.urandom(100)), 100)
+        self.assertEqual(len(os.urandom(1000)), 1000)
+
+    def test_urandom_value(self):
+        data1 = os.urandom(16)
+        data2 = os.urandom(16)
+        self.assertNotEqual(data1, data2)
+
+    def get_urandom_subprocess(self, count):
+        code = '\n'.join((
+            'import os, sys',
+            'data = os.urandom(%s)' % count,
+            'sys.stdout.buffer.write(data)',
+            'sys.stdout.buffer.flush()'))
+        out = assert_python_ok('-c', code)
+        stdout = out[1]
+        self.assertEqual(len(stdout), 16)
+        return stdout
+
+    def test_urandom_subprocess(self):
+        data1 = self.get_urandom_subprocess(16)
+        data2 = self.get_urandom_subprocess(16)
+        self.assertNotEqual(data1, data2)
 
 @contextlib.contextmanager
 def _execvpe_mockup(defpath=None):
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
index 07bfe0657e..6642440dea 100644
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -733,6 +733,17 @@ class TestBasicOps(unittest.TestCase):
         if self.repr is not None:
             self.assertEqual(repr(self.set), self.repr)
 
+    def check_repr_against_values(self):
+        text = repr(self.set)
+        self.assertTrue(text.startswith('{'))
+        self.assertTrue(text.endswith('}'))
+
+        result = text[1:-1].split(', ')
+        result.sort()
+        sorted_repr_values = [repr(value) for value in self.values]
+        sorted_repr_values.sort()
+        self.assertEqual(result, sorted_repr_values)
+
     def test_print(self):
         try:
             fo = open(support.TESTFN, "w")
@@ -891,7 +902,9 @@ class TestBasicOpsString(TestBasicOps):
         self.set    = set(self.values)
         self.dup    = set(self.values)
         self.length = 3
-        self.repr   = "{'a', 'c', 'b'}"
+
+    def test_repr(self):
+        self.check_repr_against_values()
 
 #------------------------------------------------------------------------------
 
@@ -902,7 +915,9 @@ class TestBasicOpsBytes(TestBasicOps):
         self.set    = set(self.values)
         self.dup    = set(self.values)
         self.length = 3
-        self.repr   = "{b'a', b'c', b'b'}"
+
+    def test_repr(self):
+        self.check_repr_against_values()
 
 #------------------------------------------------------------------------------
 
@@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps):
         self.set    = set(self.values)
         self.dup    = set(self.values)
         self.length = 4
-        self.repr   = "{'a', b'a', 'b', b'b'}"
 
     def tearDown(self):
         self._warning_filters.__exit__(None, None, None)
 
+    def test_repr(self):
+        self.check_repr_against_values()
+
 #==============================================================================
 
 def baditer():
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 5d3404f75e..3268b1a141 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -503,7 +503,7 @@ class SysModuleTest(unittest.TestCase):
         attrs = ("debug", "division_warning",
                  "inspect", "interactive", "optimize", "dont_write_bytecode",
                  "no_user_site", "no_site", "ignore_environment", "verbose",
-                 "bytes_warning", "quiet")
+                 "bytes_warning", "quiet", "hash_randomization")
         for attr in attrs:
             self.assertTrue(hasattr(sys.flags, attr), attr)
             self.assertEqual(type(getattr(sys.flags, attr)), int, attr)
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index f6b48cba23..c6f6f6121f 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -13,6 +13,7 @@ import sys
 import tempfile
 
 from base64 import b64encode
+import collections
 
 def hexescape(char):
     """Escape char as RFC 2396 specifies"""
@@ -953,8 +954,9 @@ class urlencode_Tests(unittest.TestCase):
         self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
         self.assertEqual("a=None&a=a",
                          urllib.parse.urlencode({"a": [None, "a"]}, True))
+        data = collections.OrderedDict([("a", 1), ("b", 1)])
         self.assertEqual("a=a&a=b",
-                         urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
+                         urllib.parse.urlencode({"a": data}, True))
 
     def test_urlencode_encoding(self):
         # ASCII encoding. Expect %3F with errors="replace'
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index a6e7ee8e1c..ada0ca8788 100644..100755
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -769,7 +769,8 @@ class UrlParseTestCase(unittest.TestCase):
         # Other tests incidentally urlencode things; test non-covered cases:
         # Sequence and object values.
         result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
-        self.assertEqual(result, 'a=1&a=2&b=3&b=4&b=5')
+        # we cannot rely on ordering here
+        assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
 
         class Trivial:
             def __str__(self):
author	Georg Brandl <georg@python.org>	2012-02-20 21:31:46 +0100
committer	Georg Brandl <georg@python.org>	2012-02-20 21:31:46 +0100
commit	09a7c72cad48f568e0781541167cf9ea6a3f0760 (patch)
tree	d925894bfc3662e33c03ff7b6b2c5e9e38749b73 /Lib/test
parent	fee358b0df547e9451cfb0b3d25980e6cc7177cc (diff)
parent	2daf6ae2495c862adf8bc717bfe9964081ea0b10 (diff)
download	cpython-git-09a7c72cad48f568e0781541167cf9ea6a3f0760.tar.gz