diff options
| -rw-r--r-- | Doc/library/pickle.rst | 51 | ||||
| -rw-r--r-- | Lib/_compat_pickle.py | 81 | ||||
| -rw-r--r-- | Lib/pickle.py | 56 | ||||
| -rw-r--r-- | Lib/pickletools.py | 170 | ||||
| -rw-r--r-- | Lib/test/pickletester.py | 42 | ||||
| -rw-r--r-- | Lib/test/test_pickletools.py | 3 | ||||
| -rw-r--r-- | Misc/NEWS | 5 | ||||
| -rw-r--r-- | Modules/_pickle.c | 281 | 
8 files changed, 532 insertions, 157 deletions
| diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst index 1c70196dbb..21e4001139 100644 --- a/Doc/library/pickle.rst +++ b/Doc/library/pickle.rst @@ -141,7 +141,7 @@ an unpickler, then you call the unpickler's :meth:`load` method.  The  The :mod:`pickle` module provides the following functions to make the pickling  process more convenient: -.. function:: dump(obj, file[, protocol]) +.. function:: dump(obj, file[, protocol, \*, fix_imports=True])     Write a pickled representation of *obj* to the open file object *file*.  This     is equivalent to ``Pickler(file, protocol).dump(obj)``. @@ -158,7 +158,11 @@ process more convenient:     argument.  It can thus be a file object opened for binary writing, a     io.BytesIO instance, or any other custom object that meets this interface. -.. function:: dumps(obj[, protocol]) +   If *fix_imports* is True and *protocol* is less than 3, pickle will try to +   map the new Python 3.x names to the old module names used in Python 2.x, +   so that the pickle data stream is readable with Python 2.x. + +.. function:: dumps(obj[, protocol, \*, fix_imports=True])     Return the pickled representation of the object as a :class:`bytes`     object, instead of writing it to a file. @@ -171,7 +175,11 @@ process more convenient:     supported.  The higher the protocol used, the more recent the version of     Python needed to read the pickle produced. -.. function:: load(file, [\*, encoding="ASCII", errors="strict"]) +   If *fix_imports* is True and *protocol* is less than 3, pickle will try to +   map the new Python 3.x names to the old module names used in Python 2.x, +   so that the pickle data stream is readable with Python 2.x. + +.. function:: load(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"])     Read a pickled object representation from the open file object *file* and     return the reconstituted object hierarchy specified therein.  This is @@ -187,11 +195,14 @@ process more convenient:     for reading, a BytesIO object, or any other custom object that meets this     interface. -   Optional keyword arguments are encoding and errors, which are used to decode -   8-bit string instances pickled by Python 2.x.  These default to 'ASCII' and -   'strict', respectively. +   Optional keyword arguments are *fix_imports*, *encoding* and *errors*, +   which are used to control compatiblity support for pickle stream generated +   by Python 2.x.  If *fix_imports* is True, pickle will try to map the old +   Python 2.x names to the new names used in Python 3.x.  The *encoding* and +   *errors* tell pickle how to decode 8-bit string instances pickled by Python +   2.x; these default to 'ASCII' and 'strict', respectively. -.. function:: loads(bytes_object, [\*, encoding="ASCII", errors="strict"]) +.. function:: loads(bytes_object, [\*, fix_imports=True, encoding="ASCII", errors="strict"])     Read a pickled object hierarchy from a :class:`bytes` object and return the     reconstituted object hierarchy specified therein @@ -200,9 +211,12 @@ process more convenient:     argument is needed.  Bytes past the pickled object's representation are     ignored. -   Optional keyword arguments are encoding and errors, which are used to decode -   8-bit string instances pickled by Python 2.x.  These default to 'ASCII' and -   'strict', respectively. +   Optional keyword arguments are *fix_imports*, *encoding* and *errors*, +   which are used to control compatiblity support for pickle stream generated +   by Python 2.x.  If *fix_imports* is True, pickle will try to map the old +   Python 2.x names to the new names used in Python 3.x.  The *encoding* and +   *errors* tell pickle how to decode 8-bit string instances pickled by Python +   2.x; these default to 'ASCII' and 'strict', respectively.  The :mod:`pickle` module defines three exceptions: @@ -233,7 +247,7 @@ The :mod:`pickle` module defines three exceptions:  The :mod:`pickle` module exports two classes, :class:`Pickler` and  :class:`Unpickler`: -.. class:: Pickler(file[, protocol]) +.. class:: Pickler(file[, protocol, \*, fix_imports=True])     This takes a binary file for writing a pickle data stream. @@ -249,6 +263,10 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and     argument.  It can thus be a file object opened for binary writing, a     io.BytesIO instance, or any other custom object that meets this interface. +   If *fix_imports* is True and *protocol* is less than 3, pickle will try to +   map the new Python 3.x names to the old module names used in Python 2.x, +   so that the pickle data stream is readable with Python 2.x. +     .. method:: dump(obj)        Write a pickled representation of *obj* to the open file object given in @@ -277,7 +295,7 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and        Use :func:`pickletools.optimize` if you need more compact pickles. -.. class:: Unpickler(file, [\*, encoding="ASCII", errors="strict"]) +.. class:: Unpickler(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"])     This takes a binary file for reading a pickle data stream. @@ -290,9 +308,12 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and     for reading, a BytesIO object, or any other custom object that meets this     interface. -   Optional keyword arguments are encoding and errors, which are used to decode -   8-bit string instances pickled by Python 2.x.  These default to 'ASCII' and -   'strict', respectively. +   Optional keyword arguments are *fix_imports*, *encoding* and *errors*, +   which are used to control compatiblity support for pickle stream generated +   by Python 2.x.  If *fix_imports* is True, pickle will try to map the old +   Python 2.x names to the new names used in Python 3.x.  The *encoding* and +   *errors* tell pickle how to decode 8-bit string instances pickled by Python +   2.x; these default to 'ASCII' and 'strict', respectively.     .. method:: load() diff --git a/Lib/_compat_pickle.py b/Lib/_compat_pickle.py new file mode 100644 index 0000000000..700c80cd57 --- /dev/null +++ b/Lib/_compat_pickle.py @@ -0,0 +1,81 @@ +# This module is used to map the old Python 2 names to the new names used in +# Python 3 for the pickle module.  This needed to make pickle streams +# generated with Python 2 loadable by Python 3. + +# This is a copy of lib2to3.fixes.fix_imports.MAPPING.  We cannot import +# lib2to3 and use the mapping defined there, because lib2to3 uses pickle. +# Thus, this could cause the module to be imported recursively. +IMPORT_MAPPING = { +    'StringIO':  'io', +    'cStringIO': 'io', +    'cPickle': 'pickle', +    '__builtin__' : 'builtins', +    'copy_reg': 'copyreg', +    'Queue': 'queue', +    'SocketServer': 'socketserver', +    'ConfigParser': 'configparser', +    'repr': 'reprlib', +    'FileDialog': 'tkinter.filedialog', +    'tkFileDialog': 'tkinter.filedialog', +    'SimpleDialog': 'tkinter.simpledialog', +    'tkSimpleDialog': 'tkinter.simpledialog', +    'tkColorChooser': 'tkinter.colorchooser', +    'tkCommonDialog': 'tkinter.commondialog', +    'Dialog': 'tkinter.dialog', +    'Tkdnd': 'tkinter.dnd', +    'tkFont': 'tkinter.font', +    'tkMessageBox': 'tkinter.messagebox', +    'ScrolledText': 'tkinter.scrolledtext', +    'Tkconstants': 'tkinter.constants', +    'Tix': 'tkinter.tix', +    'ttk': 'tkinter.ttk', +    'Tkinter': 'tkinter', +    'markupbase': '_markupbase', +    '_winreg': 'winreg', +    'thread': '_thread', +    'dummy_thread': '_dummy_thread', +    'dbhash': 'dbm.bsd', +    'dumbdbm': 'dbm.dumb', +    'dbm': 'dbm.ndbm', +    'gdbm': 'dbm.gnu', +    'xmlrpclib': 'xmlrpc.client', +    'DocXMLRPCServer': 'xmlrpc.server', +    'SimpleXMLRPCServer': 'xmlrpc.server', +    'httplib': 'http.client', +    'htmlentitydefs' : 'html.entities', +    'HTMLParser' : 'html.parser', +    'Cookie': 'http.cookies', +    'cookielib': 'http.cookiejar', +    'BaseHTTPServer': 'http.server', +    'SimpleHTTPServer': 'http.server', +    'CGIHTTPServer': 'http.server', +    'test.test_support': 'test.support', +    'commands': 'subprocess', +    'UserString' : 'collections', +    'UserList' : 'collections', +    'urlparse' : 'urllib.parse', +    'robotparser' : 'urllib.robotparser', +    'whichdb': 'dbm', +    'anydbm': 'dbm' +} + + +# This contains rename rules that are easy to handle.  We ignore the more +# complex stuff (e.g. mapping the names in the urllib and types modules). +# These rules should be run before import names are fixed. +NAME_MAPPING = { +    ('__builtin__', 'xrange'):     ('builtins', 'range'), +    ('__builtin__', 'reduce'):     ('functools', 'reduce'), +    ('__builtin__', 'intern'):     ('sys', 'intern'), +    ('__builtin__', 'unichr'):     ('builtins', 'chr'), +    ('__builtin__', 'basestring'): ('builtins', 'str'), +    ('__builtin__', 'long'):       ('builtins', 'int'), +    ('itertools', 'izip'):         ('builtins', 'zip'), +    ('itertools', 'imap'):         ('builtins', 'map'), +    ('itertools', 'ifilter'):      ('builtins', 'filter'), +    ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), +} + +# Same, but for 3.x to 2.x +REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items()) +REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items()) diff --git a/Lib/pickle.py b/Lib/pickle.py index 720c1a00e6..7af4ce969a 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -34,6 +34,7 @@ import struct  import re  import io  import codecs +import _compat_pickle  __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",             "Unpickler", "dump", "dumps", "load", "loads"] @@ -171,12 +172,11 @@ SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes  __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) -  # Pickling machinery  class _Pickler: -    def __init__(self, file, protocol=None): +    def __init__(self, file, protocol=None, *, fix_imports=True):          """This takes a binary file for writing a pickle data stream.          The optional protocol argument tells the pickler to use the @@ -193,6 +193,10 @@ class _Pickler:          bytes argument. It can thus be a file object opened for binary          writing, a io.BytesIO instance, or any other custom object that          meets this interface. + +        If fix_imports is True and protocol is less than 3, pickle will try to +        map the new Python 3.x names to the old module names used in Python +        2.x, so that the pickle data stream is readable with Python 2.x.          """          if protocol is None:              protocol = DEFAULT_PROTOCOL @@ -208,6 +212,7 @@ class _Pickler:          self.proto = int(protocol)          self.bin = protocol >= 1          self.fast = 0 +        self.fix_imports = fix_imports and protocol < 3      def clear_memo(self):          """Clears the pickler's "memo". @@ -698,6 +703,11 @@ class _Pickler:              write(GLOBAL + bytes(module, "utf-8") + b'\n' +                    bytes(name, "utf-8") + b'\n')          else: +            if self.fix_imports: +                if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING: +                    module, name = _compat_pickle.REVERSE_NAME_MAPPING[(module, name)] +                if module in _compat_pickle.REVERSE_IMPORT_MAPPING: +                    module = _compat_pickle.REVERSE_IMPORT_MAPPING[module]              try:                  write(GLOBAL + bytes(module, "ascii") + b'\n' +                        bytes(name, "ascii") + b'\n') @@ -766,7 +776,8 @@ def whichmodule(func, funcname):  class _Unpickler: -    def __init__(self, file, *, encoding="ASCII", errors="strict"): +    def __init__(self, file, *, fix_imports=True, +                 encoding="ASCII", errors="strict"):          """This takes a binary file for reading a pickle data stream.          The protocol version of the pickle is detected automatically, so no @@ -779,15 +790,21 @@ class _Unpickler:          reading, a BytesIO object, or any other custom object that          meets this interface. -        Optional keyword arguments are encoding and errors, which are -        used to decode 8-bit string instances pickled by Python 2.x. -        These default to 'ASCII' and 'strict', respectively. +        Optional keyword arguments are *fix_imports*, *encoding* and *errors*, +        which are used to control compatiblity support for pickle stream +        generated by Python 2.x.  If *fix_imports* is True, pickle will try to +        map the old Python 2.x names to the new names used in Python 3.x.  The +        *encoding* and *errors* tell pickle how to decode 8-bit string +        instances pickled by Python 2.x; these default to 'ASCII' and +        'strict', respectively.          """          self.readline = file.readline          self.read = file.read          self.memo = {}          self.encoding = encoding          self.errors = errors +        self.proto = 0 +        self.fix_imports = fix_imports      def load(self):          """Read a pickled object representation from the open file. @@ -838,6 +855,7 @@ class _Unpickler:          proto = ord(self.read(1))          if not 0 <= proto <= HIGHEST_PROTOCOL:              raise ValueError("unsupported pickle protocol: %d" % proto) +        self.proto = proto      dispatch[PROTO[0]] = load_proto      def load_persid(self): @@ -1088,7 +1106,12 @@ class _Unpickler:          self.append(obj)      def find_class(self, module, name): -        # Subclasses may override this +        # Subclasses may override this. +        if self.proto < 3 and self.fix_imports: +            if (module, name) in _compat_pickle.NAME_MAPPING: +                module, name = _compat_pickle.NAME_MAPPING[(module, name)] +            if module in _compat_pickle.IMPORT_MAPPING: +                module = _compat_pickle.IMPORT_MAPPING[module]          __import__(module, level=0)          mod = sys.modules[module]          klass = getattr(mod, name) @@ -1327,27 +1350,28 @@ except ImportError:  # Shorthands -def dump(obj, file, protocol=None): -    Pickler(file, protocol).dump(obj) +def dump(obj, file, protocol=None, *, fix_imports=True): +    Pickler(file, protocol, fix_imports=fix_imports).dump(obj) -def dumps(obj, protocol=None): +def dumps(obj, protocol=None, *, fix_imports=True):      f = io.BytesIO() -    Pickler(f, protocol).dump(obj) +    Pickler(f, protocol, fix_imports=fix_imports).dump(obj)      res = f.getvalue()      assert isinstance(res, bytes_types)      return res -def load(file, *, encoding="ASCII", errors="strict"): -    return Unpickler(file, encoding=encoding, errors=errors).load() +def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): +    return Unpickler(file, fix_imports=fix_imports, +                     encoding=encoding, errors=errors).load() -def loads(s, *, encoding="ASCII", errors="strict"): +def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):      if isinstance(s, str):          raise TypeError("Can't load pickle from unicode string")      file = io.BytesIO(s) -    return Unpickler(file, encoding=encoding, errors=errors).load() +    return Unpickler(file, fix_imports=fix_imports, +                     encoding=encoding, errors=errors).load()  # Doctest -  def _test():      import doctest      return doctest.testmod() diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 2bb69d1320..ca11aa3871 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -2066,27 +2066,27 @@ _dis_test = r"""     29: (    MARK     30: d        DICT       (MARK at 29)     31: p    PUT        2 -   34: c    GLOBAL     'builtins bytes' -   50: p    PUT        3 -   53: (    MARK -   54: (        MARK -   55: l            LIST       (MARK at 54) -   56: p        PUT        4 -   59: L        LONG       97 -   64: a        APPEND -   65: L        LONG       98 -   70: a        APPEND -   71: L        LONG       99 -   76: a        APPEND -   77: t        TUPLE      (MARK at 53) -   78: p    PUT        5 -   81: R    REDUCE -   82: p    PUT        6 -   85: V    UNICODE    'def' -   90: p    PUT        7 -   93: s    SETITEM -   94: a    APPEND -   95: .    STOP +   34: c    GLOBAL     '__builtin__ bytes' +   53: p    PUT        3 +   56: (    MARK +   57: (        MARK +   58: l            LIST       (MARK at 57) +   59: p        PUT        4 +   62: L        LONG       97 +   67: a        APPEND +   68: L        LONG       98 +   73: a        APPEND +   74: L        LONG       99 +   79: a        APPEND +   80: t        TUPLE      (MARK at 56) +   81: p    PUT        5 +   84: R    REDUCE +   85: p    PUT        6 +   88: V    UNICODE    'def' +   93: p    PUT        7 +   96: s    SETITEM +   97: a    APPEND +   98: .    STOP  highest protocol among opcodes = 0  Try again with a "binary" pickle. @@ -2105,25 +2105,25 @@ Try again with a "binary" pickle.     14: q        BINPUT     1     16: }        EMPTY_DICT     17: q        BINPUT     2 -   19: c        GLOBAL     'builtins bytes' -   35: q        BINPUT     3 -   37: (        MARK -   38: ]            EMPTY_LIST -   39: q            BINPUT     4 -   41: (            MARK -   42: K                BININT1    97 -   44: K                BININT1    98 -   46: K                BININT1    99 -   48: e                APPENDS    (MARK at 41) -   49: t            TUPLE      (MARK at 37) -   50: q        BINPUT     5 -   52: R        REDUCE -   53: q        BINPUT     6 -   55: X        BINUNICODE 'def' -   63: q        BINPUT     7 -   65: s        SETITEM -   66: e        APPENDS    (MARK at 3) -   67: .    STOP +   19: c        GLOBAL     '__builtin__ bytes' +   38: q        BINPUT     3 +   40: (        MARK +   41: ]            EMPTY_LIST +   42: q            BINPUT     4 +   44: (            MARK +   45: K                BININT1    97 +   47: K                BININT1    98 +   49: K                BININT1    99 +   51: e                APPENDS    (MARK at 44) +   52: t            TUPLE      (MARK at 40) +   53: q        BINPUT     5 +   55: R        REDUCE +   56: q        BINPUT     6 +   58: X        BINUNICODE 'def' +   66: q        BINPUT     7 +   68: s        SETITEM +   69: e        APPENDS    (MARK at 3) +   70: .    STOP  highest protocol among opcodes = 1  Exercise the INST/OBJ/BUILD family. @@ -2141,58 +2141,58 @@ highest protocol among opcodes = 0      0: (    MARK      1: l        LIST       (MARK at 0)      2: p    PUT        0 -    5: c    GLOBAL     'copyreg _reconstructor' -   29: p    PUT        1 -   32: (    MARK -   33: c        GLOBAL     'pickletools _Example' -   55: p        PUT        2 -   58: c        GLOBAL     'builtins object' -   75: p        PUT        3 -   78: N        NONE -   79: t        TUPLE      (MARK at 32) -   80: p    PUT        4 -   83: R    REDUCE -   84: p    PUT        5 -   87: (    MARK -   88: d        DICT       (MARK at 87) -   89: p    PUT        6 -   92: V    UNICODE    'value' -   99: p    PUT        7 -  102: L    LONG       42 -  107: s    SETITEM -  108: b    BUILD -  109: a    APPEND -  110: g    GET        5 +    5: c    GLOBAL     'copy_reg _reconstructor' +   30: p    PUT        1 +   33: (    MARK +   34: c        GLOBAL     'pickletools _Example' +   56: p        PUT        2 +   59: c        GLOBAL     '__builtin__ object' +   79: p        PUT        3 +   82: N        NONE +   83: t        TUPLE      (MARK at 33) +   84: p    PUT        4 +   87: R    REDUCE +   88: p    PUT        5 +   91: (    MARK +   92: d        DICT       (MARK at 91) +   93: p    PUT        6 +   96: V    UNICODE    'value' +  103: p    PUT        7 +  106: L    LONG       42 +  111: s    SETITEM +  112: b    BUILD    113: a    APPEND -  114: .    STOP +  114: g    GET        5 +  117: a    APPEND +  118: .    STOP  highest protocol among opcodes = 0  >>> dis(pickle.dumps(x, 1))      0: ]    EMPTY_LIST      1: q    BINPUT     0      3: (    MARK -    4: c        GLOBAL     'copyreg _reconstructor' -   28: q        BINPUT     1 -   30: (        MARK -   31: c            GLOBAL     'pickletools _Example' -   53: q            BINPUT     2 -   55: c            GLOBAL     'builtins object' -   72: q            BINPUT     3 -   74: N            NONE -   75: t            TUPLE      (MARK at 30) -   76: q        BINPUT     4 -   78: R        REDUCE -   79: q        BINPUT     5 -   81: }        EMPTY_DICT -   82: q        BINPUT     6 -   84: X        BINUNICODE 'value' -   94: q        BINPUT     7 -   96: K        BININT1    42 -   98: s        SETITEM -   99: b        BUILD -  100: h        BINGET     5 -  102: e        APPENDS    (MARK at 3) -  103: .    STOP +    4: c        GLOBAL     'copy_reg _reconstructor' +   29: q        BINPUT     1 +   31: (        MARK +   32: c            GLOBAL     'pickletools _Example' +   54: q            BINPUT     2 +   56: c            GLOBAL     '__builtin__ object' +   76: q            BINPUT     3 +   78: N            NONE +   79: t            TUPLE      (MARK at 31) +   80: q        BINPUT     4 +   82: R        REDUCE +   83: q        BINPUT     5 +   85: }        EMPTY_DICT +   86: q        BINPUT     6 +   88: X        BINUNICODE 'value' +   98: q        BINPUT     7 +  100: K        BININT1    42 +  102: s        SETITEM +  103: b        BUILD +  104: h        BINGET     5 +  106: e        APPENDS    (MARK at 3) +  107: .    STOP  highest protocol among opcodes = 1  Try "the canonical" recursive-object test. diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 58ce3b5cf9..3ed26b8346 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -3,6 +3,7 @@ import unittest  import pickle  import pickletools  import copyreg +from http.cookies import SimpleCookie  from test.support import TestFailed, TESTFN, run_with_locale @@ -342,6 +343,24 @@ DATA2_DIS = """\  highest protocol among opcodes = 2  """ +# set([1,2]) pickled from 2.x with protocol 2 +DATA3 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.' + +# xrange(5) pickled from 2.x with protocol 2 +DATA4 = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.' + +# a SimpleCookie() object pickled from 2.x with protocol 2 +DATA5 = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key' +         b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U' +         b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07' +         b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U' +         b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b' +         b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.') + +# set([3]) pickled from 2.x with protocol 2 +DATA6 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.' + +  def create_data():      c = C()      c.foo = 1 @@ -956,6 +975,29 @@ class AbstractPickleTests(unittest.TestCase):              for x_key, y_key in zip(x_keys, y_keys):                  self.assertIs(x_key, y_key) +    def test_unpickle_from_2x(self): +        # Unpickle non-trivial data from Python 2.x. +        loaded = self.loads(DATA3) +        self.assertEqual(loaded, set([1, 2])) +        loaded = self.loads(DATA4) +        self.assertEqual(type(loaded), type(range(0))) +        self.assertEqual(list(loaded), list(range(5))) +        loaded = self.loads(DATA5) +        self.assertEqual(type(loaded), SimpleCookie) +        self.assertEqual(list(loaded.keys()), ["key"]) +        self.assertEqual(loaded["key"].value, "Set-Cookie: key=value") + +    def test_pickle_to_2x(self): +        # Pickle non-trivial data with protocol 2, expecting that it yields +        # the same result as Python 2.x did. +        # NOTE: this test is a bit too strong since we can produce different +        # bytecode that 2.x will still understand. +        dumped = self.dumps(range(5), 2) +        self.assertEqual(dumped, DATA4) +        dumped = self.dumps(set([3]), 2) +        self.assertEqual(dumped, DATA6) + +  # Test classes for reduce_ex  class REX_one(object): diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index 3e701b0909..823b0c2621 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -12,6 +12,9 @@ class OptimizedPickleTests(AbstractPickleTests, AbstractPickleModuleTests):      def loads(self, buf):          return pickle.loads(buf) +    # Test relies on precise output of dumps() +    test_pickle_to_2x = None +  def test_main():      support.run_unittest(OptimizedPickleTests) @@ -21,6 +21,11 @@ Core and Builtins  Library  ------- +- Issue #6137: The pickle module now translates module names when loading +  or dumping pickles with a 2.x-compatible protocol, in order to make data +  sharing and migration easier. This behaviour can be disabled using the +  new `fix_imports` optional argument. +  - Removed the ipaddr module.  - Issue #3613: base64.{encode,decode}string are now called diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 3ad55b587f..0e6df34bf1 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -103,25 +103,33 @@ enum {  /* Exception classes for pickle. These should override the ones defined in     pickle.py, when the C-optimized Pickler and Unpickler are used. */ -static PyObject *PickleError; -static PyObject *PicklingError; -static PyObject *UnpicklingError; +static PyObject *PickleError = NULL; +static PyObject *PicklingError = NULL; +static PyObject *UnpicklingError = NULL;  /* copyreg.dispatch_table, {type_object: pickling_function} */ -static PyObject *dispatch_table; +static PyObject *dispatch_table = NULL;  /* For EXT[124] opcodes. */  /* copyreg._extension_registry, {(module_name, function_name): code} */ -static PyObject *extension_registry; +static PyObject *extension_registry = NULL;  /* copyreg._inverted_registry, {code: (module_name, function_name)} */ -static PyObject *inverted_registry; +static PyObject *inverted_registry = NULL;  /* copyreg._extension_cache, {code: object} */ -static PyObject *extension_cache; +static PyObject *extension_cache = NULL; + +/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */ +static PyObject *name_mapping_2to3 = NULL; +/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ +static PyObject *import_mapping_2to3 = NULL; +/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */ +static PyObject *name_mapping_3to2 = NULL; +static PyObject *import_mapping_3to2 = NULL;  /* XXX: Are these really nescessary? */  /* As the name says, an empty tuple. */ -static PyObject *empty_tuple; +static PyObject *empty_tuple = NULL;  /* For looking up name pairs in copyreg._extension_registry. */ -static PyObject *two_tuple; +static PyObject *two_tuple = NULL;  static int  stack_underflow(void) @@ -315,6 +323,8 @@ typedef struct PicklerObject {                                     should not be used if with self-referential                                     objects. */      int fast_nesting; +    int fix_imports;            /* Indicate whether Pickler should fix +                                   the name of globals for Python 2.x. */      PyObject *fast_memo;  } PicklerObject; @@ -340,6 +350,9 @@ typedef struct UnpicklerObject {                                     objects. */      Py_ssize_t num_marks;       /* Number of marks in the mark stack. */      Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */ +    int proto;                  /* Protocol of the pickle loaded. */ +    int fix_imports;            /* Indicate whether Unpickler should fix +                                   the name of globals pickled by Python 2.x. */  } UnpicklerObject;  /* Forward declarations */ @@ -1972,6 +1985,63 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name)              unicode_encoder = PyUnicode_AsASCIIString;          } +        /* For protocol < 3 and if the user didn't request against doing so, +           we convert module names to the old 2.x module names. */ +        if (self->fix_imports) { +            PyObject *key; +            PyObject *item; + +            key = PyTuple_Pack(2, module_name, global_name); +            if (key == NULL) +                goto error; +            item = PyDict_GetItemWithError(name_mapping_3to2, key); +            Py_DECREF(key); +            if (item) { +                if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { +                    PyErr_Format(PyExc_RuntimeError, +                                 "_compat_pickle.REVERSE_NAME_MAPPING values " +                                 "should be 2-tuples, not %.200s", +                                 Py_TYPE(item)->tp_name); +                    goto error; +                } +                Py_CLEAR(module_name); +                Py_CLEAR(global_name); +                module_name = PyTuple_GET_ITEM(item, 0); +                global_name = PyTuple_GET_ITEM(item, 1); +                if (!PyUnicode_Check(module_name) || +                    !PyUnicode_Check(global_name)) { +                    PyErr_Format(PyExc_RuntimeError, +                                 "_compat_pickle.REVERSE_NAME_MAPPING values " +                                 "should be pairs of str, not (%.200s, %.200s)", +                                 Py_TYPE(module_name)->tp_name, +                                 Py_TYPE(global_name)->tp_name); +                    goto error; +                } +                Py_INCREF(module_name); +                Py_INCREF(global_name); +            } +            else if (PyErr_Occurred()) { +                goto error; +            } + +            item = PyDict_GetItemWithError(import_mapping_3to2, module_name); +            if (item) { +                if (!PyUnicode_Check(item)) { +                    PyErr_Format(PyExc_RuntimeError, +                                 "_compat_pickle.REVERSE_IMPORT_MAPPING values " +                                 "should be strings, not %.200s", +                                 Py_TYPE(item)->tp_name); +                    goto error; +                } +                Py_CLEAR(module_name); +                module_name = item; +                Py_INCREF(module_name); +            } +            else if (PyErr_Occurred()) { +                goto error; +            } +        } +          /* Save the name of the module. */          encoded = unicode_encoder(module_name);          if (encoded == NULL) { @@ -2608,18 +2678,23 @@ PyDoc_STRVAR(Pickler_doc,  "The file argument must have a write() method that accepts a single\n"  "bytes argument. It can thus be a file object opened for binary\n"  "writing, a io.BytesIO instance, or any other custom object that\n" -"meets this interface.\n"); +"meets this interface.\n" +"\n" +"If fix_imports is True and protocol is less than 3, pickle will try to\n" +"map the new Python 3.x names to the old module names used in Python\n" +"2.x, so that the pickle data stream is readable with Python 2.x.\n");  static int  Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)  { -    static char *kwlist[] = {"file", "protocol", 0}; +    static char *kwlist[] = {"file", "protocol", "fix_imports", 0};      PyObject *file;      PyObject *proto_obj = NULL;      long proto = 0; +    int fix_imports = 1; -    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler", -                                     kwlist, &file, &proto_obj)) +    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler", +                                     kwlist, &file, &proto_obj, &fix_imports))          return -1;      /* In case of multiple __init__() calls, clear previous content. */ @@ -2628,8 +2703,11 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)      if (proto_obj == NULL || proto_obj == Py_None)          proto = DEFAULT_PROTOCOL; -    else +    else {          proto = PyLong_AsLong(proto_obj); +        if (proto == -1 && PyErr_Occurred()) +            return -1; +    }      if (proto < 0)          proto = HIGHEST_PROTOCOL; @@ -2639,12 +2717,13 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)          return -1;      } -	self->proto = proto; -	self->bin = proto > 0; -	self->arg = NULL; -	self->fast = 0; -	self->fast_nesting = 0; -	self->fast_memo = NULL; +    self->proto = proto; +    self->bin = proto > 0; +    self->arg = NULL; +    self->fast = 0; +    self->fast_nesting = 0; +    self->fast_memo = NULL; +    self->fix_imports = fix_imports && proto < 3;      if (!PyObject_HasAttrString(file, "write")) {          PyErr_SetString(PyExc_TypeError, @@ -4220,8 +4299,10 @@ load_proto(UnpicklerObject *self)          return -1;      i = (unsigned char)s[0]; -    if (i <= HIGHEST_PROTOCOL) +    if (i <= HIGHEST_PROTOCOL) { +        self->proto = i;          return 0; +    }      PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);      return -1; @@ -4383,12 +4464,67 @@ Unpickler_find_class(UnpicklerObject *self, PyObject *args)                             &module_name, &global_name))          return NULL; +    /* Try to map the old names used in Python 2.x to the new ones used in +       Python 3.x.  We do this only with old pickle protocols and when the +       user has not disabled the feature. */ +    if (self->proto < 3 && self->fix_imports) { +        PyObject *key; +        PyObject *item; + +        /* Check if the global (i.e., a function or a class) was renamed +           or moved to another module. */ +        key = PyTuple_Pack(2, module_name, global_name); +        if (key == NULL) +            return NULL; +        item = PyDict_GetItemWithError(name_mapping_2to3, key); +        Py_DECREF(key); +        if (item) { +            if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { +                PyErr_Format(PyExc_RuntimeError, +                             "_compat_pickle.NAME_MAPPING values should be " +                             "2-tuples, not %.200s", Py_TYPE(item)->tp_name); +                return NULL; +            } +            module_name = PyTuple_GET_ITEM(item, 0); +            global_name = PyTuple_GET_ITEM(item, 1); +            if (!PyUnicode_Check(module_name) || +                !PyUnicode_Check(global_name)) { +                PyErr_Format(PyExc_RuntimeError, +                             "_compat_pickle.NAME_MAPPING values should be " +                             "pairs of str, not (%.200s, %.200s)", +                             Py_TYPE(module_name)->tp_name, +                             Py_TYPE(global_name)->tp_name); +                return NULL; +            } +        } +        else if (PyErr_Occurred()) { +            return NULL; +        } + +        /* Check if the module was renamed. */ +        item = PyDict_GetItemWithError(import_mapping_2to3, module_name); +        if (item) { +            if (!PyUnicode_Check(item)) { +                PyErr_Format(PyExc_RuntimeError, +                             "_compat_pickle.IMPORT_MAPPING values should be " +                             "strings, not %.200s", Py_TYPE(item)->tp_name); +                return NULL; +            } +            module_name = item; +        } +        else if (PyErr_Occurred()) { +            return NULL; +        } +    } +      modules_dict = PySys_GetObject("modules");      if (modules_dict == NULL)          return NULL; -    module = PyDict_GetItem(modules_dict, module_name); +    module = PyDict_GetItemWithError(modules_dict, module_name);      if (module == NULL) { +        if (PyErr_Occurred()) +            return NULL;          module = PyImport_Import(module_name);          if (module == NULL)              return NULL; @@ -4477,15 +4613,20 @@ PyDoc_STRVAR(Unpickler_doc,  "reading, a BytesIO object, or any other custom object that\n"  "meets this interface.\n"  "\n" -"Optional keyword arguments are encoding and errors, which are\n" -"used to decode 8-bit string instances pickled by Python 2.x.\n" -"These default to 'ASCII' and 'strict', respectively.\n"); +"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n" +"which are used to control compatiblity support for pickle stream\n" +"generated by Python 2.x.  If *fix_imports* is True, pickle will try to\n" +"map the old Python 2.x names to the new names used in Python 3.x.  The\n" +"*encoding* and *errors* tell pickle how to decode 8-bit string\n" +"instances pickled by Python 2.x; these default to 'ASCII' and\n" +"'strict', respectively.\n");  static int  Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)  { -    static char *kwlist[] = {"file", "encoding", "errors", 0}; +    static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};      PyObject *file; +    int fix_imports = 1;      char *encoding = NULL;      char *errors = NULL; @@ -4504,8 +4645,8 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)         extra careful in the other Unpickler methods, since a subclass could         forget to call Unpickler.__init__() thus breaking our internal         invariants. */ -    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, -                                     &file, &encoding, &errors)) +    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist, +                                     &file, &fix_imports, &encoding, &errors))          return -1;      /* In case of multiple __init__() calls, clear previous content. */ @@ -4549,6 +4690,8 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)      self->last_string = NULL;      self->arg = NULL; +    self->proto = 0; +    self->fix_imports = fix_imports;      return 0;  } @@ -4672,40 +4815,85 @@ static PyTypeObject Unpickler_Type = {  };  static int -init_stuff(void) +initmodule(void)  { -    PyObject *copyreg; +    PyObject *copyreg = NULL; +    PyObject *compat_pickle = NULL; + +    /* XXX: We should ensure that the types of the dictionaries imported are +       exactly PyDict objects. Otherwise, it is possible to crash the pickle +       since we use the PyDict API directly to access these dictionaries. */      copyreg = PyImport_ImportModule("copyreg");      if (!copyreg) -        return -1; - +        goto error;      dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");      if (!dispatch_table)          goto error; -      extension_registry = \          PyObject_GetAttrString(copyreg, "_extension_registry");      if (!extension_registry)          goto error; -      inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");      if (!inverted_registry)          goto error; -      extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");      if (!extension_cache)          goto error; +    Py_CLEAR(copyreg); -    Py_DECREF(copyreg); +    /* Load the 2.x -> 3.x stdlib module mapping tables */ +    compat_pickle = PyImport_ImportModule("_compat_pickle"); +    if (!compat_pickle) +        goto error; +    name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); +    if (!name_mapping_2to3) +        goto error; +    if (!PyDict_CheckExact(name_mapping_2to3)) { +        PyErr_Format(PyExc_RuntimeError, +                     "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", +                     Py_TYPE(name_mapping_2to3)->tp_name); +        goto error; +    } +    import_mapping_2to3 = PyObject_GetAttrString(compat_pickle, +                                                 "IMPORT_MAPPING"); +    if (!import_mapping_2to3) +        goto error; +    if (!PyDict_CheckExact(import_mapping_2to3)) { +        PyErr_Format(PyExc_RuntimeError, +                     "_compat_pickle.IMPORT_MAPPING should be a dict, " +                     "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name); +        goto error; +    } +    /* ... and the 3.x -> 2.x mapping tables */ +    name_mapping_3to2 = PyObject_GetAttrString(compat_pickle, +                                               "REVERSE_NAME_MAPPING"); +    if (!name_mapping_3to2) +        goto error; +    if (!PyDict_CheckExact(name_mapping_3to2)) { +        PyErr_Format(PyExc_RuntimeError, +                     "_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, " +                     "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name); +        goto error; +    } +    import_mapping_3to2 = PyObject_GetAttrString(compat_pickle, +                                                 "REVERSE_IMPORT_MAPPING"); +    if (!import_mapping_3to2) +        goto error; +    if (!PyDict_CheckExact(import_mapping_3to2)) { +        PyErr_Format(PyExc_RuntimeError, +                     "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, " +                     "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name); +        goto error; +    } +    Py_CLEAR(compat_pickle);      empty_tuple = PyTuple_New(0);      if (empty_tuple == NULL) -        return -1; - +        goto error;      two_tuple = PyTuple_New(2);      if (two_tuple == NULL) -        return -1; +        goto error;      /* We use this temp container with no regard to refcounts, or to       * keeping containees alive.  Exempt from GC, because we don't       * want anything looking at two_tuple() by magic. @@ -4715,7 +4903,18 @@ init_stuff(void)      return 0;    error: -    Py_DECREF(copyreg); +    Py_CLEAR(copyreg); +    Py_CLEAR(dispatch_table); +    Py_CLEAR(extension_registry); +    Py_CLEAR(inverted_registry); +    Py_CLEAR(extension_cache); +    Py_CLEAR(compat_pickle); +    Py_CLEAR(name_mapping_2to3); +    Py_CLEAR(import_mapping_2to3); +    Py_CLEAR(name_mapping_3to2); +    Py_CLEAR(import_mapping_3to2); +    Py_CLEAR(empty_tuple); +    Py_CLEAR(two_tuple);      return -1;  } @@ -4773,7 +4972,7 @@ PyInit__pickle(void)      if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)          return NULL; -    if (init_stuff() < 0) +    if (initmodule() < 0)          return NULL;      return m; | 
