summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Doc/library/pickle.rst51
-rw-r--r--Lib/_compat_pickle.py81
-rw-r--r--Lib/pickle.py56
-rw-r--r--Lib/pickletools.py170
-rw-r--r--Lib/test/pickletester.py42
-rw-r--r--Lib/test/test_pickletools.py3
-rw-r--r--Misc/NEWS5
-rw-r--r--Modules/_pickle.c281
8 files changed, 532 insertions, 157 deletions
diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst
index 1c70196dbb..21e4001139 100644
--- a/Doc/library/pickle.rst
+++ b/Doc/library/pickle.rst
@@ -141,7 +141,7 @@ an unpickler, then you call the unpickler's :meth:`load` method. The
The :mod:`pickle` module provides the following functions to make the pickling
process more convenient:
-.. function:: dump(obj, file[, protocol])
+.. function:: dump(obj, file[, protocol, \*, fix_imports=True])
Write a pickled representation of *obj* to the open file object *file*. This
is equivalent to ``Pickler(file, protocol).dump(obj)``.
@@ -158,7 +158,11 @@ process more convenient:
argument. It can thus be a file object opened for binary writing, a
io.BytesIO instance, or any other custom object that meets this interface.
-.. function:: dumps(obj[, protocol])
+ If *fix_imports* is True and *protocol* is less than 3, pickle will try to
+ map the new Python 3.x names to the old module names used in Python 2.x,
+ so that the pickle data stream is readable with Python 2.x.
+
+.. function:: dumps(obj[, protocol, \*, fix_imports=True])
Return the pickled representation of the object as a :class:`bytes`
object, instead of writing it to a file.
@@ -171,7 +175,11 @@ process more convenient:
supported. The higher the protocol used, the more recent the version of
Python needed to read the pickle produced.
-.. function:: load(file, [\*, encoding="ASCII", errors="strict"])
+ If *fix_imports* is True and *protocol* is less than 3, pickle will try to
+ map the new Python 3.x names to the old module names used in Python 2.x,
+ so that the pickle data stream is readable with Python 2.x.
+
+.. function:: load(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"])
Read a pickled object representation from the open file object *file* and
return the reconstituted object hierarchy specified therein. This is
@@ -187,11 +195,14 @@ process more convenient:
for reading, a BytesIO object, or any other custom object that meets this
interface.
- Optional keyword arguments are encoding and errors, which are used to decode
- 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and
- 'strict', respectively.
+ Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
+ which are used to control compatiblity support for pickle stream generated
+ by Python 2.x. If *fix_imports* is True, pickle will try to map the old
+ Python 2.x names to the new names used in Python 3.x. The *encoding* and
+ *errors* tell pickle how to decode 8-bit string instances pickled by Python
+ 2.x; these default to 'ASCII' and 'strict', respectively.
-.. function:: loads(bytes_object, [\*, encoding="ASCII", errors="strict"])
+.. function:: loads(bytes_object, [\*, fix_imports=True, encoding="ASCII", errors="strict"])
Read a pickled object hierarchy from a :class:`bytes` object and return the
reconstituted object hierarchy specified therein
@@ -200,9 +211,12 @@ process more convenient:
argument is needed. Bytes past the pickled object's representation are
ignored.
- Optional keyword arguments are encoding and errors, which are used to decode
- 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and
- 'strict', respectively.
+ Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
+ which are used to control compatiblity support for pickle stream generated
+ by Python 2.x. If *fix_imports* is True, pickle will try to map the old
+ Python 2.x names to the new names used in Python 3.x. The *encoding* and
+ *errors* tell pickle how to decode 8-bit string instances pickled by Python
+ 2.x; these default to 'ASCII' and 'strict', respectively.
The :mod:`pickle` module defines three exceptions:
@@ -233,7 +247,7 @@ The :mod:`pickle` module defines three exceptions:
The :mod:`pickle` module exports two classes, :class:`Pickler` and
:class:`Unpickler`:
-.. class:: Pickler(file[, protocol])
+.. class:: Pickler(file[, protocol, \*, fix_imports=True])
This takes a binary file for writing a pickle data stream.
@@ -249,6 +263,10 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
argument. It can thus be a file object opened for binary writing, a
io.BytesIO instance, or any other custom object that meets this interface.
+ If *fix_imports* is True and *protocol* is less than 3, pickle will try to
+ map the new Python 3.x names to the old module names used in Python 2.x,
+ so that the pickle data stream is readable with Python 2.x.
+
.. method:: dump(obj)
Write a pickled representation of *obj* to the open file object given in
@@ -277,7 +295,7 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
Use :func:`pickletools.optimize` if you need more compact pickles.
-.. class:: Unpickler(file, [\*, encoding="ASCII", errors="strict"])
+.. class:: Unpickler(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"])
This takes a binary file for reading a pickle data stream.
@@ -290,9 +308,12 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
for reading, a BytesIO object, or any other custom object that meets this
interface.
- Optional keyword arguments are encoding and errors, which are used to decode
- 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and
- 'strict', respectively.
+ Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
+ which are used to control compatiblity support for pickle stream generated
+ by Python 2.x. If *fix_imports* is True, pickle will try to map the old
+ Python 2.x names to the new names used in Python 3.x. The *encoding* and
+ *errors* tell pickle how to decode 8-bit string instances pickled by Python
+ 2.x; these default to 'ASCII' and 'strict', respectively.
.. method:: load()
diff --git a/Lib/_compat_pickle.py b/Lib/_compat_pickle.py
new file mode 100644
index 0000000000..700c80cd57
--- /dev/null
+++ b/Lib/_compat_pickle.py
@@ -0,0 +1,81 @@
+# This module is used to map the old Python 2 names to the new names used in
+# Python 3 for the pickle module. This needed to make pickle streams
+# generated with Python 2 loadable by Python 3.
+
+# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import
+# lib2to3 and use the mapping defined there, because lib2to3 uses pickle.
+# Thus, this could cause the module to be imported recursively.
+IMPORT_MAPPING = {
+ 'StringIO': 'io',
+ 'cStringIO': 'io',
+ 'cPickle': 'pickle',
+ '__builtin__' : 'builtins',
+ 'copy_reg': 'copyreg',
+ 'Queue': 'queue',
+ 'SocketServer': 'socketserver',
+ 'ConfigParser': 'configparser',
+ 'repr': 'reprlib',
+ 'FileDialog': 'tkinter.filedialog',
+ 'tkFileDialog': 'tkinter.filedialog',
+ 'SimpleDialog': 'tkinter.simpledialog',
+ 'tkSimpleDialog': 'tkinter.simpledialog',
+ 'tkColorChooser': 'tkinter.colorchooser',
+ 'tkCommonDialog': 'tkinter.commondialog',
+ 'Dialog': 'tkinter.dialog',
+ 'Tkdnd': 'tkinter.dnd',
+ 'tkFont': 'tkinter.font',
+ 'tkMessageBox': 'tkinter.messagebox',
+ 'ScrolledText': 'tkinter.scrolledtext',
+ 'Tkconstants': 'tkinter.constants',
+ 'Tix': 'tkinter.tix',
+ 'ttk': 'tkinter.ttk',
+ 'Tkinter': 'tkinter',
+ 'markupbase': '_markupbase',
+ '_winreg': 'winreg',
+ 'thread': '_thread',
+ 'dummy_thread': '_dummy_thread',
+ 'dbhash': 'dbm.bsd',
+ 'dumbdbm': 'dbm.dumb',
+ 'dbm': 'dbm.ndbm',
+ 'gdbm': 'dbm.gnu',
+ 'xmlrpclib': 'xmlrpc.client',
+ 'DocXMLRPCServer': 'xmlrpc.server',
+ 'SimpleXMLRPCServer': 'xmlrpc.server',
+ 'httplib': 'http.client',
+ 'htmlentitydefs' : 'html.entities',
+ 'HTMLParser' : 'html.parser',
+ 'Cookie': 'http.cookies',
+ 'cookielib': 'http.cookiejar',
+ 'BaseHTTPServer': 'http.server',
+ 'SimpleHTTPServer': 'http.server',
+ 'CGIHTTPServer': 'http.server',
+ 'test.test_support': 'test.support',
+ 'commands': 'subprocess',
+ 'UserString' : 'collections',
+ 'UserList' : 'collections',
+ 'urlparse' : 'urllib.parse',
+ 'robotparser' : 'urllib.robotparser',
+ 'whichdb': 'dbm',
+ 'anydbm': 'dbm'
+}
+
+
+# This contains rename rules that are easy to handle. We ignore the more
+# complex stuff (e.g. mapping the names in the urllib and types modules).
+# These rules should be run before import names are fixed.
+NAME_MAPPING = {
+ ('__builtin__', 'xrange'): ('builtins', 'range'),
+ ('__builtin__', 'reduce'): ('functools', 'reduce'),
+ ('__builtin__', 'intern'): ('sys', 'intern'),
+ ('__builtin__', 'unichr'): ('builtins', 'chr'),
+ ('__builtin__', 'basestring'): ('builtins', 'str'),
+ ('__builtin__', 'long'): ('builtins', 'int'),
+ ('itertools', 'izip'): ('builtins', 'zip'),
+ ('itertools', 'imap'): ('builtins', 'map'),
+ ('itertools', 'ifilter'): ('builtins', 'filter'),
+ ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'),
+}
+
+# Same, but for 3.x to 2.x
+REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items())
+REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items())
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 720c1a00e6..7af4ce969a 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -34,6 +34,7 @@ import struct
import re
import io
import codecs
+import _compat_pickle
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
"Unpickler", "dump", "dumps", "load", "loads"]
@@ -171,12 +172,11 @@ SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
-
# Pickling machinery
class _Pickler:
- def __init__(self, file, protocol=None):
+ def __init__(self, file, protocol=None, *, fix_imports=True):
"""This takes a binary file for writing a pickle data stream.
The optional protocol argument tells the pickler to use the
@@ -193,6 +193,10 @@ class _Pickler:
bytes argument. It can thus be a file object opened for binary
writing, a io.BytesIO instance, or any other custom object that
meets this interface.
+
+ If fix_imports is True and protocol is less than 3, pickle will try to
+ map the new Python 3.x names to the old module names used in Python
+ 2.x, so that the pickle data stream is readable with Python 2.x.
"""
if protocol is None:
protocol = DEFAULT_PROTOCOL
@@ -208,6 +212,7 @@ class _Pickler:
self.proto = int(protocol)
self.bin = protocol >= 1
self.fast = 0
+ self.fix_imports = fix_imports and protocol < 3
def clear_memo(self):
"""Clears the pickler's "memo".
@@ -698,6 +703,11 @@ class _Pickler:
write(GLOBAL + bytes(module, "utf-8") + b'\n' +
bytes(name, "utf-8") + b'\n')
else:
+ if self.fix_imports:
+ if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING:
+ module, name = _compat_pickle.REVERSE_NAME_MAPPING[(module, name)]
+ if module in _compat_pickle.REVERSE_IMPORT_MAPPING:
+ module = _compat_pickle.REVERSE_IMPORT_MAPPING[module]
try:
write(GLOBAL + bytes(module, "ascii") + b'\n' +
bytes(name, "ascii") + b'\n')
@@ -766,7 +776,8 @@ def whichmodule(func, funcname):
class _Unpickler:
- def __init__(self, file, *, encoding="ASCII", errors="strict"):
+ def __init__(self, file, *, fix_imports=True,
+ encoding="ASCII", errors="strict"):
"""This takes a binary file for reading a pickle data stream.
The protocol version of the pickle is detected automatically, so no
@@ -779,15 +790,21 @@ class _Unpickler:
reading, a BytesIO object, or any other custom object that
meets this interface.
- Optional keyword arguments are encoding and errors, which are
- used to decode 8-bit string instances pickled by Python 2.x.
- These default to 'ASCII' and 'strict', respectively.
+ Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
+ which are used to control compatiblity support for pickle stream
+ generated by Python 2.x. If *fix_imports* is True, pickle will try to
+ map the old Python 2.x names to the new names used in Python 3.x. The
+ *encoding* and *errors* tell pickle how to decode 8-bit string
+ instances pickled by Python 2.x; these default to 'ASCII' and
+ 'strict', respectively.
"""
self.readline = file.readline
self.read = file.read
self.memo = {}
self.encoding = encoding
self.errors = errors
+ self.proto = 0
+ self.fix_imports = fix_imports
def load(self):
"""Read a pickled object representation from the open file.
@@ -838,6 +855,7 @@ class _Unpickler:
proto = ord(self.read(1))
if not 0 <= proto <= HIGHEST_PROTOCOL:
raise ValueError("unsupported pickle protocol: %d" % proto)
+ self.proto = proto
dispatch[PROTO[0]] = load_proto
def load_persid(self):
@@ -1088,7 +1106,12 @@ class _Unpickler:
self.append(obj)
def find_class(self, module, name):
- # Subclasses may override this
+ # Subclasses may override this.
+ if self.proto < 3 and self.fix_imports:
+ if (module, name) in _compat_pickle.NAME_MAPPING:
+ module, name = _compat_pickle.NAME_MAPPING[(module, name)]
+ if module in _compat_pickle.IMPORT_MAPPING:
+ module = _compat_pickle.IMPORT_MAPPING[module]
__import__(module, level=0)
mod = sys.modules[module]
klass = getattr(mod, name)
@@ -1327,27 +1350,28 @@ except ImportError:
# Shorthands
-def dump(obj, file, protocol=None):
- Pickler(file, protocol).dump(obj)
+def dump(obj, file, protocol=None, *, fix_imports=True):
+ Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
-def dumps(obj, protocol=None):
+def dumps(obj, protocol=None, *, fix_imports=True):
f = io.BytesIO()
- Pickler(f, protocol).dump(obj)
+ Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
res = f.getvalue()
assert isinstance(res, bytes_types)
return res
-def load(file, *, encoding="ASCII", errors="strict"):
- return Unpickler(file, encoding=encoding, errors=errors).load()
+def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
+ return Unpickler(file, fix_imports=fix_imports,
+ encoding=encoding, errors=errors).load()
-def loads(s, *, encoding="ASCII", errors="strict"):
+def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
if isinstance(s, str):
raise TypeError("Can't load pickle from unicode string")
file = io.BytesIO(s)
- return Unpickler(file, encoding=encoding, errors=errors).load()
+ return Unpickler(file, fix_imports=fix_imports,
+ encoding=encoding, errors=errors).load()
# Doctest
-
def _test():
import doctest
return doctest.testmod()
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index 2bb69d1320..ca11aa3871 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -2066,27 +2066,27 @@ _dis_test = r"""
29: ( MARK
30: d DICT (MARK at 29)
31: p PUT 2
- 34: c GLOBAL 'builtins bytes'
- 50: p PUT 3
- 53: ( MARK
- 54: ( MARK
- 55: l LIST (MARK at 54)
- 56: p PUT 4
- 59: L LONG 97
- 64: a APPEND
- 65: L LONG 98
- 70: a APPEND
- 71: L LONG 99
- 76: a APPEND
- 77: t TUPLE (MARK at 53)
- 78: p PUT 5
- 81: R REDUCE
- 82: p PUT 6
- 85: V UNICODE 'def'
- 90: p PUT 7
- 93: s SETITEM
- 94: a APPEND
- 95: . STOP
+ 34: c GLOBAL '__builtin__ bytes'
+ 53: p PUT 3
+ 56: ( MARK
+ 57: ( MARK
+ 58: l LIST (MARK at 57)
+ 59: p PUT 4
+ 62: L LONG 97
+ 67: a APPEND
+ 68: L LONG 98
+ 73: a APPEND
+ 74: L LONG 99
+ 79: a APPEND
+ 80: t TUPLE (MARK at 56)
+ 81: p PUT 5
+ 84: R REDUCE
+ 85: p PUT 6
+ 88: V UNICODE 'def'
+ 93: p PUT 7
+ 96: s SETITEM
+ 97: a APPEND
+ 98: . STOP
highest protocol among opcodes = 0
Try again with a "binary" pickle.
@@ -2105,25 +2105,25 @@ Try again with a "binary" pickle.
14: q BINPUT 1
16: } EMPTY_DICT
17: q BINPUT 2
- 19: c GLOBAL 'builtins bytes'
- 35: q BINPUT 3
- 37: ( MARK
- 38: ] EMPTY_LIST
- 39: q BINPUT 4
- 41: ( MARK
- 42: K BININT1 97
- 44: K BININT1 98
- 46: K BININT1 99
- 48: e APPENDS (MARK at 41)
- 49: t TUPLE (MARK at 37)
- 50: q BINPUT 5
- 52: R REDUCE
- 53: q BINPUT 6
- 55: X BINUNICODE 'def'
- 63: q BINPUT 7
- 65: s SETITEM
- 66: e APPENDS (MARK at 3)
- 67: . STOP
+ 19: c GLOBAL '__builtin__ bytes'
+ 38: q BINPUT 3
+ 40: ( MARK
+ 41: ] EMPTY_LIST
+ 42: q BINPUT 4
+ 44: ( MARK
+ 45: K BININT1 97
+ 47: K BININT1 98
+ 49: K BININT1 99
+ 51: e APPENDS (MARK at 44)
+ 52: t TUPLE (MARK at 40)
+ 53: q BINPUT 5
+ 55: R REDUCE
+ 56: q BINPUT 6
+ 58: X BINUNICODE 'def'
+ 66: q BINPUT 7
+ 68: s SETITEM
+ 69: e APPENDS (MARK at 3)
+ 70: . STOP
highest protocol among opcodes = 1
Exercise the INST/OBJ/BUILD family.
@@ -2141,58 +2141,58 @@ highest protocol among opcodes = 0
0: ( MARK
1: l LIST (MARK at 0)
2: p PUT 0
- 5: c GLOBAL 'copyreg _reconstructor'
- 29: p PUT 1
- 32: ( MARK
- 33: c GLOBAL 'pickletools _Example'
- 55: p PUT 2
- 58: c GLOBAL 'builtins object'
- 75: p PUT 3
- 78: N NONE
- 79: t TUPLE (MARK at 32)
- 80: p PUT 4
- 83: R REDUCE
- 84: p PUT 5
- 87: ( MARK
- 88: d DICT (MARK at 87)
- 89: p PUT 6
- 92: V UNICODE 'value'
- 99: p PUT 7
- 102: L LONG 42
- 107: s SETITEM
- 108: b BUILD
- 109: a APPEND
- 110: g GET 5
+ 5: c GLOBAL 'copy_reg _reconstructor'
+ 30: p PUT 1
+ 33: ( MARK
+ 34: c GLOBAL 'pickletools _Example'
+ 56: p PUT 2
+ 59: c GLOBAL '__builtin__ object'
+ 79: p PUT 3
+ 82: N NONE
+ 83: t TUPLE (MARK at 33)
+ 84: p PUT 4
+ 87: R REDUCE
+ 88: p PUT 5
+ 91: ( MARK
+ 92: d DICT (MARK at 91)
+ 93: p PUT 6
+ 96: V UNICODE 'value'
+ 103: p PUT 7
+ 106: L LONG 42
+ 111: s SETITEM
+ 112: b BUILD
113: a APPEND
- 114: . STOP
+ 114: g GET 5
+ 117: a APPEND
+ 118: . STOP
highest protocol among opcodes = 0
>>> dis(pickle.dumps(x, 1))
0: ] EMPTY_LIST
1: q BINPUT 0
3: ( MARK
- 4: c GLOBAL 'copyreg _reconstructor'
- 28: q BINPUT 1
- 30: ( MARK
- 31: c GLOBAL 'pickletools _Example'
- 53: q BINPUT 2
- 55: c GLOBAL 'builtins object'
- 72: q BINPUT 3
- 74: N NONE
- 75: t TUPLE (MARK at 30)
- 76: q BINPUT 4
- 78: R REDUCE
- 79: q BINPUT 5
- 81: } EMPTY_DICT
- 82: q BINPUT 6
- 84: X BINUNICODE 'value'
- 94: q BINPUT 7
- 96: K BININT1 42
- 98: s SETITEM
- 99: b BUILD
- 100: h BINGET 5
- 102: e APPENDS (MARK at 3)
- 103: . STOP
+ 4: c GLOBAL 'copy_reg _reconstructor'
+ 29: q BINPUT 1
+ 31: ( MARK
+ 32: c GLOBAL 'pickletools _Example'
+ 54: q BINPUT 2
+ 56: c GLOBAL '__builtin__ object'
+ 76: q BINPUT 3
+ 78: N NONE
+ 79: t TUPLE (MARK at 31)
+ 80: q BINPUT 4
+ 82: R REDUCE
+ 83: q BINPUT 5
+ 85: } EMPTY_DICT
+ 86: q BINPUT 6
+ 88: X BINUNICODE 'value'
+ 98: q BINPUT 7
+ 100: K BININT1 42
+ 102: s SETITEM
+ 103: b BUILD
+ 104: h BINGET 5
+ 106: e APPENDS (MARK at 3)
+ 107: . STOP
highest protocol among opcodes = 1
Try "the canonical" recursive-object test.
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 58ce3b5cf9..3ed26b8346 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -3,6 +3,7 @@ import unittest
import pickle
import pickletools
import copyreg
+from http.cookies import SimpleCookie
from test.support import TestFailed, TESTFN, run_with_locale
@@ -342,6 +343,24 @@ DATA2_DIS = """\
highest protocol among opcodes = 2
"""
+# set([1,2]) pickled from 2.x with protocol 2
+DATA3 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.'
+
+# xrange(5) pickled from 2.x with protocol 2
+DATA4 = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.'
+
+# a SimpleCookie() object pickled from 2.x with protocol 2
+DATA5 = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key'
+ b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U'
+ b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07'
+ b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U'
+ b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b'
+ b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.')
+
+# set([3]) pickled from 2.x with protocol 2
+DATA6 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.'
+
+
def create_data():
c = C()
c.foo = 1
@@ -956,6 +975,29 @@ class AbstractPickleTests(unittest.TestCase):
for x_key, y_key in zip(x_keys, y_keys):
self.assertIs(x_key, y_key)
+ def test_unpickle_from_2x(self):
+ # Unpickle non-trivial data from Python 2.x.
+ loaded = self.loads(DATA3)
+ self.assertEqual(loaded, set([1, 2]))
+ loaded = self.loads(DATA4)
+ self.assertEqual(type(loaded), type(range(0)))
+ self.assertEqual(list(loaded), list(range(5)))
+ loaded = self.loads(DATA5)
+ self.assertEqual(type(loaded), SimpleCookie)
+ self.assertEqual(list(loaded.keys()), ["key"])
+ self.assertEqual(loaded["key"].value, "Set-Cookie: key=value")
+
+ def test_pickle_to_2x(self):
+ # Pickle non-trivial data with protocol 2, expecting that it yields
+ # the same result as Python 2.x did.
+ # NOTE: this test is a bit too strong since we can produce different
+ # bytecode that 2.x will still understand.
+ dumped = self.dumps(range(5), 2)
+ self.assertEqual(dumped, DATA4)
+ dumped = self.dumps(set([3]), 2)
+ self.assertEqual(dumped, DATA6)
+
+
# Test classes for reduce_ex
class REX_one(object):
diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py
index 3e701b0909..823b0c2621 100644
--- a/Lib/test/test_pickletools.py
+++ b/Lib/test/test_pickletools.py
@@ -12,6 +12,9 @@ class OptimizedPickleTests(AbstractPickleTests, AbstractPickleModuleTests):
def loads(self, buf):
return pickle.loads(buf)
+ # Test relies on precise output of dumps()
+ test_pickle_to_2x = None
+
def test_main():
support.run_unittest(OptimizedPickleTests)
diff --git a/Misc/NEWS b/Misc/NEWS
index d540c9cb52..2c37a808cc 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -21,6 +21,11 @@ Core and Builtins
Library
-------
+- Issue #6137: The pickle module now translates module names when loading
+ or dumping pickles with a 2.x-compatible protocol, in order to make data
+ sharing and migration easier. This behaviour can be disabled using the
+ new `fix_imports` optional argument.
+
- Removed the ipaddr module.
- Issue #3613: base64.{encode,decode}string are now called
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 3ad55b587f..0e6df34bf1 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -103,25 +103,33 @@ enum {
/* Exception classes for pickle. These should override the ones defined in
pickle.py, when the C-optimized Pickler and Unpickler are used. */
-static PyObject *PickleError;
-static PyObject *PicklingError;
-static PyObject *UnpicklingError;
+static PyObject *PickleError = NULL;
+static PyObject *PicklingError = NULL;
+static PyObject *UnpicklingError = NULL;
/* copyreg.dispatch_table, {type_object: pickling_function} */
-static PyObject *dispatch_table;
+static PyObject *dispatch_table = NULL;
/* For EXT[124] opcodes. */
/* copyreg._extension_registry, {(module_name, function_name): code} */
-static PyObject *extension_registry;
+static PyObject *extension_registry = NULL;
/* copyreg._inverted_registry, {code: (module_name, function_name)} */
-static PyObject *inverted_registry;
+static PyObject *inverted_registry = NULL;
/* copyreg._extension_cache, {code: object} */
-static PyObject *extension_cache;
+static PyObject *extension_cache = NULL;
+
+/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
+static PyObject *name_mapping_2to3 = NULL;
+/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
+static PyObject *import_mapping_2to3 = NULL;
+/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
+static PyObject *name_mapping_3to2 = NULL;
+static PyObject *import_mapping_3to2 = NULL;
/* XXX: Are these really nescessary? */
/* As the name says, an empty tuple. */
-static PyObject *empty_tuple;
+static PyObject *empty_tuple = NULL;
/* For looking up name pairs in copyreg._extension_registry. */
-static PyObject *two_tuple;
+static PyObject *two_tuple = NULL;
static int
stack_underflow(void)
@@ -315,6 +323,8 @@ typedef struct PicklerObject {
should not be used if with self-referential
objects. */
int fast_nesting;
+ int fix_imports; /* Indicate whether Pickler should fix
+ the name of globals for Python 2.x. */
PyObject *fast_memo;
} PicklerObject;
@@ -340,6 +350,9 @@ typedef struct UnpicklerObject {
objects. */
Py_ssize_t num_marks; /* Number of marks in the mark stack. */
Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
+ int proto; /* Protocol of the pickle loaded. */
+ int fix_imports; /* Indicate whether Unpickler should fix
+ the name of globals pickled by Python 2.x. */
} UnpicklerObject;
/* Forward declarations */
@@ -1972,6 +1985,63 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name)
unicode_encoder = PyUnicode_AsASCIIString;
}
+ /* For protocol < 3 and if the user didn't request against doing so,
+ we convert module names to the old 2.x module names. */
+ if (self->fix_imports) {
+ PyObject *key;
+ PyObject *item;
+
+ key = PyTuple_Pack(2, module_name, global_name);
+ if (key == NULL)
+ goto error;
+ item = PyDict_GetItemWithError(name_mapping_3to2, key);
+ Py_DECREF(key);
+ if (item) {
+ if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.REVERSE_NAME_MAPPING values "
+ "should be 2-tuples, not %.200s",
+ Py_TYPE(item)->tp_name);
+ goto error;
+ }
+ Py_CLEAR(module_name);
+ Py_CLEAR(global_name);
+ module_name = PyTuple_GET_ITEM(item, 0);
+ global_name = PyTuple_GET_ITEM(item, 1);
+ if (!PyUnicode_Check(module_name) ||
+ !PyUnicode_Check(global_name)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.REVERSE_NAME_MAPPING values "
+ "should be pairs of str, not (%.200s, %.200s)",
+ Py_TYPE(module_name)->tp_name,
+ Py_TYPE(global_name)->tp_name);
+ goto error;
+ }
+ Py_INCREF(module_name);
+ Py_INCREF(global_name);
+ }
+ else if (PyErr_Occurred()) {
+ goto error;
+ }
+
+ item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
+ if (item) {
+ if (!PyUnicode_Check(item)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.REVERSE_IMPORT_MAPPING values "
+ "should be strings, not %.200s",
+ Py_TYPE(item)->tp_name);
+ goto error;
+ }
+ Py_CLEAR(module_name);
+ module_name = item;
+ Py_INCREF(module_name);
+ }
+ else if (PyErr_Occurred()) {
+ goto error;
+ }
+ }
+
/* Save the name of the module. */
encoded = unicode_encoder(module_name);
if (encoded == NULL) {
@@ -2608,18 +2678,23 @@ PyDoc_STRVAR(Pickler_doc,
"The file argument must have a write() method that accepts a single\n"
"bytes argument. It can thus be a file object opened for binary\n"
"writing, a io.BytesIO instance, or any other custom object that\n"
-"meets this interface.\n");
+"meets this interface.\n"
+"\n"
+"If fix_imports is True and protocol is less than 3, pickle will try to\n"
+"map the new Python 3.x names to the old module names used in Python\n"
+"2.x, so that the pickle data stream is readable with Python 2.x.\n");
static int
Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
{
- static char *kwlist[] = {"file", "protocol", 0};
+ static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
PyObject *file;
PyObject *proto_obj = NULL;
long proto = 0;
+ int fix_imports = 1;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
- kwlist, &file, &proto_obj))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler",
+ kwlist, &file, &proto_obj, &fix_imports))
return -1;
/* In case of multiple __init__() calls, clear previous content. */
@@ -2628,8 +2703,11 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
if (proto_obj == NULL || proto_obj == Py_None)
proto = DEFAULT_PROTOCOL;
- else
+ else {
proto = PyLong_AsLong(proto_obj);
+ if (proto == -1 && PyErr_Occurred())
+ return -1;
+ }
if (proto < 0)
proto = HIGHEST_PROTOCOL;
@@ -2639,12 +2717,13 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
return -1;
}
- self->proto = proto;
- self->bin = proto > 0;
- self->arg = NULL;
- self->fast = 0;
- self->fast_nesting = 0;
- self->fast_memo = NULL;
+ self->proto = proto;
+ self->bin = proto > 0;
+ self->arg = NULL;
+ self->fast = 0;
+ self->fast_nesting = 0;
+ self->fast_memo = NULL;
+ self->fix_imports = fix_imports && proto < 3;
if (!PyObject_HasAttrString(file, "write")) {
PyErr_SetString(PyExc_TypeError,
@@ -4220,8 +4299,10 @@ load_proto(UnpicklerObject *self)
return -1;
i = (unsigned char)s[0];
- if (i <= HIGHEST_PROTOCOL)
+ if (i <= HIGHEST_PROTOCOL) {
+ self->proto = i;
return 0;
+ }
PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
return -1;
@@ -4383,12 +4464,67 @@ Unpickler_find_class(UnpicklerObject *self, PyObject *args)
&module_name, &global_name))
return NULL;
+ /* Try to map the old names used in Python 2.x to the new ones used in
+ Python 3.x. We do this only with old pickle protocols and when the
+ user has not disabled the feature. */
+ if (self->proto < 3 && self->fix_imports) {
+ PyObject *key;
+ PyObject *item;
+
+ /* Check if the global (i.e., a function or a class) was renamed
+ or moved to another module. */
+ key = PyTuple_Pack(2, module_name, global_name);
+ if (key == NULL)
+ return NULL;
+ item = PyDict_GetItemWithError(name_mapping_2to3, key);
+ Py_DECREF(key);
+ if (item) {
+ if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.NAME_MAPPING values should be "
+ "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
+ return NULL;
+ }
+ module_name = PyTuple_GET_ITEM(item, 0);
+ global_name = PyTuple_GET_ITEM(item, 1);
+ if (!PyUnicode_Check(module_name) ||
+ !PyUnicode_Check(global_name)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.NAME_MAPPING values should be "
+ "pairs of str, not (%.200s, %.200s)",
+ Py_TYPE(module_name)->tp_name,
+ Py_TYPE(global_name)->tp_name);
+ return NULL;
+ }
+ }
+ else if (PyErr_Occurred()) {
+ return NULL;
+ }
+
+ /* Check if the module was renamed. */
+ item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
+ if (item) {
+ if (!PyUnicode_Check(item)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.IMPORT_MAPPING values should be "
+ "strings, not %.200s", Py_TYPE(item)->tp_name);
+ return NULL;
+ }
+ module_name = item;
+ }
+ else if (PyErr_Occurred()) {
+ return NULL;
+ }
+ }
+
modules_dict = PySys_GetObject("modules");
if (modules_dict == NULL)
return NULL;
- module = PyDict_GetItem(modules_dict, module_name);
+ module = PyDict_GetItemWithError(modules_dict, module_name);
if (module == NULL) {
+ if (PyErr_Occurred())
+ return NULL;
module = PyImport_Import(module_name);
if (module == NULL)
return NULL;
@@ -4477,15 +4613,20 @@ PyDoc_STRVAR(Unpickler_doc,
"reading, a BytesIO object, or any other custom object that\n"
"meets this interface.\n"
"\n"
-"Optional keyword arguments are encoding and errors, which are\n"
-"used to decode 8-bit string instances pickled by Python 2.x.\n"
-"These default to 'ASCII' and 'strict', respectively.\n");
+"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
+"which are used to control compatiblity support for pickle stream\n"
+"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
+"map the old Python 2.x names to the new names used in Python 3.x. The\n"
+"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
+"instances pickled by Python 2.x; these default to 'ASCII' and\n"
+"'strict', respectively.\n");
static int
Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
{
- static char *kwlist[] = {"file", "encoding", "errors", 0};
+ static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
PyObject *file;
+ int fix_imports = 1;
char *encoding = NULL;
char *errors = NULL;
@@ -4504,8 +4645,8 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
extra careful in the other Unpickler methods, since a subclass could
forget to call Unpickler.__init__() thus breaking our internal
invariants. */
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
- &file, &encoding, &errors))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist,
+ &file, &fix_imports, &encoding, &errors))
return -1;
/* In case of multiple __init__() calls, clear previous content. */
@@ -4549,6 +4690,8 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
self->last_string = NULL;
self->arg = NULL;
+ self->proto = 0;
+ self->fix_imports = fix_imports;
return 0;
}
@@ -4672,40 +4815,85 @@ static PyTypeObject Unpickler_Type = {
};
static int
-init_stuff(void)
+initmodule(void)
{
- PyObject *copyreg;
+ PyObject *copyreg = NULL;
+ PyObject *compat_pickle = NULL;
+
+ /* XXX: We should ensure that the types of the dictionaries imported are
+ exactly PyDict objects. Otherwise, it is possible to crash the pickle
+ since we use the PyDict API directly to access these dictionaries. */
copyreg = PyImport_ImportModule("copyreg");
if (!copyreg)
- return -1;
-
+ goto error;
dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
if (!dispatch_table)
goto error;
-
extension_registry = \
PyObject_GetAttrString(copyreg, "_extension_registry");
if (!extension_registry)
goto error;
-
inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
if (!inverted_registry)
goto error;
-
extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
if (!extension_cache)
goto error;
+ Py_CLEAR(copyreg);
- Py_DECREF(copyreg);
+ /* Load the 2.x -> 3.x stdlib module mapping tables */
+ compat_pickle = PyImport_ImportModule("_compat_pickle");
+ if (!compat_pickle)
+ goto error;
+ name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
+ if (!name_mapping_2to3)
+ goto error;
+ if (!PyDict_CheckExact(name_mapping_2to3)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
+ Py_TYPE(name_mapping_2to3)->tp_name);
+ goto error;
+ }
+ import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
+ "IMPORT_MAPPING");
+ if (!import_mapping_2to3)
+ goto error;
+ if (!PyDict_CheckExact(import_mapping_2to3)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.IMPORT_MAPPING should be a dict, "
+ "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
+ goto error;
+ }
+ /* ... and the 3.x -> 2.x mapping tables */
+ name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
+ "REVERSE_NAME_MAPPING");
+ if (!name_mapping_3to2)
+ goto error;
+ if (!PyDict_CheckExact(name_mapping_3to2)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, "
+ "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
+ goto error;
+ }
+ import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
+ "REVERSE_IMPORT_MAPPING");
+ if (!import_mapping_3to2)
+ goto error;
+ if (!PyDict_CheckExact(import_mapping_3to2)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
+ "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
+ goto error;
+ }
+ Py_CLEAR(compat_pickle);
empty_tuple = PyTuple_New(0);
if (empty_tuple == NULL)
- return -1;
-
+ goto error;
two_tuple = PyTuple_New(2);
if (two_tuple == NULL)
- return -1;
+ goto error;
/* We use this temp container with no regard to refcounts, or to
* keeping containees alive. Exempt from GC, because we don't
* want anything looking at two_tuple() by magic.
@@ -4715,7 +4903,18 @@ init_stuff(void)
return 0;
error:
- Py_DECREF(copyreg);
+ Py_CLEAR(copyreg);
+ Py_CLEAR(dispatch_table);
+ Py_CLEAR(extension_registry);
+ Py_CLEAR(inverted_registry);
+ Py_CLEAR(extension_cache);
+ Py_CLEAR(compat_pickle);
+ Py_CLEAR(name_mapping_2to3);
+ Py_CLEAR(import_mapping_2to3);
+ Py_CLEAR(name_mapping_3to2);
+ Py_CLEAR(import_mapping_3to2);
+ Py_CLEAR(empty_tuple);
+ Py_CLEAR(two_tuple);
return -1;
}
@@ -4773,7 +4972,7 @@ PyInit__pickle(void)
if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
return NULL;
- if (init_stuff() < 0)
+ if (initmodule() < 0)
return NULL;
return m;