summaryrefslogtreecommitdiff
path: root/Tools/scripts/umarshal.py
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2022-10-17 12:01:00 +0200
committerGitHub <noreply@github.com>2022-10-17 12:01:00 +0200
commit1863302d61a7a5dd8b8d345a00f0ee242c7c10bf (patch)
treea1e41af02147e2a14155d5b19d7b68bbb31c3f6f /Tools/scripts/umarshal.py
parenteae7dad40255bad42e4abce53ff8143dcbc66af5 (diff)
downloadcpython-git-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.tar.gz
gh-97669: Create Tools/build/ directory (#97963)
Create Tools/build/ directory. Move the following scripts from Tools/scripts/ to Tools/build/: * check_extension_modules.py * deepfreeze.py * freeze_modules.py * generate_global_objects.py * generate_levenshtein_examples.py * generate_opcode_h.py * generate_re_casefix.py * generate_sre_constants.py * generate_stdlib_module_names.py * generate_token.py * parse_html5_entities.py * smelly.py * stable_abi.py * umarshal.py * update_file.py * verify_ensurepip_wheels.py Update references to these scripts.
Diffstat (limited to 'Tools/scripts/umarshal.py')
-rw-r--r--Tools/scripts/umarshal.py325
1 files changed, 0 insertions, 325 deletions
diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py
deleted file mode 100644
index f61570cbaf..0000000000
--- a/Tools/scripts/umarshal.py
+++ /dev/null
@@ -1,325 +0,0 @@
-# Implementat marshal.loads() in pure Python
-
-import ast
-
-from typing import Any, Tuple
-
-
-class Type:
- # Adapted from marshal.c
- NULL = ord('0')
- NONE = ord('N')
- FALSE = ord('F')
- TRUE = ord('T')
- STOPITER = ord('S')
- ELLIPSIS = ord('.')
- INT = ord('i')
- INT64 = ord('I')
- FLOAT = ord('f')
- BINARY_FLOAT = ord('g')
- COMPLEX = ord('x')
- BINARY_COMPLEX = ord('y')
- LONG = ord('l')
- STRING = ord('s')
- INTERNED = ord('t')
- REF = ord('r')
- TUPLE = ord('(')
- LIST = ord('[')
- DICT = ord('{')
- CODE = ord('c')
- UNICODE = ord('u')
- UNKNOWN = ord('?')
- SET = ord('<')
- FROZENSET = ord('>')
- ASCII = ord('a')
- ASCII_INTERNED = ord('A')
- SMALL_TUPLE = ord(')')
- SHORT_ASCII = ord('z')
- SHORT_ASCII_INTERNED = ord('Z')
-
-
-FLAG_REF = 0x80 # with a type, add obj to index
-
-NULL = object() # marker
-
-# Cell kinds
-CO_FAST_LOCAL = 0x20
-CO_FAST_CELL = 0x40
-CO_FAST_FREE = 0x80
-
-
-class Code:
- def __init__(self, **kwds: Any):
- self.__dict__.update(kwds)
-
- def __repr__(self) -> str:
- return f"Code(**{self.__dict__})"
-
- co_localsplusnames: Tuple[str]
- co_localspluskinds: Tuple[int]
-
- def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]:
- varnames: list[str] = []
- for name, kind in zip(self.co_localsplusnames,
- self.co_localspluskinds):
- if kind & select_kind:
- varnames.append(name)
- return tuple(varnames)
-
- @property
- def co_varnames(self) -> Tuple[str, ...]:
- return self.get_localsplus_names(CO_FAST_LOCAL)
-
- @property
- def co_cellvars(self) -> Tuple[str, ...]:
- return self.get_localsplus_names(CO_FAST_CELL)
-
- @property
- def co_freevars(self) -> Tuple[str, ...]:
- return self.get_localsplus_names(CO_FAST_FREE)
-
- @property
- def co_nlocals(self) -> int:
- return len(self.co_varnames)
-
-
-class Reader:
- # A fairly literal translation of the marshal reader.
-
- def __init__(self, data: bytes):
- self.data: bytes = data
- self.end: int = len(self.data)
- self.pos: int = 0
- self.refs: list[Any] = []
- self.level: int = 0
-
- def r_string(self, n: int) -> bytes:
- assert 0 <= n <= self.end - self.pos
- buf = self.data[self.pos : self.pos + n]
- self.pos += n
- return buf
-
- def r_byte(self) -> int:
- buf = self.r_string(1)
- return buf[0]
-
- def r_short(self) -> int:
- buf = self.r_string(2)
- x = buf[0]
- x |= buf[1] << 8
- x |= -(x & (1<<15)) # Sign-extend
- return x
-
- def r_long(self) -> int:
- buf = self.r_string(4)
- x = buf[0]
- x |= buf[1] << 8
- x |= buf[2] << 16
- x |= buf[3] << 24
- x |= -(x & (1<<31)) # Sign-extend
- return x
-
- def r_long64(self) -> int:
- buf = self.r_string(8)
- x = buf[0]
- x |= buf[1] << 8
- x |= buf[2] << 16
- x |= buf[3] << 24
- x |= buf[1] << 32
- x |= buf[1] << 40
- x |= buf[1] << 48
- x |= buf[1] << 56
- x |= -(x & (1<<63)) # Sign-extend
- return x
-
- def r_PyLong(self) -> int:
- n = self.r_long()
- size = abs(n)
- x = 0
- # Pray this is right
- for i in range(size):
- x |= self.r_short() << i*15
- if n < 0:
- x = -x
- return x
-
- def r_float_bin(self) -> float:
- buf = self.r_string(8)
- import struct # Lazy import to avoid breaking UNIX build
- return struct.unpack("d", buf)[0]
-
- def r_float_str(self) -> float:
- n = self.r_byte()
- buf = self.r_string(n)
- return ast.literal_eval(buf.decode("ascii"))
-
- def r_ref_reserve(self, flag: int) -> int:
- if flag:
- idx = len(self.refs)
- self.refs.append(None)
- return idx
- else:
- return 0
-
- def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any:
- if flag:
- self.refs[idx] = obj
- return obj
-
- def r_ref(self, obj: Any, flag: int) -> Any:
- assert flag & FLAG_REF
- self.refs.append(obj)
- return obj
-
- def r_object(self) -> Any:
- old_level = self.level
- try:
- return self._r_object()
- finally:
- self.level = old_level
-
- def _r_object(self) -> Any:
- code = self.r_byte()
- flag = code & FLAG_REF
- type = code & ~FLAG_REF
- # print(" "*self.level + f"{code} {flag} {type} {chr(type)!r}")
- self.level += 1
-
- def R_REF(obj: Any) -> Any:
- if flag:
- obj = self.r_ref(obj, flag)
- return obj
-
- if type == Type.NULL:
- return NULL
- elif type == Type.NONE:
- return None
- elif type == Type.ELLIPSIS:
- return Ellipsis
- elif type == Type.FALSE:
- return False
- elif type == Type.TRUE:
- return True
- elif type == Type.INT:
- return R_REF(self.r_long())
- elif type == Type.INT64:
- return R_REF(self.r_long64())
- elif type == Type.LONG:
- return R_REF(self.r_PyLong())
- elif type == Type.FLOAT:
- return R_REF(self.r_float_str())
- elif type == Type.BINARY_FLOAT:
- return R_REF(self.r_float_bin())
- elif type == Type.COMPLEX:
- return R_REF(complex(self.r_float_str(),
- self.r_float_str()))
- elif type == Type.BINARY_COMPLEX:
- return R_REF(complex(self.r_float_bin(),
- self.r_float_bin()))
- elif type == Type.STRING:
- n = self.r_long()
- return R_REF(self.r_string(n))
- elif type == Type.ASCII_INTERNED or type == Type.ASCII:
- n = self.r_long()
- return R_REF(self.r_string(n).decode("ascii"))
- elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII:
- n = self.r_byte()
- return R_REF(self.r_string(n).decode("ascii"))
- elif type == Type.INTERNED or type == Type.UNICODE:
- n = self.r_long()
- return R_REF(self.r_string(n).decode("utf8", "surrogatepass"))
- elif type == Type.SMALL_TUPLE:
- n = self.r_byte()
- idx = self.r_ref_reserve(flag)
- retval: Any = tuple(self.r_object() for _ in range(n))
- self.r_ref_insert(retval, idx, flag)
- return retval
- elif type == Type.TUPLE:
- n = self.r_long()
- idx = self.r_ref_reserve(flag)
- retval = tuple(self.r_object() for _ in range(n))
- self.r_ref_insert(retval, idx, flag)
- return retval
- elif type == Type.LIST:
- n = self.r_long()
- retval = R_REF([])
- for _ in range(n):
- retval.append(self.r_object())
- return retval
- elif type == Type.DICT:
- retval = R_REF({})
- while True:
- key = self.r_object()
- if key == NULL:
- break
- val = self.r_object()
- retval[key] = val
- return retval
- elif type == Type.SET:
- n = self.r_long()
- retval = R_REF(set())
- for _ in range(n):
- v = self.r_object()
- retval.add(v)
- return retval
- elif type == Type.FROZENSET:
- n = self.r_long()
- s: set[Any] = set()
- idx = self.r_ref_reserve(flag)
- for _ in range(n):
- v = self.r_object()
- s.add(v)
- retval = frozenset(s)
- self.r_ref_insert(retval, idx, flag)
- return retval
- elif type == Type.CODE:
- retval = R_REF(Code())
- retval.co_argcount = self.r_long()
- retval.co_posonlyargcount = self.r_long()
- retval.co_kwonlyargcount = self.r_long()
- retval.co_stacksize = self.r_long()
- retval.co_flags = self.r_long()
- retval.co_code = self.r_object()
- retval.co_consts = self.r_object()
- retval.co_names = self.r_object()
- retval.co_localsplusnames = self.r_object()
- retval.co_localspluskinds = self.r_object()
- retval.co_filename = self.r_object()
- retval.co_name = self.r_object()
- retval.co_qualname = self.r_object()
- retval.co_firstlineno = self.r_long()
- retval.co_linetable = self.r_object()
- retval.co_exceptiontable = self.r_object()
- return retval
- elif type == Type.REF:
- n = self.r_long()
- retval = self.refs[n]
- assert retval is not None
- return retval
- else:
- breakpoint()
- raise AssertionError(f"Unknown type {type} {chr(type)!r}")
-
-
-def loads(data: bytes) -> Any:
- assert isinstance(data, bytes)
- r = Reader(data)
- return r.r_object()
-
-
-def main():
- # Test
- import marshal, pprint
- sample = {'foo': {(42, "bar", 3.14)}}
- data = marshal.dumps(sample)
- retval = loads(data)
- assert retval == sample, retval
- sample = main.__code__
- data = marshal.dumps(sample)
- retval = loads(data)
- assert isinstance(retval, Code), retval
- pprint.pprint(retval.__dict__)
-
-
-if __name__ == "__main__":
- main()