diff options
author | Anthon van der Neut <anthon@mnt.org> | 2018-06-16 23:41:43 +0200 |
---|---|---|
committer | Anthon van der Neut <anthon@mnt.org> | 2018-06-16 23:41:43 +0200 |
commit | 15008733ed560cb506871f1c4fac99c99e53bfe6 (patch) | |
tree | a30c9db2939a72720bac131b8b2075c9d72be500 | |
parent | 6805e6c06a7255f737d13efc9e1002ff6f254039 (diff) | |
parent | 13cd9b56e285a0ddf4c64e4c5347485bd9ec6546 (diff) | |
download | ruamel.yaml-15008733ed560cb506871f1c4fac99c99e53bfe6.tar.gz |
merge PR27 improving startup time0.15.39
-rw-r--r-- | CHANGES | 5 | ||||
-rw-r--r-- | README.rst | 5 | ||||
-rw-r--r-- | __init__.py | 4 | ||||
-rw-r--r-- | constructor.py | 3 | ||||
-rw-r--r-- | reader.py | 53 | ||||
-rw-r--r-- | resolver.py | 39 | ||||
-rw-r--r-- | serializer.py | 5 | ||||
-rw-r--r-- | util.py | 33 |
8 files changed, 111 insertions, 36 deletions
@@ -1,3 +1,8 @@ +[0, 15, 39]: 2018-06-16 + - merge PR27 improving package startup time (and loading when regexp not + actually used), provided by + `Marcel Bargull <https://bitbucket.org/mbargull/>`__ + [0, 15, 37]: 2018-06-13 - fix for losing precision when roundtripping floats by `Rolf Wojtech <https://bitbucket.org/asomov/>`__ @@ -35,6 +35,11 @@ ChangeLog .. should insert NEXT: at the beginning of line for next key (with empty line) +0.15.39 (2018-06-16): + - merge PR27 improving package startup time (and loading when regexp not + actually used), provided by + `Marcel Bargull <https://bitbucket.org/mbargull/>`__ + 0.15.38 (2018-06-13): - fix for losing precision when roundtripping floats by `Rolf Wojtech <https://bitbucket.org/asomov/>`__ diff --git a/__init__.py b/__init__.py index 79f46f3..0474160 100644 --- a/__init__.py +++ b/__init__.py @@ -7,8 +7,8 @@ if False: # MYPY _package_data = dict( full_package_name='ruamel.yaml', - version_info=(0, 15, 38), - __version__='0.15.38', + version_info=(0, 15, 39), + __version__='0.15.39', author='Anthon van der Neut', author_email='a.van.der.neut@ruamel.eu', description='ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order', # NOQA diff --git a/constructor.py b/constructor.py index 9706bbe..7769874 100644 --- a/constructor.py +++ b/constructor.py @@ -26,6 +26,7 @@ from ruamel.yaml.scalarstring import (PreservedScalarString, SingleQuotedScalarS from ruamel.yaml.scalarint import ScalarInt, BinaryInt, OctalInt, HexInt, HexCapsInt from ruamel.yaml.scalarfloat import ScalarFloat from ruamel.yaml.timestamp import TimeStamp +from ruamel.yaml.util import RegExp if False: # MYPY from typing import Any, Dict, List, Set, Generator # NOQA @@ -471,7 +472,7 @@ class SafeConstructor(BaseConstructor): None, None, "failed to decode base64 data: %s" % exc, node.start_mark) - timestamp_regexp = re.compile( + timestamp_regexp = RegExp( u'''^(?P<year>[0-9][0-9][0-9][0-9]) -(?P<month>[0-9][0-9]?) -(?P<day>[0-9][0-9]?) @@ -25,9 +25,10 @@ import re from ruamel.yaml.error import YAMLError, FileMark, StringMark, YAMLStreamError from ruamel.yaml.compat import text_type, binary_type, PY3 +from ruamel.yaml.util import RegExp if False: # MYPY - from typing import Any, Dict, Optional, List, Union, Text # NOQA + from typing import Any, Dict, Optional, List, Union, Text, Tuple # NOQA from ruamel.yaml.compat import StreamTextType # NOQA __all__ = ['Reader', 'ReaderError'] @@ -181,29 +182,59 @@ class Reader(object): # 4 if 32 bit unicode supported, 2 e.g. on MacOS (issue 56) try: - NON_PRINTABLE = re.compile( + re.compile(u'[^\U00010000]') + except: + NON_PRINTABLE = RegExp( u'[^\x09\x0A\x0D\x20-\x7E\x85' u'\xA0-\uD7FF' u'\uE000-\uFFFD' - u'\U00010000-\U0010FFFF' u']' ) - UNICODE_SIZE = 4 - except: - NON_PRINTABLE = re.compile( + UNICODE_SIZE = 2 + else: + NON_PRINTABLE = RegExp( u'[^\x09\x0A\x0D\x20-\x7E\x85' u'\xA0-\uD7FF' u'\uE000-\uFFFD' + u'\U00010000-\U0010FFFF' u']' ) - UNICODE_SIZE = 2 + UNICODE_SIZE = 4 + + _printable_ascii = ('\x09\x0A\x0D' + ''.join(map(chr, range(0x20, 0x7F)))).encode('ascii') + + @classmethod + def _get_non_printable_ascii(cls, data): + # type: (Text, bytes) -> Union[None, Tuple[int, Text]] + ascii_bytes = data.encode('ascii') + non_printables = ascii_bytes.translate(None, cls._printable_ascii) + if not non_printables: + return None + non_printable = non_printables[:1] + return ascii_bytes.index(non_printable), non_printable.decode('ascii') + + @classmethod + def _get_non_printable_regex(cls, data): + # type: (Text) -> Union[None, Tuple[int, Text]] + match = cls.NON_PRINTABLE.search(data) + if not bool(match): + return None + return match.start(), match.group() + + @classmethod + def _get_non_printable(cls, data): + # type: (Text) -> Union[None, Tuple[int, Text]] + try: + return cls._get_non_printable_ascii(data) + except UnicodeEncodeError: + return cls._get_non_printable_regex(data) def check_printable(self, data): # type: (Any) -> None - match = self.NON_PRINTABLE.search(data) - if bool(match): - character = match.group() - position = self.index + (len(self.buffer) - self.pointer) + match.start() + non_printable_match = self._get_non_printable(data) + if non_printable_match is not None: + start, character = non_printable_match + position = self.index + (len(self.buffer) - self.pointer) + start raise ReaderError(self.name, position, ord(character), 'unicode', "special characters are not allowed") diff --git a/resolver.py b/resolver.py index 85e640d..9e5d320 100644 --- a/resolver.py +++ b/resolver.py @@ -11,6 +11,7 @@ if False: # MYPY from ruamel.yaml.compat import string_types, _DEFAULT_YAML_VERSION # NOQA from ruamel.yaml.error import * # NOQA from ruamel.yaml.nodes import * # NOQA +from ruamel.yaml.util import RegExp # NOQA __all__ = ['BaseResolver', 'Resolver', 'VersionedResolver'] @@ -23,17 +24,17 @@ __all__ = ['BaseResolver', 'Resolver', 'VersionedResolver'] implicit_resolvers = [ ([(1, 2)], u'tag:yaml.org,2002:bool', - re.compile(u'''^(?:true|True|TRUE|false|False|FALSE)$''', re.X), + RegExp(u'''^(?:true|True|TRUE|false|False|FALSE)$''', re.X), list(u'tTfF')), ([(1, 1)], u'tag:yaml.org,2002:bool', - re.compile(u'''^(?:yes|Yes|YES|no|No|NO + RegExp(u'''^(?:yes|Yes|YES|no|No|NO |true|True|TRUE|false|False|FALSE |on|On|ON|off|Off|OFF)$''', re.X), list(u'yYnNtTfFoO')), ([(1, 2)], u'tag:yaml.org,2002:float', - re.compile(u'''^(?: + RegExp(u'''^(?: [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) |\\.[0-9_]+(?:[eE][-+][0-9]+)? @@ -42,7 +43,7 @@ implicit_resolvers = [ list(u'-+0123456789.')), ([(1, 1)], u'tag:yaml.org,2002:float', - re.compile(u'''^(?: + RegExp(u'''^(?: [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) |\\.[0-9_]+(?:[eE][-+][0-9]+)? @@ -52,14 +53,14 @@ implicit_resolvers = [ list(u'-+0123456789.')), ([(1, 2)], u'tag:yaml.org,2002:int', - re.compile(u'''^(?:[-+]?0b[0-1_]+ + RegExp(u'''^(?:[-+]?0b[0-1_]+ |[-+]?0o?[0-7_]+ |[-+]?(?:0|[1-9][0-9_]*) |[-+]?0x[0-9a-fA-F_]+)$''', re.X), list(u'-+0123456789')), ([(1, 1)], u'tag:yaml.org,2002:int', - re.compile(u'''^(?:[-+]?0b[0-1_]+ + RegExp(u'''^(?:[-+]?0b[0-1_]+ |[-+]?0?[0-7_]+ |[-+]?(?:0|[1-9][0-9_]*) |[-+]?0x[0-9a-fA-F_]+ @@ -67,17 +68,17 @@ implicit_resolvers = [ list(u'-+0123456789')), ([(1, 2), (1, 1)], u'tag:yaml.org,2002:merge', - re.compile(u'^(?:<<)$'), + RegExp(u'^(?:<<)$'), [u'<']), ([(1, 2), (1, 1)], u'tag:yaml.org,2002:null', - re.compile(u'''^(?: ~ + RegExp(u'''^(?: ~ |null|Null|NULL | )$''', re.X), [u'~', u'n', u'N', u'']), ([(1, 2), (1, 1)], u'tag:yaml.org,2002:timestamp', - re.compile(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + RegExp(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? (?:[Tt]|[ \\t]+)[0-9][0-9]? :[0-9][0-9] :[0-9][0-9] (?:\\.[0-9]*)? @@ -85,13 +86,13 @@ implicit_resolvers = [ list(u'0123456789')), ([(1, 2), (1, 1)], u'tag:yaml.org,2002:value', - re.compile(u'^(?:=)$'), + RegExp(u'^(?:=)$'), [u'=']), # The following resolver is only for documentation purposes. It cannot work # because plain scalars cannot start with '!', '&', or '*'. ([(1, 2), (1, 1)], u'tag:yaml.org,2002:yaml', - re.compile(u'^(?:!|&|\\*)$'), + RegExp(u'^(?:!|&|\\*)$'), list(u'!&*')), ] @@ -308,14 +309,14 @@ class Resolver(BaseResolver): Resolver.add_implicit_resolver_base( u'tag:yaml.org,2002:bool', - re.compile(u'''^(?:yes|Yes|YES|no|No|NO + RegExp(u'''^(?:yes|Yes|YES|no|No|NO |true|True|TRUE|false|False|FALSE |on|On|ON|off|Off|OFF)$''', re.X), list(u'yYnNtTfFoO')) Resolver.add_implicit_resolver_base( u'tag:yaml.org,2002:float', - re.compile(u'''^(?: + RegExp(u'''^(?: [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) |\\.[0-9_]+(?:[eE][-+][0-9]+)? @@ -326,7 +327,7 @@ Resolver.add_implicit_resolver_base( Resolver.add_implicit_resolver_base( u'tag:yaml.org,2002:int', - re.compile(u'''^(?:[-+]?0b[0-1_]+ + RegExp(u'''^(?:[-+]?0b[0-1_]+ |[-+]?0o?[0-7_]+ |[-+]?(?:0|[1-9][0-9_]*) |[-+]?0x[0-9a-fA-F_]+ @@ -335,19 +336,19 @@ Resolver.add_implicit_resolver_base( Resolver.add_implicit_resolver_base( u'tag:yaml.org,2002:merge', - re.compile(u'^(?:<<)$'), + RegExp(u'^(?:<<)$'), [u'<']) Resolver.add_implicit_resolver_base( u'tag:yaml.org,2002:null', - re.compile(u'''^(?: ~ + RegExp(u'''^(?: ~ |null|Null|NULL | )$''', re.X), [u'~', u'n', u'N', u'']) Resolver.add_implicit_resolver_base( u'tag:yaml.org,2002:timestamp', - re.compile(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + RegExp(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? (?:[Tt]|[ \\t]+)[0-9][0-9]? :[0-9][0-9] :[0-9][0-9] (?:\\.[0-9]*)? @@ -356,14 +357,14 @@ Resolver.add_implicit_resolver_base( Resolver.add_implicit_resolver_base( u'tag:yaml.org,2002:value', - re.compile(u'^(?:=)$'), + RegExp(u'^(?:=)$'), [u'=']) # The following resolver is only for documentation purposes. It cannot work # because plain scalars cannot start with '!', '&', or '*'. Resolver.add_implicit_resolver_base( u'tag:yaml.org,2002:yaml', - re.compile(u'^(?:!|&|\\*)$'), + RegExp(u'^(?:!|&|\\*)$'), list(u'!&*')) diff --git a/serializer.py b/serializer.py index 46884b5..d6012db 100644 --- a/serializer.py +++ b/serializer.py @@ -2,10 +2,9 @@ from __future__ import absolute_import -import re - from ruamel.yaml.error import YAMLError from ruamel.yaml.compat import nprint, DBG_NODE, dbg, string_types +from ruamel.yaml.util import RegExp from ruamel.yaml.events import ( StreamStartEvent, StreamEndEvent, MappingStartEvent, MappingEndEvent, @@ -31,7 +30,7 @@ class Serializer(object): # 'id' and 3+ numbers, but not 000 ANCHOR_TEMPLATE = u'id%03d' - ANCHOR_RE = re.compile(u'id(?!000$)\\d{3,}') + ANCHOR_RE = RegExp(u'id(?!000$)\\d{3,}') def __init__(self, encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None, dumper=None): @@ -6,6 +6,9 @@ some helper functions that might be generally useful from __future__ import absolute_import, print_function +from functools import partial +import re + from .compat import text_type, binary_type if False: # MYPY @@ -13,6 +16,36 @@ if False: # MYPY from .compat import StreamTextType # NOQA +class LazyEval(object): + """ + Lightweight wrapper around lazily evaluated func(*args, **kwargs). + + func is only evaluated when any attribute of its return value is accessed. + Every attribute access is passed through to the wrapped value. + (This only excludes special cases like method-wrappers, e.g., __hash__.) + The sole additional attribute is the lazy_self function which holds the + return value (or, prior to evaluation, func and arguments), in its closure. + """ + def __init__(self, func, *args, **kwargs): + def lazy_self(): + return_value = func(*args, **kwargs) + object.__setattr__(self, "lazy_self", lambda: return_value) + return return_value + object.__setattr__(self, "lazy_self", lazy_self) + + def __getattribute__(self, name): + lazy_self = object.__getattribute__(self, "lazy_self") + if name == "lazy_self": + return lazy_self + return getattr(lazy_self(), name) + + def __setattr__(self, name, value): + setattr(self.lazy_self(), name, value) + + +RegExp = partial(LazyEval, re.compile) + + # originally as comment # https://github.com/pre-commit/pre-commit/pull/211#issuecomment-186466605 # if you use this in your code, I suggest adding a test in your test suite |