From 118e8b9dd0ba3bc4c8e2052decacf31f0471bd58 Mon Sep 17 00:00:00 2001 From: Max Fischer Date: Mon, 28 Jun 2021 15:22:32 +0200 Subject: left recursion memo size may be limited --- pyparsing/core.py | 13 ++++++++++++- pyparsing/util.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_unit.py | 26 ++++++++++++++++++++++++-- 3 files changed, 82 insertions(+), 3 deletions(-) diff --git a/pyparsing/core.py b/pyparsing/core.py index 4b4a437..c346aeb 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -1,6 +1,7 @@ # # core.py # +from typing import Optional from abc import ABC, abstractmethod from enum import Enum import string @@ -23,6 +24,8 @@ from .util import ( _escapeRegexRangeChars, _bslash, _flatten, + LRUMemo as _LRUMemo, + UnboundedMemo as _UnboundedMemo, ) from .exceptions import * from .actions import * @@ -790,7 +793,7 @@ class ParserElement(ABC): ParserElement._parse = ParserElement._parseNoCache @staticmethod - def enable_left_recursion(*, force=False): + def enable_left_recursion(cache_size_limit: Optional[int] = None, *, force=False): """ Enables "bounded recursion" parsing, which allows for both direct and indirect left-recursion. During parsing, left-recursive :class:`Forward` elements are @@ -821,6 +824,12 @@ class ParserElement(ABC): ParserElement.disable_memoization() elif ParserElement._packratEnabled: raise RuntimeError("Packrat and Bounded Recursion are not compatible") + if cache_size_limit is None: + ParserElement.recursion_memos = _UnboundedMemo() + elif cache_size_limit > 0: + ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) + else: + raise NotImplementedError("Memo size of %s" % cache_size_limit) ParserElement._left_recursion_enabled = True @staticmethod @@ -4505,7 +4514,9 @@ class Forward(ParseElementEnhance): # replace the match for doActions=False as well, # in case the action did backtrack prev_loc, prev_result = memo[peek_key] = memo[act_key] + del memo[peek_key], memo[act_key] return prev_loc, prev_result.copy() + del memo[peek_key] return prev_loc, prev_peek.copy() # the match did get better: see if we can improve further else: diff --git a/pyparsing/util.py b/pyparsing/util.py index 3cb69d2..875799d 100644 --- a/pyparsing/util.py +++ b/pyparsing/util.py @@ -119,6 +119,52 @@ class _FifoCache: self.clear = types.MethodType(clear, self) +class LRUMemo: + """ + A memoizing mapping that retains `capacity` deleted items + + The memo tracks retained items by their access order; once `capacity` items + are retained, the least recently used item is discarded. + """ + def __init__(self, capacity): + self._capacity = capacity + self._active = {} + self._memory = collections.OrderedDict() + + def __getitem__(self, key): + try: + return self._active[key] + except KeyError: + self._memory.move_to_end(key) + return self._memory[key] + + def __setitem__(self, key, value): + self._memory.pop(key, None) + self._active[key] = value + + def __delitem__(self, key): + try: + value = self._active.pop(key) + except KeyError: + pass + else: + while len(self._memory) >= self._capacity: + self._memory.popitem(last=False) + self._memory[key] = value + + def clear(self): + self._active.clear() + self._memory.clear() + + +class UnboundedMemo(dict): + """ + A memoizing mapping that retains all deleted items + """ + def __delitem__(self, key): + pass + + def _escapeRegexRangeChars(s): # escape these chars: ^-[] for c in r"\^-[]": diff --git a/tests/test_unit.py b/tests/test_unit.py index e431b87..aed2db3 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -8076,11 +8076,28 @@ class Test8_WithUnboundedPackrat(Test2_WithoutPackrat): class Test9_WithLeftRecursionParsing(Test2_WithoutPackrat): """ - rerun Test2 tests, now with unbounded packrat cache + rerun Test2 tests, now with unbounded left recursion cache """ def setUp(self): - recursion_suite_context.restore() + ParserElement.enable_left_recursion(force=True) + + def tearDown(self): + default_suite_context.restore() + + def test000_assert_packrat_status(self): + print("Left-Recursion enabled:", ParserElement._left_recursion_enabled) + self.assertTrue(ParserElement._left_recursion_enabled, "left recursion not enabled") + self.assertIsInstance(ParserElement.recursion_memos, pp.util.UnboundedMemo) + + +class Test10_WithLeftRecursionParsingBoundedMemo(Test2_WithoutPackrat): + """ + rerun Test2 tests, now with bounded left recursion cache + """ + + def setUp(self): + ParserElement.enable_left_recursion(cache_size_limit=4, force=True) def tearDown(self): default_suite_context.restore() @@ -8088,6 +8105,11 @@ class Test9_WithLeftRecursionParsing(Test2_WithoutPackrat): def test000_assert_packrat_status(self): print("Left-Recursion enabled:", ParserElement._left_recursion_enabled) self.assertTrue(ParserElement._left_recursion_enabled, "left recursion not enabled") + self.assertIsInstance(ParserElement.recursion_memos, pp.util.LRUMemo) + # check that the cache matches roughly what we expect + # – it may be larger due to action handling + self.assertLessEqual(ParserElement.recursion_memos._capacity, 4) + self.assertGreater(ParserElement.recursion_memos._capacity * 3, 4) class TestLR1_Recursion(ppt.TestParseResultsAsserts, TestCase): -- cgit v1.2.1