summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Fischer <maxfischer2781@gmail.com>2021-06-28 15:22:32 +0200
committerMax Fischer <maxfischer2781@gmail.com>2021-06-28 15:22:32 +0200
commit118e8b9dd0ba3bc4c8e2052decacf31f0471bd58 (patch)
tree582c66146c17515eb1e213c76900bde3acb9b0af
parent5a4d58c7ca4b1f35f60f8ba0b8774e6cb3d24cf6 (diff)
downloadpyparsing-git-118e8b9dd0ba3bc4c8e2052decacf31f0471bd58.tar.gz
left recursion memo size may be limited
-rw-r--r--pyparsing/core.py13
-rw-r--r--pyparsing/util.py46
-rw-r--r--tests/test_unit.py26
3 files changed, 82 insertions, 3 deletions
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 4b4a437..c346aeb 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -1,6 +1,7 @@
#
# core.py
#
+from typing import Optional
from abc import ABC, abstractmethod
from enum import Enum
import string
@@ -23,6 +24,8 @@ from .util import (
_escapeRegexRangeChars,
_bslash,
_flatten,
+ LRUMemo as _LRUMemo,
+ UnboundedMemo as _UnboundedMemo,
)
from .exceptions import *
from .actions import *
@@ -790,7 +793,7 @@ class ParserElement(ABC):
ParserElement._parse = ParserElement._parseNoCache
@staticmethod
- def enable_left_recursion(*, force=False):
+ def enable_left_recursion(cache_size_limit: Optional[int] = None, *, force=False):
"""
Enables "bounded recursion" parsing, which allows for both direct and indirect
left-recursion. During parsing, left-recursive :class:`Forward` elements are
@@ -821,6 +824,12 @@ class ParserElement(ABC):
ParserElement.disable_memoization()
elif ParserElement._packratEnabled:
raise RuntimeError("Packrat and Bounded Recursion are not compatible")
+ if cache_size_limit is None:
+ ParserElement.recursion_memos = _UnboundedMemo()
+ elif cache_size_limit > 0:
+ ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit)
+ else:
+ raise NotImplementedError("Memo size of %s" % cache_size_limit)
ParserElement._left_recursion_enabled = True
@staticmethod
@@ -4505,7 +4514,9 @@ class Forward(ParseElementEnhance):
# replace the match for doActions=False as well,
# in case the action did backtrack
prev_loc, prev_result = memo[peek_key] = memo[act_key]
+ del memo[peek_key], memo[act_key]
return prev_loc, prev_result.copy()
+ del memo[peek_key]
return prev_loc, prev_peek.copy()
# the match did get better: see if we can improve further
else:
diff --git a/pyparsing/util.py b/pyparsing/util.py
index 3cb69d2..875799d 100644
--- a/pyparsing/util.py
+++ b/pyparsing/util.py
@@ -119,6 +119,52 @@ class _FifoCache:
self.clear = types.MethodType(clear, self)
+class LRUMemo:
+ """
+ A memoizing mapping that retains `capacity` deleted items
+
+ The memo tracks retained items by their access order; once `capacity` items
+ are retained, the least recently used item is discarded.
+ """
+ def __init__(self, capacity):
+ self._capacity = capacity
+ self._active = {}
+ self._memory = collections.OrderedDict()
+
+ def __getitem__(self, key):
+ try:
+ return self._active[key]
+ except KeyError:
+ self._memory.move_to_end(key)
+ return self._memory[key]
+
+ def __setitem__(self, key, value):
+ self._memory.pop(key, None)
+ self._active[key] = value
+
+ def __delitem__(self, key):
+ try:
+ value = self._active.pop(key)
+ except KeyError:
+ pass
+ else:
+ while len(self._memory) >= self._capacity:
+ self._memory.popitem(last=False)
+ self._memory[key] = value
+
+ def clear(self):
+ self._active.clear()
+ self._memory.clear()
+
+
+class UnboundedMemo(dict):
+ """
+ A memoizing mapping that retains all deleted items
+ """
+ def __delitem__(self, key):
+ pass
+
+
def _escapeRegexRangeChars(s):
# escape these chars: ^-[]
for c in r"\^-[]":
diff --git a/tests/test_unit.py b/tests/test_unit.py
index e431b87..aed2db3 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -8076,11 +8076,28 @@ class Test8_WithUnboundedPackrat(Test2_WithoutPackrat):
class Test9_WithLeftRecursionParsing(Test2_WithoutPackrat):
"""
- rerun Test2 tests, now with unbounded packrat cache
+ rerun Test2 tests, now with unbounded left recursion cache
"""
def setUp(self):
- recursion_suite_context.restore()
+ ParserElement.enable_left_recursion(force=True)
+
+ def tearDown(self):
+ default_suite_context.restore()
+
+ def test000_assert_packrat_status(self):
+ print("Left-Recursion enabled:", ParserElement._left_recursion_enabled)
+ self.assertTrue(ParserElement._left_recursion_enabled, "left recursion not enabled")
+ self.assertIsInstance(ParserElement.recursion_memos, pp.util.UnboundedMemo)
+
+
+class Test10_WithLeftRecursionParsingBoundedMemo(Test2_WithoutPackrat):
+ """
+ rerun Test2 tests, now with bounded left recursion cache
+ """
+
+ def setUp(self):
+ ParserElement.enable_left_recursion(cache_size_limit=4, force=True)
def tearDown(self):
default_suite_context.restore()
@@ -8088,6 +8105,11 @@ class Test9_WithLeftRecursionParsing(Test2_WithoutPackrat):
def test000_assert_packrat_status(self):
print("Left-Recursion enabled:", ParserElement._left_recursion_enabled)
self.assertTrue(ParserElement._left_recursion_enabled, "left recursion not enabled")
+ self.assertIsInstance(ParserElement.recursion_memos, pp.util.LRUMemo)
+ # check that the cache matches roughly what we expect
+ # – it may be larger due to action handling
+ self.assertLessEqual(ParserElement.recursion_memos._capacity, 4)
+ self.assertGreater(ParserElement.recursion_memos._capacity * 3, 4)
class TestLR1_Recursion(ppt.TestParseResultsAsserts, TestCase):