go back to generating regex, simplifiedfeature/kill-stringdefs

new version uses ~2KB vs 200KB memory, is ~100x faster to load move script to generate pattern to scripts directory add more tests
author: David Lord <davidism@gmail.com> 2017-07-04 09:54:18 -0700
committer: David Lord <davidism@gmail.com> 2017-07-04 10:00:32 -0700
commit: fb1e45315ddde51916cba01ed378fb9b8d12323e (patch)
tree: 4ca9c2a04732fafeb8ebb69104d82338cfc4dd07 /jinja2/lexer.py
parent: 1f1f031c2ec7524fd7fb6b3ba264f045704f432d (diff)
download: jinja2-feature/kill-stringdefs.tar.gz
1 files changed, 12 insertions, 9 deletions
diff --git a/jinja2/lexer.py b/jinja2/lexer.py
index 6c2b232..679e3f2 100644
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py
@@ -32,22 +32,25 @@ string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                        r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
 integer_re = re.compile(r'\d+')
 
-# we use the unicode identifier rule if this python version is able
-# to handle unicode identifiers, otherwise the standard ASCII one.
 try:
+    # check if this Python supports Unicode identifiers
     compile('föö', '<unknown>', 'eval')
 except SyntaxError:
+    # no Unicode support, use ASCII identifiers
     name_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')
     check_ident = False
 else:
-    # for whatever reason these do not exist in 'some' verisons of
-    # python 3 within \w so we just add them.  These are
-    #   0x1885 MONGOLIAN LETTER ALI GALI BALUDA
-    #   0x1886 MONGOLIAN LETTER ALI GALI THREE BALUDA
-    #   0x2118 SCRIPT CAPITAL P
-    #   0x212e ESTIMATED SYMBOL
-    name_re = re.compile(r'[\w\u1885\u1886\u2118\u212e]+')
+    # Unicode support, build a pattern to match valid characters, and set flag
+    # to use str.isidentifier to validate during lexing
+    from jinja2 import _identifier
+    name_re = re.compile(r'[\w{0}]+'.format(_identifier.pattern))
     check_ident = True
+    # remove the pattern from memory after building the regex
+    import sys
+    del sys.modules['jinja2._identifier']
+    import jinja2
+    del jinja2._identifier
+    del _identifier
 
 float_re = re.compile(r'(?<!\.)\d+\.\d+')
 newline_re = re.compile(r'(\r\n|\r|\n)')
author	David Lord <davidism@gmail.com>	2017-07-04 09:54:18 -0700
committer	David Lord <davidism@gmail.com>	2017-07-04 10:00:32 -0700
commit	fb1e45315ddde51916cba01ed378fb9b8d12323e (patch)
tree	4ca9c2a04732fafeb8ebb69104d82338cfc4dd07 /jinja2/lexer.py
parent	1f1f031c2ec7524fd7fb6b3ba264f045704f432d (diff)
download	jinja2-feature/kill-stringdefs.tar.gz