summaryrefslogtreecommitdiff
path: root/jinja2/lexer.py
diff options
context:
space:
mode:
authorDavid Lord <davidism@gmail.com>2017-07-04 09:54:18 -0700
committerDavid Lord <davidism@gmail.com>2017-07-04 10:00:32 -0700
commitfb1e45315ddde51916cba01ed378fb9b8d12323e (patch)
tree4ca9c2a04732fafeb8ebb69104d82338cfc4dd07 /jinja2/lexer.py
parent1f1f031c2ec7524fd7fb6b3ba264f045704f432d (diff)
downloadjinja2-feature/kill-stringdefs.tar.gz
go back to generating regex, simplifiedfeature/kill-stringdefs
new version uses ~2KB vs 200KB memory, is ~100x faster to load move script to generate pattern to scripts directory add more tests
Diffstat (limited to 'jinja2/lexer.py')
-rw-r--r--jinja2/lexer.py21
1 files changed, 12 insertions, 9 deletions
diff --git a/jinja2/lexer.py b/jinja2/lexer.py
index 6c2b232..679e3f2 100644
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py
@@ -32,22 +32,25 @@ string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
integer_re = re.compile(r'\d+')
-# we use the unicode identifier rule if this python version is able
-# to handle unicode identifiers, otherwise the standard ASCII one.
try:
+ # check if this Python supports Unicode identifiers
compile('föö', '<unknown>', 'eval')
except SyntaxError:
+ # no Unicode support, use ASCII identifiers
name_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')
check_ident = False
else:
- # for whatever reason these do not exist in 'some' verisons of
- # python 3 within \w so we just add them. These are
- # 0x1885 MONGOLIAN LETTER ALI GALI BALUDA
- # 0x1886 MONGOLIAN LETTER ALI GALI THREE BALUDA
- # 0x2118 SCRIPT CAPITAL P
- # 0x212e ESTIMATED SYMBOL
- name_re = re.compile(r'[\w\u1885\u1886\u2118\u212e]+')
+ # Unicode support, build a pattern to match valid characters, and set flag
+ # to use str.isidentifier to validate during lexing
+ from jinja2 import _identifier
+ name_re = re.compile(r'[\w{0}]+'.format(_identifier.pattern))
check_ident = True
+ # remove the pattern from memory after building the regex
+ import sys
+ del sys.modules['jinja2._identifier']
+ import jinja2
+ del jinja2._identifier
+ del _identifier
float_re = re.compile(r'(?<!\.)\d+\.\d+')
newline_re = re.compile(r'(\r\n|\r|\n)')