summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenn Knowles <kenn.knowles@gmail.com>2013-12-11 14:43:05 -0500
committerKenn Knowles <kenn.knowles@gmail.com>2013-12-11 14:43:05 -0500
commit7abd71bdb348e82cbf2ff2f5de095aa078ff5457 (patch)
treef29700108c66456cf71dfd44235954466e76b84d
parentd2d39b3d8c7cd2224ec8b1428dac4f70c2288c39 (diff)
downloadjsonpath-rw-7abd71bdb348e82cbf2ff2f5de095aa078ff5457.tar.gz
Add tests for escaped string delimiters1.2.2
-rw-r--r--jsonpath_rw/lexer.py61
-rw-r--r--tests/test_lexer.py9
2 files changed, 57 insertions, 13 deletions
diff --git a/jsonpath_rw/lexer.py b/jsonpath_rw/lexer.py
index bfa570a..aa28ff5 100644
--- a/jsonpath_rw/lexer.py
+++ b/jsonpath_rw/lexer.py
@@ -26,6 +26,7 @@ class JsonPathLexer(object):
new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
new_lexer.latest_newline = 0
+ new_lexer.string_value = None
new_lexer.input(string)
while True:
@@ -34,6 +35,9 @@ class JsonPathLexer(object):
t.col = t.lexpos - new_lexer.latest_newline
yield t
+ if new_lexer.string_value is not None:
+ raise JsonPathLexerError('Unexpected EOF in string literal or identifier')
+
# ============== PLY Lexer specification ==================
#
# This probably should be private but:
@@ -66,17 +70,28 @@ class JsonPathLexer(object):
t.value = int(t.value)
return t
+
# Single-quoted strings
t_singlequote_ignore = ''
- def t_SINGLEQUOTE(self, t):
- r'\''
+ def t_singlequote(self, t):
+ r"'"
t.lexer.string_start = t.lexer.lexpos
+ t.lexer.string_value = ''
t.lexer.push_state('singlequote')
- def t_singlequote_SINGLEQUOTE(self, t):
- r"([^']|\\')*'"
- t.value = t.value[:-1]
+ def t_singlequote_content(self, t):
+ r"[^'\\]+"
+ t.lexer.string_value += t.value
+
+ def t_singlequote_escape(self, t):
+ r'\\.'
+ t.lexer.string_value += t.value[1]
+
+ def t_singlequote_end(self, t):
+ r"'"
+ t.value = t.lexer.string_value
t.type = 'ID'
+ t.lexer.string_value = None
t.lexer.pop_state()
return t
@@ -86,15 +101,25 @@ class JsonPathLexer(object):
# Double-quoted strings
t_doublequote_ignore = ''
- def t_DOUBLEQUOTE(self, t):
+ def t_doublequote(self, t):
r'"'
t.lexer.string_start = t.lexer.lexpos
+ t.lexer.string_value = ''
t.lexer.push_state('doublequote')
- def t_doublequote_DOUBLEQUOTE(self, t):
- r'([^"]|\\")*"'
- t.value = t.value[:-1]
+ def t_doublequote_content(self, t):
+ r'[^"\\]+'
+ t.lexer.string_value += t.value
+
+ def t_doublequote_escape(self, t):
+ r'\\.'
+ t.lexer.string_value += t.value[1]
+
+ def t_doublequote_end(self, t):
+ r'"'
+ t.value = t.lexer.string_value
t.type = 'ID'
+ t.lexer.string_value = None
t.lexer.pop_state()
return t
@@ -104,15 +129,25 @@ class JsonPathLexer(object):
# Back-quoted "magic" operators
t_backquote_ignore = ''
- def t_BACKQUOTE(self, t):
+ def t_backquote(self, t):
r'`'
t.lexer.string_start = t.lexer.lexpos
+ t.lexer.string_value = ''
t.lexer.push_state('backquote')
- def t_backquote_BACKQUOTE(self, t):
- r'([^`]|\\`)*`'
- t.value = t.value[:-1]
+ def t_backquote_escape(self, t):
+ r'\\.'
+ t.lexer.string_value += t.value[1]
+
+ def t_backquote_content(self, t):
+ r"[^`\\]+"
+ t.lexer.string_value += t.value
+
+ def t_backquote_end(self, t):
+ r'`'
+ t.value = t.lexer.string_value
t.type = 'NAMED_OPERATOR'
+ t.lexer.string_value = None
t.lexer.pop_state()
return t
diff --git a/tests/test_lexer.py b/tests/test_lexer.py
index 64d26c9..9d9fe38 100644
--- a/tests/test_lexer.py
+++ b/tests/test_lexer.py
@@ -35,6 +35,10 @@ class TestLexer(unittest.TestCase):
self.assert_lex_equiv('$', [self.token('$', '$')])
self.assert_lex_equiv('"hello"', [self.token('hello', 'ID')])
self.assert_lex_equiv("'goodbye'", [self.token('goodbye', 'ID')])
+ self.assert_lex_equiv("'doublequote\"'", [self.token('doublequote"', 'ID')])
+ self.assert_lex_equiv(r'"doublequote\""', [self.token('doublequote"', 'ID')])
+ self.assert_lex_equiv(r"'singlequote\''", [self.token("singlequote'", 'ID')])
+ self.assert_lex_equiv('"singlequote\'"', [self.token("singlequote'", 'ID')])
self.assert_lex_equiv('fuzz', [self.token('fuzz', 'ID')])
self.assert_lex_equiv('1', [self.token(1, 'NUMBER')])
self.assert_lex_equiv('45', [self.token(45, 'NUMBER')])
@@ -54,7 +58,12 @@ class TestLexer(unittest.TestCase):
def tokenize(s):
l = JsonPathLexer(debug=True)
return list(l.tokenize(s))
+
self.assertRaises(JsonPathLexerError, tokenize, "'\"")
self.assertRaises(JsonPathLexerError, tokenize, '"\'')
+ self.assertRaises(JsonPathLexerError, tokenize, '`"')
+ self.assertRaises(JsonPathLexerError, tokenize, "`'")
+ self.assertRaises(JsonPathLexerError, tokenize, '"`')
+ self.assertRaises(JsonPathLexerError, tokenize, "'`")
self.assertRaises(JsonPathLexerError, tokenize, '?')
self.assertRaises(JsonPathLexerError, tokenize, '$.foo.bar.#')