diff options
author | Kenn Knowles <kenn.knowles@gmail.com> | 2013-12-11 14:43:05 -0500 |
---|---|---|
committer | Kenn Knowles <kenn.knowles@gmail.com> | 2013-12-11 14:43:05 -0500 |
commit | 7abd71bdb348e82cbf2ff2f5de095aa078ff5457 (patch) | |
tree | f29700108c66456cf71dfd44235954466e76b84d | |
parent | d2d39b3d8c7cd2224ec8b1428dac4f70c2288c39 (diff) | |
download | jsonpath-rw-7abd71bdb348e82cbf2ff2f5de095aa078ff5457.tar.gz |
Add tests for escaped string delimiters1.2.2
-rw-r--r-- | jsonpath_rw/lexer.py | 61 | ||||
-rw-r--r-- | tests/test_lexer.py | 9 |
2 files changed, 57 insertions, 13 deletions
diff --git a/jsonpath_rw/lexer.py b/jsonpath_rw/lexer.py index bfa570a..aa28ff5 100644 --- a/jsonpath_rw/lexer.py +++ b/jsonpath_rw/lexer.py @@ -26,6 +26,7 @@ class JsonPathLexer(object): new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger) new_lexer.latest_newline = 0 + new_lexer.string_value = None new_lexer.input(string) while True: @@ -34,6 +35,9 @@ class JsonPathLexer(object): t.col = t.lexpos - new_lexer.latest_newline yield t + if new_lexer.string_value is not None: + raise JsonPathLexerError('Unexpected EOF in string literal or identifier') + # ============== PLY Lexer specification ================== # # This probably should be private but: @@ -66,17 +70,28 @@ class JsonPathLexer(object): t.value = int(t.value) return t + # Single-quoted strings t_singlequote_ignore = '' - def t_SINGLEQUOTE(self, t): - r'\'' + def t_singlequote(self, t): + r"'" t.lexer.string_start = t.lexer.lexpos + t.lexer.string_value = '' t.lexer.push_state('singlequote') - def t_singlequote_SINGLEQUOTE(self, t): - r"([^']|\\')*'" - t.value = t.value[:-1] + def t_singlequote_content(self, t): + r"[^'\\]+" + t.lexer.string_value += t.value + + def t_singlequote_escape(self, t): + r'\\.' + t.lexer.string_value += t.value[1] + + def t_singlequote_end(self, t): + r"'" + t.value = t.lexer.string_value t.type = 'ID' + t.lexer.string_value = None t.lexer.pop_state() return t @@ -86,15 +101,25 @@ class JsonPathLexer(object): # Double-quoted strings t_doublequote_ignore = '' - def t_DOUBLEQUOTE(self, t): + def t_doublequote(self, t): r'"' t.lexer.string_start = t.lexer.lexpos + t.lexer.string_value = '' t.lexer.push_state('doublequote') - def t_doublequote_DOUBLEQUOTE(self, t): - r'([^"]|\\")*"' - t.value = t.value[:-1] + def t_doublequote_content(self, t): + r'[^"\\]+' + t.lexer.string_value += t.value + + def t_doublequote_escape(self, t): + r'\\.' + t.lexer.string_value += t.value[1] + + def t_doublequote_end(self, t): + r'"' + t.value = t.lexer.string_value t.type = 'ID' + t.lexer.string_value = None t.lexer.pop_state() return t @@ -104,15 +129,25 @@ class JsonPathLexer(object): # Back-quoted "magic" operators t_backquote_ignore = '' - def t_BACKQUOTE(self, t): + def t_backquote(self, t): r'`' t.lexer.string_start = t.lexer.lexpos + t.lexer.string_value = '' t.lexer.push_state('backquote') - def t_backquote_BACKQUOTE(self, t): - r'([^`]|\\`)*`' - t.value = t.value[:-1] + def t_backquote_escape(self, t): + r'\\.' + t.lexer.string_value += t.value[1] + + def t_backquote_content(self, t): + r"[^`\\]+" + t.lexer.string_value += t.value + + def t_backquote_end(self, t): + r'`' + t.value = t.lexer.string_value t.type = 'NAMED_OPERATOR' + t.lexer.string_value = None t.lexer.pop_state() return t diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 64d26c9..9d9fe38 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -35,6 +35,10 @@ class TestLexer(unittest.TestCase): self.assert_lex_equiv('$', [self.token('$', '$')]) self.assert_lex_equiv('"hello"', [self.token('hello', 'ID')]) self.assert_lex_equiv("'goodbye'", [self.token('goodbye', 'ID')]) + self.assert_lex_equiv("'doublequote\"'", [self.token('doublequote"', 'ID')]) + self.assert_lex_equiv(r'"doublequote\""', [self.token('doublequote"', 'ID')]) + self.assert_lex_equiv(r"'singlequote\''", [self.token("singlequote'", 'ID')]) + self.assert_lex_equiv('"singlequote\'"', [self.token("singlequote'", 'ID')]) self.assert_lex_equiv('fuzz', [self.token('fuzz', 'ID')]) self.assert_lex_equiv('1', [self.token(1, 'NUMBER')]) self.assert_lex_equiv('45', [self.token(45, 'NUMBER')]) @@ -54,7 +58,12 @@ class TestLexer(unittest.TestCase): def tokenize(s): l = JsonPathLexer(debug=True) return list(l.tokenize(s)) + self.assertRaises(JsonPathLexerError, tokenize, "'\"") self.assertRaises(JsonPathLexerError, tokenize, '"\'') + self.assertRaises(JsonPathLexerError, tokenize, '`"') + self.assertRaises(JsonPathLexerError, tokenize, "`'") + self.assertRaises(JsonPathLexerError, tokenize, '"`') + self.assertRaises(JsonPathLexerError, tokenize, "'`") self.assertRaises(JsonPathLexerError, tokenize, '?') self.assertRaises(JsonPathLexerError, tokenize, '$.foo.bar.#') |