From 843499915e91e0ee324a0407c78ac6f570806370 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Wed, 31 Aug 2016 16:10:35 +0300 Subject: Decode bytes to unicode in Lexer.get_tokens(). Raise TypeError if the input is neither bytes in a known encoding nor unicode nor a file-like object (file, StringIO). Remove function u(). Add bytes_type to compat. Add tests for non-ascii. --- tests/test_parse.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/test_parse.py b/tests/test_parse.py index 8dd1150..0632889 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -6,7 +6,7 @@ import pytest import sqlparse from sqlparse import sql, tokens as T -from sqlparse.compat import StringIO +from sqlparse.compat import StringIO, text_type def test_parse_tokenize(): @@ -403,3 +403,21 @@ def test_dbldollar_as_literal(sql, is_literal): else: for token in p.tokens: assert token.ttype != T.Literal + + +def test_non_ascii(): + _test_non_ascii = u"insert into test (id, name) values (1, 'ั‚ะตัั‚');" + + s = _test_non_ascii + stmts = sqlparse.parse(s) + assert len(stmts) == 1 + statement = stmts[0] + assert text_type(statement) == s + assert statement._pprint_tree() is None + + s = _test_non_ascii.encode('utf-8') + stmts = sqlparse.parse(s, 'utf-8') + assert len(stmts) == 1 + statement = stmts[0] + assert text_type(statement) == _test_non_ascii + assert statement._pprint_tree() is None -- cgit v1.2.1