From 843499915e91e0ee324a0407c78ac6f570806370 Mon Sep 17 00:00:00 2001
From: Oleg Broytman <phd@phdru.name>
Date: Wed, 31 Aug 2016 16:10:35 +0300
Subject: Decode bytes to unicode in Lexer.get_tokens().

Raise TypeError if the input is neither bytes in a known encoding nor
unicode nor a file-like object (file, StringIO).

Remove function u(). Add bytes_type to compat. Add tests for non-ascii.
---
 tests/test_parse.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'tests')

diff --git a/tests/test_parse.py b/tests/test_parse.py
index 8dd1150..0632889 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -6,7 +6,7 @@ import pytest
 
 import sqlparse
 from sqlparse import sql, tokens as T
-from sqlparse.compat import StringIO
+from sqlparse.compat import StringIO, text_type
 
 
 def test_parse_tokenize():
@@ -403,3 +403,21 @@ def test_dbldollar_as_literal(sql, is_literal):
     else:
         for token in p.tokens:
             assert token.ttype != T.Literal
+
+
+def test_non_ascii():
+    _test_non_ascii = u"insert into test (id, name) values (1, 'тест');"
+
+    s = _test_non_ascii
+    stmts = sqlparse.parse(s)
+    assert len(stmts) == 1
+    statement = stmts[0]
+    assert text_type(statement) == s
+    assert statement._pprint_tree() is None
+
+    s = _test_non_ascii.encode('utf-8')
+    stmts = sqlparse.parse(s, 'utf-8')
+    assert len(stmts) == 1
+    statement = stmts[0]
+    assert text_type(statement) == _test_non_ascii
+    assert statement._pprint_tree() is None
-- 
cgit v1.2.1