summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Uriarte <vmuriart@gmail.com>2017-01-10 21:18:08 -0500
committerGitHub <noreply@github.com>2017-01-10 21:18:08 -0500
commit58fae0db7c38382a9a96187213169f7b474d62ff (patch)
tree595e8a0949a7b65cf041b4d72cdf4752222c4d74
parentf776dde633acfd846e209b209fcef55051849c1a (diff)
parent66b36af84fbe6d546b73a207e687234f28bb00a0 (diff)
downloadsqlparse-58fae0db7c38382a9a96187213169f7b474d62ff.tar.gz
Merge pull request #316 from twang2218/fix-issue-315-utf8-support
Fix #315 support utf-8 by default
-rw-r--r--sqlparse/lexer.py11
-rw-r--r--tests/test_regressions.py19
2 files changed, 25 insertions, 5 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 914b520..60e43da 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -43,12 +43,13 @@ class Lexer(object):
if isinstance(text, text_type):
pass
elif isinstance(text, bytes_type):
- try:
- text = text.decode()
- except UnicodeDecodeError:
- if not encoding:
- encoding = 'unicode-escape'
+ if encoding:
text = text.decode(encoding)
+ else:
+ try:
+ text = text.decode('utf-8')
+ except UnicodeDecodeError:
+ text = text.decode('unicode-escape')
else:
raise TypeError(u"Expected text or file-like object, got {!r}".
format(type(text)))
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
index d646325..b9a73a2 100644
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -323,3 +323,22 @@ def test_token_next_doesnt_ignore_skip_cm():
def test_issue284_as_grouping(s):
p = sqlparse.parse(s)[0]
assert s == str(p)
+
+
+def test_issue315_utf8_by_default():
+ # Make sure the lexer can handle utf-8 string by default correctly
+ # digest = '齐天大圣.カラフルな雲.사랑해요'
+ # The digest contains Chinese, Japanese and Korean characters
+ # All in 'utf-8' encoding.
+ digest = (
+ '\xe9\xbd\x90\xe5\xa4\xa9\xe5\xa4\xa7\xe5\x9c\xa3.'
+ '\xe3\x82\xab\xe3\x83\xa9\xe3\x83\x95\xe3\x83\xab\xe3\x81\xaa\xe9'
+ '\x9b\xb2.'
+ '\xec\x82\xac\xeb\x9e\x91\xed\x95\xb4\xec\x9a\x94'
+ )
+ sql = "select * from foo where bar = '{0}'".format(digest)
+ formatted = sqlparse.format(sql, reindent=True)
+ tformatted = "select *\nfrom foo\nwhere bar = '{0}'".format(digest)
+ if PY2:
+ tformatted = tformatted.decode('utf-8')
+ assert formatted == tformatted