summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndi Albrecht <albrecht.andi@gmail.com>2022-09-10 10:08:02 +0200
committerAndi Albrecht <albrecht.andi@gmail.com>2022-09-10 10:08:02 +0200
commitb72a8ff42f53cba0517b1dd9e8af051b4a060ecf (patch)
treed193827bbf5e85809b56cbbeefab07419164cf59
parent07a2e81532daf62f1f4360e48ff322abeade7315 (diff)
downloadsqlparse-b72a8ff42f53cba0517b1dd9e8af051b4a060ecf.tar.gz
Allow any unicode character as identifier name (fixes #641).
-rw-r--r--CHANGELOG1
-rw-r--r--sqlparse/keywords.py2
-rw-r--r--tests/test_parse.py1
3 files changed, 3 insertions, 1 deletions
diff --git a/CHANGELOG b/CHANGELOG
index e1b3ae6..a363b22 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -16,6 +16,7 @@ Bug Fixes
* Fix formatting error in EXTRACT function (issue562, issue670, pr676, by ecederstrand).
* Fix bad parsing of create table statements that use lower case (issue217, pr642, by mrmasterplan).
* Handle backtick as valid quote char (issue628, pr629, by codenamelxl).
+* Allow any unicode character as valid identifier name (issue641).
Other
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index 3aa6c63..d73e114 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -99,7 +99,7 @@ SQL_REGEX = {
(r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison),
# Check for keywords, also returns tokens.Name if regex matches
# but the match isn't a keyword.
- (r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword),
+ (r'[0-9_\w][_$#\w]*', is_keyword),
(r'[;:()\[\],\.]', tokens.Punctuation),
(r'[<>=~!]+', tokens.Operator.Comparison),
(r'[+/@#%^&|^-]+', tokens.Operator),
diff --git a/tests/test_parse.py b/tests/test_parse.py
index caba537..ec327ac 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -148,6 +148,7 @@ def test_quoted_identifier():
@pytest.mark.parametrize('name', [
'foo', '_foo', # issue175
'1_data', # valid MySQL table name, see issue337
+ '業者名稱', # valid at least for SQLite3, see issue641
])
def test_valid_identifier_names(name):
t = sqlparse.parse(name)[0].tokens