diff options
author | da-woods <dw-git@d-woods.co.uk> | 2021-04-12 19:24:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-12 20:24:32 +0200 |
commit | cd6e29d0251b21d6b1b45f3de8eb452bcfe2cf68 (patch) | |
tree | 87459e40cdb207295216f1315514bfb5f150ca73 | |
parent | 7590de7e940cbbf733f78d8d4f5322242418ee21 (diff) | |
download | cython-cd6e29d0251b21d6b1b45f3de8eb452bcfe2cf68.tar.gz |
Fix string constant folding with language_level=2 (GH-4083)
* Handle constant folding for LanguageLevel 2 on Python 3. Ensure that when StrNode is a BytesLiteral, that we don't coerce it to unicode.
* Add test for string multiplication bug. Needed to change the TreePath slightly to allow bytes-to-str comparison.
Fixes https://github.com/cython/cython/issues/3951
-rw-r--r-- | Cython/Compiler/Optimize.py | 8 | ||||
-rw-r--r-- | Cython/Compiler/TreePath.py | 11 | ||||
-rw-r--r-- | Cython/Compiler/Visitor.py | 4 | ||||
-rw-r--r-- | tests/run/cstringmul.pyx | 12 |
4 files changed, 32 insertions, 3 deletions
diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py index 59e979b8b..846b389d1 100644 --- a/Cython/Compiler/Optimize.py +++ b/Cython/Compiler/Optimize.py @@ -4425,6 +4425,7 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): string_node.unicode_value = encoded_string( string_node.unicode_value * multiplier, string_node.unicode_value.encoding) + build_string = encoded_string if string_node.value.is_unicode else bytes_literal elif isinstance(string_node, ExprNodes.UnicodeNode): if string_node.bytes_value is not None: string_node.bytes_value = bytes_literal( @@ -4432,9 +4433,14 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): string_node.bytes_value.encoding) else: assert False, "unknown string node type: %s" % type(string_node) - string_node.constant_result = string_node.value = build_string( + string_node.value = build_string( string_node.value * multiplier, string_node.value.encoding) + # follow constant-folding and use unicode_value in preference + if isinstance(string_node, ExprNodes.StringNode) and string_node.unicode_value is not None: + string_node.constant_result = string_node.unicode_value + else: + string_node.constant_result = string_node.value return string_node def _calculate_constant_seq(self, node, sequence_node, factor): diff --git a/Cython/Compiler/TreePath.py b/Cython/Compiler/TreePath.py index 272570a03..858590555 100644 --- a/Cython/Compiler/TreePath.py +++ b/Cython/Compiler/TreePath.py @@ -10,6 +10,12 @@ from __future__ import absolute_import import re import operator +import sys + +if sys.version_info[0] >= 3: + _unicode = str +else: + _unicode = unicode path_tokenizer = re.compile( r"(" @@ -167,6 +173,11 @@ def handle_attribute(next, token): continue if attr_value == value: yield attr_value + elif (isinstance(attr_value, bytes) and isinstance(value, _unicode) and + attr_value == value.encode()): + # allow a bytes-to-string comparison too + yield attr_value + return select diff --git a/Cython/Compiler/Visitor.py b/Cython/Compiler/Visitor.py index 368c38058..0cf5ee1eb 100644 --- a/Cython/Compiler/Visitor.py +++ b/Cython/Compiler/Visitor.py @@ -834,8 +834,8 @@ class PrintTree(TreeVisitor): result += "(name=\"%s\")" % node.name elif isinstance(node, ExprNodes.AttributeNode): result += "(type=%s, attribute=\"%s\")" % (repr(node.type), node.attribute) - elif isinstance(node, ExprNodes.ConstNode): - result += "(type=%s, value=\"%s\")" % (repr(node.type), node.value) + elif isinstance(node, (ExprNodes.ConstNode, ExprNodes.PyConstNode)): + result += "(type=%s, value=%r)" % (repr(node.type), node.value) elif isinstance(node, ExprNodes.ExprNode): t = node.type result += "(type=%s)" % repr(t) diff --git a/tests/run/cstringmul.pyx b/tests/run/cstringmul.pyx index 1c3b79302..1932e8d61 100644 --- a/tests/run/cstringmul.pyx +++ b/tests/run/cstringmul.pyx @@ -31,3 +31,15 @@ grail_long = 700 * "tomato" uspam = u"eggs" * 4 ugrail = 7 * u"tomato" ugrail_long = 700 * u"tomato" + +cimport cython + +@cython.test_assert_path_exists("//StringNode[@value = '-----']") +@cython.test_assert_path_exists("//StringNode[@unicode_value = '-----']") +def gh3951(): + """ + Bug occurs with language_level=2 and affects StringNode.value + >>> gh3951() + '-----' + """ + return "-"*5 |