summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorda-woods <dw-git@d-woods.co.uk>2021-04-12 19:24:32 +0100
committerGitHub <noreply@github.com>2021-04-12 20:24:32 +0200
commitcd6e29d0251b21d6b1b45f3de8eb452bcfe2cf68 (patch)
tree87459e40cdb207295216f1315514bfb5f150ca73
parent7590de7e940cbbf733f78d8d4f5322242418ee21 (diff)
downloadcython-cd6e29d0251b21d6b1b45f3de8eb452bcfe2cf68.tar.gz
Fix string constant folding with language_level=2 (GH-4083)
* Handle constant folding for LanguageLevel 2 on Python 3. Ensure that when StrNode is a BytesLiteral, that we don't coerce it to unicode. * Add test for string multiplication bug. Needed to change the TreePath slightly to allow bytes-to-str comparison. Fixes https://github.com/cython/cython/issues/3951
-rw-r--r--Cython/Compiler/Optimize.py8
-rw-r--r--Cython/Compiler/TreePath.py11
-rw-r--r--Cython/Compiler/Visitor.py4
-rw-r--r--tests/run/cstringmul.pyx12
4 files changed, 32 insertions, 3 deletions
diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py
index 59e979b8b..846b389d1 100644
--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -4425,6 +4425,7 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
string_node.unicode_value = encoded_string(
string_node.unicode_value * multiplier,
string_node.unicode_value.encoding)
+ build_string = encoded_string if string_node.value.is_unicode else bytes_literal
elif isinstance(string_node, ExprNodes.UnicodeNode):
if string_node.bytes_value is not None:
string_node.bytes_value = bytes_literal(
@@ -4432,9 +4433,14 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
string_node.bytes_value.encoding)
else:
assert False, "unknown string node type: %s" % type(string_node)
- string_node.constant_result = string_node.value = build_string(
+ string_node.value = build_string(
string_node.value * multiplier,
string_node.value.encoding)
+ # follow constant-folding and use unicode_value in preference
+ if isinstance(string_node, ExprNodes.StringNode) and string_node.unicode_value is not None:
+ string_node.constant_result = string_node.unicode_value
+ else:
+ string_node.constant_result = string_node.value
return string_node
def _calculate_constant_seq(self, node, sequence_node, factor):
diff --git a/Cython/Compiler/TreePath.py b/Cython/Compiler/TreePath.py
index 272570a03..858590555 100644
--- a/Cython/Compiler/TreePath.py
+++ b/Cython/Compiler/TreePath.py
@@ -10,6 +10,12 @@ from __future__ import absolute_import
import re
import operator
+import sys
+
+if sys.version_info[0] >= 3:
+ _unicode = str
+else:
+ _unicode = unicode
path_tokenizer = re.compile(
r"("
@@ -167,6 +173,11 @@ def handle_attribute(next, token):
continue
if attr_value == value:
yield attr_value
+ elif (isinstance(attr_value, bytes) and isinstance(value, _unicode) and
+ attr_value == value.encode()):
+ # allow a bytes-to-string comparison too
+ yield attr_value
+
return select
diff --git a/Cython/Compiler/Visitor.py b/Cython/Compiler/Visitor.py
index 368c38058..0cf5ee1eb 100644
--- a/Cython/Compiler/Visitor.py
+++ b/Cython/Compiler/Visitor.py
@@ -834,8 +834,8 @@ class PrintTree(TreeVisitor):
result += "(name=\"%s\")" % node.name
elif isinstance(node, ExprNodes.AttributeNode):
result += "(type=%s, attribute=\"%s\")" % (repr(node.type), node.attribute)
- elif isinstance(node, ExprNodes.ConstNode):
- result += "(type=%s, value=\"%s\")" % (repr(node.type), node.value)
+ elif isinstance(node, (ExprNodes.ConstNode, ExprNodes.PyConstNode)):
+ result += "(type=%s, value=%r)" % (repr(node.type), node.value)
elif isinstance(node, ExprNodes.ExprNode):
t = node.type
result += "(type=%s)" % repr(t)
diff --git a/tests/run/cstringmul.pyx b/tests/run/cstringmul.pyx
index 1c3b79302..1932e8d61 100644
--- a/tests/run/cstringmul.pyx
+++ b/tests/run/cstringmul.pyx
@@ -31,3 +31,15 @@ grail_long = 700 * "tomato"
uspam = u"eggs" * 4
ugrail = 7 * u"tomato"
ugrail_long = 700 * u"tomato"
+
+cimport cython
+
+@cython.test_assert_path_exists("//StringNode[@value = '-----']")
+@cython.test_assert_path_exists("//StringNode[@unicode_value = '-----']")
+def gh3951():
+ """
+ Bug occurs with language_level=2 and affects StringNode.value
+ >>> gh3951()
+ '-----'
+ """
+ return "-"*5