summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Lib/sre_parse.py8
-rw-r--r--Lib/test/test_re.py18
-rw-r--r--Misc/NEWS3
3 files changed, 28 insertions, 1 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index bc71b58775..13737ca12f 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -786,12 +786,18 @@ def parse_template(source, pattern):
groups = []
groupsappend = groups.append
literals = [None] * len(p)
+ if isinstance(source, str):
+ encode = lambda x: x
+ else:
+ # The tokenizer implicitly decodes bytes objects as latin-1, we must
+ # therefore re-encode the final representation.
+ encode = lambda x: x.encode('latin1')
for c, s in p:
if c is MARK:
groupsappend((i, s))
# literal[i] is already None
else:
- literals[i] = s
+ literals[i] = encode(s)
i = i + 1
return groups, literals
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index d2f7f6e52e..5eb94056d6 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -717,6 +717,24 @@ class ReTests(unittest.TestCase):
self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
self.assertRaises(ValueError, re.compile, '(?au)\w')
+ def test_bug_6509(self):
+ # Replacement strings of both types must parse properly.
+ # all strings
+ pat = re.compile('a(\w)')
+ self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
+ pat = re.compile('a(.)')
+ self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
+ pat = re.compile('..')
+ self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
+
+ # all bytes
+ pat = re.compile(b'a(\w)')
+ self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
+ pat = re.compile(b'a(.)')
+ self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
+ pat = re.compile(b'..')
+ self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
+
def test_dealloc(self):
# issue 3299: check for segfault in debug build
import _sre
diff --git a/Misc/NEWS b/Misc/NEWS
index 0aaf558ddc..12a8f38e77 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -268,6 +268,9 @@ C-API
Library
-------
+- Issue #6509: fix re.sub to work properly when the pattern, the string, and
+ the replacement were all bytes. Patch by Antoine Pitrou.
+
- The sqlite3 module was updated to pysqlite 2.6.0. This fixes several obscure
bugs and allows loading SQLite extensions from shared libraries.