#6509: fix re.sub to work properly when the pattern, the string, and the replacement were all bytes. Patch by Antoine Pitrou.

author: Ezio Melotti <ezio.melotti@gmail.com> 2010-03-06 15:24:08 +0000
committer: Ezio Melotti <ezio.melotti@gmail.com> 2010-03-06 15:24:08 +0000
commit: b92ed7cf3673cd9902b785febb895b4e0c7a55ff (patch)
tree: 7e0b6a4e0e746f6b102bf41859642107c413761a /Lib/sre_parse.py
parent: 64fb18e1921d18c8801e85bf8fc429a4be2160e9 (diff)
download: cpython-git-b92ed7cf3673cd9902b785febb895b4e0c7a55ff.tar.gz
1 files changed, 7 insertions, 1 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index bc71b58775..13737ca12f 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -786,12 +786,18 @@ def parse_template(source, pattern):
     groups = []
     groupsappend = groups.append
     literals = [None] * len(p)
+    if isinstance(source, str):
+        encode = lambda x: x
+    else:
+        # The tokenizer implicitly decodes bytes objects as latin-1, we must
+        # therefore re-encode the final representation.
+        encode = lambda x: x.encode('latin1')
     for c, s in p:
         if c is MARK:
             groupsappend((i, s))
             # literal[i] is already None
         else:
-            literals[i] = s
+            literals[i] = encode(s)
         i = i + 1
     return groups, literals
author	Ezio Melotti <ezio.melotti@gmail.com>	2010-03-06 15:24:08 +0000
committer	Ezio Melotti <ezio.melotti@gmail.com>	2010-03-06 15:24:08 +0000
commit	b92ed7cf3673cd9902b785febb895b4e0c7a55ff (patch)
tree	7e0b6a4e0e746f6b102bf41859642107c413761a /Lib/sre_parse.py
parent	64fb18e1921d18c8801e85bf8fc429a4be2160e9 (diff)
download	cpython-git-b92ed7cf3673cd9902b785febb895b4e0c7a55ff.tar.gz