diff options
| author | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-03 12:08:38 +0300 | 
|---|---|---|
| committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-03 12:08:38 +0300 | 
| commit | def0a4c298358c50b0b53a8113551e9aee3532e5 (patch) | |
| tree | 7fc497bed47259280aa94fc9d6fcd34388a93882 /Lib/lib2to3/fixes/fix_unicode.py | |
| parent | 2a8b3f26b91d6f1774c73fe4f545d2eb94475525 (diff) | |
| download | cpython-git-def0a4c298358c50b0b53a8113551e9aee3532e5.tar.gz | |
Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
Diffstat (limited to 'Lib/lib2to3/fixes/fix_unicode.py')
| -rw-r--r-- | Lib/lib2to3/fixes/fix_unicode.py | 32 | 
1 files changed, 25 insertions, 7 deletions
| diff --git a/Lib/lib2to3/fixes/fix_unicode.py b/Lib/lib2to3/fixes/fix_unicode.py index d2b3ceeb91..6555397da6 100644 --- a/Lib/lib2to3/fixes/fix_unicode.py +++ b/Lib/lib2to3/fixes/fix_unicode.py @@ -1,25 +1,43 @@ -"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...". +r"""Fixer for unicode. + +* Changes unicode to str and unichr to chr. + +* If "...\u..." is not unicode literal change it into "...\\u...". + +* Change u"..." into "...".  """ -import re  from ..pgen2 import token  from .. import fixer_base  _mapping = {"unichr" : "chr", "unicode" : "str"} -_literal_re = re.compile(r"[uU][rR]?[\'\"]")  class FixUnicode(fixer_base.BaseFix):      BM_compatible = True      PATTERN = "STRING | 'unicode' | 'unichr'" +    def start_tree(self, tree, filename): +        super(FixUnicode, self).start_tree(tree, filename) +        self.unicode_literals = 'unicode_literals' in tree.future_features +      def transform(self, node, results):          if node.type == token.NAME:              new = node.clone()              new.value = _mapping[node.value]              return new          elif node.type == token.STRING: -            if _literal_re.match(node.value): -                new = node.clone() -                new.value = new.value[1:] -                return new +            val = node.value +            if (not self.unicode_literals and val[0] in 'rR\'"' and +                '\\' in val): +                val = r'\\'.join([ +                    v.replace('\\u', r'\\u').replace('\\U', r'\\U') +                    for v in val.split(r'\\') +                ]) +            if val[0] in 'uU': +                val = val[1:] +            if val == node.value: +                return node +            new = node.clone() +            new.value = val +            return new | 
