diff options
Diffstat (limited to 'Lib/encodings/idna.py')
-rw-r--r-- | Lib/encodings/idna.py | 100 |
1 files changed, 83 insertions, 17 deletions
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 8bdae32cea..ea90d67142 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -35,7 +35,7 @@ def nameprep(label): stringprep.in_table_c7(c) or \ stringprep.in_table_c8(c) or \ stringprep.in_table_c9(c): - raise UnicodeError, "Invalid character %s" % repr(c) + raise UnicodeError("Invalid character %r" % c) # Check bidi RandAL = map(stringprep.in_table_d1, label) @@ -48,14 +48,14 @@ def nameprep(label): # 2) If a string contains any RandALCat character, the string # MUST NOT contain any LCat character. if filter(stringprep.in_table_d2, label): - raise UnicodeError, "Violation of BIDI requirement 2" + raise UnicodeError("Violation of BIDI requirement 2") # 3) If a string contains any RandALCat character, a # RandALCat character MUST be the first character of the # string, and a RandALCat character MUST be the last # character of the string. if not RandAL[0] or not RandAL[-1]: - raise UnicodeError, "Violation of BIDI requirement 3" + raise UnicodeError("Violation of BIDI requirement 3") return label @@ -70,7 +70,7 @@ def ToASCII(label): # Skip to step 8. if 0 < len(label) < 64: return label - raise UnicodeError, "label too long" + raise UnicodeError("label empty or too long") # Step 2: nameprep label = nameprep(label) @@ -85,11 +85,11 @@ def ToASCII(label): # Skip to step 8. if 0 < len(label) < 64: return label - raise UnicodeError, "label too long" + raise UnicodeError("label empty or too long") # Step 5: Check ACE prefix if label.startswith(uace_prefix): - raise UnicodeError, "Label starts with ACE prefix" + raise UnicodeError("Label starts with ACE prefix") # Step 6: Encode with PUNYCODE label = label.encode("punycode") @@ -100,7 +100,7 @@ def ToASCII(label): # Step 8: Check size if 0 < len(label) < 64: return label - raise UnicodeError, "label too long" + raise UnicodeError("label empty or too long") def ToUnicode(label): # Step 1: Check for ASCII @@ -119,7 +119,7 @@ def ToUnicode(label): try: label = label.encode("ascii") except UnicodeError: - raise UnicodeError, "Invalid character in IDN label" + raise UnicodeError("Invalid character in IDN label") # Step 3: Check for ACE prefix if not label.startswith(ace_prefix): return unicode(label, "ascii") @@ -136,7 +136,7 @@ def ToUnicode(label): # Step 7: Compare the result of step 6 with the one of step 3 # label2 will already be in lower case. if label.lower() != label2: - raise UnicodeError, ("IDNA does not round-trip", label, label2) + raise UnicodeError("IDNA does not round-trip", label, label2) # Step 8: return the result of step 5 return result @@ -148,7 +148,7 @@ class Codec(codecs.Codec): if errors != 'strict': # IDNA is quite clear that implementations must be strict - raise UnicodeError, "unsupported error handling "+errors + raise UnicodeError("unsupported error handling "+errors) if not input: return "", 0 @@ -168,7 +168,7 @@ class Codec(codecs.Codec): def decode(self,input,errors='strict'): if errors != 'strict': - raise UnicodeError, "Unsupported error handling "+errors + raise UnicodeError("Unsupported error handling "+errors) if not input: return u"", 0 @@ -194,13 +194,79 @@ class Codec(codecs.Codec): return u".".join(result)+trailing_dot, len(input) -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return Codec().encode(input, self.errors)[0] +class IncrementalEncoder(codecs.BufferedIncrementalEncoder): + def _buffer_encode(self, input, errors, final): + if errors != 'strict': + # IDNA is quite clear that implementations must be strict + raise UnicodeError("unsupported error handling "+errors) + + if not input: + return ("", 0) + + labels = dots.split(input) + trailing_dot = u'' + if labels: + if not labels[-1]: + trailing_dot = '.' + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = '.' + + result = [] + size = 0 + for label in labels: + result.append(ToASCII(label)) + if size: + size += 1 + size += len(label) + + # Join with U+002E + result = ".".join(result) + trailing_dot + size += len(trailing_dot) + return (result, size) + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def _buffer_decode(self, input, errors, final): + if errors != 'strict': + raise UnicodeError("Unsupported error handling "+errors) + + if not input: + return (u"", 0) + + # IDNA allows decoding to operate on Unicode strings, too. + if isinstance(input, unicode): + labels = dots.split(input) + else: + # Must be ASCII string + input = str(input) + unicode(input, "ascii") + labels = input.split(".") + + trailing_dot = u'' + if labels: + if not labels[-1]: + trailing_dot = u'.' + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = u'.' + + result = [] + size = 0 + for label in labels: + result.append(ToUnicode(label)) + if size: + size += 1 + size += len(label) -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return Codec().decode(input, self.errors)[0] + result = u".".join(result) + trailing_dot + size += len(trailing_dot) + return (result, size) class StreamWriter(Codec,codecs.StreamWriter): pass |