diff options
Diffstat (limited to 'Lib/test/test_unicode.py')
| -rw-r--r-- | Lib/test/test_unicode.py | 90 |
1 files changed, 65 insertions, 25 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index f046938b36..a38e7b1610 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -8,6 +8,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com). import _string import codecs import itertools +import operator import struct import sys import unittest @@ -318,6 +319,7 @@ class UnicodeTest(string_tests.CommonTest, {ord('a'): None, ord('b'): ''}) self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'): 'yy'}) + # this needs maketrans() self.checkequalnofix('abababc', 'abababc', 'translate', {'b': '<i>'}) @@ -327,6 +329,43 @@ class UnicodeTest(string_tests.CommonTest, tbl = self.type2test.maketrans('abc', 'xyz', 'd') self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl) + # various tests switching from ASCII to latin1 or the opposite; + # same length, remove a letter, or replace with a longer string. + self.assertEqual("[a]".translate(str.maketrans('a', 'X')), + "[X]") + self.assertEqual("[a]".translate(str.maketrans({'a': 'X'})), + "[X]") + self.assertEqual("[a]".translate(str.maketrans({'a': None})), + "[]") + self.assertEqual("[a]".translate(str.maketrans({'a': 'XXX'})), + "[XXX]") + self.assertEqual("[a]".translate(str.maketrans({'a': '\xe9'})), + "[\xe9]") + self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '123'})), + "x123") + self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '\xe9'})), + "x\xe9") + + # test non-ASCII (don't take the fast-path) + self.assertEqual("[a]".translate(str.maketrans({'a': '<\xe9>'})), + "[<\xe9>]") + self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': 'a'})), + "[a]") + self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})), + "[]") + self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': '123'})), + "[123]") + self.assertEqual("[a\xe9]".translate(str.maketrans({'a': '<\u20ac>'})), + "[<\u20ac>\xe9]") + + # invalid Unicode characters + invalid_char = 0x10ffff+1 + for before in "a\xe9\u20ac\U0010ffff": + mapping = str.maketrans({before: invalid_char}) + text = "[%s]" % before + self.assertRaises(ValueError, text.translate, mapping) + + # errors self.assertRaises(TypeError, self.type2test.maketrans) self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg') self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def') @@ -341,10 +380,6 @@ class UnicodeTest(string_tests.CommonTest, def test_split(self): string_tests.CommonTest.test_split(self) - # Mixed arguments - self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//') - self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//') - self.checkequalnofix(['endcase ', ''], 'endcase test', 'split', 'test') # test mixed kinds for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): left *= 9 @@ -526,7 +561,7 @@ class UnicodeTest(string_tests.CommonTest, self.assertTrue('\ud800\udc02' < '\ud84d\udc56') def test_islower(self): - string_tests.MixinStrUnicodeUserStringTest.test_islower(self) + super().test_islower() self.checkequalnofix(False, '\u1FFc', 'islower') self.assertFalse('\u2167'.islower()) self.assertTrue('\u2177'.islower()) @@ -541,7 +576,7 @@ class UnicodeTest(string_tests.CommonTest, self.assertFalse('\U0001F46F'.islower()) def test_isupper(self): - string_tests.MixinStrUnicodeUserStringTest.test_isupper(self) + super().test_isupper() if not sys.platform.startswith('java'): self.checkequalnofix(False, '\u1FFc', 'isupper') self.assertTrue('\u2167'.isupper()) @@ -557,7 +592,7 @@ class UnicodeTest(string_tests.CommonTest, self.assertFalse('\U0001F46F'.isupper()) def test_istitle(self): - string_tests.MixinStrUnicodeUserStringTest.test_istitle(self) + super().test_istitle() self.checkequalnofix(True, '\u1FFc', 'istitle') self.checkequalnofix(True, 'Greek \u1FFcitlecases ...', 'istitle') @@ -569,7 +604,7 @@ class UnicodeTest(string_tests.CommonTest, self.assertFalse(ch.istitle(), '{!a} is not title'.format(ch)) def test_isspace(self): - string_tests.MixinStrUnicodeUserStringTest.test_isspace(self) + super().test_isspace() self.checkequalnofix(True, '\u2000', 'isspace') self.checkequalnofix(True, '\u200a', 'isspace') self.checkequalnofix(False, '\u2014', 'isspace') @@ -579,13 +614,13 @@ class UnicodeTest(string_tests.CommonTest, self.assertFalse(ch.isspace(), '{!a} is not space.'.format(ch)) def test_isalnum(self): - string_tests.MixinStrUnicodeUserStringTest.test_isalnum(self) + super().test_isalnum() for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E', '\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']: self.assertTrue(ch.isalnum(), '{!a} is alnum.'.format(ch)) def test_isalpha(self): - string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self) + super().test_isalpha() self.checkequalnofix(True, '\u1FFc', 'isalpha') # non-BMP, cased self.assertTrue('\U00010401'.isalpha()) @@ -615,7 +650,7 @@ class UnicodeTest(string_tests.CommonTest, self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch)) def test_isdigit(self): - string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self) + super().test_isdigit() self.checkequalnofix(True, '\u2460', 'isdigit') self.checkequalnofix(False, '\xbc', 'isdigit') self.checkequalnofix(True, '\u0660', 'isdigit') @@ -768,7 +803,7 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2') def test_title(self): - string_tests.MixinStrUnicodeUserStringTest.test_title(self) + super().test_title() self.assertEqual('\U0001044F'.title(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.title(), '\U00010427\U0001044F') @@ -1317,20 +1352,20 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual('%.2s' % "a\xe9\u20ac", 'a\xe9') #issue 19995 - class PsuedoInt: + class PseudoInt: def __init__(self, value): self.value = int(value) def __int__(self): return self.value def __index__(self): return self.value - class PsuedoFloat: + class PseudoFloat: def __init__(self, value): self.value = float(value) def __int__(self): return int(self.value) - pi = PsuedoFloat(3.1415) - letter_m = PsuedoInt(109) + pi = PseudoFloat(3.1415) + letter_m = PseudoInt(109) self.assertEqual('%x' % 42, '2a') self.assertEqual('%X' % 15, 'F') self.assertEqual('%o' % 9, '11') @@ -1339,11 +1374,11 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual('%X' % letter_m, '6D') self.assertEqual('%o' % letter_m, '155') self.assertEqual('%c' % letter_m, 'm') - self.assertWarns(DeprecationWarning, '%x'.__mod__, pi), - self.assertWarns(DeprecationWarning, '%x'.__mod__, 3.14), - self.assertWarns(DeprecationWarning, '%X'.__mod__, 2.11), - self.assertWarns(DeprecationWarning, '%o'.__mod__, 1.79), - self.assertWarns(DeprecationWarning, '%c'.__mod__, pi), + self.assertRaisesRegex(TypeError, '%x format: an integer is required, not float', operator.mod, '%x', 3.14), + self.assertRaisesRegex(TypeError, '%X format: an integer is required, not float', operator.mod, '%X', 2.11), + self.assertRaisesRegex(TypeError, '%o format: an integer is required, not float', operator.mod, '%o', 1.79), + self.assertRaisesRegex(TypeError, '%x format: an integer is required, not PseudoFloat', operator.mod, '%x', pi), + self.assertRaises(TypeError, operator.mod, '%c', pi), def test_formatting_with_enum(self): # issue18780 @@ -1739,7 +1774,7 @@ class UnicodeTest(string_tests.CommonTest, def assertCorrectUTF8Decoding(self, seq, res, err): """ - Check that an invalid UTF-8 sequence raises an UnicodeDecodeError when + Check that an invalid UTF-8 sequence raises a UnicodeDecodeError when 'strict' is used, returns res when 'replace' is used, and that doesn't return anything when 'ignore' is used. """ @@ -2061,7 +2096,8 @@ class UnicodeTest(string_tests.CommonTest, 'cp863', 'cp865', 'cp866', 'cp1125', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', - 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', + 'iso8859_7', 'iso8859_9', + 'koi8_r', 'koi8_t', 'koi8_u', 'kz1048', 'latin_1', 'mac_cyrillic', 'mac_latin2', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', @@ -2089,14 +2125,14 @@ class UnicodeTest(string_tests.CommonTest, 'cp863', 'cp865', 'cp866', 'cp1125', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_2', 'iso8859_4', 'iso8859_5', - 'iso8859_9', 'koi8_r', 'latin_1', + 'iso8859_9', 'koi8_r', 'koi8_u', 'latin_1', 'mac_cyrillic', 'mac_latin2', ### These have undefined mappings: #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', #'cp1256', 'cp1257', 'cp1258', #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', - #'iso8859_3', 'iso8859_6', 'iso8859_7', + #'iso8859_3', 'iso8859_6', 'iso8859_7', 'koi8_t', 'kz1048', #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', ### These fail the round-trip: @@ -2689,6 +2725,10 @@ class UnicodeTest(string_tests.CommonTest, # Check that the second call returns the same result self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) + def test_free_after_iterating(self): + support.check_free_after_iterating(self, iter, str) + support.check_free_after_iterating(self, reversed, str) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): |
