summaryrefslogtreecommitdiff
path: root/Lib/test/test_unicode.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_unicode.py')
-rw-r--r--Lib/test/test_unicode.py90
1 files changed, 65 insertions, 25 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index f046938b36..a38e7b1610 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -8,6 +8,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
import _string
import codecs
import itertools
+import operator
import struct
import sys
import unittest
@@ -318,6 +319,7 @@ class UnicodeTest(string_tests.CommonTest,
{ord('a'): None, ord('b'): ''})
self.checkequalnofix('xyyx', 'xzx', 'translate',
{ord('z'): 'yy'})
+
# this needs maketrans()
self.checkequalnofix('abababc', 'abababc', 'translate',
{'b': '<i>'})
@@ -327,6 +329,43 @@ class UnicodeTest(string_tests.CommonTest,
tbl = self.type2test.maketrans('abc', 'xyz', 'd')
self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
+ # various tests switching from ASCII to latin1 or the opposite;
+ # same length, remove a letter, or replace with a longer string.
+ self.assertEqual("[a]".translate(str.maketrans('a', 'X')),
+ "[X]")
+ self.assertEqual("[a]".translate(str.maketrans({'a': 'X'})),
+ "[X]")
+ self.assertEqual("[a]".translate(str.maketrans({'a': None})),
+ "[]")
+ self.assertEqual("[a]".translate(str.maketrans({'a': 'XXX'})),
+ "[XXX]")
+ self.assertEqual("[a]".translate(str.maketrans({'a': '\xe9'})),
+ "[\xe9]")
+ self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '123'})),
+ "x123")
+ self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '\xe9'})),
+ "x\xe9")
+
+ # test non-ASCII (don't take the fast-path)
+ self.assertEqual("[a]".translate(str.maketrans({'a': '<\xe9>'})),
+ "[<\xe9>]")
+ self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': 'a'})),
+ "[a]")
+ self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})),
+ "[]")
+ self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': '123'})),
+ "[123]")
+ self.assertEqual("[a\xe9]".translate(str.maketrans({'a': '<\u20ac>'})),
+ "[<\u20ac>\xe9]")
+
+ # invalid Unicode characters
+ invalid_char = 0x10ffff+1
+ for before in "a\xe9\u20ac\U0010ffff":
+ mapping = str.maketrans({before: invalid_char})
+ text = "[%s]" % before
+ self.assertRaises(ValueError, text.translate, mapping)
+
+ # errors
self.assertRaises(TypeError, self.type2test.maketrans)
self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
@@ -341,10 +380,6 @@ class UnicodeTest(string_tests.CommonTest,
def test_split(self):
string_tests.CommonTest.test_split(self)
- # Mixed arguments
- self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
- self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
- self.checkequalnofix(['endcase ', ''], 'endcase test', 'split', 'test')
# test mixed kinds
for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
left *= 9
@@ -526,7 +561,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertTrue('\ud800\udc02' < '\ud84d\udc56')
def test_islower(self):
- string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
+ super().test_islower()
self.checkequalnofix(False, '\u1FFc', 'islower')
self.assertFalse('\u2167'.islower())
self.assertTrue('\u2177'.islower())
@@ -541,7 +576,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertFalse('\U0001F46F'.islower())
def test_isupper(self):
- string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
+ super().test_isupper()
if not sys.platform.startswith('java'):
self.checkequalnofix(False, '\u1FFc', 'isupper')
self.assertTrue('\u2167'.isupper())
@@ -557,7 +592,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertFalse('\U0001F46F'.isupper())
def test_istitle(self):
- string_tests.MixinStrUnicodeUserStringTest.test_istitle(self)
+ super().test_istitle()
self.checkequalnofix(True, '\u1FFc', 'istitle')
self.checkequalnofix(True, 'Greek \u1FFcitlecases ...', 'istitle')
@@ -569,7 +604,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertFalse(ch.istitle(), '{!a} is not title'.format(ch))
def test_isspace(self):
- string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
+ super().test_isspace()
self.checkequalnofix(True, '\u2000', 'isspace')
self.checkequalnofix(True, '\u200a', 'isspace')
self.checkequalnofix(False, '\u2014', 'isspace')
@@ -579,13 +614,13 @@ class UnicodeTest(string_tests.CommonTest,
self.assertFalse(ch.isspace(), '{!a} is not space.'.format(ch))
def test_isalnum(self):
- string_tests.MixinStrUnicodeUserStringTest.test_isalnum(self)
+ super().test_isalnum()
for ch in ['\U00010401', '\U00010427', '\U00010429', '\U0001044E',
'\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']:
self.assertTrue(ch.isalnum(), '{!a} is alnum.'.format(ch))
def test_isalpha(self):
- string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
+ super().test_isalpha()
self.checkequalnofix(True, '\u1FFc', 'isalpha')
# non-BMP, cased
self.assertTrue('\U00010401'.isalpha())
@@ -615,7 +650,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch))
def test_isdigit(self):
- string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
+ super().test_isdigit()
self.checkequalnofix(True, '\u2460', 'isdigit')
self.checkequalnofix(False, '\xbc', 'isdigit')
self.checkequalnofix(True, '\u0660', 'isdigit')
@@ -768,7 +803,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2')
def test_title(self):
- string_tests.MixinStrUnicodeUserStringTest.test_title(self)
+ super().test_title()
self.assertEqual('\U0001044F'.title(), '\U00010427')
self.assertEqual('\U0001044F\U0001044F'.title(),
'\U00010427\U0001044F')
@@ -1317,20 +1352,20 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('%.2s' % "a\xe9\u20ac", 'a\xe9')
#issue 19995
- class PsuedoInt:
+ class PseudoInt:
def __init__(self, value):
self.value = int(value)
def __int__(self):
return self.value
def __index__(self):
return self.value
- class PsuedoFloat:
+ class PseudoFloat:
def __init__(self, value):
self.value = float(value)
def __int__(self):
return int(self.value)
- pi = PsuedoFloat(3.1415)
- letter_m = PsuedoInt(109)
+ pi = PseudoFloat(3.1415)
+ letter_m = PseudoInt(109)
self.assertEqual('%x' % 42, '2a')
self.assertEqual('%X' % 15, 'F')
self.assertEqual('%o' % 9, '11')
@@ -1339,11 +1374,11 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('%X' % letter_m, '6D')
self.assertEqual('%o' % letter_m, '155')
self.assertEqual('%c' % letter_m, 'm')
- self.assertWarns(DeprecationWarning, '%x'.__mod__, pi),
- self.assertWarns(DeprecationWarning, '%x'.__mod__, 3.14),
- self.assertWarns(DeprecationWarning, '%X'.__mod__, 2.11),
- self.assertWarns(DeprecationWarning, '%o'.__mod__, 1.79),
- self.assertWarns(DeprecationWarning, '%c'.__mod__, pi),
+ self.assertRaisesRegex(TypeError, '%x format: an integer is required, not float', operator.mod, '%x', 3.14),
+ self.assertRaisesRegex(TypeError, '%X format: an integer is required, not float', operator.mod, '%X', 2.11),
+ self.assertRaisesRegex(TypeError, '%o format: an integer is required, not float', operator.mod, '%o', 1.79),
+ self.assertRaisesRegex(TypeError, '%x format: an integer is required, not PseudoFloat', operator.mod, '%x', pi),
+ self.assertRaises(TypeError, operator.mod, '%c', pi),
def test_formatting_with_enum(self):
# issue18780
@@ -1739,7 +1774,7 @@ class UnicodeTest(string_tests.CommonTest,
def assertCorrectUTF8Decoding(self, seq, res, err):
"""
- Check that an invalid UTF-8 sequence raises an UnicodeDecodeError when
+ Check that an invalid UTF-8 sequence raises a UnicodeDecodeError when
'strict' is used, returns res when 'replace' is used, and that doesn't
return anything when 'ignore' is used.
"""
@@ -2061,7 +2096,8 @@ class UnicodeTest(string_tests.CommonTest,
'cp863', 'cp865', 'cp866', 'cp1125',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
- 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
+ 'iso8859_7', 'iso8859_9',
+ 'koi8_r', 'koi8_t', 'koi8_u', 'kz1048', 'latin_1',
'mac_cyrillic', 'mac_latin2',
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
@@ -2089,14 +2125,14 @@ class UnicodeTest(string_tests.CommonTest,
'cp863', 'cp865', 'cp866', 'cp1125',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_4', 'iso8859_5',
- 'iso8859_9', 'koi8_r', 'latin_1',
+ 'iso8859_9', 'koi8_r', 'koi8_u', 'latin_1',
'mac_cyrillic', 'mac_latin2',
### These have undefined mappings:
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
#'cp1256', 'cp1257', 'cp1258',
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
- #'iso8859_3', 'iso8859_6', 'iso8859_7',
+ #'iso8859_3', 'iso8859_6', 'iso8859_7', 'koi8_t', 'kz1048',
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
### These fail the round-trip:
@@ -2689,6 +2725,10 @@ class UnicodeTest(string_tests.CommonTest,
# Check that the second call returns the same result
self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+ def test_free_after_iterating(self):
+ support.check_free_after_iterating(self, iter, str)
+ support.check_free_after_iterating(self, reversed, str)
+
class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self):