From 7b67e09e06d8d9c96b9ff36934ca7427036c3a74 Mon Sep 17 00:00:00 2001 From: Adrien Di Mascio Date: Wed, 2 Mar 2011 09:13:40 +0100 Subject: [textutils] fix unormalize implementation Give a chance to NFKD decomposition even if ord(c) > 2**8, this will work with a few more cases. add a substitute optional parameter to provide a replacement char if decomposition fails --- test/unittest_textutils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/unittest_textutils.py b/test/unittest_textutils.py index 75b9cbb..d72a4a1 100644 --- a/test/unittest_textutils.py +++ b/test/unittest_textutils.py @@ -228,7 +228,7 @@ class ColorizeAnsiTC(TestCase): class UnormalizeTC(TestCase): - def test_unormalize(self): + def test_unormalize_no_substitute(self): data = [(u'\u0153nologie', u'oenologie'), (u'\u0152nologie', u'OEnologie'), (u'l\xf8to', u'loto'), @@ -236,11 +236,19 @@ class UnormalizeTC(TestCase): (u'àèùéïîôêç', u'aeueiioec'), (u'ÀÈÙÉÏÎÔÊÇ', u'AEUEIIOEC'), (u'\xa0', u' '), # NO-BREAK SPACE managed by NFKD decomposition + (u'\u0154', u'R'), ] for input, output in data: yield self.assertEqual, tu.unormalize(input), output - self.assertRaises(ValueError, tu.unormalize, u"non ascii char is \u0154", + + def test_unormalize_substitute(self): + self.assertEqual(tu.unormalize(u'ab \u8000 cd', substitute='_'), + 'ab _ cd') + + def test_unormalize_backward_compat(self): + self.assertRaises(ValueError, tu.unormalize, u"\u8000", ignorenonascii=False) + self.assertEqual(tu.unormalize(u"\u8000", ignorenonascii=True), u'') class ModuleDocTest(DocTest): -- cgit v1.2.1