From 7e8c50863b305a6cb01f715d1f8f41042d919ccf Mon Sep 17 00:00:00 2001 From: Julien Jehannet Date: Thu, 23 Sep 2010 14:52:49 +0200 Subject: [textutils] use NFKD decomposition in unormalize() The normal form KD (NFKD) will apply the compatibility decomposition, i.e. replace all compatibility characters with their equivalents. --- test/unittest_textutils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/unittest_textutils.py b/test/unittest_textutils.py index 33f73ec..07f0fa4 100644 --- a/test/unittest_textutils.py +++ b/test/unittest_textutils.py @@ -233,9 +233,15 @@ class UnormalizeTC(TestCase): (u'\u0152nologie', u'OEnologie'), (u'l\xf8to', u'loto'), (u'été', u'ete'), - ] + (u'àèùéïîôêç', u'aeueiioec'), + (u'ÀÈÙÉÏÎÔÊÇ', u'AEUEIIOEC'), + (u'\xa0', u' '), # NO-BREAK SPACE managed by NFKD decomposition + ] for input, output in data: - yield self.assertEquals, tu.unormalize(input), output + yield self.assertEqual, tu.unormalize(input), output + self.assertRaises(ValueError, tu.unormalize, u"non ascii char is \u0154", + ignorenonascii=False) + class ModuleDocTest(DocTest): """test doc test in this module""" -- cgit v1.2.1