diff options
author | Julien Jehannet <julien.jehannet@logilab.fr> | 2010-09-23 14:52:49 +0200 |
---|---|---|
committer | Julien Jehannet <julien.jehannet@logilab.fr> | 2010-09-23 14:52:49 +0200 |
commit | 7e8c50863b305a6cb01f715d1f8f41042d919ccf (patch) | |
tree | 95a500796ddec9b7d5c74ed9213cc967f330881e /test | |
parent | a4d541d67062fe234be812c7ee83a8f48440916b (diff) | |
download | logilab-common-7e8c50863b305a6cb01f715d1f8f41042d919ccf.tar.gz |
[textutils] use NFKD decomposition in unormalize()
The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
replace all compatibility characters with their equivalents.
Diffstat (limited to 'test')
-rw-r--r-- | test/unittest_textutils.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/test/unittest_textutils.py b/test/unittest_textutils.py index 33f73ec..07f0fa4 100644 --- a/test/unittest_textutils.py +++ b/test/unittest_textutils.py @@ -233,9 +233,15 @@ class UnormalizeTC(TestCase): (u'\u0152nologie', u'OEnologie'), (u'l\xf8to', u'loto'), (u'été', u'ete'), - ] + (u'àèùéïîôêç', u'aeueiioec'), + (u'ÀÈÙÉÏÎÔÊÇ', u'AEUEIIOEC'), + (u'\xa0', u' '), # NO-BREAK SPACE managed by NFKD decomposition + ] for input, output in data: - yield self.assertEquals, tu.unormalize(input), output + yield self.assertEqual, tu.unormalize(input), output + self.assertRaises(ValueError, tu.unormalize, u"non ascii char is \u0154", + ignorenonascii=False) + class ModuleDocTest(DocTest): """test doc test in this module""" |