summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorAdrien Di Mascio <Adrien.DiMascio@logilab.fr>2011-03-02 09:13:40 +0100
committerAdrien Di Mascio <Adrien.DiMascio@logilab.fr>2011-03-02 09:13:40 +0100
commit7b67e09e06d8d9c96b9ff36934ca7427036c3a74 (patch)
treef8b1454445ccba94629c6297b2e0a60483b6b408 /test
parent4484aa1875d2cddb9afea97bd4d7069aec066825 (diff)
downloadlogilab-common-7b67e09e06d8d9c96b9ff36934ca7427036c3a74.tar.gz
[textutils] fix unormalize implementation
Give a chance to NFKD decomposition even if ord(c) > 2**8, this will work with a few more cases. add a substitute optional parameter to provide a replacement char if decomposition fails
Diffstat (limited to 'test')
-rw-r--r--test/unittest_textutils.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/test/unittest_textutils.py b/test/unittest_textutils.py
index 75b9cbb..d72a4a1 100644
--- a/test/unittest_textutils.py
+++ b/test/unittest_textutils.py
@@ -228,7 +228,7 @@ class ColorizeAnsiTC(TestCase):
class UnormalizeTC(TestCase):
- def test_unormalize(self):
+ def test_unormalize_no_substitute(self):
data = [(u'\u0153nologie', u'oenologie'),
(u'\u0152nologie', u'OEnologie'),
(u'l\xf8to', u'loto'),
@@ -236,11 +236,19 @@ class UnormalizeTC(TestCase):
(u'àèùéïîôêç', u'aeueiioec'),
(u'ÀÈÙÉÏÎÔÊÇ', u'AEUEIIOEC'),
(u'\xa0', u' '), # NO-BREAK SPACE managed by NFKD decomposition
+ (u'\u0154', u'R'),
]
for input, output in data:
yield self.assertEqual, tu.unormalize(input), output
- self.assertRaises(ValueError, tu.unormalize, u"non ascii char is \u0154",
+
+ def test_unormalize_substitute(self):
+ self.assertEqual(tu.unormalize(u'ab \u8000 cd', substitute='_'),
+ 'ab _ cd')
+
+ def test_unormalize_backward_compat(self):
+ self.assertRaises(ValueError, tu.unormalize, u"\u8000",
ignorenonascii=False)
+ self.assertEqual(tu.unormalize(u"\u8000", ignorenonascii=True), u'')
class ModuleDocTest(DocTest):