summaryrefslogtreecommitdiff
path: root/Lib/test/test_multibytecodec.py
diff options
context:
space:
mode:
authorHye-Shik Chang <hyeshik@gmail.com>2004-01-17 14:29:29 +0000
committerHye-Shik Chang <hyeshik@gmail.com>2004-01-17 14:29:29 +0000
commit3e2a30692085d32ac63f72b35da39158a471fc68 (patch)
tree4cbe735f61eae87ac56a13ca6bd32113b98bd03d /Lib/test/test_multibytecodec.py
parentcd1f7430cb8f48de970021071d7683054c23b10f (diff)
downloadcpython-git-3e2a30692085d32ac63f72b35da39158a471fc68.tar.gz
Add CJK codecs support as discussed on python-dev. (SF #873597)
Several style fixes are suggested by Martin v. Loewis and Marc-Andre Lemburg. Thanks!
Diffstat (limited to 'Lib/test/test_multibytecodec.py')
-rw-r--r--Lib/test/test_multibytecodec.py79
1 files changed, 79 insertions, 0 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
new file mode 100644
index 0000000000..f686b702da
--- /dev/null
+++ b/Lib/test/test_multibytecodec.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#
+# test_multibytecodec.py
+# Unit test for multibytecodec itself
+#
+# $CJKCodecs: test_multibytecodec.py,v 1.5 2004/01/06 02:26:28 perky Exp $
+
+from test import test_support
+from test import test_multibytecodec_support
+import unittest, StringIO, codecs
+
+class Test_StreamWriter(unittest.TestCase):
+ if len(u'\U00012345') == 2: # UCS2
+ def test_gb18030(self):
+ s= StringIO.StringIO()
+ c = codecs.lookup('gb18030')[3](s)
+ c.write(u'123')
+ self.assertEqual(s.getvalue(), '123')
+ c.write(u'\U00012345')
+ self.assertEqual(s.getvalue(), '123\x907\x959')
+ c.write(u'\U00012345'[0])
+ self.assertEqual(s.getvalue(), '123\x907\x959')
+ c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
+ self.assertEqual(s.getvalue(),
+ '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
+ c.write(u'\U00012345'[0])
+ self.assertEqual(s.getvalue(),
+ '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
+ self.assertRaises(UnicodeError, c.reset)
+ self.assertEqual(s.getvalue(),
+ '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
+
+ # standard utf-8 codecs has broken StreamReader
+ if test_multibytecodec_support.__cjkcodecs__:
+ def test_utf_8(self):
+ s= StringIO.StringIO()
+ c = codecs.lookup('utf-8')[3](s)
+ c.write(u'123')
+ self.assertEqual(s.getvalue(), '123')
+ c.write(u'\U00012345')
+ self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
+ c.write(u'\U00012345'[0])
+ self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
+ c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
+ self.assertEqual(s.getvalue(),
+ '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
+ '\xea\xb0\x80\xc2\xac')
+ c.write(u'\U00012345'[0])
+ self.assertEqual(s.getvalue(),
+ '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
+ '\xea\xb0\x80\xc2\xac')
+ c.reset()
+ self.assertEqual(s.getvalue(),
+ '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
+ '\xea\xb0\x80\xc2\xac\xed\xa0\x88')
+ c.write(u'\U00012345'[1])
+ self.assertEqual(s.getvalue(),
+ '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
+ '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
+
+ else: # UCS4
+ pass
+
+ def test_nullcoding(self):
+ self.assertEqual(''.decode('utf-8'), u'')
+ self.assertEqual(unicode('', 'utf-8'), u'')
+ self.assertEqual(u''.encode('utf-8'), '')
+
+ def test_str_decode(self):
+ self.assertEqual('abcd'.encode('utf-8'), 'abcd')
+
+def test_main():
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(Test_StreamWriter))
+ test_support.run_suite(suite)
+
+if __name__ == "__main__":
+ test_main()
+