diff options
author | Victor Stinner <vstinner@redhat.com> | 2016-01-20 16:36:52 +0100 |
---|---|---|
committer | Victor Stinner <vstinner@redhat.com> | 2016-01-20 16:47:23 +0100 |
commit | de84b5ba03f10158c16142ef5410b584867e3e95 (patch) | |
tree | 9218db24e06bcce7bf964500cf1e2a93481b23bb | |
parent | 68d55698549511094612b7206dc2456701aaf087 (diff) | |
download | oslo-utils-de84b5ba03f10158c16142ef5410b584867e3e95.tar.gz |
Add encodeutils.to_utf8() function
The function replaces a very common pattern in code base supporting
Python 2 and Python 3:
if isinstance(text, six.text_type):
text = text.encode('utf-8')
to_utf8() accepts subtypes of bytes and six.text_type. For example,
oslo.i18n Message objects are accepted and encoded to UTF-8 as
expected.
Using encodeutils.safe_encode(text) is not reliable because it relies
on the current locale encoding which can be ASCII, whereas UTF-8 is
expected. Having to write encodeutils.safe_encode(text,
incoding='utf-8') is not obvious and error-prone (it's easy to forget
the incoming parameter).
Change-Id: I00463716b6012cbef383855999f63f99f2f52540
-rw-r--r-- | oslo_utils/encodeutils.py | 19 | ||||
-rw-r--r-- | oslo_utils/tests/tests_encodeutils.py | 14 |
2 files changed, 33 insertions, 0 deletions
diff --git a/oslo_utils/encodeutils.py b/oslo_utils/encodeutils.py index 7b87226..3827631 100644 --- a/oslo_utils/encodeutils.py +++ b/oslo_utils/encodeutils.py @@ -71,6 +71,9 @@ def safe_encode(text, incoming=None, :returns: text or a bytestring `encoding` encoded representation of it. :raises TypeError: If text is not an instance of str + + See also to_utf8() function which is simpler and don't depend on + the locale encoding. """ if not isinstance(text, (six.string_types, six.binary_type)): raise TypeError("%s can't be encoded" % type(text)) @@ -95,6 +98,22 @@ def safe_encode(text, incoming=None, return text +def to_utf8(text): + """Encode Unicode to UTF-8, return bytes unchanged. + + Raise TypeError if text is not a bytes string or a Unicode string. + + .. versionadded:: 3.5 + """ + if isinstance(text, bytes): + return text + elif isinstance(text, six.text_type): + return text.encode('utf-8') + else: + raise TypeError("bytes or Unicode expected, got %s" + % type(text).__name__) + + def exception_to_unicode(exc): """Get the message of an exception as a Unicode string. diff --git a/oslo_utils/tests/tests_encodeutils.py b/oslo_utils/tests/tests_encodeutils.py index 984ebc3..8cceb8e 100644 --- a/oslo_utils/tests/tests_encodeutils.py +++ b/oslo_utils/tests/tests_encodeutils.py @@ -108,6 +108,20 @@ class EncodeUtilsTest(test_base.BaseTestCase): self.assertNotEqual(text, result) self.assertNotEqual(six.b("foo\xf1bar"), result) + def test_to_utf8(self): + self.assertEqual(encodeutils.to_utf8(b'a\xe9\xff'), # bytes + b'a\xe9\xff') + self.assertEqual(encodeutils.to_utf8(u'a\xe9\xff\u20ac'), # Unicode + b'a\xc3\xa9\xc3\xbf\xe2\x82\xac') + self.assertRaises(TypeError, encodeutils.to_utf8, 123) # invalid + + # oslo.i18n Message objects should also be accepted for convenience. + # It works because Message is a subclass of six.text_type. Use the + # lazy translation to get a Message instance of oslo_i18n. + msg = oslo_i18n.fixture.Translation().lazy("test") + self.assertEqual(encodeutils.to_utf8(msg), + b'test') + class ExceptionToUnicodeTest(test_base.BaseTestCase): |