summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2016-01-20 16:36:52 +0100
committerVictor Stinner <vstinner@redhat.com>2016-01-20 16:47:23 +0100
commitde84b5ba03f10158c16142ef5410b584867e3e95 (patch)
tree9218db24e06bcce7bf964500cf1e2a93481b23bb
parent68d55698549511094612b7206dc2456701aaf087 (diff)
downloadoslo-utils-de84b5ba03f10158c16142ef5410b584867e3e95.tar.gz
Add encodeutils.to_utf8() function
The function replaces a very common pattern in code base supporting Python 2 and Python 3: if isinstance(text, six.text_type): text = text.encode('utf-8') to_utf8() accepts subtypes of bytes and six.text_type. For example, oslo.i18n Message objects are accepted and encoded to UTF-8 as expected. Using encodeutils.safe_encode(text) is not reliable because it relies on the current locale encoding which can be ASCII, whereas UTF-8 is expected. Having to write encodeutils.safe_encode(text, incoding='utf-8') is not obvious and error-prone (it's easy to forget the incoming parameter). Change-Id: I00463716b6012cbef383855999f63f99f2f52540
-rw-r--r--oslo_utils/encodeutils.py19
-rw-r--r--oslo_utils/tests/tests_encodeutils.py14
2 files changed, 33 insertions, 0 deletions
diff --git a/oslo_utils/encodeutils.py b/oslo_utils/encodeutils.py
index 7b87226..3827631 100644
--- a/oslo_utils/encodeutils.py
+++ b/oslo_utils/encodeutils.py
@@ -71,6 +71,9 @@ def safe_encode(text, incoming=None,
:returns: text or a bytestring `encoding` encoded
representation of it.
:raises TypeError: If text is not an instance of str
+
+ See also to_utf8() function which is simpler and don't depend on
+ the locale encoding.
"""
if not isinstance(text, (six.string_types, six.binary_type)):
raise TypeError("%s can't be encoded" % type(text))
@@ -95,6 +98,22 @@ def safe_encode(text, incoming=None,
return text
+def to_utf8(text):
+ """Encode Unicode to UTF-8, return bytes unchanged.
+
+ Raise TypeError if text is not a bytes string or a Unicode string.
+
+ .. versionadded:: 3.5
+ """
+ if isinstance(text, bytes):
+ return text
+ elif isinstance(text, six.text_type):
+ return text.encode('utf-8')
+ else:
+ raise TypeError("bytes or Unicode expected, got %s"
+ % type(text).__name__)
+
+
def exception_to_unicode(exc):
"""Get the message of an exception as a Unicode string.
diff --git a/oslo_utils/tests/tests_encodeutils.py b/oslo_utils/tests/tests_encodeutils.py
index 984ebc3..8cceb8e 100644
--- a/oslo_utils/tests/tests_encodeutils.py
+++ b/oslo_utils/tests/tests_encodeutils.py
@@ -108,6 +108,20 @@ class EncodeUtilsTest(test_base.BaseTestCase):
self.assertNotEqual(text, result)
self.assertNotEqual(six.b("foo\xf1bar"), result)
+ def test_to_utf8(self):
+ self.assertEqual(encodeutils.to_utf8(b'a\xe9\xff'), # bytes
+ b'a\xe9\xff')
+ self.assertEqual(encodeutils.to_utf8(u'a\xe9\xff\u20ac'), # Unicode
+ b'a\xc3\xa9\xc3\xbf\xe2\x82\xac')
+ self.assertRaises(TypeError, encodeutils.to_utf8, 123) # invalid
+
+ # oslo.i18n Message objects should also be accepted for convenience.
+ # It works because Message is a subclass of six.text_type. Use the
+ # lazy translation to get a Message instance of oslo_i18n.
+ msg = oslo_i18n.fixture.Translation().lazy("test")
+ self.assertEqual(encodeutils.to_utf8(msg),
+ b'test')
+
class ExceptionToUnicodeTest(test_base.BaseTestCase):