Add encodeutils.to_utf8() function

The function replaces a very common pattern in code base supporting Python 2 and Python 3: if isinstance(text, six.text_type): text = text.encode('utf-8') to_utf8() accepts subtypes of bytes and six.text_type. For example, oslo.i18n Message objects are accepted and encoded to UTF-8 as expected. Using encodeutils.safe_encode(text) is not reliable because it relies on the current locale encoding which can be ASCII, whereas UTF-8 is expected. Having to write encodeutils.safe_encode(text, incoding='utf-8') is not obvious and error-prone (it's easy to forget the incoming parameter). Change-Id: I00463716b6012cbef383855999f63f99f2f52540
author: Victor Stinner <vstinner@redhat.com> 2016-01-20 16:36:52 +0100
committer: Victor Stinner <vstinner@redhat.com> 2016-01-20 16:47:23 +0100
commit: de84b5ba03f10158c16142ef5410b584867e3e95 (patch)
tree: 9218db24e06bcce7bf964500cf1e2a93481b23bb
parent: 68d55698549511094612b7206dc2456701aaf087 (diff)
download: oslo-utils-de84b5ba03f10158c16142ef5410b584867e3e95.tar.gz
2 files changed, 33 insertions, 0 deletions
diff --git a/oslo_utils/encodeutils.py b/oslo_utils/encodeutils.py
index 7b87226..3827631 100644
--- a/oslo_utils/encodeutils.py
+++ b/oslo_utils/encodeutils.py
@@ -71,6 +71,9 @@ def safe_encode(text, incoming=None,
     :returns: text or a bytestring `encoding` encoded
                 representation of it.
     :raises TypeError: If text is not an instance of str
+
+    See also to_utf8() function which is simpler and don't depend on
+    the locale encoding.
     """
     if not isinstance(text, (six.string_types, six.binary_type)):
         raise TypeError("%s can't be encoded" % type(text))
@@ -95,6 +98,22 @@ def safe_encode(text, incoming=None,
         return text
 
 
+def to_utf8(text):
+    """Encode Unicode to UTF-8, return bytes unchanged.
+
+    Raise TypeError if text is not a bytes string or a Unicode string.
+
+    .. versionadded:: 3.5
+    """
+    if isinstance(text, bytes):
+        return text
+    elif isinstance(text, six.text_type):
+        return text.encode('utf-8')
+    else:
+        raise TypeError("bytes or Unicode expected, got %s"
+                        % type(text).__name__)
+
+
 def exception_to_unicode(exc):
     """Get the message of an exception as a Unicode string.
 
diff --git a/oslo_utils/tests/tests_encodeutils.py b/oslo_utils/tests/tests_encodeutils.py
index 984ebc3..8cceb8e 100644
--- a/oslo_utils/tests/tests_encodeutils.py
+++ b/oslo_utils/tests/tests_encodeutils.py
@@ -108,6 +108,20 @@ class EncodeUtilsTest(test_base.BaseTestCase):
         self.assertNotEqual(text, result)
         self.assertNotEqual(six.b("foo\xf1bar"), result)
 
+    def test_to_utf8(self):
+        self.assertEqual(encodeutils.to_utf8(b'a\xe9\xff'),        # bytes
+                         b'a\xe9\xff')
+        self.assertEqual(encodeutils.to_utf8(u'a\xe9\xff\u20ac'),  # Unicode
+                         b'a\xc3\xa9\xc3\xbf\xe2\x82\xac')
+        self.assertRaises(TypeError, encodeutils.to_utf8, 123)     # invalid
+
+        # oslo.i18n Message objects should also be accepted for convenience.
+        # It works because Message is a subclass of six.text_type. Use the
+        # lazy translation to get a Message instance of oslo_i18n.
+        msg = oslo_i18n.fixture.Translation().lazy("test")
+        self.assertEqual(encodeutils.to_utf8(msg),
+                         b'test')
+
 
 class ExceptionToUnicodeTest(test_base.BaseTestCase):
author	Victor Stinner <vstinner@redhat.com>	2016-01-20 16:36:52 +0100
committer	Victor Stinner <vstinner@redhat.com>	2016-01-20 16:47:23 +0100
commit	de84b5ba03f10158c16142ef5410b584867e3e95 (patch)
tree	9218db24e06bcce7bf964500cf1e2a93481b23bb
parent	68d55698549511094612b7206dc2456701aaf087 (diff)
download	oslo-utils-de84b5ba03f10158c16142ef5410b584867e3e95.tar.gz