Split strutils into 2 different modules

This patch pulls encoding related functions out of strutils into its own encodeutils module. We could probably find a better name for strutils now, although it seems short and contextualized enough. Partially-implements blueprint: graduate-oslo-utils Change-Id: Ib76065823c8a1b56020f14cea80b6d73e150aa49
author: Flavio Percoco <flaper87@gmail.com> 2014-07-11 19:38:42 +0200
committer: Flavio Percoco <flaper87@gmail.com> 2014-07-11 19:41:31 +0200
commit: 7687a04ea44cb9a0a40b6ba794160ffe5e1adf90 (patch)
tree: 8accf4fea9b046a93d0ae4d9392b99a87b246efa
parent: 5621114c62c9feaa53daec91c5d682b5c0cda40c (diff)
download: oslo-utils-7687a04ea44cb9a0a40b6ba794160ffe5e1adf90.tar.gz
4 files changed, 157 insertions, 115 deletions
diff --git a/oslo/utils/encodeutils.py b/oslo/utils/encodeutils.py
new file mode 100644
index 0000000..c8a6cb8
--- /dev/null
+++ b/oslo/utils/encodeutils.py
@@ -0,0 +1,89 @@
+# Copyright 2014 Red Hat, Inc.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+
+import six
+
+
+def safe_decode(text, incoming=None, errors='strict'):
+    """Decodes incoming text/bytes string using `incoming` if they're not
+       already unicode.
+
+    :param incoming: Text's current encoding
+    :param errors: Errors handling policy. See here for valid
+        values http://docs.python.org/2/library/codecs.html
+    :returns: text or a unicode `incoming` encoded
+                representation of it.
+    :raises TypeError: If text is not an instance of str
+    """
+    if not isinstance(text, (six.string_types, six.binary_type)):
+        raise TypeError("%s can't be decoded" % type(text))
+
+    if isinstance(text, six.text_type):
+        return text
+
+    if not incoming:
+        incoming = (sys.stdin.encoding or
+                    sys.getdefaultencoding())
+
+    try:
+        return text.decode(incoming, errors)
+    except UnicodeDecodeError:
+        # Note(flaper87) If we get here, it means that
+        # sys.stdin.encoding / sys.getdefaultencoding
+        # didn't return a suitable encoding to decode
+        # text. This happens mostly when global LANG
+        # var is not set correctly and there's no
+        # default encoding. In this case, most likely
+        # python will use ASCII or ANSI encoders as
+        # default encodings but they won't be capable
+        # of decoding non-ASCII characters.
+        #
+        # Also, UTF-8 is being used since it's an ASCII
+        # extension.
+        return text.decode('utf-8', errors)
+
+
+def safe_encode(text, incoming=None,
+                encoding='utf-8', errors='strict'):
+    """Encodes incoming text/bytes string using `encoding`.
+
+    If incoming is not specified, text is expected to be encoded with
+    current python's default encoding. (`sys.getdefaultencoding`)
+
+    :param incoming: Text's current encoding
+    :param encoding: Expected encoding for text (Default UTF-8)
+    :param errors: Errors handling policy. See here for valid
+        values http://docs.python.org/2/library/codecs.html
+    :returns: text or a bytestring `encoding` encoded
+                representation of it.
+    :raises TypeError: If text is not an instance of str
+    """
+    if not isinstance(text, (six.string_types, six.binary_type)):
+        raise TypeError("%s can't be encoded" % type(text))
+
+    if not incoming:
+        incoming = (sys.stdin.encoding or
+                    sys.getdefaultencoding())
+
+    if isinstance(text, six.text_type):
+        return text.encode(encoding, errors)
+    elif text and encoding != incoming:
+        # Decode text before encoding it with `encoding`
+        text = safe_decode(text, incoming, errors)
+        return text.encode(encoding, errors)
+    else:
+        return text
diff --git a/oslo/utils/strutils.py b/oslo/utils/strutils.py
index bcb9626..2714a1e 100644
--- a/oslo/utils/strutils.py
+++ b/oslo/utils/strutils.py
@@ -19,11 +19,11 @@ System-level utilities and helper functions.
 
 import math
 import re
-import sys
 import unicodedata
 
 import six
 
+from oslo.utils import encodeutils
 from oslo.utils.openstack.common.gettextutils import _
 
 
@@ -97,77 +97,6 @@ def bool_from_string(subject, strict=False, default=False):
         return default
 
 
-def safe_decode(text, incoming=None, errors='strict'):
-    """Decodes incoming text/bytes string using `incoming` if they're not
-       already unicode.
-
-    :param incoming: Text's current encoding
-    :param errors: Errors handling policy. See here for valid
-        values http://docs.python.org/2/library/codecs.html
-    :returns: text or a unicode `incoming` encoded
-                representation of it.
-    :raises TypeError: If text is not an instance of str
-    """
-    if not isinstance(text, (six.string_types, six.binary_type)):
-        raise TypeError("%s can't be decoded" % type(text))
-
-    if isinstance(text, six.text_type):
-        return text
-
-    if not incoming:
-        incoming = (sys.stdin.encoding or
-                    sys.getdefaultencoding())
-
-    try:
-        return text.decode(incoming, errors)
-    except UnicodeDecodeError:
-        # Note(flaper87) If we get here, it means that
-        # sys.stdin.encoding / sys.getdefaultencoding
-        # didn't return a suitable encoding to decode
-        # text. This happens mostly when global LANG
-        # var is not set correctly and there's no
-        # default encoding. In this case, most likely
-        # python will use ASCII or ANSI encoders as
-        # default encodings but they won't be capable
-        # of decoding non-ASCII characters.
-        #
-        # Also, UTF-8 is being used since it's an ASCII
-        # extension.
-        return text.decode('utf-8', errors)
-
-
-def safe_encode(text, incoming=None,
-                encoding='utf-8', errors='strict'):
-    """Encodes incoming text/bytes string using `encoding`.
-
-    If incoming is not specified, text is expected to be encoded with
-    current python's default encoding. (`sys.getdefaultencoding`)
-
-    :param incoming: Text's current encoding
-    :param encoding: Expected encoding for text (Default UTF-8)
-    :param errors: Errors handling policy. See here for valid
-        values http://docs.python.org/2/library/codecs.html
-    :returns: text or a bytestring `encoding` encoded
-                representation of it.
-    :raises TypeError: If text is not an instance of str
-    """
-    if not isinstance(text, (six.string_types, six.binary_type)):
-        raise TypeError("%s can't be encoded" % type(text))
-
-    if not incoming:
-        incoming = (sys.stdin.encoding or
-                    sys.getdefaultencoding())
-
-    if isinstance(text, six.text_type):
-        return text.encode(encoding, errors)
-    elif text and encoding != incoming:
-        # Decode text before encoding it with `encoding`
-        text = safe_decode(text, incoming, errors)
-        return text.encode(encoding, errors)
-    else:
-        return text
-
-
 def string_to_bytes(text, unit_system='IEC', return_int=False):
     """Converts a string into an float representation of bytes.
 
@@ -229,7 +158,7 @@ def to_slug(value, incoming=None, errors="strict"):
     :returns: slugified unicode representation of `value`
     :raises TypeError: If text is not an instance of str
     """
-    value = safe_decode(value, incoming, errors)
+    value = encodeutils.safe_decode(value, incoming, errors)
     # NOTE(aababilov): no need to use safe_(encode|decode) here:
     # encodings are always "ascii", error handling is always "ignore"
     # and types are always known (first: unicode; second: str)
diff --git a/tests/test_strutils.py b/tests/test_strutils.py
index 0ee6bc6..4b37b21 100644
--- a/tests/test_strutils.py
+++ b/tests/test_strutils.py
@@ -143,48 +143,6 @@ class StrUtilsTest(test_base.BaseTestCase):
         self.assertEqual(1, strutils.int_from_bool_as_string(True))
         self.assertEqual(0, strutils.int_from_bool_as_string(False))
 
-    def test_safe_decode(self):
-        safe_decode = strutils.safe_decode
-        self.assertRaises(TypeError, safe_decode, True)
-        self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
-                         incoming="utf-8"))
-        if six.PY2:
-            # In Python 3, bytes.decode() doesn't support anymore
-            # bytes => bytes encodings like base64
-            self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
-                             incoming='base64'))
-
-        self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
-                         errors='ignore'))
-
-        self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
-                         incoming='iso-8859-1'))
-
-        # Forcing incoming to ascii so it falls back to utf-8
-        self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
-                         incoming='ascii'))
-
-        self.assertEqual(six.u('foo'), safe_decode(b'foo'))
-
-    def test_safe_encode(self):
-        safe_encode = strutils.safe_encode
-        self.assertRaises(TypeError, safe_encode, True)
-        self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
-                                                           encoding="utf-8"))
-        if six.PY2:
-            # In Python 3, str.encode() doesn't support anymore
-            # text => text encodings like base64
-            self.assertEqual(six.b("dGVzdA==\n"),
-                             safe_encode("test", encoding='base64'))
-        self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
-                                                       encoding="iso-8859-1",
-                                                       incoming="utf-8"))
-
-        # Forcing incoming to ascii so it falls back to utf-8
-        self.assertEqual(six.b('ni\xc3\xb1o'),
-                         safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
-        self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))
-
     def test_slugify(self):
         to_slug = strutils.to_slug
         self.assertRaises(TypeError, to_slug, True)
diff --git a/tests/tests_encodeutils.py b/tests/tests_encodeutils.py
new file mode 100644
index 0000000..f3bb09c
--- /dev/null
+++ b/tests/tests_encodeutils.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2014 Red Hat, Inc.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from oslotest import base as test_base
+import six
+
+from oslo.utils import encodeutils
+
+
+class EncodeUtilsTest(test_base.BaseTestCase):
+
+    def test_safe_decode(self):
+        safe_decode = encodeutils.safe_decode
+        self.assertRaises(TypeError, safe_decode, True)
+        self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
+                         incoming="utf-8"))
+        if six.PY2:
+            # In Python 3, bytes.decode() doesn't support anymore
+            # bytes => bytes encodings like base64
+            self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
+                             incoming='base64'))
+
+        self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
+                         errors='ignore'))
+
+        self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
+                         incoming='iso-8859-1'))
+
+        # Forcing incoming to ascii so it falls back to utf-8
+        self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
+                         incoming='ascii'))
+
+        self.assertEqual(six.u('foo'), safe_decode(b'foo'))
+
+    def test_safe_encode(self):
+        safe_encode = encodeutils.safe_encode
+        self.assertRaises(TypeError, safe_encode, True)
+        self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
+                                                           encoding="utf-8"))
+        if six.PY2:
+            # In Python 3, str.encode() doesn't support anymore
+            # text => text encodings like base64
+            self.assertEqual(six.b("dGVzdA==\n"),
+                             safe_encode("test", encoding='base64'))
+        self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
+                                                       encoding="iso-8859-1",
+                                                       incoming="utf-8"))
+
+        # Forcing incoming to ascii so it falls back to utf-8
+        self.assertEqual(six.b('ni\xc3\xb1o'),
+                         safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
+        self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))
author	Flavio Percoco <flaper87@gmail.com>	2014-07-11 19:38:42 +0200
committer	Flavio Percoco <flaper87@gmail.com>	2014-07-11 19:41:31 +0200
commit	7687a04ea44cb9a0a40b6ba794160ffe5e1adf90 (patch)
tree	8accf4fea9b046a93d0ae4d9392b99a87b246efa
parent	5621114c62c9feaa53daec91c5d682b5c0cda40c (diff)
download	oslo-utils-7687a04ea44cb9a0a40b6ba794160ffe5e1adf90.tar.gz