summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlavio Percoco <flaper87@gmail.com>2014-07-11 19:38:42 +0200
committerFlavio Percoco <flaper87@gmail.com>2014-07-11 19:41:31 +0200
commit7687a04ea44cb9a0a40b6ba794160ffe5e1adf90 (patch)
tree8accf4fea9b046a93d0ae4d9392b99a87b246efa
parent5621114c62c9feaa53daec91c5d682b5c0cda40c (diff)
downloadoslo-utils-7687a04ea44cb9a0a40b6ba794160ffe5e1adf90.tar.gz
Split strutils into 2 different modules
This patch pulls encoding related functions out of strutils into its own encodeutils module. We could probably find a better name for strutils now, although it seems short and contextualized enough. Partially-implements blueprint: graduate-oslo-utils Change-Id: Ib76065823c8a1b56020f14cea80b6d73e150aa49
-rw-r--r--oslo/utils/encodeutils.py89
-rw-r--r--oslo/utils/strutils.py75
-rw-r--r--tests/test_strutils.py42
-rw-r--r--tests/tests_encodeutils.py66
4 files changed, 157 insertions, 115 deletions
diff --git a/oslo/utils/encodeutils.py b/oslo/utils/encodeutils.py
new file mode 100644
index 0000000..c8a6cb8
--- /dev/null
+++ b/oslo/utils/encodeutils.py
@@ -0,0 +1,89 @@
+# Copyright 2014 Red Hat, Inc.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+import six
+
+
+def safe_decode(text, incoming=None, errors='strict'):
+ """Decodes incoming text/bytes string using `incoming` if they're not
+ already unicode.
+
+ :param incoming: Text's current encoding
+ :param errors: Errors handling policy. See here for valid
+ values http://docs.python.org/2/library/codecs.html
+ :returns: text or a unicode `incoming` encoded
+ representation of it.
+ :raises TypeError: If text is not an instance of str
+ """
+ if not isinstance(text, (six.string_types, six.binary_type)):
+ raise TypeError("%s can't be decoded" % type(text))
+
+ if isinstance(text, six.text_type):
+ return text
+
+ if not incoming:
+ incoming = (sys.stdin.encoding or
+ sys.getdefaultencoding())
+
+ try:
+ return text.decode(incoming, errors)
+ except UnicodeDecodeError:
+ # Note(flaper87) If we get here, it means that
+ # sys.stdin.encoding / sys.getdefaultencoding
+ # didn't return a suitable encoding to decode
+ # text. This happens mostly when global LANG
+ # var is not set correctly and there's no
+ # default encoding. In this case, most likely
+ # python will use ASCII or ANSI encoders as
+ # default encodings but they won't be capable
+ # of decoding non-ASCII characters.
+ #
+ # Also, UTF-8 is being used since it's an ASCII
+ # extension.
+ return text.decode('utf-8', errors)
+
+
+def safe_encode(text, incoming=None,
+ encoding='utf-8', errors='strict'):
+ """Encodes incoming text/bytes string using `encoding`.
+
+ If incoming is not specified, text is expected to be encoded with
+ current python's default encoding. (`sys.getdefaultencoding`)
+
+ :param incoming: Text's current encoding
+ :param encoding: Expected encoding for text (Default UTF-8)
+ :param errors: Errors handling policy. See here for valid
+ values http://docs.python.org/2/library/codecs.html
+ :returns: text or a bytestring `encoding` encoded
+ representation of it.
+ :raises TypeError: If text is not an instance of str
+ """
+ if not isinstance(text, (six.string_types, six.binary_type)):
+ raise TypeError("%s can't be encoded" % type(text))
+
+ if not incoming:
+ incoming = (sys.stdin.encoding or
+ sys.getdefaultencoding())
+
+ if isinstance(text, six.text_type):
+ return text.encode(encoding, errors)
+ elif text and encoding != incoming:
+ # Decode text before encoding it with `encoding`
+ text = safe_decode(text, incoming, errors)
+ return text.encode(encoding, errors)
+ else:
+ return text
diff --git a/oslo/utils/strutils.py b/oslo/utils/strutils.py
index bcb9626..2714a1e 100644
--- a/oslo/utils/strutils.py
+++ b/oslo/utils/strutils.py
@@ -19,11 +19,11 @@ System-level utilities and helper functions.
import math
import re
-import sys
import unicodedata
import six
+from oslo.utils import encodeutils
from oslo.utils.openstack.common.gettextutils import _
@@ -97,77 +97,6 @@ def bool_from_string(subject, strict=False, default=False):
return default
-def safe_decode(text, incoming=None, errors='strict'):
- """Decodes incoming text/bytes string using `incoming` if they're not
- already unicode.
-
- :param incoming: Text's current encoding
- :param errors: Errors handling policy. See here for valid
- values http://docs.python.org/2/library/codecs.html
- :returns: text or a unicode `incoming` encoded
- representation of it.
- :raises TypeError: If text is not an instance of str
- """
- if not isinstance(text, (six.string_types, six.binary_type)):
- raise TypeError("%s can't be decoded" % type(text))
-
- if isinstance(text, six.text_type):
- return text
-
- if not incoming:
- incoming = (sys.stdin.encoding or
- sys.getdefaultencoding())
-
- try:
- return text.decode(incoming, errors)
- except UnicodeDecodeError:
- # Note(flaper87) If we get here, it means that
- # sys.stdin.encoding / sys.getdefaultencoding
- # didn't return a suitable encoding to decode
- # text. This happens mostly when global LANG
- # var is not set correctly and there's no
- # default encoding. In this case, most likely
- # python will use ASCII or ANSI encoders as
- # default encodings but they won't be capable
- # of decoding non-ASCII characters.
- #
- # Also, UTF-8 is being used since it's an ASCII
- # extension.
- return text.decode('utf-8', errors)
-
-
-def safe_encode(text, incoming=None,
- encoding='utf-8', errors='strict'):
- """Encodes incoming text/bytes string using `encoding`.
-
- If incoming is not specified, text is expected to be encoded with
- current python's default encoding. (`sys.getdefaultencoding`)
-
- :param incoming: Text's current encoding
- :param encoding: Expected encoding for text (Default UTF-8)
- :param errors: Errors handling policy. See here for valid
- values http://docs.python.org/2/library/codecs.html
- :returns: text or a bytestring `encoding` encoded
- representation of it.
- :raises TypeError: If text is not an instance of str
- """
- if not isinstance(text, (six.string_types, six.binary_type)):
- raise TypeError("%s can't be encoded" % type(text))
-
- if not incoming:
- incoming = (sys.stdin.encoding or
- sys.getdefaultencoding())
-
- if isinstance(text, six.text_type):
- return text.encode(encoding, errors)
- elif text and encoding != incoming:
- # Decode text before encoding it with `encoding`
- text = safe_decode(text, incoming, errors)
- return text.encode(encoding, errors)
- else:
- return text
-
-
def string_to_bytes(text, unit_system='IEC', return_int=False):
"""Converts a string into an float representation of bytes.
@@ -229,7 +158,7 @@ def to_slug(value, incoming=None, errors="strict"):
:returns: slugified unicode representation of `value`
:raises TypeError: If text is not an instance of str
"""
- value = safe_decode(value, incoming, errors)
+ value = encodeutils.safe_decode(value, incoming, errors)
# NOTE(aababilov): no need to use safe_(encode|decode) here:
# encodings are always "ascii", error handling is always "ignore"
# and types are always known (first: unicode; second: str)
diff --git a/tests/test_strutils.py b/tests/test_strutils.py
index 0ee6bc6..4b37b21 100644
--- a/tests/test_strutils.py
+++ b/tests/test_strutils.py
@@ -143,48 +143,6 @@ class StrUtilsTest(test_base.BaseTestCase):
self.assertEqual(1, strutils.int_from_bool_as_string(True))
self.assertEqual(0, strutils.int_from_bool_as_string(False))
- def test_safe_decode(self):
- safe_decode = strutils.safe_decode
- self.assertRaises(TypeError, safe_decode, True)
- self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
- incoming="utf-8"))
- if six.PY2:
- # In Python 3, bytes.decode() doesn't support anymore
- # bytes => bytes encodings like base64
- self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
- incoming='base64'))
-
- self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
- errors='ignore'))
-
- self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
- incoming='iso-8859-1'))
-
- # Forcing incoming to ascii so it falls back to utf-8
- self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
- incoming='ascii'))
-
- self.assertEqual(six.u('foo'), safe_decode(b'foo'))
-
- def test_safe_encode(self):
- safe_encode = strutils.safe_encode
- self.assertRaises(TypeError, safe_encode, True)
- self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
- encoding="utf-8"))
- if six.PY2:
- # In Python 3, str.encode() doesn't support anymore
- # text => text encodings like base64
- self.assertEqual(six.b("dGVzdA==\n"),
- safe_encode("test", encoding='base64'))
- self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
- encoding="iso-8859-1",
- incoming="utf-8"))
-
- # Forcing incoming to ascii so it falls back to utf-8
- self.assertEqual(six.b('ni\xc3\xb1o'),
- safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
- self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))
-
def test_slugify(self):
to_slug = strutils.to_slug
self.assertRaises(TypeError, to_slug, True)
diff --git a/tests/tests_encodeutils.py b/tests/tests_encodeutils.py
new file mode 100644
index 0000000..f3bb09c
--- /dev/null
+++ b/tests/tests_encodeutils.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2014 Red Hat, Inc.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from oslotest import base as test_base
+import six
+
+from oslo.utils import encodeutils
+
+
+class EncodeUtilsTest(test_base.BaseTestCase):
+
+ def test_safe_decode(self):
+ safe_decode = encodeutils.safe_decode
+ self.assertRaises(TypeError, safe_decode, True)
+ self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
+ incoming="utf-8"))
+ if six.PY2:
+ # In Python 3, bytes.decode() doesn't support anymore
+ # bytes => bytes encodings like base64
+ self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
+ incoming='base64'))
+
+ self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
+ errors='ignore'))
+
+ self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
+ incoming='iso-8859-1'))
+
+ # Forcing incoming to ascii so it falls back to utf-8
+ self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
+ incoming='ascii'))
+
+ self.assertEqual(six.u('foo'), safe_decode(b'foo'))
+
+ def test_safe_encode(self):
+ safe_encode = encodeutils.safe_encode
+ self.assertRaises(TypeError, safe_encode, True)
+ self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
+ encoding="utf-8"))
+ if six.PY2:
+ # In Python 3, str.encode() doesn't support anymore
+ # text => text encodings like base64
+ self.assertEqual(six.b("dGVzdA==\n"),
+ safe_encode("test", encoding='base64'))
+ self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
+ encoding="iso-8859-1",
+ incoming="utf-8"))
+
+ # Forcing incoming to ascii so it falls back to utf-8
+ self.assertEqual(six.b('ni\xc3\xb1o'),
+ safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
+ self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))