diff options
author | Flavio Percoco <flaper87@gmail.com> | 2014-07-11 19:38:42 +0200 |
---|---|---|
committer | Flavio Percoco <flaper87@gmail.com> | 2014-07-11 19:41:31 +0200 |
commit | 7687a04ea44cb9a0a40b6ba794160ffe5e1adf90 (patch) | |
tree | 8accf4fea9b046a93d0ae4d9392b99a87b246efa | |
parent | 5621114c62c9feaa53daec91c5d682b5c0cda40c (diff) | |
download | oslo-utils-7687a04ea44cb9a0a40b6ba794160ffe5e1adf90.tar.gz |
Split strutils into 2 different modules
This patch pulls encoding related functions out of strutils into its own
encodeutils module. We could probably find a better name for strutils
now, although it seems short and contextualized enough.
Partially-implements blueprint: graduate-oslo-utils
Change-Id: Ib76065823c8a1b56020f14cea80b6d73e150aa49
-rw-r--r-- | oslo/utils/encodeutils.py | 89 | ||||
-rw-r--r-- | oslo/utils/strutils.py | 75 | ||||
-rw-r--r-- | tests/test_strutils.py | 42 | ||||
-rw-r--r-- | tests/tests_encodeutils.py | 66 |
4 files changed, 157 insertions, 115 deletions
diff --git a/oslo/utils/encodeutils.py b/oslo/utils/encodeutils.py new file mode 100644 index 0000000..c8a6cb8 --- /dev/null +++ b/oslo/utils/encodeutils.py @@ -0,0 +1,89 @@ +# Copyright 2014 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys + +import six + + +def safe_decode(text, incoming=None, errors='strict'): + """Decodes incoming text/bytes string using `incoming` if they're not + already unicode. + + :param incoming: Text's current encoding + :param errors: Errors handling policy. See here for valid + values http://docs.python.org/2/library/codecs.html + :returns: text or a unicode `incoming` encoded + representation of it. + :raises TypeError: If text is not an instance of str + """ + if not isinstance(text, (six.string_types, six.binary_type)): + raise TypeError("%s can't be decoded" % type(text)) + + if isinstance(text, six.text_type): + return text + + if not incoming: + incoming = (sys.stdin.encoding or + sys.getdefaultencoding()) + + try: + return text.decode(incoming, errors) + except UnicodeDecodeError: + # Note(flaper87) If we get here, it means that + # sys.stdin.encoding / sys.getdefaultencoding + # didn't return a suitable encoding to decode + # text. This happens mostly when global LANG + # var is not set correctly and there's no + # default encoding. In this case, most likely + # python will use ASCII or ANSI encoders as + # default encodings but they won't be capable + # of decoding non-ASCII characters. + # + # Also, UTF-8 is being used since it's an ASCII + # extension. + return text.decode('utf-8', errors) + + +def safe_encode(text, incoming=None, + encoding='utf-8', errors='strict'): + """Encodes incoming text/bytes string using `encoding`. + + If incoming is not specified, text is expected to be encoded with + current python's default encoding. (`sys.getdefaultencoding`) + + :param incoming: Text's current encoding + :param encoding: Expected encoding for text (Default UTF-8) + :param errors: Errors handling policy. See here for valid + values http://docs.python.org/2/library/codecs.html + :returns: text or a bytestring `encoding` encoded + representation of it. + :raises TypeError: If text is not an instance of str + """ + if not isinstance(text, (six.string_types, six.binary_type)): + raise TypeError("%s can't be encoded" % type(text)) + + if not incoming: + incoming = (sys.stdin.encoding or + sys.getdefaultencoding()) + + if isinstance(text, six.text_type): + return text.encode(encoding, errors) + elif text and encoding != incoming: + # Decode text before encoding it with `encoding` + text = safe_decode(text, incoming, errors) + return text.encode(encoding, errors) + else: + return text diff --git a/oslo/utils/strutils.py b/oslo/utils/strutils.py index bcb9626..2714a1e 100644 --- a/oslo/utils/strutils.py +++ b/oslo/utils/strutils.py @@ -19,11 +19,11 @@ System-level utilities and helper functions. import math import re -import sys import unicodedata import six +from oslo.utils import encodeutils from oslo.utils.openstack.common.gettextutils import _ @@ -97,77 +97,6 @@ def bool_from_string(subject, strict=False, default=False): return default -def safe_decode(text, incoming=None, errors='strict'): - """Decodes incoming text/bytes string using `incoming` if they're not - already unicode. - - :param incoming: Text's current encoding - :param errors: Errors handling policy. See here for valid - values http://docs.python.org/2/library/codecs.html - :returns: text or a unicode `incoming` encoded - representation of it. - :raises TypeError: If text is not an instance of str - """ - if not isinstance(text, (six.string_types, six.binary_type)): - raise TypeError("%s can't be decoded" % type(text)) - - if isinstance(text, six.text_type): - return text - - if not incoming: - incoming = (sys.stdin.encoding or - sys.getdefaultencoding()) - - try: - return text.decode(incoming, errors) - except UnicodeDecodeError: - # Note(flaper87) If we get here, it means that - # sys.stdin.encoding / sys.getdefaultencoding - # didn't return a suitable encoding to decode - # text. This happens mostly when global LANG - # var is not set correctly and there's no - # default encoding. In this case, most likely - # python will use ASCII or ANSI encoders as - # default encodings but they won't be capable - # of decoding non-ASCII characters. - # - # Also, UTF-8 is being used since it's an ASCII - # extension. - return text.decode('utf-8', errors) - - -def safe_encode(text, incoming=None, - encoding='utf-8', errors='strict'): - """Encodes incoming text/bytes string using `encoding`. - - If incoming is not specified, text is expected to be encoded with - current python's default encoding. (`sys.getdefaultencoding`) - - :param incoming: Text's current encoding - :param encoding: Expected encoding for text (Default UTF-8) - :param errors: Errors handling policy. See here for valid - values http://docs.python.org/2/library/codecs.html - :returns: text or a bytestring `encoding` encoded - representation of it. - :raises TypeError: If text is not an instance of str - """ - if not isinstance(text, (six.string_types, six.binary_type)): - raise TypeError("%s can't be encoded" % type(text)) - - if not incoming: - incoming = (sys.stdin.encoding or - sys.getdefaultencoding()) - - if isinstance(text, six.text_type): - return text.encode(encoding, errors) - elif text and encoding != incoming: - # Decode text before encoding it with `encoding` - text = safe_decode(text, incoming, errors) - return text.encode(encoding, errors) - else: - return text - - def string_to_bytes(text, unit_system='IEC', return_int=False): """Converts a string into an float representation of bytes. @@ -229,7 +158,7 @@ def to_slug(value, incoming=None, errors="strict"): :returns: slugified unicode representation of `value` :raises TypeError: If text is not an instance of str """ - value = safe_decode(value, incoming, errors) + value = encodeutils.safe_decode(value, incoming, errors) # NOTE(aababilov): no need to use safe_(encode|decode) here: # encodings are always "ascii", error handling is always "ignore" # and types are always known (first: unicode; second: str) diff --git a/tests/test_strutils.py b/tests/test_strutils.py index 0ee6bc6..4b37b21 100644 --- a/tests/test_strutils.py +++ b/tests/test_strutils.py @@ -143,48 +143,6 @@ class StrUtilsTest(test_base.BaseTestCase): self.assertEqual(1, strutils.int_from_bool_as_string(True)) self.assertEqual(0, strutils.int_from_bool_as_string(False)) - def test_safe_decode(self): - safe_decode = strutils.safe_decode - self.assertRaises(TypeError, safe_decode, True) - self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"), - incoming="utf-8")) - if six.PY2: - # In Python 3, bytes.decode() doesn't support anymore - # bytes => bytes encodings like base64 - self.assertEqual(six.u("test"), safe_decode("dGVzdA==", - incoming='base64')) - - self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'), - errors='ignore')) - - self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'), - incoming='iso-8859-1')) - - # Forcing incoming to ascii so it falls back to utf-8 - self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'), - incoming='ascii')) - - self.assertEqual(six.u('foo'), safe_decode(b'foo')) - - def test_safe_encode(self): - safe_encode = strutils.safe_encode - self.assertRaises(TypeError, safe_encode, True) - self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'), - encoding="utf-8")) - if six.PY2: - # In Python 3, str.encode() doesn't support anymore - # text => text encodings like base64 - self.assertEqual(six.b("dGVzdA==\n"), - safe_encode("test", encoding='base64')) - self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"), - encoding="iso-8859-1", - incoming="utf-8")) - - # Forcing incoming to ascii so it falls back to utf-8 - self.assertEqual(six.b('ni\xc3\xb1o'), - safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii')) - self.assertEqual(six.b('foo'), safe_encode(six.u('foo'))) - def test_slugify(self): to_slug = strutils.to_slug self.assertRaises(TypeError, to_slug, True) diff --git a/tests/tests_encodeutils.py b/tests/tests_encodeutils.py new file mode 100644 index 0000000..f3bb09c --- /dev/null +++ b/tests/tests_encodeutils.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslotest import base as test_base +import six + +from oslo.utils import encodeutils + + +class EncodeUtilsTest(test_base.BaseTestCase): + + def test_safe_decode(self): + safe_decode = encodeutils.safe_decode + self.assertRaises(TypeError, safe_decode, True) + self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"), + incoming="utf-8")) + if six.PY2: + # In Python 3, bytes.decode() doesn't support anymore + # bytes => bytes encodings like base64 + self.assertEqual(six.u("test"), safe_decode("dGVzdA==", + incoming='base64')) + + self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'), + errors='ignore')) + + self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'), + incoming='iso-8859-1')) + + # Forcing incoming to ascii so it falls back to utf-8 + self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'), + incoming='ascii')) + + self.assertEqual(six.u('foo'), safe_decode(b'foo')) + + def test_safe_encode(self): + safe_encode = encodeutils.safe_encode + self.assertRaises(TypeError, safe_encode, True) + self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'), + encoding="utf-8")) + if six.PY2: + # In Python 3, str.encode() doesn't support anymore + # text => text encodings like base64 + self.assertEqual(six.b("dGVzdA==\n"), + safe_encode("test", encoding='base64')) + self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"), + encoding="iso-8859-1", + incoming="utf-8")) + + # Forcing incoming to ascii so it falls back to utf-8 + self.assertEqual(six.b('ni\xc3\xb1o'), + safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii')) + self.assertEqual(six.b('foo'), safe_encode(six.u('foo'))) |