diff options
author | Flaper Fesp <flaper87@gmail.com> | 2013-01-30 15:18:44 +0100 |
---|---|---|
committer | Flaper Fesp <flaper87@gmail.com> | 2013-02-13 21:53:11 +0100 |
commit | 55cb4f4473a6fc429524e7c4848379013a4d2d1d (patch) | |
tree | 102a6749c19a23515164ce7d2f3e95e594a9d59c /glanceclient/common/utils.py | |
parent | 542a45bd2881b84d845ab167095e40d562d274a2 (diff) | |
download | python-glanceclient-55cb4f4473a6fc429524e7c4848379013a4d2d1d.tar.gz |
Decode input and encode output
Currently glanceclient doesn't support non-ASCII characters for images
names and properties (names and values as well). This patch introduces 2
functions (utils.py) that will help encoding and decoding strings in a
more "secure" way.
About the ensure_(str|unicode) functions:
They both try to use first the encoding used in stdin (or python's
default encoding if that's None) and fallback to utf-8 if those
encodings fail to decode a given text.
About the changes in glanceclient:
The major change is that all inputs will be decoded and will kept as
such inside the client's functions and will then be encoded before
being printed / sent out the client.
There are other small changes, all related to encoding to str,
around in order to avoid fails during some conversions. i.e: quoting
url encoded parameters.
Fixes bug: 1061150
Change-Id: I5c3ea93a716edfe284d19f6291d4e36028f91eb2
Diffstat (limited to 'glanceclient/common/utils.py')
-rw-r--r-- | glanceclient/common/utils.py | 81 |
1 files changed, 77 insertions, 4 deletions
diff --git a/glanceclient/common/utils.py b/glanceclient/common/utils.py index 0159129..08e047b 100644 --- a/glanceclient/common/utils.py +++ b/glanceclient/common/utils.py @@ -54,14 +54,14 @@ def print_list(objs, fields, formatters={}): row.append(data) pt.add_row(row) - print pt.get_string() + print ensure_str(pt.get_string()) def print_dict(d): pt = prettytable.PrettyTable(['Property', 'Value'], caching=False) pt.align = 'l' [pt.add_row(list(r)) for r in d.iteritems()] - print pt.get_string(sortby='Property') + print ensure_str(pt.get_string(sortby='Property')) def find_resource(manager, name_or_id): @@ -75,7 +75,7 @@ def find_resource(manager, name_or_id): # now try to get entity as uuid try: - uuid.UUID(str(name_or_id)) + uuid.UUID(ensure_str(name_or_id)) return manager.get(name_or_id) except (ValueError, exc.NotFound): pass @@ -137,7 +137,7 @@ def import_versioned_module(version, submodule=None): def exit(msg=''): if msg: - print >> sys.stderr, msg + print >> sys.stderr, ensure_str(msg) sys.exit(1) @@ -190,3 +190,76 @@ def make_size_human_readable(size): stripped = padded.rstrip('0').rstrip('.') return '%s%s' % (stripped, suffix[index]) + + +def ensure_unicode(text, incoming=None, errors='strict'): + """ + Decodes incoming objects using `incoming` if they're + not already unicode. + + :param incoming: Text's current encoding + :param errors: Errors handling policy. + :returns: text or a unicode `incoming` encoded + representation of it. + """ + if isinstance(text, unicode): + return text + + if not incoming: + incoming = sys.stdin.encoding or \ + sys.getdefaultencoding() + + # Calling `str` in case text is a non str + # object. + text = str(text) + try: + return text.decode(incoming, errors) + except UnicodeDecodeError: + # Note(flaper87) If we get here, it means that + # sys.stdin.encoding / sys.getdefaultencoding + # didn't return a suitable encoding to decode + # text. This happens mostly when global LANG + # var is not set correctly and there's no + # default encoding. In this case, most likely + # python will use ASCII or ANSI encoders as + # default encodings but they won't be capable + # of decoding non-ASCII characters. + # + # Also, UTF-8 is being used since it's an ASCII + # extension. + return text.decode('utf-8', errors) + + +def ensure_str(text, incoming=None, + encoding='utf-8', errors='strict'): + """ + Encodes incoming objects using `encoding`. If + incoming is not specified, text is expected to + be encoded with current python's default encoding. + (`sys.getdefaultencoding`) + + :param incoming: Text's current encoding + :param encoding: Expected encoding for text (Default UTF-8) + :param errors: Errors handling policy. + :returns: text or a bytestring `encoding` encoded + representation of it. + """ + + if not incoming: + incoming = sys.stdin.encoding or \ + sys.getdefaultencoding() + + if not isinstance(text, basestring): + # try to convert `text` to string + # This allows this method for receiving + # objs that can be converted to string + text = str(text) + + if isinstance(text, unicode): + return text.encode(encoding, errors) + elif text and encoding != incoming: + # Decode text before encoding it with `encoding` + text = ensure_unicode(text, incoming, errors) + return text.encode(encoding, errors) + + return text |