summaryrefslogtreecommitdiff
path: root/glanceclient/common/utils.py
diff options
context:
space:
mode:
authorFlaper Fesp <flaper87@gmail.com>2013-01-30 15:18:44 +0100
committerFlaper Fesp <flaper87@gmail.com>2013-02-13 21:53:11 +0100
commit55cb4f4473a6fc429524e7c4848379013a4d2d1d (patch)
tree102a6749c19a23515164ce7d2f3e95e594a9d59c /glanceclient/common/utils.py
parent542a45bd2881b84d845ab167095e40d562d274a2 (diff)
downloadpython-glanceclient-55cb4f4473a6fc429524e7c4848379013a4d2d1d.tar.gz
Decode input and encode output
Currently glanceclient doesn't support non-ASCII characters for images names and properties (names and values as well). This patch introduces 2 functions (utils.py) that will help encoding and decoding strings in a more "secure" way. About the ensure_(str|unicode) functions: They both try to use first the encoding used in stdin (or python's default encoding if that's None) and fallback to utf-8 if those encodings fail to decode a given text. About the changes in glanceclient: The major change is that all inputs will be decoded and will kept as such inside the client's functions and will then be encoded before being printed / sent out the client. There are other small changes, all related to encoding to str, around in order to avoid fails during some conversions. i.e: quoting url encoded parameters. Fixes bug: 1061150 Change-Id: I5c3ea93a716edfe284d19f6291d4e36028f91eb2
Diffstat (limited to 'glanceclient/common/utils.py')
-rw-r--r--glanceclient/common/utils.py81
1 files changed, 77 insertions, 4 deletions
diff --git a/glanceclient/common/utils.py b/glanceclient/common/utils.py
index 0159129..08e047b 100644
--- a/glanceclient/common/utils.py
+++ b/glanceclient/common/utils.py
@@ -54,14 +54,14 @@ def print_list(objs, fields, formatters={}):
row.append(data)
pt.add_row(row)
- print pt.get_string()
+ print ensure_str(pt.get_string())
def print_dict(d):
pt = prettytable.PrettyTable(['Property', 'Value'], caching=False)
pt.align = 'l'
[pt.add_row(list(r)) for r in d.iteritems()]
- print pt.get_string(sortby='Property')
+ print ensure_str(pt.get_string(sortby='Property'))
def find_resource(manager, name_or_id):
@@ -75,7 +75,7 @@ def find_resource(manager, name_or_id):
# now try to get entity as uuid
try:
- uuid.UUID(str(name_or_id))
+ uuid.UUID(ensure_str(name_or_id))
return manager.get(name_or_id)
except (ValueError, exc.NotFound):
pass
@@ -137,7 +137,7 @@ def import_versioned_module(version, submodule=None):
def exit(msg=''):
if msg:
- print >> sys.stderr, msg
+ print >> sys.stderr, ensure_str(msg)
sys.exit(1)
@@ -190,3 +190,76 @@ def make_size_human_readable(size):
stripped = padded.rstrip('0').rstrip('.')
return '%s%s' % (stripped, suffix[index])
+
+
+def ensure_unicode(text, incoming=None, errors='strict'):
+ """
+ Decodes incoming objects using `incoming` if they're
+ not already unicode.
+
+ :param incoming: Text's current encoding
+ :param errors: Errors handling policy.
+ :returns: text or a unicode `incoming` encoded
+ representation of it.
+ """
+ if isinstance(text, unicode):
+ return text
+
+ if not incoming:
+ incoming = sys.stdin.encoding or \
+ sys.getdefaultencoding()
+
+ # Calling `str` in case text is a non str
+ # object.
+ text = str(text)
+ try:
+ return text.decode(incoming, errors)
+ except UnicodeDecodeError:
+ # Note(flaper87) If we get here, it means that
+ # sys.stdin.encoding / sys.getdefaultencoding
+ # didn't return a suitable encoding to decode
+ # text. This happens mostly when global LANG
+ # var is not set correctly and there's no
+ # default encoding. In this case, most likely
+ # python will use ASCII or ANSI encoders as
+ # default encodings but they won't be capable
+ # of decoding non-ASCII characters.
+ #
+ # Also, UTF-8 is being used since it's an ASCII
+ # extension.
+ return text.decode('utf-8', errors)
+
+
+def ensure_str(text, incoming=None,
+ encoding='utf-8', errors='strict'):
+ """
+ Encodes incoming objects using `encoding`. If
+ incoming is not specified, text is expected to
+ be encoded with current python's default encoding.
+ (`sys.getdefaultencoding`)
+
+ :param incoming: Text's current encoding
+ :param encoding: Expected encoding for text (Default UTF-8)
+ :param errors: Errors handling policy.
+ :returns: text or a bytestring `encoding` encoded
+ representation of it.
+ """
+
+ if not incoming:
+ incoming = sys.stdin.encoding or \
+ sys.getdefaultencoding()
+
+ if not isinstance(text, basestring):
+ # try to convert `text` to string
+ # This allows this method for receiving
+ # objs that can be converted to string
+ text = str(text)
+
+ if isinstance(text, unicode):
+ return text.encode(encoding, errors)
+ elif text and encoding != incoming:
+ # Decode text before encoding it with `encoding`
+ text = ensure_unicode(text, incoming, errors)
+ return text.encode(encoding, errors)
+
+ return text