From b59290b5ddb988f91eba5911d0a33d20571f904b Mon Sep 17 00:00:00 2001 From: Stephen Hatch Date: Thu, 26 Oct 2017 08:19:23 -0400 Subject: Python 3 UTF-8 Encode/Decode w/ Surrogates (#168) * ensure that all call sites for decoding bytes to str allow surrogates, as the encoding mechanism now supports * tests <3 * fix tests * test updates * thinking maybe this dependency is out of date * revert --- amqp/serialization.py | 4 ++-- amqp/utils.py | 6 +++--- t/unit/test_utils.py | 7 +++++++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/amqp/serialization.py b/amqp/serialization.py index 5ef508a..2166eba 100644 --- a/amqp/serialization.py +++ b/amqp/serialization.py @@ -194,13 +194,13 @@ def loads(format, buf, offset=0, bitcount = bits = 0 slen, = unpack_from('B', buf, offset) offset += 1 - val = buf[offset:offset + slen].decode('utf-8') + val = buf[offset:offset + slen].decode('utf-8', 'surrogatepass') offset += slen elif p == 'S': bitcount = bits = 0 slen, = unpack_from('>I', buf, offset) offset += 4 - val = buf[offset:offset + slen].decode('utf-8') + val = buf[offset:offset + slen].decode('utf-8', 'surrogatepass') offset += slen elif p == 'F': bitcount = bits = 0 diff --git a/amqp/utils.py b/amqp/utils.py index cf4b911..52f7717 100644 --- a/amqp/utils.py +++ b/amqp/utils.py @@ -73,20 +73,20 @@ if is_py3k: # pragma: no cover def str_to_bytes(s): """Convert str to bytes.""" if isinstance(s, str): - return s.encode() + return s.encode('utf-8', 'surrogatepass') return s def bytes_to_str(s): """Convert bytes to str.""" if isinstance(s, bytes): - return s.decode() + return s.decode('utf-8', 'surrogatepass') return s else: def str_to_bytes(s): # noqa """Convert str to bytes.""" if isinstance(s, unicode): - return s.encode() + return s.encode('utf-8') return s def bytes_to_str(s): # noqa diff --git a/t/unit/test_utils.py b/t/unit/test_utils.py index a2b126c..c9bb2e8 100644 --- a/t/unit/test_utils.py +++ b/t/unit/test_utils.py @@ -47,6 +47,10 @@ class test_str_to_bytes: def test_from_bytes(self): assert isinstance(str_to_bytes(b'foo'), bytes) + def test_supports_surrogates(self): + bytes_with_surrogates = '\ud83d\ude4f'.encode('utf-8', 'surrogatepass') + assert str_to_bytes('\ud83d\ude4f') == bytes_with_surrogates + class test_bytes_to_str: @@ -56,6 +60,9 @@ class test_bytes_to_str: def test_from_bytes(self): assert bytes_to_str(b'foo') + def test_support_surrogates(self): + assert bytes_to_str(u'\ud83d\ude4f') == u'\ud83d\ude4f' + class test_NullHandler: -- cgit v1.2.1