Python 3 UTF-8 Encode/Decode w/ Surrogates (#168)

* ensure that all call sites for decoding bytes to str allow surrogates, as the encoding mechanism now supports * tests <3 * fix tests * test updates * thinking maybe this dependency is out of date * revert
author: Stephen Hatch <stephen.hatch@vertical-knowledge.com> 2017-10-26 08:19:23 -0400
committer: Omer Katz <omer.drow@gmail.com> 2017-10-26 15:19:23 +0300
commit: b59290b5ddb988f91eba5911d0a33d20571f904b (patch)
tree: 31f791a798dcee801432dd4e4d6c7e1879fe6bcc
parent: 3faa7598536d8d8ccdf07722f5af8f2e44d9d0a1 (diff)
download: py-amqp-b59290b5ddb988f91eba5911d0a33d20571f904b.tar.gz
3 files changed, 12 insertions, 5 deletions
diff --git a/amqp/serialization.py b/amqp/serialization.py
index 5ef508a..2166eba 100644
--- a/amqp/serialization.py
+++ b/amqp/serialization.py
@@ -194,13 +194,13 @@ def loads(format, buf, offset=0,
             bitcount = bits = 0
             slen, = unpack_from('B', buf, offset)
             offset += 1
-            val = buf[offset:offset + slen].decode('utf-8')
+            val = buf[offset:offset + slen].decode('utf-8', 'surrogatepass')
             offset += slen
         elif p == 'S':
             bitcount = bits = 0
             slen, = unpack_from('>I', buf, offset)
             offset += 4
-            val = buf[offset:offset + slen].decode('utf-8')
+            val = buf[offset:offset + slen].decode('utf-8', 'surrogatepass')
             offset += slen
         elif p == 'F':
             bitcount = bits = 0
diff --git a/amqp/utils.py b/amqp/utils.py
index cf4b911..52f7717 100644
--- a/amqp/utils.py
+++ b/amqp/utils.py
@@ -73,20 +73,20 @@ if is_py3k:  # pragma: no cover
     def str_to_bytes(s):
         """Convert str to bytes."""
         if isinstance(s, str):
-            return s.encode()
+            return s.encode('utf-8', 'surrogatepass')
         return s
 
     def bytes_to_str(s):
         """Convert bytes to str."""
         if isinstance(s, bytes):
-            return s.decode()
+            return s.decode('utf-8', 'surrogatepass')
         return s
 else:
 
     def str_to_bytes(s):                # noqa
         """Convert str to bytes."""
         if isinstance(s, unicode):
-            return s.encode()
+            return s.encode('utf-8')
         return s
 
     def bytes_to_str(s):                # noqa
diff --git a/t/unit/test_utils.py b/t/unit/test_utils.py
index a2b126c..c9bb2e8 100644
--- a/t/unit/test_utils.py
+++ b/t/unit/test_utils.py
@@ -47,6 +47,10 @@ class test_str_to_bytes:
     def test_from_bytes(self):
         assert isinstance(str_to_bytes(b'foo'), bytes)
 
+    def test_supports_surrogates(self):
+        bytes_with_surrogates = '\ud83d\ude4f'.encode('utf-8', 'surrogatepass')
+        assert str_to_bytes('\ud83d\ude4f') == bytes_with_surrogates
+
 
 class test_bytes_to_str:
 
@@ -56,6 +60,9 @@ class test_bytes_to_str:
     def test_from_bytes(self):
         assert bytes_to_str(b'foo')
 
+    def test_support_surrogates(self):
+        assert bytes_to_str(u'\ud83d\ude4f') == u'\ud83d\ude4f'
+
 
 class test_NullHandler:
author	Stephen Hatch <stephen.hatch@vertical-knowledge.com>	2017-10-26 08:19:23 -0400
committer	Omer Katz <omer.drow@gmail.com>	2017-10-26 15:19:23 +0300
commit	b59290b5ddb988f91eba5911d0a33d20571f904b (patch)
tree	31f791a798dcee801432dd4e4d6c7e1879fe6bcc
parent	3faa7598536d8d8ccdf07722f5af8f2e44d9d0a1 (diff)
download	py-amqp-b59290b5ddb988f91eba5911d0a33d20571f904b.tar.gz