summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2019-06-14 09:30:27 -0700
committerGitHub <noreply@github.com>2019-06-14 09:30:27 -0700
commit0b592d513b073cd3a4ba7632907c25b8282f15ce (patch)
treedfc0dd591a0c6843ee34a1a7a78618b2c29fec17
parent33feb2e1a391cde91aefcb8d9cf5144b5fbc5d87 (diff)
downloadcpython-git-0b592d513b073cd3a4ba7632907c25b8282f15ce.tar.gz
bpo-19865: ctypes.create_unicode_buffer() supports non-BMP strings on Windows (GH-14081)
(cherry picked from commit 9765efcb39fc03d5b1abec3924388974470a8bd5) Co-authored-by: Zackery Spytz <zspytz@gmail.com>
-rw-r--r--Lib/ctypes/__init__.py10
-rw-r--r--Lib/ctypes/test/test_buffers.py9
-rw-r--r--Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst2
3 files changed, 20 insertions, 1 deletions
diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py
index 6146773988..dae408a867 100644
--- a/Lib/ctypes/__init__.py
+++ b/Lib/ctypes/__init__.py
@@ -279,7 +279,15 @@ def create_unicode_buffer(init, size=None):
"""
if isinstance(init, str):
if size is None:
- size = len(init)+1
+ if sizeof(c_wchar) == 2:
+ # UTF-16 requires a surrogate pair (2 wchar_t) for non-BMP
+ # characters (outside [U+0000; U+FFFF] range). +1 for trailing
+ # NUL character.
+ size = sum(2 if ord(c) > 0xFFFF else 1 for c in init) + 1
+ else:
+ # 32-bit wchar_t (1 wchar_t per Unicode character). +1 for
+ # trailing NUL character.
+ size = len(init) + 1
buftype = c_wchar * size
buf = buftype()
buf.value = init
diff --git a/Lib/ctypes/test/test_buffers.py b/Lib/ctypes/test/test_buffers.py
index 166faaf4e4..15782be757 100644
--- a/Lib/ctypes/test/test_buffers.py
+++ b/Lib/ctypes/test/test_buffers.py
@@ -60,5 +60,14 @@ class StringBufferTestCase(unittest.TestCase):
self.assertEqual(b[::2], "ac")
self.assertEqual(b[::5], "a")
+ @need_symbol('c_wchar')
+ def test_create_unicode_buffer_non_bmp(self):
+ expected = 5 if sizeof(c_wchar) == 2 else 3
+ for s in '\U00010000\U00100000', '\U00010000\U0010ffff':
+ b = create_unicode_buffer(s)
+ self.assertEqual(len(b), expected)
+ self.assertEqual(b[-1], '\0')
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst b/Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst
new file mode 100644
index 0000000000..efd1f55c01
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst
@@ -0,0 +1,2 @@
+:func:`ctypes.create_unicode_buffer()` now also supports non-BMP characters
+on platforms with 16-bit :c:type:`wchar_t` (for example, Windows and AIX).