From 0b592d513b073cd3a4ba7632907c25b8282f15ce Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 14 Jun 2019 09:30:27 -0700 Subject: bpo-19865: ctypes.create_unicode_buffer() supports non-BMP strings on Windows (GH-14081) (cherry picked from commit 9765efcb39fc03d5b1abec3924388974470a8bd5) Co-authored-by: Zackery Spytz --- Lib/ctypes/__init__.py | 10 +++++++++- Lib/ctypes/test/test_buffers.py | 9 +++++++++ .../next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst | 2 ++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index 6146773988..dae408a867 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -279,7 +279,15 @@ def create_unicode_buffer(init, size=None): """ if isinstance(init, str): if size is None: - size = len(init)+1 + if sizeof(c_wchar) == 2: + # UTF-16 requires a surrogate pair (2 wchar_t) for non-BMP + # characters (outside [U+0000; U+FFFF] range). +1 for trailing + # NUL character. + size = sum(2 if ord(c) > 0xFFFF else 1 for c in init) + 1 + else: + # 32-bit wchar_t (1 wchar_t per Unicode character). +1 for + # trailing NUL character. + size = len(init) + 1 buftype = c_wchar * size buf = buftype() buf.value = init diff --git a/Lib/ctypes/test/test_buffers.py b/Lib/ctypes/test/test_buffers.py index 166faaf4e4..15782be757 100644 --- a/Lib/ctypes/test/test_buffers.py +++ b/Lib/ctypes/test/test_buffers.py @@ -60,5 +60,14 @@ class StringBufferTestCase(unittest.TestCase): self.assertEqual(b[::2], "ac") self.assertEqual(b[::5], "a") + @need_symbol('c_wchar') + def test_create_unicode_buffer_non_bmp(self): + expected = 5 if sizeof(c_wchar) == 2 else 3 + for s in '\U00010000\U00100000', '\U00010000\U0010ffff': + b = create_unicode_buffer(s) + self.assertEqual(len(b), expected) + self.assertEqual(b[-1], '\0') + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst b/Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst new file mode 100644 index 0000000000..efd1f55c01 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst @@ -0,0 +1,2 @@ +:func:`ctypes.create_unicode_buffer()` now also supports non-BMP characters +on platforms with 16-bit :c:type:`wchar_t` (for example, Windows and AIX). -- cgit v1.2.1