diff options
author | Jonathan Wakely <jwakely@redhat.com> | 2017-03-16 15:27:45 +0000 |
---|---|---|
committer | Jonathan Wakely <redi@gcc.gnu.org> | 2017-03-16 15:27:45 +0000 |
commit | 02e12bda2df86491a5d9c7294550fd4cd4968aa1 (patch) | |
tree | 33f94cc1c2cd76b91fa4b4764e91ca08e07ae7f2 | |
parent | 8d85abab445d4f458826f4ef3acbc83091257c8b (diff) | |
download | gcc-02e12bda2df86491a5d9c7294550fd4cd4968aa1.tar.gz |
PR libstdc++/79511 fix endianness of UTF-16 data
PR libstdc++/79511
* src/c++11/codecvt.cc (write_utf16_code_point): Don't write 0xffff
as a surrogate pair.
(__codecvt_utf8_utf16_base<char32_t>::do_in): Use native endianness
for internal representation.
(__codecvt_utf8_utf16_base<wchar_t>::do_in): Likewise.
* testsuite/22_locale/codecvt/codecvt_utf8_utf16/79511.cc: New test.
From-SVN: r246199
-rw-r--r-- | libstdc++-v3/ChangeLog | 8 | ||||
-rw-r--r-- | libstdc++-v3/src/c++11/codecvt.cc | 14 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/79511.cc | 60 |
3 files changed, 79 insertions, 3 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 63ed2daeed5..98735ca0c38 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,5 +1,13 @@ 2017-03-16 Jonathan Wakely <jwakely@redhat.com> + PR libstdc++/79511 + * src/c++11/codecvt.cc (write_utf16_code_point): Don't write 0xffff + as a surrogate pair. + (__codecvt_utf8_utf16_base<char32_t>::do_in): Use native endianness + for internal representation. + (__codecvt_utf8_utf16_base<wchar_t>::do_in): Likewise. + * testsuite/22_locale/codecvt/codecvt_utf8_utf16/79511.cc: New test. + PR libstdc++/80064 * include/bits/stl_heap.h (__is_heap, push_heap, __adjust_heap) (pop_heap, make_heap, sort_heap, is_heap_until, is_heap): Cope with diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc index 12a4d4f88b5..9b63e2b79f9 100644 --- a/libstdc++-v3/src/c++11/codecvt.cc +++ b/libstdc++-v3/src/c++11/codecvt.cc @@ -315,7 +315,7 @@ namespace { static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit"); - if (codepoint < max_single_utf16_unit) + if (codepoint <= max_single_utf16_unit) { if (to.size() > 0) { @@ -1341,7 +1341,11 @@ do_in(state_type&, const extern_type* __from, const extern_type* __from_end, { range<const char> from{ __from, __from_end }; range<char32_t> to{ __to, __to_end }; - auto res = utf16_in(from, to, _M_maxcode, _M_mode); + codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header)); +#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ + mode = codecvt_mode(mode | little_endian); +#endif + auto res = utf16_in(from, to, _M_maxcode, mode); __from_next = from.next; __to_next = to.next; return res; @@ -1411,7 +1415,11 @@ do_in(state_type&, const extern_type* __from, const extern_type* __from_end, { range<const char> from{ __from, __from_end }; range<wchar_t> to{ __to, __to_end }; - auto res = utf16_in(from, to, _M_maxcode, _M_mode); + codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header)); +#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ + mode = codecvt_mode(mode | little_endian); +#endif + auto res = utf16_in(from, to, _M_maxcode, mode); __from_next = from.next; __to_next = to.next; return res; diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/79511.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/79511.cc new file mode 100644 index 00000000000..5555bcba6fe --- /dev/null +++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/79511.cc @@ -0,0 +1,60 @@ +// Copyright (C) 2017 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// { dg-do run { target c++11 } } + +#include <locale> +#include <codecvt> +#include <testsuite_hooks.h> + +// PR libstdc++/79511 + +template<typename ElemT> + std::basic_string<ElemT> conv(const char* src) + { + std::wstring_convert<std::codecvt_utf8_utf16<ElemT>, ElemT> conv; + return conv.from_bytes(src); + } + +void +test01() +{ + static char const src[] = "\xEF\xBF\xBF"; + VERIFY( conv<char16_t>(src) == u"\xffff" ); + VERIFY( conv<char32_t>(src) == U"\xffff" ); +#ifdef _GLIBCXX_USE_WCHAR_T + VERIFY( conv<wchar_t>(src) == L"\xffff" ); +#endif +} + +void +test02() +{ + static char const src[] = "\xE2\x82\xAC"; + VERIFY( conv<char16_t>(src) == u"\x20ac" ); + VERIFY( conv<char32_t>(src) == U"\x20ac" ); +#ifdef _GLIBCXX_USE_WCHAR_T + VERIFY( conv<wchar_t>(src) == L"\x20ac" ); +#endif +} + +int +main() +{ + test01(); + test02(); +} |