diff options
author | Jonathan Wakely <jwakely@redhat.com> | 2017-03-17 19:28:05 +0000 |
---|---|---|
committer | Jonathan Wakely <redi@gcc.gnu.org> | 2017-03-17 19:28:05 +0000 |
commit | d951e75dfe83b86dd2c46c7835e03bbf04b29278 (patch) | |
tree | 0e22d701b0437cb98ff145680df2d6dab207e8bb /libstdc++-v3/testsuite/22_locale/codecvt | |
parent | d1a73b0baead836a8d813a6a63459ef87a270bba (diff) | |
download | gcc-d951e75dfe83b86dd2c46c7835e03bbf04b29278.tar.gz |
Fix alignment bugs in std::codecvt_utf16
* src/c++11/codecvt.cc (range): Add non-type template parameter and
define oerloaded operators for reading and writing code units.
(range<Elem, false>): Define partial specialization for accessing
wide characters in potentially unaligned byte ranges.
(ucs2_span(const char16_t*, const char16_t*, ...))
(ucs4_span(const char16_t*, const char16_t*, ...)): Change parameters
to range<const char16_t, false> in order to avoid unaligned reads.
(__codecvt_utf16_base<char16_t>::do_out)
(__codecvt_utf16_base<char32_t>::do_out)
(__codecvt_utf16_base<wchar_t>::do_out): Use range specialization for
unaligned data to avoid unaligned writes.
(__codecvt_utf16_base<char16_t>::do_in)
(__codecvt_utf16_base<char32_t>::do_in)
(__codecvt_utf16_base<wchar_t>::do_in): Likewise for writes. Return
error if there are unprocessable trailing bytes.
(__codecvt_utf16_base<char16_t>::do_length)
(__codecvt_utf16_base<char32_t>::do_length)
(__codecvt_utf16_base<wchar_t>::do_length): Pass arguments of type
range<const char16_t, false> to span functions.
* testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc: New test.
From-SVN: r246245
Diffstat (limited to 'libstdc++-v3/testsuite/22_locale/codecvt')
-rw-r--r-- | libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc | 27 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc | 289 |
2 files changed, 316 insertions, 0 deletions
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc index 9383818d86b..d8b9729ed5b 100644 --- a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc +++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc @@ -103,6 +103,31 @@ test07() VERIFY( conv.converted() == 5 ); } +void +test08() +{ + // Read/write UTF-16 code units from data not correctly aligned for char16_t + Conv<char16_t, 0x10FFFF, std::generate_header> conv; + const char src[] = "-\xFE\xFF\0\x61\xAB\xCD"; + auto out = conv.from_bytes(src + 1, src + 7); + VERIFY( out[0] == 0x0061 ); + VERIFY( out[1] == 0xabcd ); + auto bytes = conv.to_bytes(out); + VERIFY( bytes == std::string(src + 1, 6) ); +} + +void +test09() +{ + // Read/write UTF-16 code units from data not correctly aligned for char16_t + Conv<char32_t, 0x10FFFF, std::generate_header> conv; + const char src[] = "-\xFE\xFF\xD8\x08\xDF\x45"; + auto out = conv.from_bytes(src + 1, src + 7); + VERIFY( out == U"\U00012345" ); + auto bytes = conv.to_bytes(out); + VERIFY( bytes == std::string(src + 1, 6) ); +} + int main() { test01(); @@ -112,4 +137,6 @@ int main() test05(); test06(); test07(); + test08(); + test09(); } diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc new file mode 100644 index 00000000000..0179c184c20 --- /dev/null +++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc @@ -0,0 +1,289 @@ +// Copyright (C) 2017 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// { dg-do run { target c++11 } } + +#include <locale> +#include <codecvt> +#include <testsuite_hooks.h> + +using std::codecvt_base; +using std::codecvt_mode; +using std::codecvt_utf16; +using std::wstring_convert; +using std::mbstate_t; + +constexpr codecvt_mode +operator|(codecvt_mode m1, codecvt_mode m2) +{ + using underlying = std::underlying_type<codecvt_mode>::type; + return static_cast<codecvt_mode>(static_cast<underlying>(m1) | m2); +} + +// Read/write UTF-16 code units from data not correctly aligned for char16_t + +void +test01() +{ + mbstate_t st; + constexpr codecvt_mode m = std::consume_header|std::generate_header; + codecvt_utf16<char16_t, 0x10FFFF, m> conv; + const char src[] = "-\xFE\xFF\0\x61\xAB\xCD"; + const char* const src_end = src + 7; + + int len = conv.length(st, src + 1, src_end, 1); + VERIFY( len == 4 ); + len = conv.length(st, src + 1, src_end, 2); + VERIFY( len == 6 ); + + char16_t dst[2]; + char16_t* const dst_end = dst + 2; + char16_t* dst_next; + const char* src_cnext; + auto res = conv.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( dst[0] == 0x0061 ); + VERIFY( dst[1] == 0xabcd ); + VERIFY( src_cnext == src_end ); + VERIFY( dst_next == dst_end ); + + char out[sizeof(src)] = { src[0] }; + char* const out_end = out + 7; + char* out_next; + const char16_t* dst_cnext; + res = conv.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( out_next == out_end ); + VERIFY( dst_cnext == dst_end ); + VERIFY( out[1] == src[1] ); + VERIFY( out[2] == src[2] ); + VERIFY( out[3] == src[3] ); + VERIFY( out[4] == src[4] ); + VERIFY( out[5] == src[5] ); + VERIFY( out[6] == src[6] ); + + codecvt_utf16<char16_t, 0x10FFFF, m|std::little_endian> conv_le; + + len = conv_le.length(st, src + 1, src_end, 1); + VERIFY( len == 4 ); + len = conv_le.length(st, src + 1, src_end, 2); + VERIFY( len == 6 ); + + res = conv_le.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( dst[0] == 0x0061 ); + VERIFY( dst[1] == 0xabcd ); + VERIFY( src_cnext == src_end ); + VERIFY( dst_next == dst_end ); + + res = conv_le.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( out_next == out_end ); + VERIFY( dst_cnext == dst_end ); + VERIFY( out[1] == src[2] ); + VERIFY( out[2] == src[1] ); + VERIFY( out[3] == src[4] ); + VERIFY( out[4] == src[3] ); + VERIFY( out[5] == src[6] ); + VERIFY( out[6] == src[5] ); +} + +void +test02() +{ + mbstate_t st; + constexpr codecvt_mode m = std::consume_header|std::generate_header; + codecvt_utf16<char32_t, 0x10FFFF, m> conv; + const char src[] = "-\xFE\xFF\0\x61\xAB\xCD\xD8\x08\xDF\x45"; + const char* const src_end = src + 11; + + int len = conv.length(st, src + 1, src_end, 1); + VERIFY( len == 4 ); + len = conv.length(st, src + 1, src_end, 2); + VERIFY( len == 6 ); + len = conv.length(st, src + 1, src_end, -1ul); + VERIFY( len == 10 ); + + char32_t dst[3]; + char32_t* const dst_end = dst + 3; + char32_t* dst_next; + const char* src_cnext; + auto res = conv.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( dst[0] == 0x0061 ); + VERIFY( dst[1] == 0xabcd ); + VERIFY( dst[2] == 0x012345 ); + VERIFY( src_cnext == src_end ); + VERIFY( dst_next == dst_end ); + + char out[sizeof(src)] = { src[0] }; + char* const out_end = out + 11; + char* out_next; + const char32_t* dst_cnext; + res = conv.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( out_next == out_end ); + VERIFY( dst_cnext == dst_end ); + VERIFY( out[1] == src[1] ); + VERIFY( out[2] == src[2] ); + VERIFY( out[3] == src[3] ); + VERIFY( out[4] == src[4] ); + VERIFY( out[5] == src[5] ); + VERIFY( out[6] == src[6] ); + VERIFY( out[7] == src[7] ); + VERIFY( out[8] == src[8] ); + VERIFY( out[9] == src[9] ); + VERIFY( out[10] == src[10] ); + + codecvt_utf16<char32_t, 0x10FFFF, m|std::little_endian> conv_le; + + len = conv_le.length(st, src + 1, src_end, 1); + VERIFY( len == 4 ); + len = conv_le.length(st, src + 1, src_end, 2); + VERIFY( len == 6 ); + len = conv.length(st, src + 1, src_end, -1ul); + VERIFY( len == 10 ); + + res = conv_le.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( dst[0] == 0x0061 ); + VERIFY( dst[1] == 0xabcd ); + VERIFY( dst[2] == 0x012345 ); + VERIFY( src_cnext == src_end ); + VERIFY( dst_next == dst_end ); + + res = conv_le.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( out_next == out_end ); + VERIFY( dst_cnext == dst_end ); + VERIFY( out[1] == src[2] ); + VERIFY( out[2] == src[1] ); + VERIFY( out[3] == src[4] ); + VERIFY( out[4] == src[3] ); + VERIFY( out[5] == src[6] ); + VERIFY( out[6] == src[5] ); + VERIFY( out[7] == src[8] ); + VERIFY( out[8] == src[7] ); + VERIFY( out[9] == src[10] ); + VERIFY( out[10] == src[9] ); +} + +void +test03() +{ +#ifdef _GLIBCXX_USE_WCHAR_T + mbstate_t st; + constexpr codecvt_mode m = std::consume_header|std::generate_header; + codecvt_utf16<wchar_t, 0x10FFFF, m> conv; + const char src[] = "-\xFE\xFF\0\x61\xAB\xCD\xD8\x08\xDF\x45"; + const size_t in_len = sizeof(wchar_t) == 4 ? 11 : 7; + const size_t out_len = sizeof(wchar_t) == 4 ? 3 : 2; + const char* const src_end = src + in_len; + + int len = conv.length(st, src + 1, src_end, 1); + VERIFY( len == 4 ); + len = conv.length(st, src + 1, src_end, 2); + VERIFY( len == 6 ); + if (sizeof(wchar_t) == 4) + { + len = conv.length(st, src + 1, src_end, -1ul); + VERIFY( len == 10 ); + } + + wchar_t dst[out_len]; + wchar_t* const dst_end = dst + out_len; + wchar_t* dst_next; + const char* src_cnext; + auto res = conv.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( dst[0] == 0x0061 ); + VERIFY( dst[1] == 0xabcd ); + if (sizeof(wchar_t) == 4) + VERIFY( dst[2] == 0x012345 ); + VERIFY( src_cnext == src_end ); + VERIFY( dst_next == dst_end ); + + char out[sizeof(src)] = { src[0] }; + char* const out_end = out + in_len; + char* out_next; + const wchar_t* dst_cnext; + res = conv.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( out_next == out_end ); + VERIFY( dst_cnext == dst_end ); + VERIFY( out[1] == src[1] ); + VERIFY( out[2] == src[2] ); + VERIFY( out[3] == src[3] ); + VERIFY( out[4] == src[4] ); + VERIFY( out[5] == src[5] ); + VERIFY( out[6] == src[6] ); + if (sizeof(wchar_t) == 4) + { + VERIFY( out[7] == src[7] ); + VERIFY( out[8] == src[8] ); + VERIFY( out[9] == src[9] ); + VERIFY( out[10] == src[10] ); + } + + codecvt_utf16<wchar_t, 0x10FFFF, m|std::little_endian> conv_le; + + len = conv_le.length(st, src + 1, src_end, 1); + VERIFY( len == 4 ); + len = conv_le.length(st, src + 1, src_end, 2); + VERIFY( len == 6 ); + if (sizeof(wchar_t) == 4) + { + len = conv.length(st, src + 1, src_end, -1ul); + VERIFY( len == 10 ); + } + + res = conv_le.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( dst[0] == 0x0061 ); + VERIFY( dst[1] == 0xabcd ); + if (sizeof(wchar_t) == 4) + VERIFY( dst[2] == 0x012345 ); + VERIFY( src_cnext == src_end ); + VERIFY( dst_next == dst_end ); + + res = conv_le.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next); + VERIFY( res == codecvt_base::ok ); + VERIFY( out_next == out_end ); + VERIFY( dst_cnext == dst_end ); + VERIFY( out[1] == src[2] ); + VERIFY( out[2] == src[1] ); + VERIFY( out[3] == src[4] ); + VERIFY( out[4] == src[3] ); + VERIFY( out[5] == src[6] ); + VERIFY( out[6] == src[5] ); + if (sizeof(wchar_t) == 4) + { + VERIFY( out[7] == src[8] ); + VERIFY( out[8] == src[7] ); + VERIFY( out[9] == src[10] ); + VERIFY( out[10] == src[9] ); + } +#endif +} + +int +main() +{ + test01(); + test02(); + test03(); +} |