summaryrefslogtreecommitdiff
path: root/libstdc++-v3
diff options
context:
space:
mode:
Diffstat (limited to 'libstdc++-v3')
-rw-r--r--libstdc++-v3/ChangeLog13
-rw-r--r--libstdc++-v3/config/abi/pre/gnu.ver11
-rw-r--r--libstdc++-v3/include/Makefile.am1
-rw-r--r--libstdc++-v3/include/Makefile.in1
-rw-r--r--libstdc++-v3/include/std/codecvt179
-rw-r--r--libstdc++-v3/src/c++11/codecvt.cc1029
-rw-r--r--libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc37
-rw-r--r--libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8/requirements/1.cc37
-rw-r--r--libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/requirements/1.cc37
9 files changed, 1304 insertions, 41 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 4cd62b0c725..cd4f0891ad4 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,16 @@
+2015-01-16 Jonathan Wakely <jwakely@redhat.com>
+
+ * config/abi/pre/gnu.ver: Export new symbols.
+ * include/Makefile.am: Add codecvt.
+ * include/Makefile.in: Regenerate.
+ * include/std/codecvt: New header.
+ * src/c++11/codecvt.cc (__codecvt_utf8_base, __codecvt_utf16_base,
+ __codecvt_utf8_utf16_base): Define specializations.
+ * testsuite/22_locale/codecvt/codecvt_utf8/requirements/1.cc: New.
+ * testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc: New.
+ * testsuite/22_locale/codecvt/codecvt_utf8_utf16/requirements/1.cc:
+ New.
+
2015-01-16 Torvald Riegel <triegel@redhat.com>
* src/c++11/futex.cc: New file.
diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver
index dc83ad4f70c..d23306e7c76 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -1769,6 +1769,17 @@ GLIBCXX_3.4.21 {
std::__atomic_futex_unsigned_base*;
};
+ # codecvt_utf8 etc.
+ _ZNKSt19__codecvt_utf8_base*;
+ _ZNSt19__codecvt_utf8_base*;
+ _ZT[ISV]St19__codecvt_utf8_base*;
+ _ZNKSt20__codecvt_utf16_base*;
+ _ZNSt20__codecvt_utf16_base*;
+ _ZT[ISV]St20__codecvt_utf16_base*;
+ _ZNKSt25__codecvt_utf8_utf16_base*;
+ _ZNSt25__codecvt_utf8_utf16_base*;
+ _ZT[ISV]St25__codecvt_utf8_utf16_base*;
+
} GLIBCXX_3.4.20;
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 4772950124a..285a504dbf9 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -31,6 +31,7 @@ std_headers = \
${std_srcdir}/atomic \
${std_srcdir}/bitset \
${std_srcdir}/chrono \
+ ${std_srcdir}/codecvt \
${std_srcdir}/complex \
${std_srcdir}/condition_variable \
${std_srcdir}/deque \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index ebcaa9663fe..5c2ea1496c9 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -299,6 +299,7 @@ std_headers = \
${std_srcdir}/atomic \
${std_srcdir}/bitset \
${std_srcdir}/chrono \
+ ${std_srcdir}/codecvt \
${std_srcdir}/complex \
${std_srcdir}/condition_variable \
${std_srcdir}/deque \
diff --git a/libstdc++-v3/include/std/codecvt b/libstdc++-v3/include/std/codecvt
new file mode 100644
index 00000000000..d58a0ecd673
--- /dev/null
+++ b/libstdc++-v3/include/std/codecvt
@@ -0,0 +1,179 @@
+// <codecvt> -*- C++ -*-
+
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+// ISO C++ 14882: 22.5 Standard code conversion facets
+
+/** @file include/codecvt
+ * This is a Standard C++ Library header.
+ */
+
+#ifndef _GLIBCXX_CODECVT
+#define _GLIBCXX_CODECVT 1
+
+#pragma GCC system_header
+
+#if __cplusplus < 201103L
+# include <bits/c++0x_warning.h>
+#else
+
+#include <bits/locale_classes.h>
+#include <bits/codecvt.h>
+
+#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+ enum codecvt_mode
+ {
+ consume_header = 4,
+ generate_header = 2,
+ little_endian = 1
+ };
+
+ template<typename _Elem, unsigned long _Maxcode = 0x10ffff,
+ codecvt_mode _Mode = (codecvt_mode)0>
+ class codecvt_utf8 : public codecvt<_Elem, char, mbstate_t>
+ {
+ public:
+ explicit
+ codecvt_utf8(size_t __refs = 0);
+
+ ~codecvt_utf8();
+ };
+
+ template<typename _Elem, unsigned long _Maxcode = 0x10ffff,
+ codecvt_mode _Mode = (codecvt_mode)0>
+ class codecvt_utf16 : public codecvt<_Elem, char, mbstate_t>
+ {
+ public:
+ explicit
+ codecvt_utf16(size_t __refs = 0);
+
+ ~codecvt_utf16();
+ };
+
+ template<typename _Elem, unsigned long _Maxcode = 0x10ffff,
+ codecvt_mode _Mode = (codecvt_mode)0>
+ class codecvt_utf8_utf16 : public codecvt<_Elem, char, mbstate_t>
+ {
+ public:
+ explicit
+ codecvt_utf8_utf16(size_t __refs = 0);
+
+ ~codecvt_utf8_utf16();
+ };
+
+#define _GLIBCXX_CODECVT_SPECIALIZATION2(_NAME, _ELEM) \
+ template<> \
+ class _NAME<_ELEM> \
+ : public codecvt<_ELEM, char, mbstate_t> \
+ { \
+ public: \
+ typedef _ELEM intern_type; \
+ typedef char extern_type; \
+ typedef mbstate_t state_type; \
+ \
+ protected: \
+ _NAME(unsigned long __maxcode, codecvt_mode __mode, size_t __refs) \
+ : codecvt(__refs), _M_maxcode(__maxcode), _M_mode(__mode) { } \
+ \
+ virtual \
+ ~_NAME(); \
+ \
+ virtual result \
+ do_out(state_type& __state, const intern_type* __from, \
+ const intern_type* __from_end, const intern_type*& __from_next, \
+ extern_type* __to, extern_type* __to_end, \
+ extern_type*& __to_next) const; \
+ \
+ virtual result \
+ do_unshift(state_type& __state, \
+ extern_type* __to, extern_type* __to_end, \
+ extern_type*& __to_next) const; \
+ \
+ virtual result \
+ do_in(state_type& __state, \
+ const extern_type* __from, const extern_type* __from_end, \
+ const extern_type*& __from_next, \
+ intern_type* __to, intern_type* __to_end, \
+ intern_type*& __to_next) const; \
+ \
+ virtual \
+ int do_encoding() const throw(); \
+ \
+ virtual \
+ bool do_always_noconv() const throw(); \
+ \
+ virtual \
+ int do_length(state_type&, const extern_type* __from, \
+ const extern_type* __end, size_t __max) const; \
+ \
+ virtual int \
+ do_max_length() const throw(); \
+ \
+ private: \
+ unsigned long _M_maxcode; \
+ codecvt_mode _M_mode; \
+ }
+
+#define _GLIBCXX_CODECVT_SPECIALIZATION(_NAME, _ELEM) \
+ _GLIBCXX_CODECVT_SPECIALIZATION2(__ ## _NAME ## _base, _ELEM); \
+ template<unsigned long _Maxcode, codecvt_mode _Mode> \
+ class _NAME<_ELEM, _Maxcode, _Mode> \
+ : public __ ## _NAME ## _base<_ELEM> \
+ { \
+ public: \
+ explicit \
+ _NAME(size_t __refs = 0) \
+ : __ ## _NAME ## _base<_ELEM>(_Maxcode, _Mode, __refs) { } \
+ }
+
+ template<typename _Elem> class __codecvt_utf8_base;
+ template<typename _Elem> class __codecvt_utf16_base;
+ template<typename _Elem> class __codecvt_utf8_utf16_base;
+
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf8, char16_t);
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf16, char16_t);
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf8_utf16, char16_t);
+
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf8, char32_t);
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf16, char32_t);
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf8_utf16, char32_t);
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf8, wchar_t);
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf16, wchar_t);
+ _GLIBCXX_CODECVT_SPECIALIZATION(codecvt_utf8_utf16, wchar_t);
+#endif
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace
+
+#endif // _GLIBCXX_USE_C99_STDINT_TR1
+
+#endif
+
+#endif /* _GLIBCXX_CODECVT */
diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc
index fdd49720384..7eed903bc0c 100644
--- a/libstdc++-v3/src/c++11/codecvt.cc
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -22,10 +22,9 @@
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
-#include <bits/locale_classes.h>
-#include <bits/codecvt.h>
-#include <bits/stl_algobase.h> // std::max
+#include <codecvt>
#include <cstring> // std::memcpy, std::memcmp
+#include <bits/stl_algobase.h> // std::max
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
namespace std _GLIBCXX_VISIBILITY(default)
@@ -51,6 +50,88 @@ namespace
size_t size() const { return end - next; }
};
+ // Multibyte sequences can have "header" consisting of Byte Order Mark
+ const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
+ const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
+ const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
+
+ template<size_t N>
+ inline bool
+ write_bom(range<char>& to, const unsigned char (&bom)[N])
+ {
+ if (to.size() < N)
+ return false;
+ memcpy(to.next, bom, N);
+ to.next += N;
+ return true;
+ }
+
+ // If generate_header is set in mode write out UTF-8 BOM.
+ bool
+ write_utf8_bom(range<char>& to, codecvt_mode mode)
+ {
+ if (mode & generate_header)
+ return write_bom(to, utf8_bom);
+ return true;
+ }
+
+ // If generate_header is set in mode write out the UTF-16 BOM indicated
+ // by whether little_endian is set in mode.
+ bool
+ write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
+ {
+ if (mode & generate_header)
+ {
+ if (!to.size())
+ return false;
+ auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
+ std::memcpy(to.next, bom, 2);
+ ++to.next;
+ }
+ return true;
+ }
+
+ template<size_t N>
+ inline bool
+ read_bom(range<const char>& from, const unsigned char (&bom)[N])
+ {
+ if (from.size() >= N && !memcmp(from.next, bom, N))
+ {
+ from.next += N;
+ return true;
+ }
+ return false;
+ }
+
+ // If consume_header is set in mode update from.next to after any BOM.
+ void
+ read_utf8_bom(range<const char>& from, codecvt_mode mode)
+ {
+ if (mode & consume_header)
+ read_bom(from, utf8_bom);
+ }
+
+ // If consume_header is set in mode update from.next to after any BOM.
+ // Return little_endian iff the UTF-16LE BOM was present.
+ codecvt_mode
+ read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
+ {
+ if (mode & consume_header && from.size())
+ {
+ if (*from.next == 0xFEFF)
+ ++from.next;
+ else if (*from.next == 0xFFFE)
+ {
+ ++from.next;
+ return little_endian;
+ }
+ }
+ return {};
+ }
+
+ // Read a codepoint from a UTF-8 multibyte sequence.
+ // Updates from.next if the codepoint is not greater than maxcode.
+ // Returns -1 if there is an invalid or incomplete multibyte character.
char32_t
read_utf8_code_point(range<const char>& from, unsigned long maxcode)
{
@@ -74,9 +155,8 @@ namespace
if ((c2 & 0xC0) != 0x80)
return -1;
char32_t c = (c1 << 6) + c2 - 0x3080;
- if (c > maxcode)
- return -1;
- from.next += 2;
+ if (c <= maxcode)
+ from.next += 2;
return c;
}
else if (c1 < 0xF0) // 3-byte sequence
@@ -92,9 +172,8 @@ namespace
if ((c3 & 0xC0) != 0x80)
return -1;
char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
- if (c > maxcode)
- return -1;
- from.next += 3;
+ if (c <= maxcode)
+ from.next += 3;
return c;
}
else if (c1 < 0xF5) // 4-byte sequence
@@ -115,9 +194,8 @@ namespace
if ((c4 & 0xC0) != 0x80)
return -1;
char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
- if (c > maxcode)
- return -1;
- from.next += 4;
+ if (c <= maxcode)
+ from.next += 4;
return c;
}
else // > U+10FFFF
@@ -162,9 +240,48 @@ namespace
return true;
}
+ inline char16_t
+ adjust_byte_order(char16_t c, codecvt_mode mode)
+ {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return (mode & little_endian) ? __builtin_bswap16(c) : c;
+#else
+ return (mode & little_endian) ? c : __builtin_bswap16(c);
+#endif
+ }
+
+ // Read a codepoint from a UTF-16 multibyte sequence.
+ // The sequence's endianness is indicated by (mode & little_endian).
+ // Updates from.next if the codepoint is not greater than maxcode.
+ // Returns -1 if there is an incomplete multibyte character.
+ char32_t
+ read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
+ codecvt_mode mode)
+ {
+ int inc = 1;
+ char32_t c = adjust_byte_order(from.next[0], mode);
+ if (c >= 0xD800 && c <= 0xDBFF)
+ {
+ if (from.size() < 2)
+ return -1;
+ const char16_t c2 = adjust_byte_order(from.next[1], mode);
+ if (c2 >= 0xDC00 && c2 <= 0xDFFF)
+ {
+ c = (c << 10) + c2 - 0x35FDC00;
+ inc = 2;
+ }
+ }
+ if (c <= maxcode)
+ from.next += inc;
+ return c;
+ }
+
+ template<typename C>
bool
- write_utf16_code_point(range<char16_t>& to, char32_t codepoint)
+ write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
{
+ static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
+
if (codepoint < max_single_utf16_unit)
{
if (to.size() > 0)
@@ -183,8 +300,8 @@ namespace
char16_t trail = 0xDC00 + (codepoint & 0x3FF);
char32_t utf16bytes = (lead << 10) + trail + SURROGATE_OFFSET;
- to.next[0] = utf16bytes >> 16;
- to.next[1] = utf16bytes & 0xFFFF;
+ to.next[0] = adjust_byte_order(utf16bytes >> 16, mode);
+ to.next[1] = adjust_byte_order(utf16bytes & 0xFFFF, mode);
to.next += 2;
return true;
}
@@ -194,12 +311,15 @@ namespace
// utf8 -> ucs4
codecvt_base::result
ucs4_in(range<const char>& from, range<char32_t>& to,
- unsigned long maxcode = max_code_point)
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
{
+ read_utf8_bom(from, mode);
while (from.size() && to.size())
{
const char32_t codepoint = read_utf8_code_point(from, maxcode);
- if (codepoint == char32_t(-1) || codepoint > maxcode)
+ if (codepoint == char32_t(-1))
+ break;
+ if (codepoint > maxcode)
return codecvt_base::error;
*to.next++ = codepoint;
}
@@ -209,8 +329,10 @@ namespace
// ucs4 -> utf8
codecvt_base::result
ucs4_out(range<const char32_t>& from, range<char>& to,
- unsigned long maxcode = max_code_point)
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
{
+ if (!write_utf8_bom(to, mode))
+ return codecvt_base::partial;
while (from.size())
{
const char32_t c = from.next[0];
@@ -223,20 +345,62 @@ namespace
return codecvt_base::ok;
}
+ // utf16 -> ucs4
+ codecvt_base::result
+ ucs4_in(range<const char16_t>& from, range<char32_t>& to,
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ if (read_utf16_bom(from, mode) == little_endian)
+ mode = codecvt_mode(mode & little_endian);
+ while (from.size() && to.size())
+ {
+ const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
+ if (codepoint == char32_t(-1))
+ break;
+ if (codepoint > maxcode)
+ return codecvt_base::error;
+ *to.next++ = codepoint;
+ }
+ return from.size() ? codecvt_base::partial : codecvt_base::ok;
+ }
+
+ // ucs4 -> utf16
+ codecvt_base::result
+ ucs4_out(range<const char32_t>& from, range<char16_t>& to,
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ if (!write_utf16_bom(to, mode))
+ return codecvt_base::partial;
+ while (from.size())
+ {
+ const char32_t c = from.next[0];
+ if (c > maxcode)
+ return codecvt_base::error;
+ if (!write_utf16_code_point(to, c, mode))
+ return codecvt_base::partial;
+ ++from.next;
+ }
+ return codecvt_base::ok;
+ }
+
// utf8 -> utf16
+ template<typename C>
codecvt_base::result
- utf16_in(range<const char>& from, range<char16_t>& to,
- unsigned long maxcode = max_code_point)
+ utf16_in(range<const char>& from, range<C>& to,
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
{
+ read_utf8_bom(from, mode);
while (from.size() && to.size())
{
const char* first = from.next;
if ((unsigned char)*first >= 0xF0 && to.size() < 2)
return codecvt_base::partial;
const char32_t codepoint = read_utf8_code_point(from, maxcode);
- if (codepoint == char32_t(-1) || codepoint > maxcode)
+ if (codepoint == char32_t(-1))
+ return codecvt_base::partial;
+ if (codepoint > maxcode)
return codecvt_base::error;
- if (!write_utf16_code_point(to, codepoint))
+ if (!write_utf16_code_point(to, codepoint, {}))
{
from.next = first;
return codecvt_base::partial;
@@ -246,15 +410,18 @@ namespace
}
// utf16 -> utf8
+ template<typename C>
codecvt_base::result
- utf16_out(range<const char16_t>& from, range<char>& to,
- unsigned long maxcode = max_code_point)
+ utf16_out(range<const C>& from, range<char>& to,
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
{
+ if (!write_utf8_bom(to, mode))
+ return codecvt_base::partial;
while (from.size())
{
char32_t c = from.next[0];
int inc = 1;
- if (c >= 0xD800 && c < 0xDBFF) // start of surrogate pair
+ if (c >= 0xD800 && c <= 0xDBFF) // start of surrogate pair
{
if (from.size() < 2)
return codecvt_base::ok; // stop converting at this point
@@ -278,11 +445,12 @@ namespace
}
// return pos such that [begin,pos) is valid UTF-16 string no longer than max
- int
- utf16_len(const char* begin, const char* end, size_t max,
- char32_t maxcode = max_code_point)
+ const char*
+ utf16_span(const char* begin, const char* end, size_t max,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
{
range<const char> from{ begin, end };
+ read_utf8_bom(from, mode);
size_t count = 0;
while (count+1 < max)
{
@@ -295,24 +463,117 @@ namespace
}
if (count+1 == max) // take one more character if it fits in a single unit
read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
- return from.next - begin;
+ return from.next;
}
- // return pos such that [begin,pos) is valid UCS-4 string no longer than max
- int
- ucs4_len(const char* begin, const char* end, size_t max,
- char32_t maxcode = max_code_point)
+ // utf8 -> ucs2
+ codecvt_base::result
+ ucs2_in(range<const char>& from, range<char16_t>& to,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
{
- range<const char> from{ begin, end };
- size_t count = 0;
- while (count < max)
+ return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
+ }
+
+ // ucs2 -> utf8
+ codecvt_base::result
+ ucs2_out(range<const char16_t>& from, range<char>& to,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
+ }
+
+ // ucs2 -> utf16
+ codecvt_base::result
+ ucs2_out(range<const char16_t>& from, range<char16_t>& to,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ if (!write_utf16_bom(to, mode))
+ return codecvt_base::partial;
+ while (from.size() && to.size())
{
- char32_t c = read_utf8_code_point(from, maxcode);
+ char16_t c = from.next[0];
+ if (c >= 0xD800 && c <= 0xDBFF) // start of surrogate pair
+ return codecvt_base::error;
+ if (c > maxcode)
+ return codecvt_base::error;
+ *to.next++ = adjust_byte_order(c, mode);
+ ++from.next;
+ }
+ return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
+ }
+
+ // utf16 -> ucs2
+ codecvt_base::result
+ ucs2_in(range<const char16_t>& from, range<char16_t>& to,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ if (read_utf16_bom(from, mode) == little_endian)
+ mode = codecvt_mode(mode & little_endian);
+ maxcode = std::max(max_single_utf16_unit, maxcode);
+ while (from.size() && to.size())
+ {
+ const char32_t c = read_utf16_code_point(from, maxcode, mode);
if (c == char32_t(-1))
break;
- ++count;
+ if (c >= maxcode)
+ return codecvt_base::error;
+ *to.next++ = c;
}
- return from.next - begin;
+ return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
+ }
+
+ const char16_t*
+ ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
+ char32_t maxcode, codecvt_mode mode)
+ {
+ range<const char16_t> from{ begin, end };
+ if (read_utf16_bom(from, mode) == little_endian)
+ mode = codecvt_mode(mode & little_endian);
+ maxcode = std::max(max_single_utf16_unit, maxcode);
+ char32_t c = 0;
+ while (max-- && c <= maxcode)
+ c = read_utf16_code_point(from, maxcode, mode);
+ return from.next;
+ }
+
+ const char*
+ ucs2_span(const char* begin, const char* end, size_t max,
+ char32_t maxcode, codecvt_mode mode)
+ {
+ range<const char> from{ begin, end };
+ read_utf8_bom(from, mode);
+ maxcode = std::max(max_single_utf16_unit, maxcode);
+ char32_t c = 0;
+ while (max-- && c <= maxcode)
+ c = read_utf8_code_point(from, maxcode);
+ return from.next;
+ }
+
+ // return pos such that [begin,pos) is valid UCS-4 string no longer than max
+ const char*
+ ucs4_span(const char* begin, const char* end, size_t max,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ range<const char> from{ begin, end };
+ read_utf8_bom(from, mode);
+ char32_t c = 0;
+ while (max-- && c <= maxcode)
+ c = read_utf8_code_point(from, maxcode);
+ return from.next;
+ }
+
+ // return pos such that [begin,pos) is valid UCS-4 string no longer than max
+ const char16_t*
+ ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ range<const char16_t> from{ begin, end };
+ if (read_utf16_bom(from, mode) == little_endian)
+ mode = codecvt_mode(mode & little_endian);
+ char32_t c = 0;
+ while (max-- && c <= maxcode)
+ c = read_utf16_code_point(from, maxcode, mode);
+ return from.next;
}
}
@@ -376,7 +637,8 @@ codecvt<char16_t, char, mbstate_t>::
do_length(state_type&, const extern_type* __from,
const extern_type* __end, size_t __max) const
{
- return utf16_len(__from, __end, __max);
+ __end = utf16_span(__from, __end, __max);
+ return __end - __from;
}
int
@@ -446,13 +708,698 @@ codecvt<char32_t, char, mbstate_t>::
do_length(state_type&, const extern_type* __from,
const extern_type* __end, size_t __max) const
{
- return ucs4_len(__from, __end, __max);
+ __end = ucs4_span(__from, __end, __max);
+ return __end - __from;
}
int
codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
{ return 4; }
+// Define members of codecvt_utf8<char16_t> base class implementation.
+// Converts from UTF-8 to UCS-2.
+
+__codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char16_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<char16_t> to{ __to, __to_end };
+ auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_base<char16_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_base<char16_t>::do_max_length() const throw()
+{ return 3; }
+
+// Define members of codecvt_utf8<char32_t> base class implementation.
+// Converts from UTF-8 to UTF-32 (aka UCS-4).
+
+__codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char32_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<char32_t> to{ __to, __to_end };
+ auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_base<char32_t>::do_max_length() const throw()
+{ return 4; }
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf8<wchar_t> base class implementation.
+// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
+
+__codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<char> to{ __to, __to_end };
+#if __SIZEOF_WCHAR_T__ == 2
+ range<const char16_t> from{
+ reinterpret_cast<const char16_t*>(__from),
+ reinterpret_cast<const char16_t*>(__from_end)
+ };
+ auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ range<const char32_t> from{
+ reinterpret_cast<const char32_t*>(__from),
+ reinterpret_cast<const char32_t*>(__from_end)
+ };
+ auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+#else
+ return codecvt_base::error;
+#endif
+ __from_next = reinterpret_cast<const wchar_t*>(from.next);
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+#if __SIZEOF_WCHAR_T__ == 2
+ range<char16_t> to{
+ reinterpret_cast<char16_t*>(__to),
+ reinterpret_cast<char16_t*>(__to_end)
+ };
+ auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ range<char32_t> to{
+ reinterpret_cast<char32_t*>(__to),
+ reinterpret_cast<char32_t*>(__to_end)
+ };
+ auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+#else
+ return codecvt_base::error;
+#endif
+ __from_next = from.next;
+ __to_next = reinterpret_cast<wchar_t*>(to.next);
+ return res;
+}
+
+int
+__codecvt_utf8_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+#if __SIZEOF_WCHAR_T__ == 2
+ __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
+#else
+ __end = __from;
+#endif
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_base<wchar_t>::do_max_length() const throw()
+{ return 4; }
+#endif
+
+// Define members of codecvt_utf16<char16_t> base class implementation.
+// Converts from UTF-16 to UCS-2.
+
+__codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char16_t> from{ __from, __from_end };
+ range<char16_t> to{
+ reinterpret_cast<char16_t*>(__to),
+ reinterpret_cast<char16_t*>(__to_end)
+ };
+ auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = reinterpret_cast<char*>(to.next);
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char16_t> from{
+ reinterpret_cast<const char16_t*>(__from),
+ reinterpret_cast<const char16_t*>(__from_end)
+ };
+ range<char16_t> to{ __to, __to_end };
+ auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+ __from_next = reinterpret_cast<const char*>(from.next);
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf16_base<char16_t>::do_encoding() const throw()
+{ return 1; }
+
+bool
+__codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ auto next = reinterpret_cast<const char16_t*>(__from);
+ next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+ _M_maxcode, _M_mode);
+ return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<char16_t>::do_max_length() const throw()
+{ return 3; }
+
+// Define members of codecvt_utf16<char32_t> base class implementation.
+// Converts from UTF-16 to UTF-32 (aka UCS-4).
+
+__codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char32_t> from{ __from, __from_end };
+ range<char16_t> to{
+ reinterpret_cast<char16_t*>(__to),
+ reinterpret_cast<char16_t*>(__to_end)
+ };
+ auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = reinterpret_cast<char*>(to.next);
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char16_t> from{
+ reinterpret_cast<const char16_t*>(__from),
+ reinterpret_cast<const char16_t*>(__from_end)
+ };
+ range<char32_t> to{ __to, __to_end };
+ auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+ __from_next = reinterpret_cast<const char*>(from.next);
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf16_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ auto next = reinterpret_cast<const char16_t*>(__from);
+ next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+ _M_maxcode, _M_mode);
+ return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<char32_t>::do_max_length() const throw()
+{ return 3; }
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf16<wchar_t> base class implementation.
+// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
+
+__codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<char> to{ __to, __to_end };
+#if __SIZEOF_WCHAR_T__ == 2
+ range<const char16_t> from{
+ reinterpret_cast<const char16_t*>(__from),
+ reinterpret_cast<const char16_t*>(__from_end)
+ };
+ auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ range<const char32_t> from{
+ reinterpret_cast<const char32_t*>(__from),
+ reinterpret_cast<const char32_t*>(__from_end)
+ };
+ auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+#else
+ return codecvt_base::error;
+#endif
+ __from_next = reinterpret_cast<const wchar_t*>(from.next);
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+#if __SIZEOF_WCHAR_T__ == 2
+ range<char16_t> to{
+ reinterpret_cast<char16_t*>(__to),
+ reinterpret_cast<char16_t*>(__to_end)
+ };
+ auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ range<char32_t> to{
+ reinterpret_cast<char32_t*>(__to),
+ reinterpret_cast<char32_t*>(__to_end)
+ };
+ auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+#else
+ return codecvt_base::error;
+#endif
+ __from_next = from.next;
+ __to_next = reinterpret_cast<wchar_t*>(to.next);
+ return res;
+}
+
+int
+__codecvt_utf16_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ auto next = reinterpret_cast<const char16_t*>(__from);
+#if __SIZEOF_WCHAR_T__ == 2
+ next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+ _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+ _M_maxcode, _M_mode);
+#endif
+ return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<wchar_t>::do_max_length() const throw()
+{ return 4; }
+#endif
+
+// Define members of codecvt_utf8_utf16<char16_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char16_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<char16_t> to{ __to, __to_end };
+ auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
+{
+ // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+ // whereas 4 byte sequences require two 16-bit code units.
+ return 3;
+}
+
+// Define members of codecvt_utf8_utf16<char32_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char32_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<char32_t> to{ __to, __to_end };
+ auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
+{
+ // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+ // whereas 4 byte sequences require two 16-bit code units.
+ return 3;
+}
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const wchar_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<wchar_t> to{ __to, __to_end };
+ auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
+{
+ // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+ // whereas 4 byte sequences require two 16-bit code units.
+ return 3;
+}
+#endif
+
inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc
new file mode 100644
index 00000000000..38bb393aed8
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc
@@ -0,0 +1,37 @@
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-require-cstdint "" }
+// { dg-options "-std=gnu++11" }
+// { dg-do compile }
+
+#include <codecvt>
+#include <type_traits>
+#include <testsuite_hooks.h>
+
+template<typename C>
+ using codecvt = std::codecvt<C, char, std::mbstate_t>;
+
+using std::is_base_of;
+
+static_assert(
+ is_base_of<codecvt<char16_t>, std::codecvt_utf16<char16_t>>::value,
+ "codecvt_utf16<char16_t> has wrong base class");
+
+static_assert(
+ is_base_of<codecvt<char32_t>, std::codecvt_utf16<char32_t>>::value,
+ "codecvt_utf16<char32_t> has wrong base class");
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8/requirements/1.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8/requirements/1.cc
new file mode 100644
index 00000000000..6bc241858ad
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8/requirements/1.cc
@@ -0,0 +1,37 @@
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-require-cstdint "" }
+// { dg-options "-std=gnu++11" }
+// { dg-do compile }
+
+#include <codecvt>
+#include <type_traits>
+#include <testsuite_hooks.h>
+
+template<typename C>
+ using codecvt = std::codecvt<C, char, std::mbstate_t>;
+
+using std::is_base_of;
+
+static_assert(
+ is_base_of<codecvt<char16_t>, std::codecvt_utf8<char16_t>>::value,
+ "codecvt_utf8<char16_t> has wrong base class");
+
+static_assert(
+ is_base_of<codecvt<char32_t>, std::codecvt_utf8<char32_t>>::value,
+ "codecvt_utf8<char32_t> has wrong base class");
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/requirements/1.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/requirements/1.cc
new file mode 100644
index 00000000000..5e5f8dd9fee
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/requirements/1.cc
@@ -0,0 +1,37 @@
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-require-cstdint "" }
+// { dg-options "-std=gnu++11" }
+// { dg-do compile }
+
+#include <codecvt>
+#include <type_traits>
+#include <testsuite_hooks.h>
+
+template<typename C>
+ using codecvt = std::codecvt<C, char, std::mbstate_t>;
+
+using std::is_base_of;
+
+static_assert(
+ is_base_of<codecvt<char16_t>, std::codecvt_utf8_utf16<char16_t>>::value,
+ "codecvt_utf8_utf16<char16_t> has wrong base class");
+
+static_assert(
+ is_base_of<codecvt<char32_t>, std::codecvt_utf8_utf16<char32_t>>::value,
+ "codecvt_utf8_utf16<char32_t> has wrong base class");