summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Wakely <jwakely@redhat.com>2015-04-30 01:27:30 +0100
committerJonathan Wakely <jwakely@redhat.com>2015-04-30 01:27:30 +0100
commit4b45e9e80ea7f599b2e0c1bde8327bfe4f6a7f6d (patch)
tree5f2f4a48b3d6de2aba2d80bd2ba48db05f661ba7
parentfc0eb7fba787c96bf64c5351eb44d16544c2a537 (diff)
downloadgcc-4b45e9e80ea7f599b2e0c1bde8327bfe4f6a7f6d.tar.gz
Refactor wstring_convert for FS path conversions
-rw-r--r--libstdc++-v3/include/bits/locale_conv.h173
-rw-r--r--libstdc++-v3/include/experimental/fs_path.h170
-rw-r--r--libstdc++-v3/src/filesystem/path.cc25
3 files changed, 262 insertions, 106 deletions
diff --git a/libstdc++-v3/include/bits/locale_conv.h b/libstdc++-v3/include/bits/locale_conv.h
index 9b49617b7a9..8878aa1310b 100644
--- a/libstdc++-v3/include/bits/locale_conv.h
+++ b/libstdc++-v3/include/bits/locale_conv.h
@@ -51,6 +51,105 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
+ template<typename _OutStr, typename _InChar, typename _Codecvt,
+ typename _State, typename _Fn>
+ bool
+ __do_str_codecvt(const _InChar* __first, const _InChar* __last,
+ _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
+ size_t& __count, _Fn __fn)
+ {
+ size_t __outchars = 0;
+ auto __next = __first;
+ const auto __maxlen = __cvt.max_length();
+
+ codecvt_base::result __result;
+ do
+ {
+ __outstr.resize(__outstr.size() + (__last - __next) + __maxlen);
+ auto __outnext = &__outstr.front() + __outchars;
+ auto const __outlast = &__outstr.back() + 1;
+ __result = (__cvt.*__fn)(__state, __next, __last, __next,
+ __outnext, __outlast, __outnext);
+ __outchars = __outnext - &__outstr.front();
+ }
+ while (__result == codecvt_base::partial && __next != __last
+ && (__outstr.size() - __outchars) < __maxlen);
+
+ if (__result == codecvt_base::error)
+ return false;
+
+ if (__result == codecvt_base::noconv)
+ {
+ __outstr.assign(__first, __last);
+ __count = __last - __first;
+ }
+ else
+ {
+ __outstr.resize(__outchars);
+ __count = __next - __first;
+ }
+
+ return true;
+ }
+
+ // Convert narrow character string to wide.
+ template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
+ inline bool
+ __str_codecvt_in(const char* __first, const char* __last,
+ basic_string<_CharT, _Traits, _Alloc>& __outstr,
+ const codecvt<_CharT, char, _State>& __cvt,
+ _State& __state, size_t& __count)
+ {
+ using _Codecvt = codecvt<_CharT, char, _State>;
+ using _ConvFn
+ = codecvt_base::result
+ (_Codecvt::*)(_State&, const char*, const char*, const char*&,
+ _CharT*, _CharT*, _CharT*&) const;
+ _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
+ return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
+ __count, __fn);
+ }
+
+ template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
+ inline bool
+ __str_codecvt_in(const char* __first, const char* __last,
+ basic_string<_CharT, _Traits, _Alloc>& __outstr,
+ const codecvt<_CharT, char, _State>& __cvt)
+ {
+ _State __state = {};
+ size_t __n;
+ return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
+ }
+
+ // Convert wide character string to narrow.
+ template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
+ inline bool
+ __str_codecvt_out(const _CharT* __first, const _CharT* __last,
+ basic_string<char, _Traits, _Alloc>& __outstr,
+ const codecvt<_CharT, char, _State>& __cvt,
+ _State& __state, size_t& __count)
+ {
+ using _Codecvt = codecvt<_CharT, char, _State>;
+ using _ConvFn
+ = codecvt_base::result
+ (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
+ char*, char*, char*&) const;
+ _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
+ return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
+ __count, __fn);
+ }
+
+ template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
+ inline bool
+ __str_codecvt_out(const _CharT* __first, const _CharT* __last,
+ basic_string<char, _Traits, _Alloc>& __outstr,
+ const codecvt<_CharT, char, _State>& __cvt)
+ {
+ _State __state = {};
+ size_t __n;
+ return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
+ }
+
/// String conversions
template<typename _Codecvt, typename _Elem = wchar_t,
typename _Wide_alloc = allocator<_Elem>,
@@ -136,9 +235,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
wide_string
from_bytes(const char* __first, const char* __last)
{
- auto __errstr = _M_with_strings ? &_M_wide_err_string : nullptr;
- _ConvFn<char, _Elem> __fn = &_Codecvt::in;
- return _M_conv(__first, __last, __errstr, __fn);
+ if (!_M_with_cvtstate)
+ _M_state = state_type();
+ wide_string __out{ _M_wide_err_string.get_allocator() };
+ if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
+ _M_count))
+ return __out;
+ if (_M_with_strings)
+ return _M_wide_err_string;
+ __throw_range_error("wstring_convert::from_bytes");
}
/// @}
@@ -166,9 +271,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
byte_string
to_bytes(const _Elem* __first, const _Elem* __last)
{
- auto __errstr = _M_with_strings ? &_M_byte_err_string : nullptr;
- _ConvFn<_Elem, char> __fn = &_Codecvt::out;
- return _M_conv(__first, __last, __errstr, __fn);
+ if (!_M_with_cvtstate)
+ _M_state = state_type();
+ byte_string __out{ _M_byte_err_string.get_allocator() };
+ if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
+ _M_count))
+ return __out;
+ if (_M_with_strings)
+ return _M_byte_err_string;
+ __throw_range_error("wstring_convert::to_bytes");
}
/// @}
@@ -181,56 +292,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
state_type state() const { return _M_state; }
private:
- template<typename _InC, typename _OutC>
- using _ConvFn
- = codecvt_base::result
- (_Codecvt::*)(state_type&, const _InC*, const _InC*, const _InC*&,
- _OutC*, _OutC*, _OutC*&) const;
-
- template<typename _InChar, typename _OutStr, typename _MemFn>
- _OutStr
- _M_conv(const _InChar* __first, const _InChar* __last,
- const _OutStr* __err, _MemFn __memfn)
- {
- if (!_M_with_cvtstate)
- _M_state = state_type();
-
- auto __outstr = __err ? _OutStr(__err->get_allocator()) : _OutStr();
- size_t __outchars = 0;
- auto __next = __first;
- const auto __maxlen = _M_cvt->max_length();
-
- codecvt_base::result __result;
- do
- {
- __outstr.resize(__outstr.size() + (__last - __next) + __maxlen);
- auto __outnext = &__outstr.front() + __outchars;
- auto const __outlast = &__outstr.back() + 1;
- __result = ((*_M_cvt).*__memfn)(_M_state, __next, __last, __next,
- __outnext, __outlast, __outnext);
- __outchars = __outnext - &__outstr.front();
- }
- while (__result == codecvt_base::partial && __next != __last
- && (__outstr.size() - __outchars) < __maxlen);
-
- if (__result == codecvt_base::noconv)
- {
- __outstr.assign(__first, __last);
- _M_count = __outstr.size();
- return __outstr;
- }
-
- __outstr.resize(__outchars);
- _M_count = __next - __first;
-
- if (__result != codecvt_base::error)
- return __outstr;
- else if (__err)
- return *__err;
- else
- __throw_range_error("wstring_convert");
- }
-
unique_ptr<_Codecvt> _M_cvt;
byte_string _M_byte_err_string;
wide_string _M_wide_err_string;
diff --git a/libstdc++-v3/include/experimental/fs_path.h b/libstdc++-v3/include/experimental/fs_path.h
index 51118a54291..8ab499ad0bb 100644
--- a/libstdc++-v3/include/experimental/fs_path.h
+++ b/libstdc++-v3/include/experimental/fs_path.h
@@ -39,9 +39,11 @@
#include <list>
#include <locale>
#include <iosfwd>
+#include <codecvt>
#include <system_error>
#include <bits/stl_algobase.h>
#include <bits/quoted_string.h>
+#include <bits/locale_conv.h>
#if defined(_WIN32) && !defined(__CYGWIN__)
# define _GLIBCXX_FILESYSTEM_IS_WINDOWS
@@ -578,50 +580,91 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _CharT>
struct path::_Cvt<_CharT, char>
{
- template<typename _Tp>
- static std::string
- _S_convert(_Tp* __f, _Tp* __l)
- {
- /* TODO use wstring_convert and codecvt_utf8
- std::wstring_convert<std::codecvt_utf8<_CharT>, _CharT> __cvt;
- return __cvt.to_bytes(__f, __l);
- */
- const _Tp* __first = __f;
- const _Tp* __last = __l;
- std::locale __loc;
- auto& __cvt
- = use_facet<std::codecvt<_CharT, char, mbstate_t>>(__loc);
- mbstate_t __st = mbstate_t();
- size_t __len = __last - __first;
- std::string __out;
- codecvt_base::result __res;
- do {
- __out.resize(__out.size() + __len);
- auto __outnext = &__out.front() + (__first - __f);
- __res = __cvt.out(__st, __first, __last, __first,
- __outnext, &__out.back() + 1, __outnext);
- } while (__res == codecvt_base::partial && __first != __last);
- if (__res == codecvt_base::ok && __first == __last)
- return __out;
- _GLIBCXX_THROW_OR_ABORT(filesystem_error(
- "Cannot convert character sequence",
- std::make_error_code(errc::illegal_byte_sequence)));
- }
+ static std::string
+ _S_convert(const _CharT* __f, const _CharT* __l)
+ {
+ std::string __str;
+ codecvt_utf8<_CharT> __cvt;
+ if (__str_codecvt_out(__f, __l, __str, __cvt))
+ return __str;
+ _GLIBCXX_THROW_OR_ABORT(filesystem_error(
+ "Cannot convert character sequence",
+ std::make_error_code(errc::illegal_byte_sequence)));
+ }
+
+ static std::string
+ _S_convert(_CharT* __f, _CharT* __l)
+ {
+ return _S_convert(const_cast<const _CharT*>(__f),
+ const_cast<const _CharT*>(__l));
+ }
template<typename _Iter>
- static string_type
+ static std::string
_S_convert(_Iter __first, _Iter __last)
{
const std::basic_string<_CharT> __str(__first, __last);
return _S_convert(__str.data(), __str.data() + __str.size());
}
+
+ template<typename _Iter, typename _Cont>
+ static std::string
+ _S_convert(__gnu_cxx::__normal_iterator<_Iter, _Cont> __first,
+ __gnu_cxx::__normal_iterator<_Iter, _Cont> __last)
+ { return _S_convert(__first.base(), __last.base()); }
};
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
template<typename _CharT>
- struct path::_Cvt<_CharT, wchar_t>; // TODO
-#endif
+ struct path::_Cvt<char, wchar_t>
+ {
+ // TODO
+ };
+
+ template<typename _CharT>
+ struct path::_Cvt<_CharT, wchar_t>
+ {
+ static std::wstring
+ _S_convert(const _CharT* __f, const _CharT* __l)
+ {
+ std::codecvt_utf8<_CharT> __narrow_cvt;
+ std::string __u8str;
+ if (__str_codecvt_out(__f, __l, __u8str, __narrow_cvt))
+ {
+ const char* __f2 = __u8str.data();
+ const char* __l2 = __f2 + __u8str.size();
+ std::codecvt_utf8<wchar_t> __widen_cvt;
+ std::wstring __str;
+ if (__str_codecvt_in(__f2, __l2, __str, __widen_cvt))
+ return __str;
+ }
+ _GLIBCXX_THROW_OR_ABORT(filesystem_error(
+ "Cannot convert character sequence",
+ std::make_error_code(errc::illegal_byte_sequence)));
+ }
+ static std::wstring
+ _S_convert(_CharT* __f, _CharT* __l)
+ {
+ return _S_convert(const_cast<const _CharT*>(__f),
+ const_cast<const _CharT*>(__l));
+ }
+
+ template<typename _Iter>
+ static std::wstring
+ _S_convert(_Iter __first, _Iter __last)
+ {
+ const std::basic_string<_CharT> __str(__first, __last);
+ return _S_convert(__str.data(), __str.data() + __str.size());
+ }
+
+ template<typename _Iter, typename _Cont>
+ static std::wstring
+ _S_convert(__gnu_cxx::__normal_iterator<_Iter, _Cont> __first,
+ __gnu_cxx::__normal_iterator<_Iter, _Cont> __last)
+ { return _S_convert(__first.base(), __last.base()); }
+ };
+#endif
/// An iterator for the components of a path
class path::iterator
@@ -740,8 +783,54 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _CharT, typename _Traits, typename _Allocator>
inline std::basic_string<_CharT, _Traits, _Allocator>
path::string(const _Allocator& __a) const
- { // TODO need to use codecvt (or wstring_convert)
- return { _M_pathname.begin(), _M_pathname.end(), __a };
+ {
+ if (is_same<_CharT, value_type>::value)
+ return { _M_pathname.begin(), _M_pathname.end(), __a };
+
+ basic_string<_CharT, _Traits, _Allocator> __str{__a};
+ const value_type* __first = _M_pathname.data();
+ const value_type* __last = __first + _M_pathname.size();
+
+#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
+ // use codecvt_utf8<wchar_t> to convert native string to UTF-8
+ codecvt_utf8<value_type> __cvt;
+ basic_string<char, _Traits, _Allocator> __u8str{__a};
+ if (__str_codecvt_out(__first, __last, __u8str, __cvt))
+ {
+ struct
+ {
+ // This overload will be used when is_same<_CharT, char>
+ const basic_string<char, _Traits, _Allocator>*
+ operator()(const basic_string<char, _Traits, _Allocator>& __instr,
+ basic_string<char, _Traits, _Allocator>&,
+ int)
+ { return std::__addressof(__instr); }
+
+ basic_string<_CharT, _Traits, _Allocator>*
+ operator()(const basic_string<char, _Traits, _Allocator>& __instr,
+ basic_string<_CharT, _Traits, _Allocator>& __outstr,
+ ...)
+ {
+ // use codecvt_utf8<_CharT> to convert UTF-8 to target string
+ codecvt_utf8<_CharT> __cvt;
+ const char* __f = __instr.data();
+ const char* __l = __f + __instr.size();
+ if (__str_codecvt_in(__f, __l, __outstr, __cvt))
+ return std::__addressof(__outstr);
+ return nullptr;
+ }
+ } __dispatch;
+ if (auto* __p = __dispatch(__u8str, __str, 1))
+ return *__p;
+ }
+#else
+ codecvt_utf8<_CharT> __cvt;
+ if (__str_codecvt_in(__first, __last, __str, __cvt))
+ return __str;
+#endif
+ _GLIBCXX_THROW_OR_ABORT(filesystem_error(
+ "Cannot convert character sequence",
+ std::make_error_code(errc::illegal_byte_sequence)));
}
inline std::string
@@ -754,9 +843,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
path::u8string() const
{
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
- // TODO ???
+ string __str;
+ // convert from native encoding to UTF-8
+ codecvt_utf8<value_type> __cvt;
+ const value_type* __first = _M_pathname.data();
+ const value_type* __last = __first + _M_pathname.size();
+ if (__str_codecvt_out(__first, __last, __str, __cvt))
+ return __str;
+ _GLIBCXX_THROW_OR_ABORT(filesystem_error(
+ "Cannot convert character sequence",
+ std::make_error_code(errc::illegal_byte_sequence)));
#else
- return string<char>();
+ return _M_pathname;
#endif
}
diff --git a/libstdc++-v3/src/filesystem/path.cc b/libstdc++-v3/src/filesystem/path.cc
index 6f8abb39738..db58f3bdb70 100644
--- a/libstdc++-v3/src/filesystem/path.cc
+++ b/libstdc++-v3/src/filesystem/path.cc
@@ -432,20 +432,17 @@ path::string_type
path::_S_convert_loc(const char* __first, const char* __last,
const std::locale& __loc)
{
- auto& __cvt = use_facet<std::codecvt<wchar_t, char, mbstate_t>>(__loc);
- mbstate_t __st = mbstate_t();
- size_t __len = __cvt.length(__st, __first, __last,
- (__last - __first) * __cvt.max_length());
- std::wstring __out(__len, L'\0');
- auto __outnext = &__out.front();
- __st = mbstate_t();
- auto __res = __cvt.in(__st, __first, __last, __first,
- __outnext, &__out.back() + 1, __outnext);
- if (__res == codecvt_base::ok && __first == __last)
- return _Cvt<wchar_t>::_S_convert(&__out.front(), __outnext);
- _GLIBCXX_THROW_OR_ABORT(filesystem_error(
- "Cannot convert character sequence",
- std::make_error_code(errc::illegal_byte_sequence)));
+ auto& __cvt = std::use_facet<codecvt<wchar_t, char, mbstate_t>>(__loc);
+ basic_string<wchar_t> __ws;
+ if (!__str_codecvt_in(__first, __last, __ws, __cvt))
+ _GLIBCXX_THROW_OR_ABORT(filesystem_error(
+ "Cannot convert character sequence",
+ std::make_error_code(errc::illegal_byte_sequence)));
+#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
+ return __ws;
+#else
+ return _Cvt<wchar_t>::_S_convert(__ws.data(), __ws.data() + __ws.size());
+#endif
}
std::size_t