diff options
author | Jonathan Wakely <jwakely@redhat.com> | 2015-04-30 01:27:30 +0100 |
---|---|---|
committer | Jonathan Wakely <jwakely@redhat.com> | 2015-04-30 01:27:30 +0100 |
commit | 4b45e9e80ea7f599b2e0c1bde8327bfe4f6a7f6d (patch) | |
tree | 5f2f4a48b3d6de2aba2d80bd2ba48db05f661ba7 | |
parent | fc0eb7fba787c96bf64c5351eb44d16544c2a537 (diff) | |
download | gcc-4b45e9e80ea7f599b2e0c1bde8327bfe4f6a7f6d.tar.gz |
Refactor wstring_convert for FS path conversions
-rw-r--r-- | libstdc++-v3/include/bits/locale_conv.h | 173 | ||||
-rw-r--r-- | libstdc++-v3/include/experimental/fs_path.h | 170 | ||||
-rw-r--r-- | libstdc++-v3/src/filesystem/path.cc | 25 |
3 files changed, 262 insertions, 106 deletions
diff --git a/libstdc++-v3/include/bits/locale_conv.h b/libstdc++-v3/include/bits/locale_conv.h index 9b49617b7a9..8878aa1310b 100644 --- a/libstdc++-v3/include/bits/locale_conv.h +++ b/libstdc++-v3/include/bits/locale_conv.h @@ -51,6 +51,105 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @{ */ + template<typename _OutStr, typename _InChar, typename _Codecvt, + typename _State, typename _Fn> + bool + __do_str_codecvt(const _InChar* __first, const _InChar* __last, + _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, + size_t& __count, _Fn __fn) + { + size_t __outchars = 0; + auto __next = __first; + const auto __maxlen = __cvt.max_length(); + + codecvt_base::result __result; + do + { + __outstr.resize(__outstr.size() + (__last - __next) + __maxlen); + auto __outnext = &__outstr.front() + __outchars; + auto const __outlast = &__outstr.back() + 1; + __result = (__cvt.*__fn)(__state, __next, __last, __next, + __outnext, __outlast, __outnext); + __outchars = __outnext - &__outstr.front(); + } + while (__result == codecvt_base::partial && __next != __last + && (__outstr.size() - __outchars) < __maxlen); + + if (__result == codecvt_base::error) + return false; + + if (__result == codecvt_base::noconv) + { + __outstr.assign(__first, __last); + __count = __last - __first; + } + else + { + __outstr.resize(__outchars); + __count = __next - __first; + } + + return true; + } + + // Convert narrow character string to wide. + template<typename _CharT, typename _Traits, typename _Alloc, typename _State> + inline bool + __str_codecvt_in(const char* __first, const char* __last, + basic_string<_CharT, _Traits, _Alloc>& __outstr, + const codecvt<_CharT, char, _State>& __cvt, + _State& __state, size_t& __count) + { + using _Codecvt = codecvt<_CharT, char, _State>; + using _ConvFn + = codecvt_base::result + (_Codecvt::*)(_State&, const char*, const char*, const char*&, + _CharT*, _CharT*, _CharT*&) const; + _ConvFn __fn = &codecvt<_CharT, char, _State>::in; + return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, + __count, __fn); + } + + template<typename _CharT, typename _Traits, typename _Alloc, typename _State> + inline bool + __str_codecvt_in(const char* __first, const char* __last, + basic_string<_CharT, _Traits, _Alloc>& __outstr, + const codecvt<_CharT, char, _State>& __cvt) + { + _State __state = {}; + size_t __n; + return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); + } + + // Convert wide character string to narrow. + template<typename _CharT, typename _Traits, typename _Alloc, typename _State> + inline bool + __str_codecvt_out(const _CharT* __first, const _CharT* __last, + basic_string<char, _Traits, _Alloc>& __outstr, + const codecvt<_CharT, char, _State>& __cvt, + _State& __state, size_t& __count) + { + using _Codecvt = codecvt<_CharT, char, _State>; + using _ConvFn + = codecvt_base::result + (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, + char*, char*, char*&) const; + _ConvFn __fn = &codecvt<_CharT, char, _State>::out; + return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, + __count, __fn); + } + + template<typename _CharT, typename _Traits, typename _Alloc, typename _State> + inline bool + __str_codecvt_out(const _CharT* __first, const _CharT* __last, + basic_string<char, _Traits, _Alloc>& __outstr, + const codecvt<_CharT, char, _State>& __cvt) + { + _State __state = {}; + size_t __n; + return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); + } + /// String conversions template<typename _Codecvt, typename _Elem = wchar_t, typename _Wide_alloc = allocator<_Elem>, @@ -136,9 +235,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION wide_string from_bytes(const char* __first, const char* __last) { - auto __errstr = _M_with_strings ? &_M_wide_err_string : nullptr; - _ConvFn<char, _Elem> __fn = &_Codecvt::in; - return _M_conv(__first, __last, __errstr, __fn); + if (!_M_with_cvtstate) + _M_state = state_type(); + wide_string __out{ _M_wide_err_string.get_allocator() }; + if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, + _M_count)) + return __out; + if (_M_with_strings) + return _M_wide_err_string; + __throw_range_error("wstring_convert::from_bytes"); } /// @} @@ -166,9 +271,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION byte_string to_bytes(const _Elem* __first, const _Elem* __last) { - auto __errstr = _M_with_strings ? &_M_byte_err_string : nullptr; - _ConvFn<_Elem, char> __fn = &_Codecvt::out; - return _M_conv(__first, __last, __errstr, __fn); + if (!_M_with_cvtstate) + _M_state = state_type(); + byte_string __out{ _M_byte_err_string.get_allocator() }; + if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, + _M_count)) + return __out; + if (_M_with_strings) + return _M_byte_err_string; + __throw_range_error("wstring_convert::to_bytes"); } /// @} @@ -181,56 +292,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION state_type state() const { return _M_state; } private: - template<typename _InC, typename _OutC> - using _ConvFn - = codecvt_base::result - (_Codecvt::*)(state_type&, const _InC*, const _InC*, const _InC*&, - _OutC*, _OutC*, _OutC*&) const; - - template<typename _InChar, typename _OutStr, typename _MemFn> - _OutStr - _M_conv(const _InChar* __first, const _InChar* __last, - const _OutStr* __err, _MemFn __memfn) - { - if (!_M_with_cvtstate) - _M_state = state_type(); - - auto __outstr = __err ? _OutStr(__err->get_allocator()) : _OutStr(); - size_t __outchars = 0; - auto __next = __first; - const auto __maxlen = _M_cvt->max_length(); - - codecvt_base::result __result; - do - { - __outstr.resize(__outstr.size() + (__last - __next) + __maxlen); - auto __outnext = &__outstr.front() + __outchars; - auto const __outlast = &__outstr.back() + 1; - __result = ((*_M_cvt).*__memfn)(_M_state, __next, __last, __next, - __outnext, __outlast, __outnext); - __outchars = __outnext - &__outstr.front(); - } - while (__result == codecvt_base::partial && __next != __last - && (__outstr.size() - __outchars) < __maxlen); - - if (__result == codecvt_base::noconv) - { - __outstr.assign(__first, __last); - _M_count = __outstr.size(); - return __outstr; - } - - __outstr.resize(__outchars); - _M_count = __next - __first; - - if (__result != codecvt_base::error) - return __outstr; - else if (__err) - return *__err; - else - __throw_range_error("wstring_convert"); - } - unique_ptr<_Codecvt> _M_cvt; byte_string _M_byte_err_string; wide_string _M_wide_err_string; diff --git a/libstdc++-v3/include/experimental/fs_path.h b/libstdc++-v3/include/experimental/fs_path.h index 51118a54291..8ab499ad0bb 100644 --- a/libstdc++-v3/include/experimental/fs_path.h +++ b/libstdc++-v3/include/experimental/fs_path.h @@ -39,9 +39,11 @@ #include <list> #include <locale> #include <iosfwd> +#include <codecvt> #include <system_error> #include <bits/stl_algobase.h> #include <bits/quoted_string.h> +#include <bits/locale_conv.h> #if defined(_WIN32) && !defined(__CYGWIN__) # define _GLIBCXX_FILESYSTEM_IS_WINDOWS @@ -578,50 +580,91 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _CharT> struct path::_Cvt<_CharT, char> { - template<typename _Tp> - static std::string - _S_convert(_Tp* __f, _Tp* __l) - { - /* TODO use wstring_convert and codecvt_utf8 - std::wstring_convert<std::codecvt_utf8<_CharT>, _CharT> __cvt; - return __cvt.to_bytes(__f, __l); - */ - const _Tp* __first = __f; - const _Tp* __last = __l; - std::locale __loc; - auto& __cvt - = use_facet<std::codecvt<_CharT, char, mbstate_t>>(__loc); - mbstate_t __st = mbstate_t(); - size_t __len = __last - __first; - std::string __out; - codecvt_base::result __res; - do { - __out.resize(__out.size() + __len); - auto __outnext = &__out.front() + (__first - __f); - __res = __cvt.out(__st, __first, __last, __first, - __outnext, &__out.back() + 1, __outnext); - } while (__res == codecvt_base::partial && __first != __last); - if (__res == codecvt_base::ok && __first == __last) - return __out; - _GLIBCXX_THROW_OR_ABORT(filesystem_error( - "Cannot convert character sequence", - std::make_error_code(errc::illegal_byte_sequence))); - } + static std::string + _S_convert(const _CharT* __f, const _CharT* __l) + { + std::string __str; + codecvt_utf8<_CharT> __cvt; + if (__str_codecvt_out(__f, __l, __str, __cvt)) + return __str; + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); + } + + static std::string + _S_convert(_CharT* __f, _CharT* __l) + { + return _S_convert(const_cast<const _CharT*>(__f), + const_cast<const _CharT*>(__l)); + } template<typename _Iter> - static string_type + static std::string _S_convert(_Iter __first, _Iter __last) { const std::basic_string<_CharT> __str(__first, __last); return _S_convert(__str.data(), __str.data() + __str.size()); } + + template<typename _Iter, typename _Cont> + static std::string + _S_convert(__gnu_cxx::__normal_iterator<_Iter, _Cont> __first, + __gnu_cxx::__normal_iterator<_Iter, _Cont> __last) + { return _S_convert(__first.base(), __last.base()); } }; #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS template<typename _CharT> - struct path::_Cvt<_CharT, wchar_t>; // TODO -#endif + struct path::_Cvt<char, wchar_t> + { + // TODO + }; + + template<typename _CharT> + struct path::_Cvt<_CharT, wchar_t> + { + static std::wstring + _S_convert(const _CharT* __f, const _CharT* __l) + { + std::codecvt_utf8<_CharT> __narrow_cvt; + std::string __u8str; + if (__str_codecvt_out(__f, __l, __u8str, __narrow_cvt)) + { + const char* __f2 = __u8str.data(); + const char* __l2 = __f2 + __u8str.size(); + std::codecvt_utf8<wchar_t> __widen_cvt; + std::wstring __str; + if (__str_codecvt_in(__f2, __l2, __str, __widen_cvt)) + return __str; + } + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); + } + static std::wstring + _S_convert(_CharT* __f, _CharT* __l) + { + return _S_convert(const_cast<const _CharT*>(__f), + const_cast<const _CharT*>(__l)); + } + + template<typename _Iter> + static std::wstring + _S_convert(_Iter __first, _Iter __last) + { + const std::basic_string<_CharT> __str(__first, __last); + return _S_convert(__str.data(), __str.data() + __str.size()); + } + + template<typename _Iter, typename _Cont> + static std::wstring + _S_convert(__gnu_cxx::__normal_iterator<_Iter, _Cont> __first, + __gnu_cxx::__normal_iterator<_Iter, _Cont> __last) + { return _S_convert(__first.base(), __last.base()); } + }; +#endif /// An iterator for the components of a path class path::iterator @@ -740,8 +783,54 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _CharT, typename _Traits, typename _Allocator> inline std::basic_string<_CharT, _Traits, _Allocator> path::string(const _Allocator& __a) const - { // TODO need to use codecvt (or wstring_convert) - return { _M_pathname.begin(), _M_pathname.end(), __a }; + { + if (is_same<_CharT, value_type>::value) + return { _M_pathname.begin(), _M_pathname.end(), __a }; + + basic_string<_CharT, _Traits, _Allocator> __str{__a}; + const value_type* __first = _M_pathname.data(); + const value_type* __last = __first + _M_pathname.size(); + +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + // use codecvt_utf8<wchar_t> to convert native string to UTF-8 + codecvt_utf8<value_type> __cvt; + basic_string<char, _Traits, _Allocator> __u8str{__a}; + if (__str_codecvt_out(__first, __last, __u8str, __cvt)) + { + struct + { + // This overload will be used when is_same<_CharT, char> + const basic_string<char, _Traits, _Allocator>* + operator()(const basic_string<char, _Traits, _Allocator>& __instr, + basic_string<char, _Traits, _Allocator>&, + int) + { return std::__addressof(__instr); } + + basic_string<_CharT, _Traits, _Allocator>* + operator()(const basic_string<char, _Traits, _Allocator>& __instr, + basic_string<_CharT, _Traits, _Allocator>& __outstr, + ...) + { + // use codecvt_utf8<_CharT> to convert UTF-8 to target string + codecvt_utf8<_CharT> __cvt; + const char* __f = __instr.data(); + const char* __l = __f + __instr.size(); + if (__str_codecvt_in(__f, __l, __outstr, __cvt)) + return std::__addressof(__outstr); + return nullptr; + } + } __dispatch; + if (auto* __p = __dispatch(__u8str, __str, 1)) + return *__p; + } +#else + codecvt_utf8<_CharT> __cvt; + if (__str_codecvt_in(__first, __last, __str, __cvt)) + return __str; +#endif + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); } inline std::string @@ -754,9 +843,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION path::u8string() const { #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS - // TODO ??? + string __str; + // convert from native encoding to UTF-8 + codecvt_utf8<value_type> __cvt; + const value_type* __first = _M_pathname.data(); + const value_type* __last = __first + _M_pathname.size(); + if (__str_codecvt_out(__first, __last, __str, __cvt)) + return __str; + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); #else - return string<char>(); + return _M_pathname; #endif } diff --git a/libstdc++-v3/src/filesystem/path.cc b/libstdc++-v3/src/filesystem/path.cc index 6f8abb39738..db58f3bdb70 100644 --- a/libstdc++-v3/src/filesystem/path.cc +++ b/libstdc++-v3/src/filesystem/path.cc @@ -432,20 +432,17 @@ path::string_type path::_S_convert_loc(const char* __first, const char* __last, const std::locale& __loc) { - auto& __cvt = use_facet<std::codecvt<wchar_t, char, mbstate_t>>(__loc); - mbstate_t __st = mbstate_t(); - size_t __len = __cvt.length(__st, __first, __last, - (__last - __first) * __cvt.max_length()); - std::wstring __out(__len, L'\0'); - auto __outnext = &__out.front(); - __st = mbstate_t(); - auto __res = __cvt.in(__st, __first, __last, __first, - __outnext, &__out.back() + 1, __outnext); - if (__res == codecvt_base::ok && __first == __last) - return _Cvt<wchar_t>::_S_convert(&__out.front(), __outnext); - _GLIBCXX_THROW_OR_ABORT(filesystem_error( - "Cannot convert character sequence", - std::make_error_code(errc::illegal_byte_sequence))); + auto& __cvt = std::use_facet<codecvt<wchar_t, char, mbstate_t>>(__loc); + basic_string<wchar_t> __ws; + if (!__str_codecvt_in(__first, __last, __ws, __cvt)) + _GLIBCXX_THROW_OR_ABORT(filesystem_error( + "Cannot convert character sequence", + std::make_error_code(errc::illegal_byte_sequence))); +#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS + return __ws; +#else + return _Cvt<wchar_t>::_S_convert(__ws.data(), __ws.data() + __ws.size()); +#endif } std::size_t |