diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-12 14:27:29 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-13 09:35:20 +0000 |
commit | c30a6232df03e1efbd9f3b226777b07e087a1122 (patch) | |
tree | e992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/base/strings | |
parent | 7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff) | |
download | qtwebengine-chromium-c30a6232df03e1efbd9f3b226777b07e087a1122.tar.gz |
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/base/strings')
26 files changed, 1958 insertions, 1515 deletions
diff --git a/chromium/base/strings/no_trigraphs_unittest.cc b/chromium/base/strings/no_trigraphs_unittest.cc new file mode 100644 index 00000000000..736679b9055 --- /dev/null +++ b/chromium/base/strings/no_trigraphs_unittest.cc @@ -0,0 +1,10 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/strcat.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(NoTrigraphs, Basic) { + EXPECT_EQ("??=", base::StrCat({"?", "?", "="})); +} diff --git a/chromium/base/strings/strcat.cc b/chromium/base/strings/strcat.cc index 35231ef691d..d94c2ea9148 100644 --- a/chromium/base/strings/strcat.cc +++ b/chromium/base/strings/strcat.cc @@ -4,82 +4,42 @@ #include "base/strings/strcat.h" -namespace base { - -namespace { - -// Reserves an additional amount of capacity in the given string, growing by at -// least 2x if necessary. Used by StrAppendT(). -// -// The "at least 2x" growing rule duplicates the exponential growth of -// std::string. The problem is that most implementations of reserve() will grow -// exactly to the requested amount instead of exponentially growing like would -// happen when appending normally. If we didn't do this, an append after the -// call to StrAppend() would definitely cause a reallocation, and loops with -// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want -// StrAppend() to have the same semantics as std::string::append(). -template <typename String> -void ReserveAdditionalIfNeeded(String* str, - typename String::size_type additional) { - const size_t required = str->size() + additional; - // Check whether we need to reserve additional capacity at all. - if (required <= str->capacity()) - return; - - str->reserve(std::max(required, str->capacity() * 2)); -} - -template <typename DestString, typename InputString> -void StrAppendT(DestString* dest, span<const InputString> pieces) { - size_t additional_size = 0; - for (const auto& cur : pieces) - additional_size += cur.size(); - ReserveAdditionalIfNeeded(dest, additional_size); +#include <string> - for (const auto& cur : pieces) - dest->append(cur.data(), cur.size()); -} +#include "base/strings/strcat_internal.h" -} // namespace +namespace base { std::string StrCat(span<const StringPiece> pieces) { - std::string result; - StrAppendT(&result, pieces); - return result; + return internal::StrCatT(pieces); } string16 StrCat(span<const StringPiece16> pieces) { - string16 result; - StrAppendT(&result, pieces); - return result; + return internal::StrCatT(pieces); } std::string StrCat(span<const std::string> pieces) { - std::string result; - StrAppendT(&result, pieces); - return result; + return internal::StrCatT(pieces); } string16 StrCat(span<const string16> pieces) { - string16 result; - StrAppendT(&result, pieces); - return result; + return internal::StrCatT(pieces); } void StrAppend(std::string* dest, span<const StringPiece> pieces) { - StrAppendT(dest, pieces); + internal::StrAppendT(dest, pieces); } void StrAppend(string16* dest, span<const StringPiece16> pieces) { - StrAppendT(dest, pieces); + internal::StrAppendT(dest, pieces); } void StrAppend(std::string* dest, span<const std::string> pieces) { - StrAppendT(dest, pieces); + internal::StrAppendT(dest, pieces); } void StrAppend(string16* dest, span<const string16> pieces) { - StrAppendT(dest, pieces); + internal::StrAppendT(dest, pieces); } } // namespace base diff --git a/chromium/base/strings/strcat.h b/chromium/base/strings/strcat.h index b7c76215ab2..5d9c38a075c 100644 --- a/chromium/base/strings/strcat.h +++ b/chromium/base/strings/strcat.h @@ -69,10 +69,11 @@ BASE_EXPORT string16 StrCat(span<const string16> pieces) WARN_UNUSED_RESULT; // Initializer list forwards to the array version. inline std::string StrCat(std::initializer_list<StringPiece> pieces) { - return StrCat(make_span(pieces.begin(), pieces.size())); + return StrCat(make_span(pieces)); } + inline string16 StrCat(std::initializer_list<StringPiece16> pieces) { - return StrCat(make_span(pieces.begin(), pieces.size())); + return StrCat(make_span(pieces)); } // StrAppend ------------------------------------------------------------------- @@ -91,13 +92,18 @@ BASE_EXPORT void StrAppend(string16* dest, span<const string16> pieces); // Initializer list forwards to the array version. inline void StrAppend(std::string* dest, std::initializer_list<StringPiece> pieces) { - return StrAppend(dest, make_span(pieces.begin(), pieces.size())); + StrAppend(dest, make_span(pieces)); } + inline void StrAppend(string16* dest, std::initializer_list<StringPiece16> pieces) { - return StrAppend(dest, make_span(pieces.begin(), pieces.size())); + StrAppend(dest, make_span(pieces)); } } // namespace base +#if defined(OS_WIN) +#include "base/strings/strcat_win.h" +#endif + #endif // BASE_STRINGS_STRCAT_H_ diff --git a/chromium/base/strings/strcat_internal.h b/chromium/base/strings/strcat_internal.h new file mode 100644 index 00000000000..f5e52f08667 --- /dev/null +++ b/chromium/base/strings/strcat_internal.h @@ -0,0 +1,60 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRCAT_INTERNAL_H_ +#define BASE_STRINGS_STRCAT_INTERNAL_H_ + +#include <string> + +#include "base/containers/span.h" + +namespace base { + +namespace internal { + +// Reserves an additional amount of capacity in the given string, growing by at +// least 2x if necessary. Used by StrAppendT(). +// +// The "at least 2x" growing rule duplicates the exponential growth of +// std::string. The problem is that most implementations of reserve() will grow +// exactly to the requested amount instead of exponentially growing like would +// happen when appending normally. If we didn't do this, an append after the +// call to StrAppend() would definitely cause a reallocation, and loops with +// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want +// StrAppend() to have the same semantics as std::string::append(). +template <typename String> +void ReserveAdditionalIfNeeded(String* str, + typename String::size_type additional) { + const size_t required = str->size() + additional; + // Check whether we need to reserve additional capacity at all. + if (required <= str->capacity()) + return; + + str->reserve(std::max(required, str->capacity() * 2)); +} + +template <typename DestString, typename InputString> +void StrAppendT(DestString* dest, span<const InputString> pieces) { + size_t additional_size = 0; + for (const auto& cur : pieces) + additional_size += cur.size(); + ReserveAdditionalIfNeeded(dest, additional_size); + + for (const auto& cur : pieces) + dest->append(cur.data(), cur.size()); +} + +template <typename StringT> +auto StrCatT(span<const StringT> pieces) { + std::basic_string<typename StringT::value_type, typename StringT::traits_type> + result; + StrAppendT(&result, pieces); + return result; +} + +} // namespace internal + +} // namespace base + +#endif // BASE_STRINGS_STRCAT_INTERNAL_H_ diff --git a/chromium/base/strings/strcat_win.cc b/chromium/base/strings/strcat_win.cc new file mode 100644 index 00000000000..ad2f2e16ab1 --- /dev/null +++ b/chromium/base/strings/strcat_win.cc @@ -0,0 +1,35 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/strcat_win.h" + +#include <string> + +#include "base/containers/span.h" +#include "base/strings/strcat_internal.h" +#include "base/strings/string_piece.h" + +namespace base { + +#if defined(BASE_STRING16_IS_STD_U16STRING) + +std::wstring StrCat(span<const WStringPiece> pieces) { + return internal::StrCatT(pieces); +} + +std::wstring StrCat(span<const std::wstring> pieces) { + return internal::StrCatT(pieces); +} + +void StrAppend(std::wstring* dest, span<const WStringPiece> pieces) { + internal::StrAppendT(dest, pieces); +} + +void StrAppend(std::wstring* dest, span<const std::wstring> pieces) { + internal::StrAppendT(dest, pieces); +} + +#endif + +} // namespace base diff --git a/chromium/base/strings/strcat_win.h b/chromium/base/strings/strcat_win.h new file mode 100644 index 00000000000..4b8f0290e51 --- /dev/null +++ b/chromium/base/strings/strcat_win.h @@ -0,0 +1,45 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRCAT_WIN_H_ +#define BASE_STRINGS_STRCAT_WIN_H_ + +#include <initializer_list> +#include <string> + +#include "base/base_export.h" +#include "base/compiler_specific.h" +#include "base/containers/span.h" +#include "base/strings/string_piece.h" + +namespace base { + +// The following section contains overloads of the cross-platform APIs for +// std::wstring and base::WStringPiece. These are only enabled if std::wstring +// and base::string16 are distinct types, as otherwise this would result in an +// ODR violation. +// TODO(crbug.com/911896): Remove those guards once base::string16 is +// std::u16string. +#if defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT void StrAppend(std::wstring* dest, span<const WStringPiece> pieces); +BASE_EXPORT void StrAppend(std::wstring* dest, span<const std::wstring> pieces); + +inline void StrAppend(std::wstring* dest, + std::initializer_list<WStringPiece> pieces) { + StrAppend(dest, make_span(pieces)); +} + +BASE_EXPORT std::wstring StrCat(span<const WStringPiece> pieces) + WARN_UNUSED_RESULT; +BASE_EXPORT std::wstring StrCat(span<const std::wstring> pieces) + WARN_UNUSED_RESULT; + +inline std::wstring StrCat(std::initializer_list<WStringPiece> pieces) { + return StrCat(make_span(pieces)); +} +#endif // defined(BASE_STRING16_IS_STD_U16STRING) + +} // namespace base + +#endif // BASE_STRINGS_STRCAT_WIN_H_ diff --git a/chromium/base/strings/string_number_conversions.cc b/chromium/base/strings/string_number_conversions.cc index bd0a7e435f6..e2ef8acf66f 100644 --- a/chromium/base/strings/string_number_conversions.cc +++ b/chromium/base/strings/string_number_conversions.cc @@ -4,458 +4,120 @@ #include "base/strings/string_number_conversions.h" -#include <ctype.h> -#include <errno.h> -#include <stdlib.h> -#include <wctype.h> +#include <iterator> +#include <string> -#include <limits> -#include <type_traits> - -#include "base/check_op.h" -#include "base/no_destructor.h" -#include "base/numerics/safe_math.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "base/third_party/double_conversion/double-conversion/double-conversion.h" +#include "base/containers/span.h" +#include "base/logging.h" +#include "base/strings/string16.h" +#include "base/strings/string_number_conversions_internal.h" +#include "base/strings/string_piece.h" namespace base { -namespace { - -template <typename STR, typename INT> -struct IntToStringT { - static STR IntToString(INT value) { - // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. - // So round up to allocate 3 output characters per byte, plus 1 for '-'. - const size_t kOutputBufSize = - 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed; - - // Create the string in a temporary buffer, write it back to front, and - // then return the substr of what we ended up using. - using CHR = typename STR::value_type; - CHR outbuf[kOutputBufSize]; - - // The ValueOrDie call below can never fail, because UnsignedAbs is valid - // for all valid inputs. - typename std::make_unsigned<INT>::type res = - CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie(); - - CHR* end = outbuf + kOutputBufSize; - CHR* i = end; - do { - --i; - DCHECK(i != outbuf); - *i = static_cast<CHR>((res % 10) + '0'); - res /= 10; - } while (res != 0); - if (IsValueNegative(value)) { - --i; - DCHECK(i != outbuf); - *i = static_cast<CHR>('-'); - } - return STR(i, end); - } -}; - -// Utility to convert a character to a digit in a given base -template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit { -}; - -// Faster specialization for bases <= 10 -template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> { - public: - static bool Convert(CHAR c, uint8_t* digit) { - if (c >= '0' && c < '0' + BASE) { - *digit = static_cast<uint8_t>(c - '0'); - return true; - } - return false; - } -}; - -// Specialization for bases where 10 < base <= 36 -template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> { - public: - static bool Convert(CHAR c, uint8_t* digit) { - if (c >= '0' && c <= '9') { - *digit = c - '0'; - } else if (c >= 'a' && c < 'a' + BASE - 10) { - *digit = c - 'a' + 10; - } else if (c >= 'A' && c < 'A' + BASE - 10) { - *digit = c - 'A' + 10; - } else { - return false; - } - return true; - } -}; - -template <int BASE, typename CHAR> -bool CharToDigit(CHAR c, uint8_t* digit) { - return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit); -} - -// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it -// is locale independent, whereas the functions we are replacing were -// locale-dependent. TBD what is desired, but for the moment let's not -// introduce a change in behaviour. -template<typename CHAR> class WhitespaceHelper { -}; - -template<> class WhitespaceHelper<char> { - public: - static bool Invoke(char c) { - return 0 != isspace(static_cast<unsigned char>(c)); - } -}; - -template<> class WhitespaceHelper<char16> { - public: - static bool Invoke(char16 c) { - return 0 != iswspace(c); - } -}; - -template<typename CHAR> bool LocalIsWhitespace(CHAR c) { - return WhitespaceHelper<CHAR>::Invoke(c); -} - -// IteratorRangeToNumberTraits should provide: -// - a typedef for iterator_type, the iterator type used as input. -// - a typedef for value_type, the target numeric type. -// - static functions min, max (returning the minimum and maximum permitted -// values) -// - constant kBase, the base in which to interpret the input -template<typename IteratorRangeToNumberTraits> -class IteratorRangeToNumber { - public: - typedef IteratorRangeToNumberTraits traits; - typedef typename traits::iterator_type const_iterator; - typedef typename traits::value_type value_type; - - // Generalized iterator-range-to-number conversion. - // - static bool Invoke(const_iterator begin, - const_iterator end, - value_type* output) { - bool valid = true; - - while (begin != end && LocalIsWhitespace(*begin)) { - valid = false; - ++begin; - } - - if (begin != end && *begin == '-') { - if (!std::numeric_limits<value_type>::is_signed) { - *output = 0; - valid = false; - } else if (!Negative::Invoke(begin + 1, end, output)) { - valid = false; - } - } else { - if (begin != end && *begin == '+') { - ++begin; - } - if (!Positive::Invoke(begin, end, output)) { - valid = false; - } - } - - return valid; - } - - private: - // Sign provides: - // - a static function, CheckBounds, that determines whether the next digit - // causes an overflow/underflow - // - a static function, Increment, that appends the next digit appropriately - // according to the sign of the number being parsed. - template<typename Sign> - class Base { - public: - static bool Invoke(const_iterator begin, const_iterator end, - typename traits::value_type* output) { - *output = 0; - - if (begin == end) { - return false; - } - - // Note: no performance difference was found when using template - // specialization to remove this check in bases other than 16 - if (traits::kBase == 16 && end - begin > 2 && *begin == '0' && - (*(begin + 1) == 'x' || *(begin + 1) == 'X')) { - begin += 2; - } - - for (const_iterator current = begin; current != end; ++current) { - uint8_t new_digit = 0; - - if (!CharToDigit<traits::kBase>(*current, &new_digit)) { - return false; - } - - if (current != begin) { - if (!Sign::CheckBounds(output, new_digit)) { - return false; - } - *output *= traits::kBase; - } - - Sign::Increment(new_digit, output); - } - return true; - } - }; - - class Positive : public Base<Positive> { - public: - static bool CheckBounds(value_type* output, uint8_t new_digit) { - if (*output > static_cast<value_type>(traits::max() / traits::kBase) || - (*output == static_cast<value_type>(traits::max() / traits::kBase) && - new_digit > traits::max() % traits::kBase)) { - *output = traits::max(); - return false; - } - return true; - } - static void Increment(uint8_t increment, value_type* output) { - *output += increment; - } - }; - - class Negative : public Base<Negative> { - public: - static bool CheckBounds(value_type* output, uint8_t new_digit) { - if (*output < traits::min() / traits::kBase || - (*output == traits::min() / traits::kBase && - new_digit > 0 - traits::min() % traits::kBase)) { - *output = traits::min(); - return false; - } - return true; - } - static void Increment(uint8_t increment, value_type* output) { - *output -= increment; - } - }; -}; - -template<typename ITERATOR, typename VALUE, int BASE> -class BaseIteratorRangeToNumberTraits { - public: - typedef ITERATOR iterator_type; - typedef VALUE value_type; - static value_type min() { - return std::numeric_limits<value_type>::min(); - } - static value_type max() { - return std::numeric_limits<value_type>::max(); - } - static const int kBase = BASE; -}; - -template<typename ITERATOR> -class BaseHexIteratorRangeToIntTraits - : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> { -}; - -template <typename ITERATOR> -class BaseHexIteratorRangeToUIntTraits - : public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {}; - -template <typename ITERATOR> -class BaseHexIteratorRangeToInt64Traits - : public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {}; - -template <typename ITERATOR> -class BaseHexIteratorRangeToUInt64Traits - : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {}; - -typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator> - HexIteratorRangeToIntTraits; - -typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator> - HexIteratorRangeToUIntTraits; - -typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator> - HexIteratorRangeToInt64Traits; - -typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator> - HexIteratorRangeToUInt64Traits; - -template <typename VALUE, int BASE> -class StringPieceToNumberTraits - : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator, - VALUE, - BASE> { -}; - -template <typename VALUE> -bool StringToIntImpl(StringPiece input, VALUE* output) { - return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke( - input.begin(), input.end(), output); -} - -template <typename VALUE, int BASE> -class StringPiece16ToNumberTraits - : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator, - VALUE, - BASE> { -}; - -template <typename VALUE> -bool String16ToIntImpl(StringPiece16 input, VALUE* output) { - return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke( - input.begin(), input.end(), output); -} - -} // namespace - std::string NumberToString(int value) { - return IntToStringT<std::string, int>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(int value) { - return IntToStringT<string16, int>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(unsigned value) { - return IntToStringT<std::string, unsigned>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(unsigned value) { - return IntToStringT<string16, unsigned>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(long value) { - return IntToStringT<std::string, long>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(long value) { - return IntToStringT<string16, long>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(unsigned long value) { - return IntToStringT<std::string, unsigned long>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(unsigned long value) { - return IntToStringT<string16, unsigned long>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(long long value) { - return IntToStringT<std::string, long long>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(long long value) { - return IntToStringT<string16, long long>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(unsigned long long value) { - return IntToStringT<std::string, unsigned long long>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(unsigned long long value) { - return IntToStringT<string16, unsigned long long>::IntToString(value); -} - -static const double_conversion::DoubleToStringConverter* -GetDoubleToStringConverter() { - static NoDestructor<double_conversion::DoubleToStringConverter> converter( - double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, - nullptr, nullptr, 'e', -6, 12, 0, 0); - return converter.get(); + return internal::IntToStringT<string16>(value); } std::string NumberToString(double value) { - char buffer[32]; - double_conversion::StringBuilder builder(buffer, sizeof(buffer)); - GetDoubleToStringConverter()->ToShortest(value, &builder); - return std::string(buffer, builder.position()); + return internal::DoubleToStringT<std::string>(value); } -base::string16 NumberToString16(double value) { - char buffer[32]; - double_conversion::StringBuilder builder(buffer, sizeof(buffer)); - GetDoubleToStringConverter()->ToShortest(value, &builder); - - // The number will be ASCII. This creates the string using the "input - // iterator" variant which promotes from 8-bit to 16-bit via "=". - return base::string16(&buffer[0], &buffer[builder.position()]); +string16 NumberToString16(double value) { + return internal::DoubleToStringT<string16>(value); } bool StringToInt(StringPiece input, int* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToInt(StringPiece16 input, int* output) { - return String16ToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToUint(StringPiece input, unsigned* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToUint(StringPiece16 input, unsigned* output) { - return String16ToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToInt64(StringPiece input, int64_t* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToInt64(StringPiece16 input, int64_t* output) { - return String16ToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToUint64(StringPiece input, uint64_t* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToUint64(StringPiece16 input, uint64_t* output) { - return String16ToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToSizeT(StringPiece input, size_t* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToSizeT(StringPiece16 input, size_t* output) { - return String16ToIntImpl(input, output); -} - -template <typename STRING, typename CHAR> -bool StringToDoubleImpl(STRING input, const CHAR* data, double* output) { - static NoDestructor<double_conversion::StringToDoubleConverter> converter( - double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | - double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK, - 0.0, 0, nullptr, nullptr); - - int processed_characters_count; - *output = converter->StringToDouble(data, input.size(), - &processed_characters_count); - - // Cases to return false: - // - If the input string is empty, there was nothing to parse. - // - If the value saturated to HUGE_VAL. - // - If the entire string was not processed, there are either characters - // remaining in the string after a parsed number, or the string does not - // begin with a parseable number. - // - If the first character is a space, there was leading whitespace - return !input.empty() && *output != HUGE_VAL && *output != -HUGE_VAL && - static_cast<size_t>(processed_characters_count) == input.size() && - !IsUnicodeWhitespace(input[0]); + return internal::StringToIntImpl(input, *output); } bool StringToDouble(StringPiece input, double* output) { - return StringToDoubleImpl(input, input.data(), output); + return internal::StringToDoubleImpl(input, input.data(), *output); } bool StringToDouble(StringPiece16 input, double* output) { - return StringToDoubleImpl( - input, reinterpret_cast<const uint16_t*>(input.data()), output); + return internal::StringToDoubleImpl( + input, reinterpret_cast<const uint16_t*>(input.data()), *output); } std::string HexEncode(const void* bytes, size_t size) { @@ -477,69 +139,36 @@ std::string HexEncode(base::span<const uint8_t> bytes) { } bool HexStringToInt(StringPiece input, int* output) { - return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke( - input.begin(), input.end(), output); + return internal::HexStringToIntImpl(input, *output); } bool HexStringToUInt(StringPiece input, uint32_t* output) { - return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke( - input.begin(), input.end(), output); + return internal::HexStringToIntImpl(input, *output); } bool HexStringToInt64(StringPiece input, int64_t* output) { - return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke( - input.begin(), input.end(), output); + return internal::HexStringToIntImpl(input, *output); } bool HexStringToUInt64(StringPiece input, uint64_t* output) { - return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke( - input.begin(), input.end(), output); -} - -template <typename Container> -static bool HexStringToByteContainer(StringPiece input, Container* output) { - DCHECK_EQ(output->size(), 0u); - size_t count = input.size(); - if (count == 0 || (count % 2) != 0) - return false; - for (uintptr_t i = 0; i < count / 2; ++i) { - uint8_t msb = 0; // most significant 4 bits - uint8_t lsb = 0; // least significant 4 bits - if (!CharToDigit<16>(input[i * 2], &msb) || - !CharToDigit<16>(input[i * 2 + 1], &lsb)) { - return false; - } - output->push_back((msb << 4) | lsb); - } - return true; + return internal::HexStringToIntImpl(input, *output); } bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) { - return HexStringToByteContainer(input, output); + DCHECK(output->empty()); + return internal::HexStringToByteContainer(input, std::back_inserter(*output)); } bool HexStringToString(StringPiece input, std::string* output) { - return HexStringToByteContainer(input, output); + DCHECK(output->empty()); + return internal::HexStringToByteContainer(input, std::back_inserter(*output)); } bool HexStringToSpan(StringPiece input, base::span<uint8_t> output) { - size_t count = input.size(); - if (count == 0 || (count % 2) != 0) + if (input.size() / 2 != output.size()) return false; - if (count / 2 != output.size()) - return false; - - for (uintptr_t i = 0; i < count / 2; ++i) { - uint8_t msb = 0; // most significant 4 bits - uint8_t lsb = 0; // least significant 4 bits - if (!CharToDigit<16>(input[i * 2], &msb) || - !CharToDigit<16>(input[i * 2 + 1], &lsb)) { - return false; - } - output[i] = (msb << 4) | lsb; - } - return true; + return internal::HexStringToByteContainer(input, output.begin()); } } // namespace base diff --git a/chromium/base/strings/string_number_conversions.h b/chromium/base/strings/string_number_conversions.h index 87df24e21c9..f001641d186 100644 --- a/chromium/base/strings/string_number_conversions.h +++ b/chromium/base/strings/string_number_conversions.h @@ -20,10 +20,6 @@ // ---------------------------------------------------------------------------- // IMPORTANT MESSAGE FROM YOUR SPONSOR // -// This file contains no "wstring" variants. New code should use string16. If -// you need to make old code work, use the UTF8 version and convert. Please do -// not add wstring variants. -// // Please do not add "convenience" functions for converting strings to integers // that return the value and ignore success/failure. That encourages people to // write code that doesn't properly handle the error conditions. @@ -154,4 +150,8 @@ BASE_EXPORT bool HexStringToSpan(StringPiece input, base::span<uint8_t> output); } // namespace base +#if defined(OS_WIN) +#include "base/strings/string_number_conversions_win.h" +#endif + #endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_ diff --git a/chromium/base/strings/string_number_conversions_internal.h b/chromium/base/strings/string_number_conversions_internal.h new file mode 100644 index 00000000000..1aa0c74c0b5 --- /dev/null +++ b/chromium/base/strings/string_number_conversions_internal.h @@ -0,0 +1,303 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_ +#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_ + +#include <ctype.h> +#include <errno.h> +#include <stdlib.h> +#include <wctype.h> + +#include <limits> + +#include "base/check_op.h" +#include "base/logging.h" +#include "base/no_destructor.h" +#include "base/numerics/safe_math.h" +#include "base/strings/string_util.h" +#include "base/third_party/double_conversion/double-conversion/double-conversion.h" + +namespace base { + +namespace internal { + +template <typename STR, typename INT> +static STR IntToStringT(INT value) { + // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. + // So round up to allocate 3 output characters per byte, plus 1 for '-'. + const size_t kOutputBufSize = + 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed; + + // Create the string in a temporary buffer, write it back to front, and + // then return the substr of what we ended up using. + using CHR = typename STR::value_type; + CHR outbuf[kOutputBufSize]; + + // The ValueOrDie call below can never fail, because UnsignedAbs is valid + // for all valid inputs. + std::make_unsigned_t<INT> res = + CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie(); + + CHR* end = outbuf + kOutputBufSize; + CHR* i = end; + do { + --i; + DCHECK(i != outbuf); + *i = static_cast<CHR>((res % 10) + '0'); + res /= 10; + } while (res != 0); + if (IsValueNegative(value)) { + --i; + DCHECK(i != outbuf); + *i = static_cast<CHR>('-'); + } + return STR(i, end); +} + +// Utility to convert a character to a digit in a given base +template <int BASE, typename CHAR> +Optional<uint8_t> CharToDigit(CHAR c) { + static_assert(1 <= BASE && BASE <= 36, "BASE needs to be in [1, 36]"); + if (c >= '0' && c < '0' + std::min(BASE, 10)) + return c - '0'; + + if (c >= 'a' && c < 'a' + BASE - 10) + return c - 'a' + 10; + + if (c >= 'A' && c < 'A' + BASE - 10) + return c - 'A' + 10; + + return base::nullopt; +} + +// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it +// is locale independent, whereas the functions we are replacing were +// locale-dependent. TBD what is desired, but for the moment let's not +// introduce a change in behaviour. +template <typename CHAR> +class WhitespaceHelper {}; + +template <> +class WhitespaceHelper<char> { + public: + static bool Invoke(char c) { + return 0 != isspace(static_cast<unsigned char>(c)); + } +}; + +template <> +class WhitespaceHelper<char16> { + public: + static bool Invoke(char16 c) { return 0 != iswspace(c); } +}; + +template <typename CHAR> +bool LocalIsWhitespace(CHAR c) { + return WhitespaceHelper<CHAR>::Invoke(c); +} + +template <typename Number, int kBase> +class StringToNumberParser { + public: + struct Result { + Number value = 0; + bool valid = false; + }; + + static constexpr Number kMin = std::numeric_limits<Number>::min(); + static constexpr Number kMax = std::numeric_limits<Number>::max(); + + // Sign provides: + // - a static function, CheckBounds, that determines whether the next digit + // causes an overflow/underflow + // - a static function, Increment, that appends the next digit appropriately + // according to the sign of the number being parsed. + template <typename Sign> + class Base { + public: + template <typename Iter> + static Result Invoke(Iter begin, Iter end) { + Number value = 0; + + if (begin == end) { + return {value, false}; + } + + // Note: no performance difference was found when using template + // specialization to remove this check in bases other than 16 + if (kBase == 16 && end - begin > 2 && *begin == '0' && + (*(begin + 1) == 'x' || *(begin + 1) == 'X')) { + begin += 2; + } + + for (Iter current = begin; current != end; ++current) { + Optional<uint8_t> new_digit = CharToDigit<kBase>(*current); + + if (!new_digit) { + return {value, false}; + } + + if (current != begin) { + Result result = Sign::CheckBounds(value, *new_digit); + if (!result.valid) + return result; + + value *= kBase; + } + + value = Sign::Increment(value, *new_digit); + } + return {value, true}; + } + }; + + class Positive : public Base<Positive> { + public: + static Result CheckBounds(Number value, uint8_t new_digit) { + if (value > static_cast<Number>(kMax / kBase) || + (value == static_cast<Number>(kMax / kBase) && + new_digit > kMax % kBase)) { + return {kMax, false}; + } + return {value, true}; + } + static Number Increment(Number lhs, uint8_t rhs) { return lhs + rhs; } + }; + + class Negative : public Base<Negative> { + public: + static Result CheckBounds(Number value, uint8_t new_digit) { + if (value < kMin / kBase || + (value == kMin / kBase && new_digit > 0 - kMin % kBase)) { + return {kMin, false}; + } + return {value, true}; + } + static Number Increment(Number lhs, uint8_t rhs) { return lhs - rhs; } + }; +}; + +template <typename Number, int kBase, typename Str> +auto StringToNumber(BasicStringPiece<Str> input) { + using Parser = StringToNumberParser<Number, kBase>; + using Result = typename Parser::Result; + + bool has_leading_whitespace = false; + auto begin = input.begin(); + auto end = input.end(); + + while (begin != end && LocalIsWhitespace(*begin)) { + has_leading_whitespace = true; + ++begin; + } + + if (begin != end && *begin == '-') { + if (!std::numeric_limits<Number>::is_signed) { + return Result{0, false}; + } + + Result result = Parser::Negative::Invoke(begin + 1, end); + result.valid &= !has_leading_whitespace; + return result; + } + + if (begin != end && *begin == '+') { + ++begin; + } + + Result result = Parser::Positive::Invoke(begin, end); + result.valid &= !has_leading_whitespace; + return result; +} + +template <typename STR, typename VALUE> +bool StringToIntImpl(BasicStringPiece<STR> input, VALUE& output) { + auto result = StringToNumber<VALUE, 10>(input); + output = result.value; + return result.valid; +} + +template <typename STR, typename VALUE> +bool HexStringToIntImpl(BasicStringPiece<STR> input, VALUE& output) { + auto result = StringToNumber<VALUE, 16>(input); + output = result.value; + return result.valid; +} + +static const double_conversion::DoubleToStringConverter* +GetDoubleToStringConverter() { + static NoDestructor<double_conversion::DoubleToStringConverter> converter( + double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, + nullptr, nullptr, 'e', -6, 12, 0, 0); + return converter.get(); +} + +// Converts a given (data, size) pair to a desired string type. For +// performance reasons, this dispatches to a different constructor if the +// passed-in data matches the string's value_type. +template <typename StringT> +StringT ToString(const typename StringT::value_type* data, size_t size) { + return StringT(data, size); +} + +template <typename StringT, typename CharT> +StringT ToString(const CharT* data, size_t size) { + return StringT(data, data + size); +} + +template <typename StringT> +StringT DoubleToStringT(double value) { + char buffer[32]; + double_conversion::StringBuilder builder(buffer, sizeof(buffer)); + GetDoubleToStringConverter()->ToShortest(value, &builder); + return ToString<StringT>(buffer, builder.position()); +} + +template <typename STRING, typename CHAR> +bool StringToDoubleImpl(STRING input, const CHAR* data, double& output) { + static NoDestructor<double_conversion::StringToDoubleConverter> converter( + double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | + double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK, + 0.0, 0, nullptr, nullptr); + + int processed_characters_count; + output = converter->StringToDouble(data, input.size(), + &processed_characters_count); + + // Cases to return false: + // - If the input string is empty, there was nothing to parse. + // - If the value saturated to HUGE_VAL. + // - If the entire string was not processed, there are either characters + // remaining in the string after a parsed number, or the string does not + // begin with a parseable number. + // - If the first character is a space, there was leading whitespace + return !input.empty() && output != HUGE_VAL && output != -HUGE_VAL && + static_cast<size_t>(processed_characters_count) == input.size() && + !IsUnicodeWhitespace(input[0]); +} + +template <typename OutIter> +static bool HexStringToByteContainer(StringPiece input, OutIter output) { + size_t count = input.size(); + if (count == 0 || (count % 2) != 0) + return false; + for (uintptr_t i = 0; i < count / 2; ++i) { + // most significant 4 bits + Optional<uint8_t> msb = CharToDigit<16>(input[i * 2]); + // least significant 4 bits + Optional<uint8_t> lsb = CharToDigit<16>(input[i * 2 + 1]); + if (!msb || !lsb) { + return false; + } + *(output++) = (*msb << 4) | *lsb; + } + return true; +} + +} // namespace internal + +} // namespace base + +#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_ diff --git a/chromium/base/strings/string_number_conversions_win.cc b/chromium/base/strings/string_number_conversions_win.cc new file mode 100644 index 00000000000..8a1a3bea917 --- /dev/null +++ b/chromium/base/strings/string_number_conversions_win.cc @@ -0,0 +1,79 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_number_conversions_win.h" + +#include <string> + +#include "base/strings/string_number_conversions_internal.h" +#include "base/strings/string_piece.h" + +namespace base { + +std::wstring NumberToWString(int value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(unsigned value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(long value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(unsigned long value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(long long value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(unsigned long long value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(double value) { + return internal::DoubleToStringT<std::wstring>(value); +} + +#if defined(BASE_STRING16_IS_STD_U16STRING) +namespace internal { + +template <> +class WhitespaceHelper<wchar_t> { + public: + static bool Invoke(wchar_t c) { return 0 != iswspace(c); } +}; + +} // namespace internal + +bool StringToInt(WStringPiece input, int* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToUint(WStringPiece input, unsigned* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToInt64(WStringPiece input, int64_t* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToUint64(WStringPiece input, uint64_t* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToSizeT(WStringPiece input, size_t* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToDouble(WStringPiece input, double* output) { + return internal::StringToDoubleImpl( + input, reinterpret_cast<const uint16_t*>(input.data()), *output); +} +#endif // defined(BASE_STRING16_IS_STD_U16STRING) + +} // namespace base diff --git a/chromium/base/strings/string_number_conversions_win.h b/chromium/base/strings/string_number_conversions_win.h new file mode 100644 index 00000000000..5abcc291130 --- /dev/null +++ b/chromium/base/strings/string_number_conversions_win.h @@ -0,0 +1,40 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_ +#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_ + +#include <string> + +#include "base/base_export.h" +#include "base/strings/string_piece.h" + +namespace base { + +BASE_EXPORT std::wstring NumberToWString(int value); +BASE_EXPORT std::wstring NumberToWString(unsigned int value); +BASE_EXPORT std::wstring NumberToWString(long value); +BASE_EXPORT std::wstring NumberToWString(unsigned long value); +BASE_EXPORT std::wstring NumberToWString(long long value); +BASE_EXPORT std::wstring NumberToWString(unsigned long long value); +BASE_EXPORT std::wstring NumberToWString(double value); + +// The following section contains overloads of the cross-platform APIs for +// std::wstring and base::WStringPiece. These are only enabled if std::wstring +// and base::string16 are distinct types, as otherwise this would result in an +// ODR violation. +// TODO(crbug.com/911896): Remove those guards once base::string16 is +// std::u16string. +#if defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT bool StringToInt(WStringPiece input, int* output); +BASE_EXPORT bool StringToUint(WStringPiece input, unsigned* output); +BASE_EXPORT bool StringToInt64(WStringPiece input, int64_t* output); +BASE_EXPORT bool StringToUint64(WStringPiece input, uint64_t* output); +BASE_EXPORT bool StringToSizeT(WStringPiece input, size_t* output); +BASE_EXPORT bool StringToDouble(WStringPiece input, double* output); +#endif // defined(BASE_STRING16_IS_STD_U16STRING) + +} // namespace base + +#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_ diff --git a/chromium/base/strings/string_piece.h b/chromium/base/strings/string_piece.h index bc24b4d1e90..f60af47177b 100644 --- a/chromium/base/strings/string_piece.h +++ b/chromium/base/strings/string_piece.h @@ -25,11 +25,12 @@ #include <stddef.h> #include <iosfwd> +#include <ostream> #include <string> #include <type_traits> #include "base/base_export.h" -#include "base/logging.h" +#include "base/check_op.h" #include "base/strings/char_traits.h" #include "base/strings/string16.h" #include "base/strings/string_piece_forward.h" @@ -148,6 +149,7 @@ template <typename STRING_TYPE> class BasicStringPiece { public: // Standard STL container boilerplate. typedef size_t size_type; + typedef typename STRING_TYPE::traits_type traits_type; typedef typename STRING_TYPE::value_type value_type; typedef const value_type* pointer; typedef const value_type& reference; @@ -162,7 +164,7 @@ template <typename STRING_TYPE> class BasicStringPiece { // We provide non-explicit singleton constructors so users can pass // in a "const char*" or a "string" wherever a "StringPiece" is // expected (likewise for char16, string16, StringPiece16). - constexpr BasicStringPiece() : ptr_(NULL), length_(0) {} + constexpr BasicStringPiece() : ptr_(nullptr), length_(0) {} // TODO(crbug.com/1049498): Construction from nullptr is not allowed for // std::basic_string_view, so remove the special handling for it. // Note: This doesn't just use STRING_TYPE::traits_type::length(), since that diff --git a/chromium/base/strings/string_split.cc b/chromium/base/strings/string_split.cc index a968e802e8c..4ba0412cc2c 100644 --- a/chromium/base/strings/string_split.cc +++ b/chromium/base/strings/string_split.cc @@ -7,6 +7,7 @@ #include <stddef.h> #include "base/logging.h" +#include "base/strings/string_split_internal.h" #include "base/strings/string_util.h" #include "base/third_party/icu/icu_utf.h" @@ -14,56 +15,6 @@ namespace base { namespace { -// Returns either the ASCII or UTF-16 whitespace. -template<typename Str> BasicStringPiece<Str> WhitespaceForType(); -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) -template <> -WStringPiece WhitespaceForType<std::wstring>() { - return kWhitespaceWide; -} -#endif - -template<> StringPiece16 WhitespaceForType<string16>() { - return kWhitespaceUTF16; -} -template<> StringPiece WhitespaceForType<std::string>() { - return kWhitespaceASCII; -} - -// General string splitter template. Can take 8- or 16-bit input, can produce -// the corresponding string or StringPiece output. -template <typename OutputStringType, typename Str> -static std::vector<OutputStringType> SplitStringT( - BasicStringPiece<Str> str, - BasicStringPiece<Str> delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - std::vector<OutputStringType> result; - if (str.empty()) - return result; - - size_t start = 0; - while (start != Str::npos) { - size_t end = str.find_first_of(delimiter, start); - - BasicStringPiece<Str> piece; - if (end == Str::npos) { - piece = str.substr(start); - start = Str::npos; - } else { - piece = str.substr(start, end - start); - start = end + 1; - } - - if (whitespace == TRIM_WHITESPACE) - piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); - - if (result_type == SPLIT_WANT_ALL || !piece.empty()) - result.emplace_back(piece); - } - return result; -} - bool AppendStringKeyValue(StringPiece input, char delimiter, StringPairs* result) { @@ -94,67 +45,38 @@ bool AppendStringKeyValue(StringPiece input, return true; } -template <typename OutputStringType, typename Str> -std::vector<OutputStringType> SplitStringUsingSubstrT( - BasicStringPiece<Str> input, - BasicStringPiece<Str> delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - using Piece = BasicStringPiece<Str>; - using size_type = typename Piece::size_type; - - std::vector<OutputStringType> result; - if (delimiter.size() == 0) { - result.emplace_back(input); - return result; - } - - for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos; - begin_index = end_index + delimiter.size()) { - end_index = input.find(delimiter, begin_index); - Piece term = end_index == Piece::npos - ? input.substr(begin_index) - : input.substr(begin_index, end_index - begin_index); - - if (whitespace == TRIM_WHITESPACE) - term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); - - if (result_type == SPLIT_WANT_ALL || !term.empty()) - result.emplace_back(term); - } - - return result; -} - } // namespace std::vector<std::string> SplitString(StringPiece input, StringPiece separators, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringT<std::string>(input, separators, whitespace, result_type); + return internal::SplitStringT<std::string>(input, separators, whitespace, + result_type); } std::vector<string16> SplitString(StringPiece16 input, StringPiece16 separators, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringT<string16>(input, separators, whitespace, result_type); + return internal::SplitStringT<string16>(input, separators, whitespace, + result_type); } std::vector<StringPiece> SplitStringPiece(StringPiece input, StringPiece separators, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringT<StringPiece>(input, separators, whitespace, result_type); + return internal::SplitStringT<StringPiece>(input, separators, whitespace, + result_type); } std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, StringPiece16 separators, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringT<StringPiece16>(input, separators, whitespace, - result_type); + return internal::SplitStringT<StringPiece16>(input, separators, whitespace, + result_type); } bool SplitStringIntoKeyValuePairs(StringPiece input, @@ -192,16 +114,16 @@ std::vector<string16> SplitStringUsingSubstr(StringPiece16 input, StringPiece16 delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace, - result_type); + return internal::SplitStringUsingSubstrT<string16>(input, delimiter, + whitespace, result_type); } std::vector<std::string> SplitStringUsingSubstr(StringPiece input, StringPiece delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace, - result_type); + return internal::SplitStringUsingSubstrT<std::string>( + input, delimiter, whitespace, result_type); } std::vector<StringPiece16> SplitStringPieceUsingSubstr( @@ -210,8 +132,8 @@ std::vector<StringPiece16> SplitStringPieceUsingSubstr( WhitespaceHandling whitespace, SplitResult result_type) { std::vector<StringPiece16> result; - return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace, - result_type); + return internal::SplitStringUsingSubstrT<StringPiece16>( + input, delimiter, whitespace, result_type); } std::vector<StringPiece> SplitStringPieceUsingSubstr( @@ -219,41 +141,8 @@ std::vector<StringPiece> SplitStringPieceUsingSubstr( StringPiece delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace, - result_type); -} - -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) -std::vector<std::wstring> SplitString(WStringPiece input, - WStringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type) { - return SplitStringT<std::wstring>(input, separators, whitespace, result_type); -} - -std::vector<WStringPiece> SplitStringPiece(WStringPiece input, - WStringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type) { - return SplitStringT<WStringPiece>(input, separators, whitespace, result_type); -} - -std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input, - WStringPiece delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace, - result_type); -} - -std::vector<WStringPiece> SplitStringPieceUsingSubstr( - WStringPiece input, - WStringPiece delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace, - result_type); + return internal::SplitStringUsingSubstrT<StringPiece>( + input, delimiter, whitespace, result_type); } -#endif } // namespace base diff --git a/chromium/base/strings/string_split.h b/chromium/base/strings/string_split.h index efa8b199fe0..73c15d79f1b 100644 --- a/chromium/base/strings/string_split.h +++ b/chromium/base/strings/string_split.h @@ -138,32 +138,10 @@ BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr( WhitespaceHandling whitespace, SplitResult result_type) WARN_UNUSED_RESULT; -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) -BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input, - WStringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type) - WARN_UNUSED_RESULT; - -BASE_EXPORT std::vector<WStringPiece> SplitStringPiece( - WStringPiece input, - WStringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type) WARN_UNUSED_RESULT; - -BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr( - WStringPiece input, - WStringPiece delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) WARN_UNUSED_RESULT; +} // namespace base -BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr( - WStringPiece input, - WStringPiece delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) WARN_UNUSED_RESULT; +#if defined(OS_WIN) +#include "base/strings/string_split_win.h" #endif -} // namespace base - #endif // BASE_STRINGS_STRING_SPLIT_H_ diff --git a/chromium/base/strings/string_split_internal.h b/chromium/base/strings/string_split_internal.h new file mode 100644 index 00000000000..71d8030b3d2 --- /dev/null +++ b/chromium/base/strings/string_split_internal.h @@ -0,0 +1,100 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_SPLIT_INTERNAL_H_ +#define BASE_STRINGS_STRING_SPLIT_INTERNAL_H_ + +#include <vector> + +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" + +namespace base { + +namespace internal { + +// Returns either the ASCII or UTF-16 whitespace. +template <typename Str> +BasicStringPiece<Str> WhitespaceForType(); + +template <> +inline StringPiece16 WhitespaceForType<string16>() { + return kWhitespaceUTF16; +} +template <> +inline StringPiece WhitespaceForType<std::string>() { + return kWhitespaceASCII; +} + +// General string splitter template. Can take 8- or 16-bit input, can produce +// the corresponding string or StringPiece output. +template <typename OutputStringType, typename Str> +static std::vector<OutputStringType> SplitStringT( + BasicStringPiece<Str> str, + BasicStringPiece<Str> delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + std::vector<OutputStringType> result; + if (str.empty()) + return result; + + size_t start = 0; + while (start != Str::npos) { + size_t end = str.find_first_of(delimiter, start); + + BasicStringPiece<Str> piece; + if (end == Str::npos) { + piece = str.substr(start); + start = Str::npos; + } else { + piece = str.substr(start, end - start); + start = end + 1; + } + + if (whitespace == TRIM_WHITESPACE) + piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); + + if (result_type == SPLIT_WANT_ALL || !piece.empty()) + result.emplace_back(piece); + } + return result; +} + +template <typename OutputStringType, typename Str> +std::vector<OutputStringType> SplitStringUsingSubstrT( + BasicStringPiece<Str> input, + BasicStringPiece<Str> delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + using Piece = BasicStringPiece<Str>; + using size_type = typename Piece::size_type; + + std::vector<OutputStringType> result; + if (delimiter.size() == 0) { + result.emplace_back(input); + return result; + } + + for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos; + begin_index = end_index + delimiter.size()) { + end_index = input.find(delimiter, begin_index); + Piece term = end_index == Piece::npos + ? input.substr(begin_index) + : input.substr(begin_index, end_index - begin_index); + + if (whitespace == TRIM_WHITESPACE) + term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); + + if (result_type == SPLIT_WANT_ALL || !term.empty()) + result.emplace_back(term); + } + + return result; +} + +} // namespace internal + +} // namespace base + +#endif // BASE_STRINGS_STRING_SPLIT_INTERNAL_H_ diff --git a/chromium/base/strings/string_split_win.cc b/chromium/base/strings/string_split_win.cc new file mode 100644 index 00000000000..91184bd058e --- /dev/null +++ b/chromium/base/strings/string_split_win.cc @@ -0,0 +1,59 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_split_win.h" + +#include <string> +#include <vector> + +#include "base/strings/string_piece.h" +#include "base/strings/string_split_internal.h" + +namespace base { + +#if defined(BASE_STRING16_IS_STD_U16STRING) +namespace internal { + +template <> +inline WStringPiece WhitespaceForType<std::wstring>() { + return kWhitespaceWide; +} + +} // namespace internal + +std::vector<std::wstring> SplitString(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringT<std::wstring>(input, separators, whitespace, + result_type); +} + +std::vector<WStringPiece> SplitStringPiece(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringT<WStringPiece>(input, separators, whitespace, + result_type); +} + +std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringUsingSubstrT<std::wstring>( + input, delimiter, whitespace, result_type); +} + +std::vector<WStringPiece> SplitStringPieceUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringUsingSubstrT<WStringPiece>( + input, delimiter, whitespace, result_type); +} +#endif + +} // namespace base diff --git a/chromium/base/strings/string_split_win.h b/chromium/base/strings/string_split_win.h new file mode 100644 index 00000000000..51627d9eeb8 --- /dev/null +++ b/chromium/base/strings/string_split_win.h @@ -0,0 +1,53 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_SPLIT_WIN_H_ +#define BASE_STRINGS_STRING_SPLIT_WIN_H_ + +#include <string> +#include <vector> + +#include "base/base_export.h" +#include "base/compiler_specific.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_split.h" + +namespace base { + +// The following section contains overloads of the cross-platform APIs for +// std::wstring and base::WStringPiece. These are only enabled if std::wstring +// and base::string16 are distinct types, as otherwise this would result in an +// ODR violation. +// TODO(crbug.com/911896): Remove those guards once base::string16 is +// std::u16string. +#if defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) + WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<WStringPiece> SplitStringPiece( + WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; +#endif + +} // namespace base + +#endif // BASE_STRINGS_STRING_SPLIT_WIN_H_ diff --git a/chromium/base/strings/string_util.cc b/chromium/base/strings/string_util.cc index 924455491a6..a883c97eca4 100644 --- a/chromium/base/strings/string_util.cc +++ b/chromium/base/strings/string_util.cc @@ -18,11 +18,13 @@ #include <algorithm> #include <limits> +#include <type_traits> #include <vector> -#include "base/logging.h" +#include "base/check_op.h" #include "base/no_destructor.h" #include "base/stl_util.h" +#include "base/strings/string_util_internal.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" #include "base/third_party/icu/icu_utf.h" @@ -30,60 +32,6 @@ namespace base { -namespace { - -// Used by ReplaceStringPlaceholders to track the position in the string of -// replaced parameters. -struct ReplacementOffset { - ReplacementOffset(uintptr_t parameter, size_t offset) - : parameter(parameter), - offset(offset) {} - - // Index of the parameter. - uintptr_t parameter; - - // Starting position in the string. - size_t offset; -}; - -static bool CompareParameter(const ReplacementOffset& elem1, - const ReplacementOffset& elem2) { - return elem1.parameter < elem2.parameter; -} - -// Assuming that a pointer is the size of a "machine word", then -// uintptr_t is an integer type that is also a machine word. -using MachineWord = uintptr_t; - -inline bool IsMachineWordAligned(const void* pointer) { - return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1)); -} - -template <typename CharacterType> -struct NonASCIIMask; -template <> -struct NonASCIIMask<char> { - static constexpr MachineWord value() { - return static_cast<MachineWord>(0x8080808080808080ULL); - } -}; -template <> -struct NonASCIIMask<char16> { - static constexpr MachineWord value() { - return static_cast<MachineWord>(0xFF80FF80FF80FF80ULL); - } -}; -#if defined(WCHAR_T_IS_UTF32) -template <> -struct NonASCIIMask<wchar_t> { - static constexpr MachineWord value() { - return static_cast<MachineWord>(0xFFFFFF80FFFFFF80ULL); - } -}; -#endif // WCHAR_T_IS_UTF32 - -} // namespace - bool IsWprintfFormatPortable(const wchar_t* format) { for (const wchar_t* position = format; *position != '\0'; ++position) { if (*position == '%') { @@ -119,89 +67,38 @@ bool IsWprintfFormatPortable(const wchar_t* format) { return true; } -namespace { - -template<typename StringType> -StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { - StringType ret; - ret.reserve(str.size()); - for (size_t i = 0; i < str.size(); i++) - ret.push_back(ToLowerASCII(str[i])); - return ret; -} - -template<typename StringType> -StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { - StringType ret; - ret.reserve(str.size()); - for (size_t i = 0; i < str.size(); i++) - ret.push_back(ToUpperASCII(str[i])); - return ret; -} - -} // namespace - std::string ToLowerASCII(StringPiece str) { - return ToLowerASCIIImpl<std::string>(str); + return internal::ToLowerASCIIImpl(str); } string16 ToLowerASCII(StringPiece16 str) { - return ToLowerASCIIImpl<string16>(str); + return internal::ToLowerASCIIImpl(str); } std::string ToUpperASCII(StringPiece str) { - return ToUpperASCIIImpl<std::string>(str); + return internal::ToUpperASCIIImpl(str); } string16 ToUpperASCII(StringPiece16 str) { - return ToUpperASCIIImpl<string16>(str); -} - -template<class StringType> -int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, - BasicStringPiece<StringType> b) { - // Find the first characters that aren't equal and compare them. If the end - // of one of the strings is found before a nonequal character, the lengths - // of the strings are compared. - size_t i = 0; - while (i < a.length() && i < b.length()) { - typename StringType::value_type lower_a = ToLowerASCII(a[i]); - typename StringType::value_type lower_b = ToLowerASCII(b[i]); - if (lower_a < lower_b) - return -1; - if (lower_a > lower_b) - return 1; - i++; - } - - // End of one string hit before finding a different character. Expect the - // common case to be "strings equal" at this point so check that first. - if (a.length() == b.length()) - return 0; - - if (a.length() < b.length()) - return -1; - return 1; + return internal::ToUpperASCIIImpl(str); } int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) { - return CompareCaseInsensitiveASCIIT<std::string>(a, b); + return internal::CompareCaseInsensitiveASCIIT(a, b); } int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { - return CompareCaseInsensitiveASCIIT<string16>(a, b); + return internal::CompareCaseInsensitiveASCIIT(a, b); } bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { - if (a.length() != b.length()) - return false; - return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0; + return a.size() == b.size() && + internal::CompareCaseInsensitiveASCIIT(a, b) == 0; } bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { - if (a.length() != b.length()) - return false; - return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0; + return a.size() == b.size() && + internal::CompareCaseInsensitiveASCIIT(a, b) == 0; } const std::string& EmptyString() { @@ -214,107 +111,56 @@ const string16& EmptyString16() { return *s16; } -template <class StringType> -bool ReplaceCharsT(const StringType& input, - BasicStringPiece<StringType> find_any_of_these, - BasicStringPiece<StringType> replace_with, - StringType* output); - -bool ReplaceChars(const string16& input, +bool ReplaceChars(StringPiece16 input, StringPiece16 replace_chars, StringPiece16 replace_with, string16* output) { - return ReplaceCharsT(input, replace_chars, replace_with, output); + return internal::ReplaceCharsT(input, replace_chars, replace_with, output); } -bool ReplaceChars(const std::string& input, +bool ReplaceChars(StringPiece input, StringPiece replace_chars, StringPiece replace_with, std::string* output) { - return ReplaceCharsT(input, replace_chars, replace_with, output); + return internal::ReplaceCharsT(input, replace_chars, replace_with, output); } -bool RemoveChars(const string16& input, +bool RemoveChars(StringPiece16 input, StringPiece16 remove_chars, string16* output) { - return ReplaceCharsT(input, remove_chars, StringPiece16(), output); + return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output); } -bool RemoveChars(const std::string& input, +bool RemoveChars(StringPiece input, StringPiece remove_chars, std::string* output) { - return ReplaceCharsT(input, remove_chars, StringPiece(), output); -} - -template <typename Str> -TrimPositions TrimStringT(BasicStringPiece<Str> input, - BasicStringPiece<Str> trim_chars, - TrimPositions positions, - Str* output) { - // Find the edges of leading/trailing whitespace as desired. Need to use - // a StringPiece version of input to be able to call find* on it with the - // StringPiece version of trim_chars (normally the trim_chars will be a - // constant so avoid making a copy). - const size_t last_char = input.length() - 1; - const size_t first_good_char = - (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; - const size_t last_good_char = (positions & TRIM_TRAILING) - ? input.find_last_not_of(trim_chars) - : last_char; - - // When the string was all trimmed, report that we stripped off characters - // from whichever position the caller was interested in. For empty input, we - // stripped no characters, but we still need to clear |output|. - if (input.empty() || first_good_char == Str::npos || - last_good_char == Str::npos) { - bool input_was_empty = input.empty(); // in case output == &input - output->clear(); - return input_was_empty ? TRIM_NONE : positions; - } - - // Trim. - output->assign(input.data() + first_good_char, - last_good_char - first_good_char + 1); - - // Return where we trimmed from. - return static_cast<TrimPositions>( - (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) | - (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING)); + return internal::ReplaceCharsT(input, remove_chars, StringPiece(), output); } bool TrimString(StringPiece16 input, StringPiece16 trim_chars, string16* output) { - return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; + return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != + TRIM_NONE; } bool TrimString(StringPiece input, StringPiece trim_chars, std::string* output) { - return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; -} - -template<typename Str> -BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, - BasicStringPiece<Str> trim_chars, - TrimPositions positions) { - size_t begin = (positions & TRIM_LEADING) ? - input.find_first_not_of(trim_chars) : 0; - size_t end = (positions & TRIM_TRAILING) ? - input.find_last_not_of(trim_chars) + 1 : input.size(); - return input.substr(begin, end - begin); + return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != + TRIM_NONE; } StringPiece16 TrimString(StringPiece16 input, StringPiece16 trim_chars, TrimPositions positions) { - return TrimStringPieceT(input, trim_chars, positions); + return internal::TrimStringPieceT(input, trim_chars, positions); } StringPiece TrimString(StringPiece input, StringPiece trim_chars, TrimPositions positions) { - return TrimStringPieceT(input, trim_chars, positions); + return internal::TrimStringPieceT(input, trim_chars, positions); } void TruncateUTF8ToByteSize(const std::string& input, @@ -357,74 +203,36 @@ void TruncateUTF8ToByteSize(const std::string& input, TrimPositions TrimWhitespace(StringPiece16 input, TrimPositions positions, string16* output) { - return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output); + return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16), + positions, output); } StringPiece16 TrimWhitespace(StringPiece16 input, TrimPositions positions) { - return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions); + return internal::TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), + positions); } TrimPositions TrimWhitespaceASCII(StringPiece input, TrimPositions positions, std::string* output) { - return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output); + return internal::TrimStringT(input, StringPiece(kWhitespaceASCII), positions, + output); } StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) { - return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions); + return internal::TrimStringPieceT(input, StringPiece(kWhitespaceASCII), + positions); } -template<typename STR> -STR CollapseWhitespaceT(const STR& text, - bool trim_sequences_with_line_breaks) { - STR result; - result.resize(text.size()); - - // Set flags to pretend we're already in a trimmed whitespace sequence, so we - // will trim any leading whitespace. - bool in_whitespace = true; - bool already_trimmed = true; - - int chars_written = 0; - for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { - if (IsUnicodeWhitespace(*i)) { - if (!in_whitespace) { - // Reduce all whitespace sequences to a single space. - in_whitespace = true; - result[chars_written++] = L' '; - } - if (trim_sequences_with_line_breaks && !already_trimmed && - ((*i == '\n') || (*i == '\r'))) { - // Whitespace sequences containing CR or LF are eliminated entirely. - already_trimmed = true; - --chars_written; - } - } else { - // Non-whitespace chracters are copied straight across. - in_whitespace = false; - already_trimmed = false; - result[chars_written++] = *i; - } - } - - if (in_whitespace && !already_trimmed) { - // Any trailing whitespace is eliminated. - --chars_written; - } - - result.resize(chars_written); - return result; -} - -string16 CollapseWhitespace(const string16& text, +string16 CollapseWhitespace(StringPiece16 text, bool trim_sequences_with_line_breaks) { - return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); + return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); } -std::string CollapseWhitespaceASCII(const std::string& text, +std::string CollapseWhitespaceASCII(StringPiece text, bool trim_sequences_with_line_breaks) { - return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); + return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); } bool ContainsOnlyChars(StringPiece input, StringPiece characters) { @@ -435,198 +243,63 @@ bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) { return input.find_first_not_of(characters) == StringPiece16::npos; } -template <class Char> -inline bool DoIsStringASCII(const Char* characters, size_t length) { - if (!length) - return true; - constexpr MachineWord non_ascii_bit_mask = NonASCIIMask<Char>::value(); - MachineWord all_char_bits = 0; - const Char* end = characters + length; - - // Prologue: align the input. - while (!IsMachineWordAligned(characters) && characters < end) - all_char_bits |= *characters++; - if (all_char_bits & non_ascii_bit_mask) - return false; - - // Compare the values of CPU word size. - constexpr size_t chars_per_word = sizeof(MachineWord) / sizeof(Char); - constexpr int batch_count = 16; - while (characters <= end - batch_count * chars_per_word) { - all_char_bits = 0; - for (int i = 0; i < batch_count; ++i) { - all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); - characters += chars_per_word; - } - if (all_char_bits & non_ascii_bit_mask) - return false; - } - - // Process the remaining words. - all_char_bits = 0; - while (characters <= end - chars_per_word) { - all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); - characters += chars_per_word; - } - - // Process the remaining bytes. - while (characters < end) - all_char_bits |= *characters++; - - return !(all_char_bits & non_ascii_bit_mask); -} bool IsStringASCII(StringPiece str) { - return DoIsStringASCII(str.data(), str.length()); + return internal::DoIsStringASCII(str.data(), str.length()); } bool IsStringASCII(StringPiece16 str) { - return DoIsStringASCII(str.data(), str.length()); + return internal::DoIsStringASCII(str.data(), str.length()); } #if defined(WCHAR_T_IS_UTF32) bool IsStringASCII(WStringPiece str) { - return DoIsStringASCII(str.data(), str.length()); + return internal::DoIsStringASCII(str.data(), str.length()); } #endif -template <bool (*Validator)(uint32_t)> -inline static bool DoIsStringUTF8(StringPiece str) { - const char* src = str.data(); - int32_t src_len = static_cast<int32_t>(str.length()); - int32_t char_index = 0; - - while (char_index < src_len) { - int32_t code_point; - CBU8_NEXT(src, char_index, src_len, code_point); - if (!Validator(code_point)) - return false; - } - return true; -} - bool IsStringUTF8(StringPiece str) { - return DoIsStringUTF8<IsValidCharacter>(str); + return internal::DoIsStringUTF8<IsValidCharacter>(str); } bool IsStringUTF8AllowingNoncharacters(StringPiece str) { - return DoIsStringUTF8<IsValidCodepoint>(str); -} - -// Implementation note: Normally this function will be called with a hardcoded -// constant for the lowercase_ascii parameter. Constructing a StringPiece from -// a C constant requires running strlen, so the result will be two passes -// through the buffers, one to file the length of lowercase_ascii, and one to -// compare each letter. -// -// This function could have taken a const char* to avoid this and only do one -// pass through the string. But the strlen is faster than the case-insensitive -// compares and lets us early-exit in the case that the strings are different -// lengths (will often be the case for non-matches). So whether one approach or -// the other will be faster depends on the case. -// -// The hardcoded strings are typically very short so it doesn't matter, and the -// string piece gives additional flexibility for the caller (doesn't have to be -// null terminated) so we choose the StringPiece route. -template<typename Str> -static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, - StringPiece lowercase_ascii) { - if (str.size() != lowercase_ascii.size()) - return false; - for (size_t i = 0; i < str.size(); i++) { - if (ToLowerASCII(str[i]) != lowercase_ascii[i]) - return false; - } - return true; + return internal::DoIsStringUTF8<IsValidCodepoint>(str); } bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) { - return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii); + return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii); } bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) { - return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii); + return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii); } bool EqualsASCII(StringPiece16 str, StringPiece ascii) { - if (str.length() != ascii.length()) - return false; - return std::equal(ascii.begin(), ascii.end(), str.begin()); -} - -template<typename Str> -bool StartsWithT(BasicStringPiece<Str> str, - BasicStringPiece<Str> search_for, - CompareCase case_sensitivity) { - if (search_for.size() > str.size()) - return false; - - BasicStringPiece<Str> source = str.substr(0, search_for.size()); - - switch (case_sensitivity) { - case CompareCase::SENSITIVE: - return source == search_for; - - case CompareCase::INSENSITIVE_ASCII: - return std::equal( - search_for.begin(), search_for.end(), - source.begin(), - CaseInsensitiveCompareASCII<typename Str::value_type>()); - - default: - NOTREACHED(); - return false; - } + return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end()); } bool StartsWith(StringPiece str, StringPiece search_for, CompareCase case_sensitivity) { - return StartsWithT<std::string>(str, search_for, case_sensitivity); + return internal::StartsWithT(str, search_for, case_sensitivity); } bool StartsWith(StringPiece16 str, StringPiece16 search_for, CompareCase case_sensitivity) { - return StartsWithT<string16>(str, search_for, case_sensitivity); -} - -template <typename Str> -bool EndsWithT(BasicStringPiece<Str> str, - BasicStringPiece<Str> search_for, - CompareCase case_sensitivity) { - if (search_for.size() > str.size()) - return false; - - BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(), - search_for.size()); - - switch (case_sensitivity) { - case CompareCase::SENSITIVE: - return source == search_for; - - case CompareCase::INSENSITIVE_ASCII: - return std::equal( - source.begin(), source.end(), - search_for.begin(), - CaseInsensitiveCompareASCII<typename Str::value_type>()); - - default: - NOTREACHED(); - return false; - } + return internal::StartsWithT(str, search_for, case_sensitivity); } bool EndsWith(StringPiece str, StringPiece search_for, CompareCase case_sensitivity) { - return EndsWithT<std::string>(str, search_for, case_sensitivity); + return internal::EndsWithT(str, search_for, case_sensitivity); } bool EndsWith(StringPiece16 str, StringPiece16 search_for, CompareCase case_sensitivity) { - return EndsWithT<string16>(str, search_for, case_sensitivity); + return internal::EndsWithT(str, search_for, case_sensitivity); } char HexDigitToInt(wchar_t c) { @@ -680,384 +353,93 @@ string16 FormatBytesUnlocalized(int64_t bytes) { return ASCIIToUTF16(buf); } -// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings. -template <class StringType> -struct SubstringMatcher { - BasicStringPiece<StringType> find_this; - - size_t Find(const StringType& input, size_t pos) { - return input.find(find_this.data(), pos, find_this.length()); - } - size_t MatchSize() { return find_this.length(); } -}; - -// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters. -template <class StringType> -struct CharacterMatcher { - BasicStringPiece<StringType> find_any_of_these; - - size_t Find(const StringType& input, size_t pos) { - return input.find_first_of(find_any_of_these.data(), pos, - find_any_of_these.length()); - } - constexpr size_t MatchSize() { return 1; } -}; - -enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST }; - -// Runs in O(n) time in the length of |str|, and transforms the string without -// reallocating when possible. Returns |true| if any matches were found. -// -// This is parameterized on a |Matcher| traits type, so that it can be the -// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset(). -template <class StringType, class Matcher> -bool DoReplaceMatchesAfterOffset(StringType* str, - size_t initial_offset, - Matcher matcher, - BasicStringPiece<StringType> replace_with, - ReplaceType replace_type) { - using CharTraits = typename StringType::traits_type; - - const size_t find_length = matcher.MatchSize(); - if (!find_length) - return false; - - // If the find string doesn't appear, there's nothing to do. - size_t first_match = matcher.Find(*str, initial_offset); - if (first_match == StringType::npos) - return false; - - // If we're only replacing one instance, there's no need to do anything - // complicated. - const size_t replace_length = replace_with.length(); - if (replace_type == ReplaceType::REPLACE_FIRST) { - str->replace(first_match, find_length, replace_with.data(), replace_length); - return true; - } - - // If the find and replace strings are the same length, we can simply use - // replace() on each instance, and finish the entire operation in O(n) time. - if (find_length == replace_length) { - auto* buffer = &((*str)[0]); - for (size_t offset = first_match; offset != StringType::npos; - offset = matcher.Find(*str, offset + replace_length)) { - CharTraits::copy(buffer + offset, replace_with.data(), replace_length); - } - return true; - } - - // Since the find and replace strings aren't the same length, a loop like the - // one above would be O(n^2) in the worst case, as replace() will shift the - // entire remaining string each time. We need to be more clever to keep things - // O(n). - // - // When the string is being shortened, it's possible to just shift the matches - // down in one pass while finding, and truncate the length at the end of the - // search. - // - // If the string is being lengthened, more work is required. The strategy used - // here is to make two find() passes through the string. The first pass counts - // the number of matches to determine the new size. The second pass will - // either construct the new string into a new buffer (if the existing buffer - // lacked capacity), or else -- if there is room -- create a region of scratch - // space after |first_match| by shifting the tail of the string to a higher - // index, and doing in-place moves from the tail to lower indices thereafter. - size_t str_length = str->length(); - size_t expansion = 0; - if (replace_length > find_length) { - // This operation lengthens the string; determine the new length by counting - // matches. - const size_t expansion_per_match = (replace_length - find_length); - size_t num_matches = 0; - for (size_t match = first_match; match != StringType::npos; - match = matcher.Find(*str, match + find_length)) { - expansion += expansion_per_match; - ++num_matches; - } - const size_t final_length = str_length + expansion; - - if (str->capacity() < final_length) { - // If we'd have to allocate a new buffer to grow the string, build the - // result directly into the new allocation via append(). - StringType src(str->get_allocator()); - str->swap(src); - str->reserve(final_length); - - size_t pos = 0; - for (size_t match = first_match;; match = matcher.Find(src, pos)) { - str->append(src, pos, match - pos); - str->append(replace_with.data(), replace_length); - pos = match + find_length; - - // A mid-loop test/break enables skipping the final Find() call; the - // number of matches is known, so don't search past the last one. - if (!--num_matches) - break; - } - - // Handle substring after the final match. - str->append(src, pos, str_length - pos); - return true; - } - - // Prepare for the copy/move loop below -- expand the string to its final - // size by shifting the data after the first match to the end of the resized - // string. - size_t shift_src = first_match + find_length; - size_t shift_dst = shift_src + expansion; - - // Big |expansion| factors (relative to |str_length|) require padding up to - // |shift_dst|. - if (shift_dst > str_length) - str->resize(shift_dst); - - str->replace(shift_dst, str_length - shift_src, *str, shift_src, - str_length - shift_src); - str_length = final_length; - } - - // We can alternate replacement and move operations. This won't overwrite the - // unsearched region of the string so long as |write_offset| <= |read_offset|; - // that condition is always satisfied because: - // - // (a) If the string is being shortened, |expansion| is zero and - // |write_offset| grows slower than |read_offset|. - // - // (b) If the string is being lengthened, |write_offset| grows faster than - // |read_offset|, but |expansion| is big enough so that |write_offset| - // will only catch up to |read_offset| at the point of the last match. - auto* buffer = &((*str)[0]); - size_t write_offset = first_match; - size_t read_offset = first_match + expansion; - do { - if (replace_length) { - CharTraits::copy(buffer + write_offset, replace_with.data(), - replace_length); - write_offset += replace_length; - } - read_offset += find_length; - - // min() clamps StringType::npos (the largest unsigned value) to str_length. - size_t match = std::min(matcher.Find(*str, read_offset), str_length); - - size_t length = match - read_offset; - if (length) { - CharTraits::move(buffer + write_offset, buffer + read_offset, length); - write_offset += length; - read_offset += length; - } - } while (read_offset < str_length); - - // If we're shortening the string, truncate it now. - str->resize(write_offset); - return true; -} - -template <class StringType> -bool ReplaceCharsT(const StringType& input, - BasicStringPiece<StringType> find_any_of_these, - BasicStringPiece<StringType> replace_with, - StringType* output) { - // Commonly, this is called with output and input being the same string; in - // that case, this assignment is inexpensive. - *output = input; - - return DoReplaceMatchesAfterOffset( - output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with, - ReplaceType::REPLACE_ALL); -} - void ReplaceFirstSubstringAfterOffset(string16* str, size_t start_offset, StringPiece16 find_this, StringPiece16 replace_with) { - DoReplaceMatchesAfterOffset(str, start_offset, - SubstringMatcher<string16>{find_this}, - replace_with, ReplaceType::REPLACE_FIRST); + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<string16>{find_this}, + replace_with, internal::ReplaceType::REPLACE_FIRST); } void ReplaceFirstSubstringAfterOffset(std::string* str, size_t start_offset, StringPiece find_this, StringPiece replace_with) { - DoReplaceMatchesAfterOffset(str, start_offset, - SubstringMatcher<std::string>{find_this}, - replace_with, ReplaceType::REPLACE_FIRST); + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<std::string>{find_this}, + replace_with, internal::ReplaceType::REPLACE_FIRST); } void ReplaceSubstringsAfterOffset(string16* str, size_t start_offset, StringPiece16 find_this, StringPiece16 replace_with) { - DoReplaceMatchesAfterOffset(str, start_offset, - SubstringMatcher<string16>{find_this}, - replace_with, ReplaceType::REPLACE_ALL); + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<string16>{find_this}, + replace_with, internal::ReplaceType::REPLACE_ALL); } void ReplaceSubstringsAfterOffset(std::string* str, size_t start_offset, StringPiece find_this, StringPiece replace_with) { - DoReplaceMatchesAfterOffset(str, start_offset, - SubstringMatcher<std::string>{find_this}, - replace_with, ReplaceType::REPLACE_ALL); -} - -template <class string_type> -inline typename string_type::value_type* WriteIntoT(string_type* str, - size_t length_with_null) { - DCHECK_GE(length_with_null, 1u); - str->reserve(length_with_null); - str->resize(length_with_null - 1); - return &((*str)[0]); + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<std::string>{find_this}, + replace_with, internal::ReplaceType::REPLACE_ALL); } char* WriteInto(std::string* str, size_t length_with_null) { - return WriteIntoT(str, length_with_null); + return internal::WriteIntoT(str, length_with_null); } char16* WriteInto(string16* str, size_t length_with_null) { - return WriteIntoT(str, length_with_null); + return internal::WriteIntoT(str, length_with_null); } -// Generic version for all JoinString overloads. |list_type| must be a sequence -// (std::vector or std::initializer_list) of strings/StringPieces (std::string, -// string16, StringPiece or StringPiece16). |string_type| is either std::string -// or string16. -template <typename list_type, typename string_type> -static string_type JoinStringT(const list_type& parts, - BasicStringPiece<string_type> sep) { - if (base::empty(parts)) - return string_type(); - - // Pre-allocate the eventual size of the string. Start with the size of all of - // the separators (note that this *assumes* parts.size() > 0). - size_t total_size = (parts.size() - 1) * sep.size(); - for (const auto& part : parts) - total_size += part.size(); - string_type result; - result.reserve(total_size); - - auto iter = parts.begin(); - DCHECK(iter != parts.end()); - result.append(iter->data(), iter->size()); - ++iter; - - for (; iter != parts.end(); ++iter) { - result.append(sep.data(), sep.size()); - result.append(iter->data(), iter->size()); - } - - // Sanity-check that we pre-allocated correctly. - DCHECK_EQ(total_size, result.size()); - - return result; +std::string JoinString(span<const std::string> parts, StringPiece separator) { + return internal::JoinStringT(parts, separator); } -std::string JoinString(const std::vector<std::string>& parts, - StringPiece separator) { - return JoinStringT(parts, separator); +string16 JoinString(span<const string16> parts, StringPiece16 separator) { + return internal::JoinStringT(parts, separator); } -string16 JoinString(const std::vector<string16>& parts, - StringPiece16 separator) { - return JoinStringT(parts, separator); -} - -std::string JoinString(const std::vector<StringPiece>& parts, - StringPiece separator) { - return JoinStringT(parts, separator); +std::string JoinString(span<const StringPiece> parts, StringPiece separator) { + return internal::JoinStringT(parts, separator); } -string16 JoinString(const std::vector<StringPiece16>& parts, - StringPiece16 separator) { - return JoinStringT(parts, separator); +string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator) { + return internal::JoinStringT(parts, separator); } std::string JoinString(std::initializer_list<StringPiece> parts, StringPiece separator) { - return JoinStringT(parts, separator); + return internal::JoinStringT(parts, separator); } string16 JoinString(std::initializer_list<StringPiece16> parts, StringPiece16 separator) { - return JoinStringT(parts, separator); + return internal::JoinStringT(parts, separator); } -template<class FormatStringType, class OutStringType> -OutStringType DoReplaceStringPlaceholders( - const FormatStringType& format_string, - const std::vector<OutStringType>& subst, - std::vector<size_t>* offsets) { - size_t substitutions = subst.size(); - DCHECK_LT(substitutions, 10U); - - size_t sub_length = 0; - for (const auto& cur : subst) - sub_length += cur.length(); - - OutStringType formatted; - formatted.reserve(format_string.length() + sub_length); - - std::vector<ReplacementOffset> r_offsets; - for (auto i = format_string.begin(); i != format_string.end(); ++i) { - if ('$' == *i) { - if (i + 1 != format_string.end()) { - ++i; - if ('$' == *i) { - while (i != format_string.end() && '$' == *i) { - formatted.push_back('$'); - ++i; - } - --i; - } else { - if (*i < '1' || *i > '9') { - DLOG(ERROR) << "Invalid placeholder: $" << *i; - continue; - } - uintptr_t index = *i - '1'; - if (offsets) { - ReplacementOffset r_offset(index, - static_cast<int>(formatted.size())); - r_offsets.insert( - std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset, - &CompareParameter), - r_offset); - } - if (index < substitutions) - formatted.append(subst.at(index)); - } - } - } else { - formatted.push_back(*i); - } - } - if (offsets) { - for (const auto& cur : r_offsets) - offsets->push_back(cur.offset); - } - return formatted; -} - -string16 ReplaceStringPlaceholders(const string16& format_string, +string16 ReplaceStringPlaceholders(StringPiece16 format_string, const std::vector<string16>& subst, std::vector<size_t>* offsets) { - return DoReplaceStringPlaceholders(format_string, subst, offsets); + return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); } std::string ReplaceStringPlaceholders(StringPiece format_string, const std::vector<std::string>& subst, std::vector<size_t>* offsets) { - return DoReplaceStringPlaceholders(format_string, subst, offsets); + return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); } string16 ReplaceStringPlaceholders(const string16& format_string, const string16& a, size_t* offset) { std::vector<size_t> offsets; - std::vector<string16> subst; - subst.push_back(a); - string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); + string16 result = ReplaceStringPlaceholders(format_string, {a}, &offsets); DCHECK_EQ(1U, offsets.size()); if (offset) @@ -1065,65 +447,11 @@ string16 ReplaceStringPlaceholders(const string16& format_string, return result; } -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) - -TrimPositions TrimWhitespace(WStringPiece input, - TrimPositions positions, - std::wstring* output) { - return TrimStringT(input, WStringPiece(kWhitespaceWide), positions, output); -} - -WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) { - return TrimStringPieceT(input, WStringPiece(kWhitespaceWide), positions); -} - -bool TrimString(WStringPiece input, - WStringPiece trim_chars, - std::wstring* output) { - return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; -} - -WStringPiece TrimString(WStringPiece input, - WStringPiece trim_chars, - TrimPositions positions) { - return TrimStringPieceT(input, trim_chars, positions); -} - -wchar_t* WriteInto(std::wstring* str, size_t length_with_null) { - return WriteIntoT(str, length_with_null); -} - -#endif - -// The following code is compatible with the OpenBSD lcpy interface. See: -// http://www.gratisoft.us/todd/papers/strlcpy.html -// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c - -namespace { - -template <typename CHAR> -size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { - for (size_t i = 0; i < dst_size; ++i) { - if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. - return i; - } - - // We were left off at dst_size. We over copied 1 byte. Null terminate. - if (dst_size != 0) - dst[dst_size - 1] = 0; - - // Count the rest of the |src|, and return it's length in characters. - while (src[dst_size]) ++dst_size; - return dst_size; -} - -} // namespace - size_t strlcpy(char* dst, const char* src, size_t dst_size) { - return lcpyT<char>(dst, src, dst_size); + return internal::lcpyT(dst, src, dst_size); } size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { - return lcpyT<wchar_t>(dst, src, dst_size); + return internal::lcpyT(dst, src, dst_size); } } // namespace base diff --git a/chromium/base/strings/string_util.h b/chromium/base/strings/string_util.h index f9f5e10ade9..e5a0487cff1 100644 --- a/chromium/base/strings/string_util.h +++ b/chromium/base/strings/string_util.h @@ -14,10 +14,12 @@ #include <initializer_list> #include <string> +#include <type_traits> #include <vector> #include "base/base_export.h" #include "base/compiler_specific.h" +#include "base/containers/span.h" #include "base/stl_util.h" #include "base/strings/string16.h" #include "base/strings/string_piece.h" // For implicit conversions. @@ -84,19 +86,17 @@ BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); // ASCII-specific tolower. The standard library's tolower is locale sensitive, // so we don't want to use it here. -inline char ToLowerASCII(char c) { - return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; -} -inline char16 ToLowerASCII(char16 c) { +template <typename CharT, + typename = std::enable_if_t<std::is_integral<CharT>::value>> +CharT ToLowerASCII(CharT c) { return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; } // ASCII-specific toupper. The standard library's toupper is locale sensitive, // so we don't want to use it here. -inline char ToUpperASCII(char c) { - return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; -} -inline char16 ToUpperASCII(char16 c) { +template <typename CharT, + typename = std::enable_if_t<std::is_integral<CharT>::value>> +CharT ToUpperASCII(CharT c) { return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; } @@ -170,10 +170,10 @@ BASE_EXPORT extern const char kUtf8ByteOrderMark[]; // Removes characters in |remove_chars| from anywhere in |input|. Returns true // if any characters were removed. |remove_chars| must be null-terminated. // NOTE: Safe to use the same variable for both |input| and |output|. -BASE_EXPORT bool RemoveChars(const string16& input, +BASE_EXPORT bool RemoveChars(StringPiece16 input, StringPiece16 remove_chars, string16* output); -BASE_EXPORT bool RemoveChars(const std::string& input, +BASE_EXPORT bool RemoveChars(StringPiece input, StringPiece remove_chars, std::string* output); @@ -182,11 +182,11 @@ BASE_EXPORT bool RemoveChars(const std::string& input, // the |replace_with| string. Returns true if any characters were replaced. // |replace_chars| must be null-terminated. // NOTE: Safe to use the same variable for both |input| and |output|. -BASE_EXPORT bool ReplaceChars(const string16& input, +BASE_EXPORT bool ReplaceChars(StringPiece16 input, StringPiece16 replace_chars, StringPiece16 replace_with, string16* output); -BASE_EXPORT bool ReplaceChars(const std::string& input, +BASE_EXPORT bool ReplaceChars(StringPiece input, StringPiece replace_chars, StringPiece replace_with, std::string* output); @@ -226,69 +226,6 @@ BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, const size_t byte_size, std::string* output); -#if defined(WCHAR_T_IS_UTF16) -// Utility functions to access the underlying string buffer as a wide char -// pointer. -// -// Note: These functions violate strict aliasing when char16 and wchar_t are -// unrelated types. We thus pass -fno-strict-aliasing to the compiler on -// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2]. -// -// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244 -// [2] -// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949 -inline wchar_t* as_writable_wcstr(char16* str) { - return reinterpret_cast<wchar_t*>(str); -} - -inline wchar_t* as_writable_wcstr(string16& str) { - return reinterpret_cast<wchar_t*>(data(str)); -} - -inline const wchar_t* as_wcstr(const char16* str) { - return reinterpret_cast<const wchar_t*>(str); -} - -inline const wchar_t* as_wcstr(StringPiece16 str) { - return reinterpret_cast<const wchar_t*>(str.data()); -} - -// Utility functions to access the underlying string buffer as a char16 pointer. -inline char16* as_writable_u16cstr(wchar_t* str) { - return reinterpret_cast<char16*>(str); -} - -inline char16* as_writable_u16cstr(std::wstring& str) { - return reinterpret_cast<char16*>(data(str)); -} - -inline const char16* as_u16cstr(const wchar_t* str) { - return reinterpret_cast<const char16*>(str); -} - -inline const char16* as_u16cstr(WStringPiece str) { - return reinterpret_cast<const char16*>(str.data()); -} - -// Utility functions to convert between base::WStringPiece and -// base::StringPiece16. -inline WStringPiece AsWStringPiece(StringPiece16 str) { - return WStringPiece(as_wcstr(str.data()), str.size()); -} - -inline StringPiece16 AsStringPiece16(WStringPiece str) { - return StringPiece16(as_u16cstr(str.data()), str.size()); -} - -inline std::wstring AsWString(StringPiece16 str) { - return std::wstring(as_wcstr(str.data()), str.size()); -} - -inline string16 AsString16(WStringPiece str) { - return string16(as_u16cstr(str.data()), str.size()); -} -#endif // defined(WCHAR_T_IS_UTF16) - // Trims any whitespace from either end of the input string. // // The StringPiece versions return a substring referencing the input buffer. @@ -315,11 +252,10 @@ BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input, // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace // sequences containing a CR or LF are trimmed. // (3) All other whitespace sequences are converted to single spaces. -BASE_EXPORT string16 CollapseWhitespace( - const string16& text, - bool trim_sequences_with_line_breaks); +BASE_EXPORT string16 CollapseWhitespace(StringPiece16 text, + bool trim_sequences_with_line_breaks); BASE_EXPORT std::string CollapseWhitespaceASCII( - const std::string& text, + StringPiece text, bool trim_sequences_with_line_breaks); // Returns true if |input| is empty or contains only characters found in @@ -347,6 +283,7 @@ BASE_EXPORT bool IsStringUTF8AllowingNoncharacters(StringPiece str); // does not leave early if it is not the case. BASE_EXPORT bool IsStringASCII(StringPiece str); BASE_EXPORT bool IsStringASCII(StringPiece16 str); + #if defined(WCHAR_T_IS_UTF32) BASE_EXPORT bool IsStringASCII(WStringPiece str); #endif @@ -488,8 +425,8 @@ BASE_EXPORT void ReplaceSubstringsAfterOffset( BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null); BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null); -// Joins a vector or list of strings into a single string, inserting |separator| -// (which may be empty) in between all elements. +// Joins a list of strings into a single string, inserting |separator| (which +// may be empty) in between all elements. // // Note this is inverse of SplitString()/SplitStringPiece() defined in // string_split.h. @@ -501,13 +438,13 @@ BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null); // copies of those strings are created until the final join operation. // // Use StrCat (in base/strings/strcat.h) if you don't need a separator. -BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts, +BASE_EXPORT std::string JoinString(span<const std::string> parts, StringPiece separator); -BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, +BASE_EXPORT string16 JoinString(span<const string16> parts, StringPiece16 separator); -BASE_EXPORT std::string JoinString(const std::vector<StringPiece>& parts, +BASE_EXPORT std::string JoinString(span<const StringPiece> parts, StringPiece separator); -BASE_EXPORT string16 JoinString(const std::vector<StringPiece16>& parts, +BASE_EXPORT string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator); // Explicit initializer_list overloads are required to break ambiguity when used // with a literal initializer list (otherwise the compiler would not be able to @@ -521,10 +458,10 @@ BASE_EXPORT string16 JoinString(std::initializer_list<StringPiece16> parts, // Additionally, any number of consecutive '$' characters is replaced by that // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be // NULL. This only allows you to use up to nine replacements. -BASE_EXPORT string16 ReplaceStringPlaceholders( - const string16& format_string, - const std::vector<string16>& subst, - std::vector<size_t>* offsets); +BASE_EXPORT string16 +ReplaceStringPlaceholders(StringPiece16 format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets); BASE_EXPORT std::string ReplaceStringPlaceholders( StringPiece format_string, @@ -536,25 +473,6 @@ BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, const string16& a, size_t* offset); -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) -BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input, - TrimPositions positions, - std::wstring* output); - -BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input, - TrimPositions positions); - -BASE_EXPORT bool TrimString(WStringPiece input, - WStringPiece trim_chars, - std::wstring* output); - -BASE_EXPORT WStringPiece TrimString(WStringPiece input, - WStringPiece trim_chars, - TrimPositions positions); - -BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null); -#endif - } // namespace base #if defined(OS_WIN) diff --git a/chromium/base/strings/string_util_internal.h b/chromium/base/strings/string_util_internal.h new file mode 100644 index 00000000000..da3fb07cc30 --- /dev/null +++ b/chromium/base/strings/string_util_internal.h @@ -0,0 +1,625 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_UTIL_INTERNAL_H_ +#define BASE_STRINGS_STRING_UTIL_INTERNAL_H_ + +#include "base/logging.h" +#include "base/notreached.h" +#include "base/strings/string_piece.h" +#include "base/third_party/icu/icu_utf.h" + +namespace base { + +namespace internal { + +// Used by ReplaceStringPlaceholders to track the position in the string of +// replaced parameters. +struct ReplacementOffset { + ReplacementOffset(uintptr_t parameter, size_t offset) + : parameter(parameter), offset(offset) {} + + // Index of the parameter. + uintptr_t parameter; + + // Starting position in the string. + size_t offset; +}; + +static bool CompareParameter(const ReplacementOffset& elem1, + const ReplacementOffset& elem2) { + return elem1.parameter < elem2.parameter; +} + +// Assuming that a pointer is the size of a "machine word", then +// uintptr_t is an integer type that is also a machine word. +using MachineWord = uintptr_t; + +inline bool IsMachineWordAligned(const void* pointer) { + return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1)); +} + +template <typename StringType> +StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { + StringType ret; + ret.reserve(str.size()); + for (size_t i = 0; i < str.size(); i++) + ret.push_back(ToLowerASCII(str[i])); + return ret; +} + +template <typename StringType> +StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { + StringType ret; + ret.reserve(str.size()); + for (size_t i = 0; i < str.size(); i++) + ret.push_back(ToUpperASCII(str[i])); + return ret; +} + +template <class StringType> +int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, + BasicStringPiece<StringType> b) { + // Find the first characters that aren't equal and compare them. If the end + // of one of the strings is found before a nonequal character, the lengths + // of the strings are compared. + size_t i = 0; + while (i < a.length() && i < b.length()) { + typename StringType::value_type lower_a = ToLowerASCII(a[i]); + typename StringType::value_type lower_b = ToLowerASCII(b[i]); + if (lower_a < lower_b) + return -1; + if (lower_a > lower_b) + return 1; + i++; + } + + // End of one string hit before finding a different character. Expect the + // common case to be "strings equal" at this point so check that first. + if (a.length() == b.length()) + return 0; + + if (a.length() < b.length()) + return -1; + return 1; +} + +template <typename Str> +TrimPositions TrimStringT(BasicStringPiece<Str> input, + BasicStringPiece<Str> trim_chars, + TrimPositions positions, + Str* output) { + // Find the edges of leading/trailing whitespace as desired. Need to use + // a StringPiece version of input to be able to call find* on it with the + // StringPiece version of trim_chars (normally the trim_chars will be a + // constant so avoid making a copy). + const size_t last_char = input.length() - 1; + const size_t first_good_char = + (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; + const size_t last_good_char = (positions & TRIM_TRAILING) + ? input.find_last_not_of(trim_chars) + : last_char; + + // When the string was all trimmed, report that we stripped off characters + // from whichever position the caller was interested in. For empty input, we + // stripped no characters, but we still need to clear |output|. + if (input.empty() || first_good_char == Str::npos || + last_good_char == Str::npos) { + bool input_was_empty = input.empty(); // in case output == &input + output->clear(); + return input_was_empty ? TRIM_NONE : positions; + } + + // Trim. + output->assign(input.data() + first_good_char, + last_good_char - first_good_char + 1); + + // Return where we trimmed from. + return static_cast<TrimPositions>( + (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) | + (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING)); +} + +template <typename Str> +BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, + BasicStringPiece<Str> trim_chars, + TrimPositions positions) { + size_t begin = + (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; + size_t end = (positions & TRIM_TRAILING) + ? input.find_last_not_of(trim_chars) + 1 + : input.size(); + return input.substr(begin, end - begin); +} + +template <typename STR> +STR CollapseWhitespaceT(BasicStringPiece<STR> text, + bool trim_sequences_with_line_breaks) { + STR result; + result.resize(text.size()); + + // Set flags to pretend we're already in a trimmed whitespace sequence, so we + // will trim any leading whitespace. + bool in_whitespace = true; + bool already_trimmed = true; + + int chars_written = 0; + for (auto c : text) { + if (IsUnicodeWhitespace(c)) { + if (!in_whitespace) { + // Reduce all whitespace sequences to a single space. + in_whitespace = true; + result[chars_written++] = L' '; + } + if (trim_sequences_with_line_breaks && !already_trimmed && + ((c == '\n') || (c == '\r'))) { + // Whitespace sequences containing CR or LF are eliminated entirely. + already_trimmed = true; + --chars_written; + } + } else { + // Non-whitespace characters are copied straight across. + in_whitespace = false; + already_trimmed = false; + result[chars_written++] = c; + } + } + + if (in_whitespace && !already_trimmed) { + // Any trailing whitespace is eliminated. + --chars_written; + } + + result.resize(chars_written); + return result; +} + +template <class Char> +bool DoIsStringASCII(const Char* characters, size_t length) { + // Bitmasks to detect non ASCII characters for character sizes of 8, 16 and 32 + // bits. + constexpr MachineWord NonASCIIMasks[] = { + 0, MachineWord(0x8080808080808080ULL), MachineWord(0xFF80FF80FF80FF80ULL), + 0, MachineWord(0xFFFFFF80FFFFFF80ULL), + }; + + if (!length) + return true; + constexpr MachineWord non_ascii_bit_mask = NonASCIIMasks[sizeof(Char)]; + static_assert(non_ascii_bit_mask, "Error: Invalid Mask"); + MachineWord all_char_bits = 0; + const Char* end = characters + length; + + // Prologue: align the input. + while (!IsMachineWordAligned(characters) && characters < end) + all_char_bits |= *characters++; + if (all_char_bits & non_ascii_bit_mask) + return false; + + // Compare the values of CPU word size. + constexpr size_t chars_per_word = sizeof(MachineWord) / sizeof(Char); + constexpr int batch_count = 16; + while (characters <= end - batch_count * chars_per_word) { + all_char_bits = 0; + for (int i = 0; i < batch_count; ++i) { + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); + characters += chars_per_word; + } + if (all_char_bits & non_ascii_bit_mask) + return false; + } + + // Process the remaining words. + all_char_bits = 0; + while (characters <= end - chars_per_word) { + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); + characters += chars_per_word; + } + + // Process the remaining bytes. + while (characters < end) + all_char_bits |= *characters++; + + return !(all_char_bits & non_ascii_bit_mask); +} + +template <bool (*Validator)(uint32_t)> +inline static bool DoIsStringUTF8(StringPiece str) { + const char* src = str.data(); + int32_t src_len = static_cast<int32_t>(str.length()); + int32_t char_index = 0; + + while (char_index < src_len) { + int32_t code_point; + CBU8_NEXT(src, char_index, src_len, code_point); + if (!Validator(code_point)) + return false; + } + return true; +} + +// Implementation note: Normally this function will be called with a hardcoded +// constant for the lowercase_ascii parameter. Constructing a StringPiece from +// a C constant requires running strlen, so the result will be two passes +// through the buffers, one to file the length of lowercase_ascii, and one to +// compare each letter. +// +// This function could have taken a const char* to avoid this and only do one +// pass through the string. But the strlen is faster than the case-insensitive +// compares and lets us early-exit in the case that the strings are different +// lengths (will often be the case for non-matches). So whether one approach or +// the other will be faster depends on the case. +// +// The hardcoded strings are typically very short so it doesn't matter, and the +// string piece gives additional flexibility for the caller (doesn't have to be +// null terminated) so we choose the StringPiece route. +template <typename Str> +static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, + StringPiece lowercase_ascii) { + return std::equal( + str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(), + [](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; }); +} + +template <typename Str> +bool StartsWithT(BasicStringPiece<Str> str, + BasicStringPiece<Str> search_for, + CompareCase case_sensitivity) { + if (search_for.size() > str.size()) + return false; + + BasicStringPiece<Str> source = str.substr(0, search_for.size()); + + switch (case_sensitivity) { + case CompareCase::SENSITIVE: + return source == search_for; + + case CompareCase::INSENSITIVE_ASCII: + return std::equal( + search_for.begin(), search_for.end(), source.begin(), + CaseInsensitiveCompareASCII<typename Str::value_type>()); + + default: + NOTREACHED(); + return false; + } +} + +template <typename Str> +bool EndsWithT(BasicStringPiece<Str> str, + BasicStringPiece<Str> search_for, + CompareCase case_sensitivity) { + if (search_for.size() > str.size()) + return false; + + BasicStringPiece<Str> source = + str.substr(str.size() - search_for.size(), search_for.size()); + + switch (case_sensitivity) { + case CompareCase::SENSITIVE: + return source == search_for; + + case CompareCase::INSENSITIVE_ASCII: + return std::equal( + source.begin(), source.end(), search_for.begin(), + CaseInsensitiveCompareASCII<typename Str::value_type>()); + + default: + NOTREACHED(); + return false; + } +} + +// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings. +template <class StringType> +struct SubstringMatcher { + BasicStringPiece<StringType> find_this; + + size_t Find(const StringType& input, size_t pos) { + return input.find(find_this.data(), pos, find_this.length()); + } + size_t MatchSize() { return find_this.length(); } +}; + +// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters. +template <class StringType> +struct CharacterMatcher { + BasicStringPiece<StringType> find_any_of_these; + + size_t Find(const StringType& input, size_t pos) { + return input.find_first_of(find_any_of_these.data(), pos, + find_any_of_these.length()); + } + constexpr size_t MatchSize() { return 1; } +}; + +enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST }; + +// Runs in O(n) time in the length of |str|, and transforms the string without +// reallocating when possible. Returns |true| if any matches were found. +// +// This is parameterized on a |Matcher| traits type, so that it can be the +// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset(). +template <class StringType, class Matcher> +bool DoReplaceMatchesAfterOffset(StringType* str, + size_t initial_offset, + Matcher matcher, + BasicStringPiece<StringType> replace_with, + ReplaceType replace_type) { + using CharTraits = typename StringType::traits_type; + + const size_t find_length = matcher.MatchSize(); + if (!find_length) + return false; + + // If the find string doesn't appear, there's nothing to do. + size_t first_match = matcher.Find(*str, initial_offset); + if (first_match == StringType::npos) + return false; + + // If we're only replacing one instance, there's no need to do anything + // complicated. + const size_t replace_length = replace_with.length(); + if (replace_type == ReplaceType::REPLACE_FIRST) { + str->replace(first_match, find_length, replace_with.data(), replace_length); + return true; + } + + // If the find and replace strings are the same length, we can simply use + // replace() on each instance, and finish the entire operation in O(n) time. + if (find_length == replace_length) { + auto* buffer = &((*str)[0]); + for (size_t offset = first_match; offset != StringType::npos; + offset = matcher.Find(*str, offset + replace_length)) { + CharTraits::copy(buffer + offset, replace_with.data(), replace_length); + } + return true; + } + + // Since the find and replace strings aren't the same length, a loop like the + // one above would be O(n^2) in the worst case, as replace() will shift the + // entire remaining string each time. We need to be more clever to keep things + // O(n). + // + // When the string is being shortened, it's possible to just shift the matches + // down in one pass while finding, and truncate the length at the end of the + // search. + // + // If the string is being lengthened, more work is required. The strategy used + // here is to make two find() passes through the string. The first pass counts + // the number of matches to determine the new size. The second pass will + // either construct the new string into a new buffer (if the existing buffer + // lacked capacity), or else -- if there is room -- create a region of scratch + // space after |first_match| by shifting the tail of the string to a higher + // index, and doing in-place moves from the tail to lower indices thereafter. + size_t str_length = str->length(); + size_t expansion = 0; + if (replace_length > find_length) { + // This operation lengthens the string; determine the new length by counting + // matches. + const size_t expansion_per_match = (replace_length - find_length); + size_t num_matches = 0; + for (size_t match = first_match; match != StringType::npos; + match = matcher.Find(*str, match + find_length)) { + expansion += expansion_per_match; + ++num_matches; + } + const size_t final_length = str_length + expansion; + + if (str->capacity() < final_length) { + // If we'd have to allocate a new buffer to grow the string, build the + // result directly into the new allocation via append(). + StringType src(str->get_allocator()); + str->swap(src); + str->reserve(final_length); + + size_t pos = 0; + for (size_t match = first_match;; match = matcher.Find(src, pos)) { + str->append(src, pos, match - pos); + str->append(replace_with.data(), replace_length); + pos = match + find_length; + + // A mid-loop test/break enables skipping the final Find() call; the + // number of matches is known, so don't search past the last one. + if (!--num_matches) + break; + } + + // Handle substring after the final match. + str->append(src, pos, str_length - pos); + return true; + } + + // Prepare for the copy/move loop below -- expand the string to its final + // size by shifting the data after the first match to the end of the resized + // string. + size_t shift_src = first_match + find_length; + size_t shift_dst = shift_src + expansion; + + // Big |expansion| factors (relative to |str_length|) require padding up to + // |shift_dst|. + if (shift_dst > str_length) + str->resize(shift_dst); + + str->replace(shift_dst, str_length - shift_src, *str, shift_src, + str_length - shift_src); + str_length = final_length; + } + + // We can alternate replacement and move operations. This won't overwrite the + // unsearched region of the string so long as |write_offset| <= |read_offset|; + // that condition is always satisfied because: + // + // (a) If the string is being shortened, |expansion| is zero and + // |write_offset| grows slower than |read_offset|. + // + // (b) If the string is being lengthened, |write_offset| grows faster than + // |read_offset|, but |expansion| is big enough so that |write_offset| + // will only catch up to |read_offset| at the point of the last match. + auto* buffer = &((*str)[0]); + size_t write_offset = first_match; + size_t read_offset = first_match + expansion; + do { + if (replace_length) { + CharTraits::copy(buffer + write_offset, replace_with.data(), + replace_length); + write_offset += replace_length; + } + read_offset += find_length; + + // min() clamps StringType::npos (the largest unsigned value) to str_length. + size_t match = std::min(matcher.Find(*str, read_offset), str_length); + + size_t length = match - read_offset; + if (length) { + CharTraits::move(buffer + write_offset, buffer + read_offset, length); + write_offset += length; + read_offset += length; + } + } while (read_offset < str_length); + + // If we're shortening the string, truncate it now. + str->resize(write_offset); + return true; +} + +template <class StringType> +bool ReplaceCharsT(BasicStringPiece<StringType> input, + BasicStringPiece<StringType> find_any_of_these, + BasicStringPiece<StringType> replace_with, + StringType* output) { + // Commonly, this is called with output and input being the same string; in + // that case, skip the copy. + if (input.data() != output->data() || input.size() != output->size()) + output->assign(input.data(), input.size()); + + return DoReplaceMatchesAfterOffset( + output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with, + ReplaceType::REPLACE_ALL); +} + +template <class string_type> +inline typename string_type::value_type* WriteIntoT(string_type* str, + size_t length_with_null) { + DCHECK_GE(length_with_null, 1u); + str->reserve(length_with_null); + str->resize(length_with_null - 1); + return &((*str)[0]); +} + +// Generic version for all JoinString overloads. |list_type| must be a sequence +// (base::span or std::initializer_list) of strings/StringPieces (std::string, +// string16, StringPiece or StringPiece16). |string_type| is either std::string +// or string16. +template <typename list_type, typename string_type> +static string_type JoinStringT(list_type parts, + BasicStringPiece<string_type> sep) { + if (base::empty(parts)) + return string_type(); + + // Pre-allocate the eventual size of the string. Start with the size of all of + // the separators (note that this *assumes* parts.size() > 0). + size_t total_size = (parts.size() - 1) * sep.size(); + for (const auto& part : parts) + total_size += part.size(); + string_type result; + result.reserve(total_size); + + auto iter = parts.begin(); + DCHECK(iter != parts.end()); + result.append(iter->data(), iter->size()); + ++iter; + + for (; iter != parts.end(); ++iter) { + result.append(sep.data(), sep.size()); + result.append(iter->data(), iter->size()); + } + + // Sanity-check that we pre-allocated correctly. + DCHECK_EQ(total_size, result.size()); + + return result; +} + +template <class StringType> +StringType DoReplaceStringPlaceholders( + BasicStringPiece<StringType> format_string, + const std::vector<StringType>& subst, + std::vector<size_t>* offsets) { + size_t substitutions = subst.size(); + DCHECK_LT(substitutions, 10U); + + size_t sub_length = 0; + for (const auto& cur : subst) + sub_length += cur.length(); + + StringType formatted; + formatted.reserve(format_string.length() + sub_length); + + std::vector<ReplacementOffset> r_offsets; + for (auto i = format_string.begin(); i != format_string.end(); ++i) { + if ('$' == *i) { + if (i + 1 != format_string.end()) { + ++i; + if ('$' == *i) { + while (i != format_string.end() && '$' == *i) { + formatted.push_back('$'); + ++i; + } + --i; + } else { + if (*i < '1' || *i > '9') { + DLOG(ERROR) << "Invalid placeholder: $" << *i; + continue; + } + uintptr_t index = *i - '1'; + if (offsets) { + ReplacementOffset r_offset(index, + static_cast<int>(formatted.size())); + r_offsets.insert( + std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset, + &CompareParameter), + r_offset); + } + if (index < substitutions) + formatted.append(subst.at(index)); + } + } + } else { + formatted.push_back(*i); + } + } + if (offsets) { + for (const auto& cur : r_offsets) + offsets->push_back(cur.offset); + } + return formatted; +} + +// The following code is compatible with the OpenBSD lcpy interface. See: +// http://www.gratisoft.us/todd/papers/strlcpy.html +// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c + +template <typename CHAR> +size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { + for (size_t i = 0; i < dst_size; ++i) { + if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. + return i; + } + + // We were left off at dst_size. We over copied 1 byte. Null terminate. + if (dst_size != 0) + dst[dst_size - 1] = 0; + + // Count the rest of the |src|, and return it's length in characters. + while (src[dst_size]) + ++dst_size; + return dst_size; +} + +} // namespace internal + +} // namespace base + +#endif // BASE_STRINGS_STRING_UTIL_INTERNAL_H_ diff --git a/chromium/base/strings/string_util_posix.h b/chromium/base/strings/string_util_posix.h index 8299118e106..c869df78e58 100644 --- a/chromium/base/strings/string_util_posix.h +++ b/chromium/base/strings/string_util_posix.h @@ -11,7 +11,7 @@ #include <string.h> #include <wchar.h> -#include "base/logging.h" +#include "base/check.h" namespace base { diff --git a/chromium/base/strings/string_util_unittest.cc b/chromium/base/strings/string_util_unittest.cc index f1132b9be4e..b9be7fbfa84 100644 --- a/chromium/base/strings/string_util_unittest.cc +++ b/chromium/base/strings/string_util_unittest.cc @@ -12,6 +12,7 @@ #include <algorithm> #include <type_traits> +#include "base/bits.h" #include "base/stl_util.h" #include "base/strings/string16.h" #include "base/strings/utf_string_conversions.h" @@ -678,28 +679,28 @@ TEST(StringUtilTest, FormatBytesUnlocalized) { int64_t bytes; const char* expected; } cases[] = { - // Expected behavior: we show one post-decimal digit when we have - // under two pre-decimal digits, except in cases where it makes no - // sense (zero or bytes). - // Since we switch units once we cross the 1000 mark, this keeps - // the display of file sizes or bytes consistently around three - // digits. - {0, "0 B"}, - {512, "512 B"}, - {1024*1024, "1.0 MB"}, - {1024*1024*1024, "1.0 GB"}, - {10LL*1024*1024*1024, "10.0 GB"}, - {99LL*1024*1024*1024, "99.0 GB"}, - {105LL*1024*1024*1024, "105 GB"}, - {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"}, - {~(1LL << 63), "8192 PB"}, - - {99*1024 + 103, "99.1 kB"}, - {1024*1024 + 103, "1.0 MB"}, - {1024*1024 + 205 * 1024, "1.2 MB"}, - {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"}, - {10LL*1024*1024*1024, "10.0 GB"}, - {100LL*1024*1024*1024, "100 GB"}, + // Expected behavior: we show one post-decimal digit when we have + // under two pre-decimal digits, except in cases where it makes no + // sense (zero or bytes). + // Since we switch units once we cross the 1000 mark, this keeps + // the display of file sizes or bytes consistently around three + // digits. + {0, "0 B"}, + {512, "512 B"}, + {1024 * 1024, "1.0 MB"}, + {1024 * 1024 * 1024, "1.0 GB"}, + {10LL * 1024 * 1024 * 1024, "10.0 GB"}, + {99LL * 1024 * 1024 * 1024, "99.0 GB"}, + {105LL * 1024 * 1024 * 1024, "105 GB"}, + {105LL * 1024 * 1024 * 1024 + 500LL * 1024 * 1024, "105 GB"}, + {~(bits::LeftmostBit<int64_t>()), "8192 PB"}, + + {99 * 1024 + 103, "99.1 kB"}, + {1024 * 1024 + 103, "1.0 MB"}, + {1024 * 1024 + 205 * 1024, "1.2 MB"}, + {1024 * 1024 * 1024 + (927 * 1024 * 1024), "1.9 GB"}, + {10LL * 1024 * 1024 * 1024, "10.0 GB"}, + {100LL * 1024 * 1024 * 1024, "100 GB"}, }; for (const auto& i : cases) { diff --git a/chromium/base/strings/string_util_win.cc b/chromium/base/strings/string_util_win.cc new file mode 100644 index 00000000000..2c19729e0a5 --- /dev/null +++ b/chromium/base/strings/string_util_win.cc @@ -0,0 +1,145 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_util_win.h" + +#include "base/strings/string_util_internal.h" + +namespace base { + +#if defined(BASE_STRING16_IS_STD_U16STRING) +bool IsStringASCII(WStringPiece str) { + return internal::DoIsStringASCII(str.data(), str.length()); +} + +std::wstring ToLowerASCII(WStringPiece str) { + return internal::ToLowerASCIIImpl(str); +} + +std::wstring ToUpperASCII(WStringPiece str) { + return internal::ToUpperASCIIImpl(str); +} + +int CompareCaseInsensitiveASCII(WStringPiece a, WStringPiece b) { + return internal::CompareCaseInsensitiveASCIIT(a, b); +} + +bool EqualsCaseInsensitiveASCII(WStringPiece a, WStringPiece b) { + return a.size() == b.size() && + internal::CompareCaseInsensitiveASCIIT(a, b) == 0; +} + +bool RemoveChars(WStringPiece input, + WStringPiece remove_chars, + std::wstring* output) { + return internal::ReplaceCharsT(input, remove_chars, WStringPiece(), output); +} + +bool ReplaceChars(WStringPiece input, + WStringPiece replace_chars, + WStringPiece replace_with, + std::wstring* output) { + return internal::ReplaceCharsT(input, replace_chars, replace_with, output); +} + +bool TrimString(WStringPiece input, + WStringPiece trim_chars, + std::wstring* output) { + return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != + TRIM_NONE; +} + +WStringPiece TrimString(WStringPiece input, + WStringPiece trim_chars, + TrimPositions positions) { + return internal::TrimStringPieceT(input, trim_chars, positions); +} + +TrimPositions TrimWhitespace(WStringPiece input, + TrimPositions positions, + std::wstring* output) { + return internal::TrimStringT(input, WStringPiece(kWhitespaceWide), positions, + output); +} + +WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) { + return internal::TrimStringPieceT(input, WStringPiece(kWhitespaceWide), + positions); +} + +std::wstring CollapseWhitespace(WStringPiece text, + bool trim_sequences_with_line_breaks) { + return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); +} + +bool ContainsOnlyChars(WStringPiece input, WStringPiece characters) { + return input.find_first_not_of(characters) == StringPiece::npos; +} + +bool LowerCaseEqualsASCII(WStringPiece str, StringPiece lowercase_ascii) { + return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii); +} + +bool EqualsASCII(WStringPiece str, StringPiece ascii) { + return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end()); +} + +bool StartsWith(WStringPiece str, + WStringPiece search_for, + CompareCase case_sensitivity) { + return internal::StartsWithT(str, search_for, case_sensitivity); +} + +bool EndsWith(WStringPiece str, + WStringPiece search_for, + CompareCase case_sensitivity) { + return internal::EndsWithT(str, search_for, case_sensitivity); +} + +void ReplaceFirstSubstringAfterOffset(std::wstring* str, + size_t start_offset, + WStringPiece find_this, + WStringPiece replace_with) { + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<std::wstring>{find_this}, + replace_with, internal::ReplaceType::REPLACE_FIRST); +} + +void ReplaceSubstringsAfterOffset(std::wstring* str, + size_t start_offset, + WStringPiece find_this, + WStringPiece replace_with) { + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<std::wstring>{find_this}, + replace_with, internal::ReplaceType::REPLACE_ALL); +} + +wchar_t* WriteInto(std::wstring* str, size_t length_with_null) { + return internal::WriteIntoT(str, length_with_null); +} + +std::wstring JoinString(span<const std::wstring> parts, + WStringPiece separator) { + return internal::JoinStringT(parts, separator); +} + +std::wstring JoinString(span<const WStringPiece> parts, + WStringPiece separator) { + return internal::JoinStringT(parts, separator); +} + +std::wstring JoinString(std::initializer_list<WStringPiece> parts, + WStringPiece separator) { + return internal::JoinStringT(parts, separator); +} + +std::wstring ReplaceStringPlaceholders(WStringPiece format_string, + const std::vector<std::wstring>& subst, + std::vector<size_t>* offsets) { + return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); +} + +#endif + +} // namespace base diff --git a/chromium/base/strings/string_util_win.h b/chromium/base/strings/string_util_win.h index 7f260bfc8b4..c39133d923c 100644 --- a/chromium/base/strings/string_util_win.h +++ b/chromium/base/strings/string_util_win.h @@ -11,7 +11,14 @@ #include <string.h> #include <wchar.h> -#include "base/logging.h" +#include <string> +#include <vector> + +#include "base/check.h" +#include "base/containers/span.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" namespace base { @@ -39,6 +46,154 @@ inline int vswprintf(wchar_t* buffer, size_t size, return length; } +// Utility functions to access the underlying string buffer as a wide char +// pointer. +// +// Note: These functions violate strict aliasing when char16 and wchar_t are +// unrelated types. We thus pass -fno-strict-aliasing to the compiler on +// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2]. +// +// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244 +// [2] +// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949 +inline wchar_t* as_writable_wcstr(char16* str) { + return reinterpret_cast<wchar_t*>(str); +} + +inline wchar_t* as_writable_wcstr(string16& str) { + return reinterpret_cast<wchar_t*>(data(str)); +} + +inline const wchar_t* as_wcstr(const char16* str) { + return reinterpret_cast<const wchar_t*>(str); +} + +inline const wchar_t* as_wcstr(StringPiece16 str) { + return reinterpret_cast<const wchar_t*>(str.data()); +} + +// Utility functions to access the underlying string buffer as a char16 pointer. +inline char16* as_writable_u16cstr(wchar_t* str) { + return reinterpret_cast<char16*>(str); +} + +inline char16* as_writable_u16cstr(std::wstring& str) { + return reinterpret_cast<char16*>(data(str)); +} + +inline const char16* as_u16cstr(const wchar_t* str) { + return reinterpret_cast<const char16*>(str); +} + +inline const char16* as_u16cstr(WStringPiece str) { + return reinterpret_cast<const char16*>(str.data()); +} + +// Utility functions to convert between base::WStringPiece and +// base::StringPiece16. +inline WStringPiece AsWStringPiece(StringPiece16 str) { + return WStringPiece(as_wcstr(str.data()), str.size()); +} + +inline StringPiece16 AsStringPiece16(WStringPiece str) { + return StringPiece16(as_u16cstr(str.data()), str.size()); +} + +inline std::wstring AsWString(StringPiece16 str) { + return std::wstring(as_wcstr(str.data()), str.size()); +} + +inline string16 AsString16(WStringPiece str) { + return string16(as_u16cstr(str.data()), str.size()); +} + +// The following section contains overloads of the cross-platform APIs for +// std::wstring and base::WStringPiece. These are only enabled if std::wstring +// and base::string16 are distinct types, as otherwise this would result in an +// ODR violation. +// TODO(crbug.com/911896): Remove those guards once base::string16 is +// std::u16string. +#if defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT bool IsStringASCII(WStringPiece str); + +BASE_EXPORT std::wstring ToLowerASCII(WStringPiece str); + +BASE_EXPORT std::wstring ToUpperASCII(WStringPiece str); + +BASE_EXPORT int CompareCaseInsensitiveASCII(WStringPiece a, WStringPiece b); + +BASE_EXPORT bool EqualsCaseInsensitiveASCII(WStringPiece a, WStringPiece b); + +BASE_EXPORT bool RemoveChars(WStringPiece input, + WStringPiece remove_chars, + std::wstring* output); + +BASE_EXPORT bool ReplaceChars(WStringPiece input, + WStringPiece replace_chars, + WStringPiece replace_with, + std::wstring* output); + +BASE_EXPORT bool TrimString(WStringPiece input, + WStringPiece trim_chars, + std::string* output); + +BASE_EXPORT WStringPiece TrimString(WStringPiece input, + WStringPiece trim_chars, + TrimPositions positions); + +BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input, + TrimPositions positions, + std::wstring* output); + +BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input, + TrimPositions positions); + +BASE_EXPORT std::wstring CollapseWhitespace( + WStringPiece text, + bool trim_sequences_with_line_breaks); + +BASE_EXPORT bool ContainsOnlyChars(WStringPiece input, WStringPiece characters); + +BASE_EXPORT bool LowerCaseEqualsASCII(WStringPiece str, + StringPiece lowecase_ascii); + +BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii); + +BASE_EXPORT bool StartsWith(WStringPiece str, + WStringPiece search_for, + CompareCase case_sensitivity); + +BASE_EXPORT bool EndsWith(WStringPiece str, + WStringPiece search_for, + CompareCase case_sensitivity); + +BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::wstring* str, + size_t start_offset, + WStringPiece find_this, + WStringPiece replace_with); + +BASE_EXPORT void ReplaceSubstringsAfterOffset(std::wstring* str, + size_t start_offset, + WStringPiece find_this, + WStringPiece replace_with); + +BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null); + +BASE_EXPORT std::wstring JoinString(span<const std::wstring> parts, + WStringPiece separator); + +BASE_EXPORT std::wstring JoinString(span<const WStringPiece> parts, + WStringPiece separator); + +BASE_EXPORT std::wstring JoinString(std::initializer_list<WStringPiece> parts, + WStringPiece separator); + +BASE_EXPORT std::wstring ReplaceStringPlaceholders( + WStringPiece format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets); +#endif + } // namespace base #endif // BASE_STRINGS_STRING_UTIL_WIN_H_ diff --git a/chromium/base/strings/utf_string_conversions.cc b/chromium/base/strings/utf_string_conversions.cc index 9a79889159e..0b55cd9e59d 100644 --- a/chromium/base/strings/utf_string_conversions.cc +++ b/chromium/base/strings/utf_string_conversions.cc @@ -339,4 +339,16 @@ std::string UTF16ToASCII(StringPiece16 utf16) { return std::string(utf16.begin(), utf16.end()); } +#if defined(WCHAR_T_IS_UTF16) +std::wstring ASCIIToWide(StringPiece ascii) { + DCHECK(IsStringASCII(ascii)) << ascii; + return std::wstring(ascii.begin(), ascii.end()); +} + +std::string WideToASCII(WStringPiece wide) { + DCHECK(IsStringASCII(wide)) << wide; + return std::string(wide.begin(), wide.end()); +} +#endif // defined(WCHAR_T_IS_UTF16) + } // namespace base diff --git a/chromium/base/strings/utf_string_conversions.h b/chromium/base/strings/utf_string_conversions.h index f780fb4f4f8..9ee91453a02 100644 --- a/chromium/base/strings/utf_string_conversions.h +++ b/chromium/base/strings/utf_string_conversions.h @@ -12,6 +12,7 @@ #include "base/base_export.h" #include "base/strings/string16.h" #include "base/strings/string_piece.h" +#include "build/build_config.h" namespace base { @@ -49,6 +50,16 @@ BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT; // beforehand. BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16) WARN_UNUSED_RESULT; +#if defined(WCHAR_T_IS_UTF16) +// This converts an ASCII string, typically a hardcoded constant, to a wide +// string. +BASE_EXPORT std::wstring ASCIIToWide(StringPiece ascii) WARN_UNUSED_RESULT; + +// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII +// beforehand. +BASE_EXPORT std::string WideToASCII(WStringPiece wide) WARN_UNUSED_RESULT; +#endif // defined(WCHAR_T_IS_UTF16) + } // namespace base #endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ |