summaryrefslogtreecommitdiff
path: root/chromium/base/strings
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-12 14:27:29 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-13 09:35:20 +0000
commitc30a6232df03e1efbd9f3b226777b07e087a1122 (patch)
treee992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/base/strings
parent7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff)
downloadqtwebengine-chromium-c30a6232df03e1efbd9f3b226777b07e087a1122.tar.gz
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/base/strings')
-rw-r--r--chromium/base/strings/no_trigraphs_unittest.cc10
-rw-r--r--chromium/base/strings/strcat.cc62
-rw-r--r--chromium/base/strings/strcat.h14
-rw-r--r--chromium/base/strings/strcat_internal.h60
-rw-r--r--chromium/base/strings/strcat_win.cc35
-rw-r--r--chromium/base/strings/strcat_win.h45
-rw-r--r--chromium/base/strings/string_number_conversions.cc461
-rw-r--r--chromium/base/strings/string_number_conversions.h8
-rw-r--r--chromium/base/strings/string_number_conversions_internal.h303
-rw-r--r--chromium/base/strings/string_number_conversions_win.cc79
-rw-r--r--chromium/base/strings/string_number_conversions_win.h40
-rw-r--r--chromium/base/strings/string_piece.h6
-rw-r--r--chromium/base/strings/string_split.cc145
-rw-r--r--chromium/base/strings/string_split.h28
-rw-r--r--chromium/base/strings/string_split_internal.h100
-rw-r--r--chromium/base/strings/string_split_win.cc59
-rw-r--r--chromium/base/strings/string_split_win.h53
-rw-r--r--chromium/base/strings/string_util.cc834
-rw-r--r--chromium/base/strings/string_util.h134
-rw-r--r--chromium/base/strings/string_util_internal.h625
-rw-r--r--chromium/base/strings/string_util_posix.h2
-rw-r--r--chromium/base/strings/string_util_unittest.cc45
-rw-r--r--chromium/base/strings/string_util_win.cc145
-rw-r--r--chromium/base/strings/string_util_win.h157
-rw-r--r--chromium/base/strings/utf_string_conversions.cc12
-rw-r--r--chromium/base/strings/utf_string_conversions.h11
26 files changed, 1958 insertions, 1515 deletions
diff --git a/chromium/base/strings/no_trigraphs_unittest.cc b/chromium/base/strings/no_trigraphs_unittest.cc
new file mode 100644
index 00000000000..736679b9055
--- /dev/null
+++ b/chromium/base/strings/no_trigraphs_unittest.cc
@@ -0,0 +1,10 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/strcat.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(NoTrigraphs, Basic) {
+ EXPECT_EQ("??=", base::StrCat({"?", "?", "="}));
+}
diff --git a/chromium/base/strings/strcat.cc b/chromium/base/strings/strcat.cc
index 35231ef691d..d94c2ea9148 100644
--- a/chromium/base/strings/strcat.cc
+++ b/chromium/base/strings/strcat.cc
@@ -4,82 +4,42 @@
#include "base/strings/strcat.h"
-namespace base {
-
-namespace {
-
-// Reserves an additional amount of capacity in the given string, growing by at
-// least 2x if necessary. Used by StrAppendT().
-//
-// The "at least 2x" growing rule duplicates the exponential growth of
-// std::string. The problem is that most implementations of reserve() will grow
-// exactly to the requested amount instead of exponentially growing like would
-// happen when appending normally. If we didn't do this, an append after the
-// call to StrAppend() would definitely cause a reallocation, and loops with
-// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want
-// StrAppend() to have the same semantics as std::string::append().
-template <typename String>
-void ReserveAdditionalIfNeeded(String* str,
- typename String::size_type additional) {
- const size_t required = str->size() + additional;
- // Check whether we need to reserve additional capacity at all.
- if (required <= str->capacity())
- return;
-
- str->reserve(std::max(required, str->capacity() * 2));
-}
-
-template <typename DestString, typename InputString>
-void StrAppendT(DestString* dest, span<const InputString> pieces) {
- size_t additional_size = 0;
- for (const auto& cur : pieces)
- additional_size += cur.size();
- ReserveAdditionalIfNeeded(dest, additional_size);
+#include <string>
- for (const auto& cur : pieces)
- dest->append(cur.data(), cur.size());
-}
+#include "base/strings/strcat_internal.h"
-} // namespace
+namespace base {
std::string StrCat(span<const StringPiece> pieces) {
- std::string result;
- StrAppendT(&result, pieces);
- return result;
+ return internal::StrCatT(pieces);
}
string16 StrCat(span<const StringPiece16> pieces) {
- string16 result;
- StrAppendT(&result, pieces);
- return result;
+ return internal::StrCatT(pieces);
}
std::string StrCat(span<const std::string> pieces) {
- std::string result;
- StrAppendT(&result, pieces);
- return result;
+ return internal::StrCatT(pieces);
}
string16 StrCat(span<const string16> pieces) {
- string16 result;
- StrAppendT(&result, pieces);
- return result;
+ return internal::StrCatT(pieces);
}
void StrAppend(std::string* dest, span<const StringPiece> pieces) {
- StrAppendT(dest, pieces);
+ internal::StrAppendT(dest, pieces);
}
void StrAppend(string16* dest, span<const StringPiece16> pieces) {
- StrAppendT(dest, pieces);
+ internal::StrAppendT(dest, pieces);
}
void StrAppend(std::string* dest, span<const std::string> pieces) {
- StrAppendT(dest, pieces);
+ internal::StrAppendT(dest, pieces);
}
void StrAppend(string16* dest, span<const string16> pieces) {
- StrAppendT(dest, pieces);
+ internal::StrAppendT(dest, pieces);
}
} // namespace base
diff --git a/chromium/base/strings/strcat.h b/chromium/base/strings/strcat.h
index b7c76215ab2..5d9c38a075c 100644
--- a/chromium/base/strings/strcat.h
+++ b/chromium/base/strings/strcat.h
@@ -69,10 +69,11 @@ BASE_EXPORT string16 StrCat(span<const string16> pieces) WARN_UNUSED_RESULT;
// Initializer list forwards to the array version.
inline std::string StrCat(std::initializer_list<StringPiece> pieces) {
- return StrCat(make_span(pieces.begin(), pieces.size()));
+ return StrCat(make_span(pieces));
}
+
inline string16 StrCat(std::initializer_list<StringPiece16> pieces) {
- return StrCat(make_span(pieces.begin(), pieces.size()));
+ return StrCat(make_span(pieces));
}
// StrAppend -------------------------------------------------------------------
@@ -91,13 +92,18 @@ BASE_EXPORT void StrAppend(string16* dest, span<const string16> pieces);
// Initializer list forwards to the array version.
inline void StrAppend(std::string* dest,
std::initializer_list<StringPiece> pieces) {
- return StrAppend(dest, make_span(pieces.begin(), pieces.size()));
+ StrAppend(dest, make_span(pieces));
}
+
inline void StrAppend(string16* dest,
std::initializer_list<StringPiece16> pieces) {
- return StrAppend(dest, make_span(pieces.begin(), pieces.size()));
+ StrAppend(dest, make_span(pieces));
}
} // namespace base
+#if defined(OS_WIN)
+#include "base/strings/strcat_win.h"
+#endif
+
#endif // BASE_STRINGS_STRCAT_H_
diff --git a/chromium/base/strings/strcat_internal.h b/chromium/base/strings/strcat_internal.h
new file mode 100644
index 00000000000..f5e52f08667
--- /dev/null
+++ b/chromium/base/strings/strcat_internal.h
@@ -0,0 +1,60 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRCAT_INTERNAL_H_
+#define BASE_STRINGS_STRCAT_INTERNAL_H_
+
+#include <string>
+
+#include "base/containers/span.h"
+
+namespace base {
+
+namespace internal {
+
+// Reserves an additional amount of capacity in the given string, growing by at
+// least 2x if necessary. Used by StrAppendT().
+//
+// The "at least 2x" growing rule duplicates the exponential growth of
+// std::string. The problem is that most implementations of reserve() will grow
+// exactly to the requested amount instead of exponentially growing like would
+// happen when appending normally. If we didn't do this, an append after the
+// call to StrAppend() would definitely cause a reallocation, and loops with
+// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want
+// StrAppend() to have the same semantics as std::string::append().
+template <typename String>
+void ReserveAdditionalIfNeeded(String* str,
+ typename String::size_type additional) {
+ const size_t required = str->size() + additional;
+ // Check whether we need to reserve additional capacity at all.
+ if (required <= str->capacity())
+ return;
+
+ str->reserve(std::max(required, str->capacity() * 2));
+}
+
+template <typename DestString, typename InputString>
+void StrAppendT(DestString* dest, span<const InputString> pieces) {
+ size_t additional_size = 0;
+ for (const auto& cur : pieces)
+ additional_size += cur.size();
+ ReserveAdditionalIfNeeded(dest, additional_size);
+
+ for (const auto& cur : pieces)
+ dest->append(cur.data(), cur.size());
+}
+
+template <typename StringT>
+auto StrCatT(span<const StringT> pieces) {
+ std::basic_string<typename StringT::value_type, typename StringT::traits_type>
+ result;
+ StrAppendT(&result, pieces);
+ return result;
+}
+
+} // namespace internal
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRCAT_INTERNAL_H_
diff --git a/chromium/base/strings/strcat_win.cc b/chromium/base/strings/strcat_win.cc
new file mode 100644
index 00000000000..ad2f2e16ab1
--- /dev/null
+++ b/chromium/base/strings/strcat_win.cc
@@ -0,0 +1,35 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/strcat_win.h"
+
+#include <string>
+
+#include "base/containers/span.h"
+#include "base/strings/strcat_internal.h"
+#include "base/strings/string_piece.h"
+
+namespace base {
+
+#if defined(BASE_STRING16_IS_STD_U16STRING)
+
+std::wstring StrCat(span<const WStringPiece> pieces) {
+ return internal::StrCatT(pieces);
+}
+
+std::wstring StrCat(span<const std::wstring> pieces) {
+ return internal::StrCatT(pieces);
+}
+
+void StrAppend(std::wstring* dest, span<const WStringPiece> pieces) {
+ internal::StrAppendT(dest, pieces);
+}
+
+void StrAppend(std::wstring* dest, span<const std::wstring> pieces) {
+ internal::StrAppendT(dest, pieces);
+}
+
+#endif
+
+} // namespace base
diff --git a/chromium/base/strings/strcat_win.h b/chromium/base/strings/strcat_win.h
new file mode 100644
index 00000000000..4b8f0290e51
--- /dev/null
+++ b/chromium/base/strings/strcat_win.h
@@ -0,0 +1,45 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRCAT_WIN_H_
+#define BASE_STRINGS_STRCAT_WIN_H_
+
+#include <initializer_list>
+#include <string>
+
+#include "base/base_export.h"
+#include "base/compiler_specific.h"
+#include "base/containers/span.h"
+#include "base/strings/string_piece.h"
+
+namespace base {
+
+// The following section contains overloads of the cross-platform APIs for
+// std::wstring and base::WStringPiece. These are only enabled if std::wstring
+// and base::string16 are distinct types, as otherwise this would result in an
+// ODR violation.
+// TODO(crbug.com/911896): Remove those guards once base::string16 is
+// std::u16string.
+#if defined(BASE_STRING16_IS_STD_U16STRING)
+BASE_EXPORT void StrAppend(std::wstring* dest, span<const WStringPiece> pieces);
+BASE_EXPORT void StrAppend(std::wstring* dest, span<const std::wstring> pieces);
+
+inline void StrAppend(std::wstring* dest,
+ std::initializer_list<WStringPiece> pieces) {
+ StrAppend(dest, make_span(pieces));
+}
+
+BASE_EXPORT std::wstring StrCat(span<const WStringPiece> pieces)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT std::wstring StrCat(span<const std::wstring> pieces)
+ WARN_UNUSED_RESULT;
+
+inline std::wstring StrCat(std::initializer_list<WStringPiece> pieces) {
+ return StrCat(make_span(pieces));
+}
+#endif // defined(BASE_STRING16_IS_STD_U16STRING)
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRCAT_WIN_H_
diff --git a/chromium/base/strings/string_number_conversions.cc b/chromium/base/strings/string_number_conversions.cc
index bd0a7e435f6..e2ef8acf66f 100644
--- a/chromium/base/strings/string_number_conversions.cc
+++ b/chromium/base/strings/string_number_conversions.cc
@@ -4,458 +4,120 @@
#include "base/strings/string_number_conversions.h"
-#include <ctype.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <wctype.h>
+#include <iterator>
+#include <string>
-#include <limits>
-#include <type_traits>
-
-#include "base/check_op.h"
-#include "base/no_destructor.h"
-#include "base/numerics/safe_math.h"
-#include "base/strings/string_util.h"
-#include "base/strings/utf_string_conversions.h"
-#include "base/third_party/double_conversion/double-conversion/double-conversion.h"
+#include "base/containers/span.h"
+#include "base/logging.h"
+#include "base/strings/string16.h"
+#include "base/strings/string_number_conversions_internal.h"
+#include "base/strings/string_piece.h"
namespace base {
-namespace {
-
-template <typename STR, typename INT>
-struct IntToStringT {
- static STR IntToString(INT value) {
- // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
- // So round up to allocate 3 output characters per byte, plus 1 for '-'.
- const size_t kOutputBufSize =
- 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed;
-
- // Create the string in a temporary buffer, write it back to front, and
- // then return the substr of what we ended up using.
- using CHR = typename STR::value_type;
- CHR outbuf[kOutputBufSize];
-
- // The ValueOrDie call below can never fail, because UnsignedAbs is valid
- // for all valid inputs.
- typename std::make_unsigned<INT>::type res =
- CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie();
-
- CHR* end = outbuf + kOutputBufSize;
- CHR* i = end;
- do {
- --i;
- DCHECK(i != outbuf);
- *i = static_cast<CHR>((res % 10) + '0');
- res /= 10;
- } while (res != 0);
- if (IsValueNegative(value)) {
- --i;
- DCHECK(i != outbuf);
- *i = static_cast<CHR>('-');
- }
- return STR(i, end);
- }
-};
-
-// Utility to convert a character to a digit in a given base
-template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit {
-};
-
-// Faster specialization for bases <= 10
-template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> {
- public:
- static bool Convert(CHAR c, uint8_t* digit) {
- if (c >= '0' && c < '0' + BASE) {
- *digit = static_cast<uint8_t>(c - '0');
- return true;
- }
- return false;
- }
-};
-
-// Specialization for bases where 10 < base <= 36
-template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> {
- public:
- static bool Convert(CHAR c, uint8_t* digit) {
- if (c >= '0' && c <= '9') {
- *digit = c - '0';
- } else if (c >= 'a' && c < 'a' + BASE - 10) {
- *digit = c - 'a' + 10;
- } else if (c >= 'A' && c < 'A' + BASE - 10) {
- *digit = c - 'A' + 10;
- } else {
- return false;
- }
- return true;
- }
-};
-
-template <int BASE, typename CHAR>
-bool CharToDigit(CHAR c, uint8_t* digit) {
- return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit);
-}
-
-// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it
-// is locale independent, whereas the functions we are replacing were
-// locale-dependent. TBD what is desired, but for the moment let's not
-// introduce a change in behaviour.
-template<typename CHAR> class WhitespaceHelper {
-};
-
-template<> class WhitespaceHelper<char> {
- public:
- static bool Invoke(char c) {
- return 0 != isspace(static_cast<unsigned char>(c));
- }
-};
-
-template<> class WhitespaceHelper<char16> {
- public:
- static bool Invoke(char16 c) {
- return 0 != iswspace(c);
- }
-};
-
-template<typename CHAR> bool LocalIsWhitespace(CHAR c) {
- return WhitespaceHelper<CHAR>::Invoke(c);
-}
-
-// IteratorRangeToNumberTraits should provide:
-// - a typedef for iterator_type, the iterator type used as input.
-// - a typedef for value_type, the target numeric type.
-// - static functions min, max (returning the minimum and maximum permitted
-// values)
-// - constant kBase, the base in which to interpret the input
-template<typename IteratorRangeToNumberTraits>
-class IteratorRangeToNumber {
- public:
- typedef IteratorRangeToNumberTraits traits;
- typedef typename traits::iterator_type const_iterator;
- typedef typename traits::value_type value_type;
-
- // Generalized iterator-range-to-number conversion.
- //
- static bool Invoke(const_iterator begin,
- const_iterator end,
- value_type* output) {
- bool valid = true;
-
- while (begin != end && LocalIsWhitespace(*begin)) {
- valid = false;
- ++begin;
- }
-
- if (begin != end && *begin == '-') {
- if (!std::numeric_limits<value_type>::is_signed) {
- *output = 0;
- valid = false;
- } else if (!Negative::Invoke(begin + 1, end, output)) {
- valid = false;
- }
- } else {
- if (begin != end && *begin == '+') {
- ++begin;
- }
- if (!Positive::Invoke(begin, end, output)) {
- valid = false;
- }
- }
-
- return valid;
- }
-
- private:
- // Sign provides:
- // - a static function, CheckBounds, that determines whether the next digit
- // causes an overflow/underflow
- // - a static function, Increment, that appends the next digit appropriately
- // according to the sign of the number being parsed.
- template<typename Sign>
- class Base {
- public:
- static bool Invoke(const_iterator begin, const_iterator end,
- typename traits::value_type* output) {
- *output = 0;
-
- if (begin == end) {
- return false;
- }
-
- // Note: no performance difference was found when using template
- // specialization to remove this check in bases other than 16
- if (traits::kBase == 16 && end - begin > 2 && *begin == '0' &&
- (*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
- begin += 2;
- }
-
- for (const_iterator current = begin; current != end; ++current) {
- uint8_t new_digit = 0;
-
- if (!CharToDigit<traits::kBase>(*current, &new_digit)) {
- return false;
- }
-
- if (current != begin) {
- if (!Sign::CheckBounds(output, new_digit)) {
- return false;
- }
- *output *= traits::kBase;
- }
-
- Sign::Increment(new_digit, output);
- }
- return true;
- }
- };
-
- class Positive : public Base<Positive> {
- public:
- static bool CheckBounds(value_type* output, uint8_t new_digit) {
- if (*output > static_cast<value_type>(traits::max() / traits::kBase) ||
- (*output == static_cast<value_type>(traits::max() / traits::kBase) &&
- new_digit > traits::max() % traits::kBase)) {
- *output = traits::max();
- return false;
- }
- return true;
- }
- static void Increment(uint8_t increment, value_type* output) {
- *output += increment;
- }
- };
-
- class Negative : public Base<Negative> {
- public:
- static bool CheckBounds(value_type* output, uint8_t new_digit) {
- if (*output < traits::min() / traits::kBase ||
- (*output == traits::min() / traits::kBase &&
- new_digit > 0 - traits::min() % traits::kBase)) {
- *output = traits::min();
- return false;
- }
- return true;
- }
- static void Increment(uint8_t increment, value_type* output) {
- *output -= increment;
- }
- };
-};
-
-template<typename ITERATOR, typename VALUE, int BASE>
-class BaseIteratorRangeToNumberTraits {
- public:
- typedef ITERATOR iterator_type;
- typedef VALUE value_type;
- static value_type min() {
- return std::numeric_limits<value_type>::min();
- }
- static value_type max() {
- return std::numeric_limits<value_type>::max();
- }
- static const int kBase = BASE;
-};
-
-template<typename ITERATOR>
-class BaseHexIteratorRangeToIntTraits
- : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> {
-};
-
-template <typename ITERATOR>
-class BaseHexIteratorRangeToUIntTraits
- : public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {};
-
-template <typename ITERATOR>
-class BaseHexIteratorRangeToInt64Traits
- : public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {};
-
-template <typename ITERATOR>
-class BaseHexIteratorRangeToUInt64Traits
- : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {};
-
-typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator>
- HexIteratorRangeToIntTraits;
-
-typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator>
- HexIteratorRangeToUIntTraits;
-
-typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator>
- HexIteratorRangeToInt64Traits;
-
-typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator>
- HexIteratorRangeToUInt64Traits;
-
-template <typename VALUE, int BASE>
-class StringPieceToNumberTraits
- : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator,
- VALUE,
- BASE> {
-};
-
-template <typename VALUE>
-bool StringToIntImpl(StringPiece input, VALUE* output) {
- return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke(
- input.begin(), input.end(), output);
-}
-
-template <typename VALUE, int BASE>
-class StringPiece16ToNumberTraits
- : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator,
- VALUE,
- BASE> {
-};
-
-template <typename VALUE>
-bool String16ToIntImpl(StringPiece16 input, VALUE* output) {
- return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke(
- input.begin(), input.end(), output);
-}
-
-} // namespace
-
std::string NumberToString(int value) {
- return IntToStringT<std::string, int>::IntToString(value);
+ return internal::IntToStringT<std::string>(value);
}
string16 NumberToString16(int value) {
- return IntToStringT<string16, int>::IntToString(value);
+ return internal::IntToStringT<string16>(value);
}
std::string NumberToString(unsigned value) {
- return IntToStringT<std::string, unsigned>::IntToString(value);
+ return internal::IntToStringT<std::string>(value);
}
string16 NumberToString16(unsigned value) {
- return IntToStringT<string16, unsigned>::IntToString(value);
+ return internal::IntToStringT<string16>(value);
}
std::string NumberToString(long value) {
- return IntToStringT<std::string, long>::IntToString(value);
+ return internal::IntToStringT<std::string>(value);
}
string16 NumberToString16(long value) {
- return IntToStringT<string16, long>::IntToString(value);
+ return internal::IntToStringT<string16>(value);
}
std::string NumberToString(unsigned long value) {
- return IntToStringT<std::string, unsigned long>::IntToString(value);
+ return internal::IntToStringT<std::string>(value);
}
string16 NumberToString16(unsigned long value) {
- return IntToStringT<string16, unsigned long>::IntToString(value);
+ return internal::IntToStringT<string16>(value);
}
std::string NumberToString(long long value) {
- return IntToStringT<std::string, long long>::IntToString(value);
+ return internal::IntToStringT<std::string>(value);
}
string16 NumberToString16(long long value) {
- return IntToStringT<string16, long long>::IntToString(value);
+ return internal::IntToStringT<string16>(value);
}
std::string NumberToString(unsigned long long value) {
- return IntToStringT<std::string, unsigned long long>::IntToString(value);
+ return internal::IntToStringT<std::string>(value);
}
string16 NumberToString16(unsigned long long value) {
- return IntToStringT<string16, unsigned long long>::IntToString(value);
-}
-
-static const double_conversion::DoubleToStringConverter*
-GetDoubleToStringConverter() {
- static NoDestructor<double_conversion::DoubleToStringConverter> converter(
- double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
- nullptr, nullptr, 'e', -6, 12, 0, 0);
- return converter.get();
+ return internal::IntToStringT<string16>(value);
}
std::string NumberToString(double value) {
- char buffer[32];
- double_conversion::StringBuilder builder(buffer, sizeof(buffer));
- GetDoubleToStringConverter()->ToShortest(value, &builder);
- return std::string(buffer, builder.position());
+ return internal::DoubleToStringT<std::string>(value);
}
-base::string16 NumberToString16(double value) {
- char buffer[32];
- double_conversion::StringBuilder builder(buffer, sizeof(buffer));
- GetDoubleToStringConverter()->ToShortest(value, &builder);
-
- // The number will be ASCII. This creates the string using the "input
- // iterator" variant which promotes from 8-bit to 16-bit via "=".
- return base::string16(&buffer[0], &buffer[builder.position()]);
+string16 NumberToString16(double value) {
+ return internal::DoubleToStringT<string16>(value);
}
bool StringToInt(StringPiece input, int* output) {
- return StringToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToInt(StringPiece16 input, int* output) {
- return String16ToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToUint(StringPiece input, unsigned* output) {
- return StringToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToUint(StringPiece16 input, unsigned* output) {
- return String16ToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToInt64(StringPiece input, int64_t* output) {
- return StringToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToInt64(StringPiece16 input, int64_t* output) {
- return String16ToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToUint64(StringPiece input, uint64_t* output) {
- return StringToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToUint64(StringPiece16 input, uint64_t* output) {
- return String16ToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToSizeT(StringPiece input, size_t* output) {
- return StringToIntImpl(input, output);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToSizeT(StringPiece16 input, size_t* output) {
- return String16ToIntImpl(input, output);
-}
-
-template <typename STRING, typename CHAR>
-bool StringToDoubleImpl(STRING input, const CHAR* data, double* output) {
- static NoDestructor<double_conversion::StringToDoubleConverter> converter(
- double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
- double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
- 0.0, 0, nullptr, nullptr);
-
- int processed_characters_count;
- *output = converter->StringToDouble(data, input.size(),
- &processed_characters_count);
-
- // Cases to return false:
- // - If the input string is empty, there was nothing to parse.
- // - If the value saturated to HUGE_VAL.
- // - If the entire string was not processed, there are either characters
- // remaining in the string after a parsed number, or the string does not
- // begin with a parseable number.
- // - If the first character is a space, there was leading whitespace
- return !input.empty() && *output != HUGE_VAL && *output != -HUGE_VAL &&
- static_cast<size_t>(processed_characters_count) == input.size() &&
- !IsUnicodeWhitespace(input[0]);
+ return internal::StringToIntImpl(input, *output);
}
bool StringToDouble(StringPiece input, double* output) {
- return StringToDoubleImpl(input, input.data(), output);
+ return internal::StringToDoubleImpl(input, input.data(), *output);
}
bool StringToDouble(StringPiece16 input, double* output) {
- return StringToDoubleImpl(
- input, reinterpret_cast<const uint16_t*>(input.data()), output);
+ return internal::StringToDoubleImpl(
+ input, reinterpret_cast<const uint16_t*>(input.data()), *output);
}
std::string HexEncode(const void* bytes, size_t size) {
@@ -477,69 +139,36 @@ std::string HexEncode(base::span<const uint8_t> bytes) {
}
bool HexStringToInt(StringPiece input, int* output) {
- return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(
- input.begin(), input.end(), output);
+ return internal::HexStringToIntImpl(input, *output);
}
bool HexStringToUInt(StringPiece input, uint32_t* output) {
- return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke(
- input.begin(), input.end(), output);
+ return internal::HexStringToIntImpl(input, *output);
}
bool HexStringToInt64(StringPiece input, int64_t* output) {
- return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke(
- input.begin(), input.end(), output);
+ return internal::HexStringToIntImpl(input, *output);
}
bool HexStringToUInt64(StringPiece input, uint64_t* output) {
- return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke(
- input.begin(), input.end(), output);
-}
-
-template <typename Container>
-static bool HexStringToByteContainer(StringPiece input, Container* output) {
- DCHECK_EQ(output->size(), 0u);
- size_t count = input.size();
- if (count == 0 || (count % 2) != 0)
- return false;
- for (uintptr_t i = 0; i < count / 2; ++i) {
- uint8_t msb = 0; // most significant 4 bits
- uint8_t lsb = 0; // least significant 4 bits
- if (!CharToDigit<16>(input[i * 2], &msb) ||
- !CharToDigit<16>(input[i * 2 + 1], &lsb)) {
- return false;
- }
- output->push_back((msb << 4) | lsb);
- }
- return true;
+ return internal::HexStringToIntImpl(input, *output);
}
bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) {
- return HexStringToByteContainer(input, output);
+ DCHECK(output->empty());
+ return internal::HexStringToByteContainer(input, std::back_inserter(*output));
}
bool HexStringToString(StringPiece input, std::string* output) {
- return HexStringToByteContainer(input, output);
+ DCHECK(output->empty());
+ return internal::HexStringToByteContainer(input, std::back_inserter(*output));
}
bool HexStringToSpan(StringPiece input, base::span<uint8_t> output) {
- size_t count = input.size();
- if (count == 0 || (count % 2) != 0)
+ if (input.size() / 2 != output.size())
return false;
- if (count / 2 != output.size())
- return false;
-
- for (uintptr_t i = 0; i < count / 2; ++i) {
- uint8_t msb = 0; // most significant 4 bits
- uint8_t lsb = 0; // least significant 4 bits
- if (!CharToDigit<16>(input[i * 2], &msb) ||
- !CharToDigit<16>(input[i * 2 + 1], &lsb)) {
- return false;
- }
- output[i] = (msb << 4) | lsb;
- }
- return true;
+ return internal::HexStringToByteContainer(input, output.begin());
}
} // namespace base
diff --git a/chromium/base/strings/string_number_conversions.h b/chromium/base/strings/string_number_conversions.h
index 87df24e21c9..f001641d186 100644
--- a/chromium/base/strings/string_number_conversions.h
+++ b/chromium/base/strings/string_number_conversions.h
@@ -20,10 +20,6 @@
// ----------------------------------------------------------------------------
// IMPORTANT MESSAGE FROM YOUR SPONSOR
//
-// This file contains no "wstring" variants. New code should use string16. If
-// you need to make old code work, use the UTF8 version and convert. Please do
-// not add wstring variants.
-//
// Please do not add "convenience" functions for converting strings to integers
// that return the value and ignore success/failure. That encourages people to
// write code that doesn't properly handle the error conditions.
@@ -154,4 +150,8 @@ BASE_EXPORT bool HexStringToSpan(StringPiece input, base::span<uint8_t> output);
} // namespace base
+#if defined(OS_WIN)
+#include "base/strings/string_number_conversions_win.h"
+#endif
+
#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
diff --git a/chromium/base/strings/string_number_conversions_internal.h b/chromium/base/strings/string_number_conversions_internal.h
new file mode 100644
index 00000000000..1aa0c74c0b5
--- /dev/null
+++ b/chromium/base/strings/string_number_conversions_internal.h
@@ -0,0 +1,303 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_
+#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <wctype.h>
+
+#include <limits>
+
+#include "base/check_op.h"
+#include "base/logging.h"
+#include "base/no_destructor.h"
+#include "base/numerics/safe_math.h"
+#include "base/strings/string_util.h"
+#include "base/third_party/double_conversion/double-conversion/double-conversion.h"
+
+namespace base {
+
+namespace internal {
+
+template <typename STR, typename INT>
+static STR IntToStringT(INT value) {
+ // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
+ // So round up to allocate 3 output characters per byte, plus 1 for '-'.
+ const size_t kOutputBufSize =
+ 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed;
+
+ // Create the string in a temporary buffer, write it back to front, and
+ // then return the substr of what we ended up using.
+ using CHR = typename STR::value_type;
+ CHR outbuf[kOutputBufSize];
+
+ // The ValueOrDie call below can never fail, because UnsignedAbs is valid
+ // for all valid inputs.
+ std::make_unsigned_t<INT> res =
+ CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie();
+
+ CHR* end = outbuf + kOutputBufSize;
+ CHR* i = end;
+ do {
+ --i;
+ DCHECK(i != outbuf);
+ *i = static_cast<CHR>((res % 10) + '0');
+ res /= 10;
+ } while (res != 0);
+ if (IsValueNegative(value)) {
+ --i;
+ DCHECK(i != outbuf);
+ *i = static_cast<CHR>('-');
+ }
+ return STR(i, end);
+}
+
+// Utility to convert a character to a digit in a given base
+template <int BASE, typename CHAR>
+Optional<uint8_t> CharToDigit(CHAR c) {
+ static_assert(1 <= BASE && BASE <= 36, "BASE needs to be in [1, 36]");
+ if (c >= '0' && c < '0' + std::min(BASE, 10))
+ return c - '0';
+
+ if (c >= 'a' && c < 'a' + BASE - 10)
+ return c - 'a' + 10;
+
+ if (c >= 'A' && c < 'A' + BASE - 10)
+ return c - 'A' + 10;
+
+ return base::nullopt;
+}
+
+// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it
+// is locale independent, whereas the functions we are replacing were
+// locale-dependent. TBD what is desired, but for the moment let's not
+// introduce a change in behaviour.
+template <typename CHAR>
+class WhitespaceHelper {};
+
+template <>
+class WhitespaceHelper<char> {
+ public:
+ static bool Invoke(char c) {
+ return 0 != isspace(static_cast<unsigned char>(c));
+ }
+};
+
+template <>
+class WhitespaceHelper<char16> {
+ public:
+ static bool Invoke(char16 c) { return 0 != iswspace(c); }
+};
+
+template <typename CHAR>
+bool LocalIsWhitespace(CHAR c) {
+ return WhitespaceHelper<CHAR>::Invoke(c);
+}
+
+template <typename Number, int kBase>
+class StringToNumberParser {
+ public:
+ struct Result {
+ Number value = 0;
+ bool valid = false;
+ };
+
+ static constexpr Number kMin = std::numeric_limits<Number>::min();
+ static constexpr Number kMax = std::numeric_limits<Number>::max();
+
+ // Sign provides:
+ // - a static function, CheckBounds, that determines whether the next digit
+ // causes an overflow/underflow
+ // - a static function, Increment, that appends the next digit appropriately
+ // according to the sign of the number being parsed.
+ template <typename Sign>
+ class Base {
+ public:
+ template <typename Iter>
+ static Result Invoke(Iter begin, Iter end) {
+ Number value = 0;
+
+ if (begin == end) {
+ return {value, false};
+ }
+
+ // Note: no performance difference was found when using template
+ // specialization to remove this check in bases other than 16
+ if (kBase == 16 && end - begin > 2 && *begin == '0' &&
+ (*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
+ begin += 2;
+ }
+
+ for (Iter current = begin; current != end; ++current) {
+ Optional<uint8_t> new_digit = CharToDigit<kBase>(*current);
+
+ if (!new_digit) {
+ return {value, false};
+ }
+
+ if (current != begin) {
+ Result result = Sign::CheckBounds(value, *new_digit);
+ if (!result.valid)
+ return result;
+
+ value *= kBase;
+ }
+
+ value = Sign::Increment(value, *new_digit);
+ }
+ return {value, true};
+ }
+ };
+
+ class Positive : public Base<Positive> {
+ public:
+ static Result CheckBounds(Number value, uint8_t new_digit) {
+ if (value > static_cast<Number>(kMax / kBase) ||
+ (value == static_cast<Number>(kMax / kBase) &&
+ new_digit > kMax % kBase)) {
+ return {kMax, false};
+ }
+ return {value, true};
+ }
+ static Number Increment(Number lhs, uint8_t rhs) { return lhs + rhs; }
+ };
+
+ class Negative : public Base<Negative> {
+ public:
+ static Result CheckBounds(Number value, uint8_t new_digit) {
+ if (value < kMin / kBase ||
+ (value == kMin / kBase && new_digit > 0 - kMin % kBase)) {
+ return {kMin, false};
+ }
+ return {value, true};
+ }
+ static Number Increment(Number lhs, uint8_t rhs) { return lhs - rhs; }
+ };
+};
+
+template <typename Number, int kBase, typename Str>
+auto StringToNumber(BasicStringPiece<Str> input) {
+ using Parser = StringToNumberParser<Number, kBase>;
+ using Result = typename Parser::Result;
+
+ bool has_leading_whitespace = false;
+ auto begin = input.begin();
+ auto end = input.end();
+
+ while (begin != end && LocalIsWhitespace(*begin)) {
+ has_leading_whitespace = true;
+ ++begin;
+ }
+
+ if (begin != end && *begin == '-') {
+ if (!std::numeric_limits<Number>::is_signed) {
+ return Result{0, false};
+ }
+
+ Result result = Parser::Negative::Invoke(begin + 1, end);
+ result.valid &= !has_leading_whitespace;
+ return result;
+ }
+
+ if (begin != end && *begin == '+') {
+ ++begin;
+ }
+
+ Result result = Parser::Positive::Invoke(begin, end);
+ result.valid &= !has_leading_whitespace;
+ return result;
+}
+
+template <typename STR, typename VALUE>
+bool StringToIntImpl(BasicStringPiece<STR> input, VALUE& output) {
+ auto result = StringToNumber<VALUE, 10>(input);
+ output = result.value;
+ return result.valid;
+}
+
+template <typename STR, typename VALUE>
+bool HexStringToIntImpl(BasicStringPiece<STR> input, VALUE& output) {
+ auto result = StringToNumber<VALUE, 16>(input);
+ output = result.value;
+ return result.valid;
+}
+
+static const double_conversion::DoubleToStringConverter*
+GetDoubleToStringConverter() {
+ static NoDestructor<double_conversion::DoubleToStringConverter> converter(
+ double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
+ nullptr, nullptr, 'e', -6, 12, 0, 0);
+ return converter.get();
+}
+
+// Converts a given (data, size) pair to a desired string type. For
+// performance reasons, this dispatches to a different constructor if the
+// passed-in data matches the string's value_type.
+template <typename StringT>
+StringT ToString(const typename StringT::value_type* data, size_t size) {
+ return StringT(data, size);
+}
+
+template <typename StringT, typename CharT>
+StringT ToString(const CharT* data, size_t size) {
+ return StringT(data, data + size);
+}
+
+template <typename StringT>
+StringT DoubleToStringT(double value) {
+ char buffer[32];
+ double_conversion::StringBuilder builder(buffer, sizeof(buffer));
+ GetDoubleToStringConverter()->ToShortest(value, &builder);
+ return ToString<StringT>(buffer, builder.position());
+}
+
+template <typename STRING, typename CHAR>
+bool StringToDoubleImpl(STRING input, const CHAR* data, double& output) {
+ static NoDestructor<double_conversion::StringToDoubleConverter> converter(
+ double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
+ double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
+ 0.0, 0, nullptr, nullptr);
+
+ int processed_characters_count;
+ output = converter->StringToDouble(data, input.size(),
+ &processed_characters_count);
+
+ // Cases to return false:
+ // - If the input string is empty, there was nothing to parse.
+ // - If the value saturated to HUGE_VAL.
+ // - If the entire string was not processed, there are either characters
+ // remaining in the string after a parsed number, or the string does not
+ // begin with a parseable number.
+ // - If the first character is a space, there was leading whitespace
+ return !input.empty() && output != HUGE_VAL && output != -HUGE_VAL &&
+ static_cast<size_t>(processed_characters_count) == input.size() &&
+ !IsUnicodeWhitespace(input[0]);
+}
+
+template <typename OutIter>
+static bool HexStringToByteContainer(StringPiece input, OutIter output) {
+ size_t count = input.size();
+ if (count == 0 || (count % 2) != 0)
+ return false;
+ for (uintptr_t i = 0; i < count / 2; ++i) {
+ // most significant 4 bits
+ Optional<uint8_t> msb = CharToDigit<16>(input[i * 2]);
+ // least significant 4 bits
+ Optional<uint8_t> lsb = CharToDigit<16>(input[i * 2 + 1]);
+ if (!msb || !lsb) {
+ return false;
+ }
+ *(output++) = (*msb << 4) | *lsb;
+ }
+ return true;
+}
+
+} // namespace internal
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_
diff --git a/chromium/base/strings/string_number_conversions_win.cc b/chromium/base/strings/string_number_conversions_win.cc
new file mode 100644
index 00000000000..8a1a3bea917
--- /dev/null
+++ b/chromium/base/strings/string_number_conversions_win.cc
@@ -0,0 +1,79 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_number_conversions_win.h"
+
+#include <string>
+
+#include "base/strings/string_number_conversions_internal.h"
+#include "base/strings/string_piece.h"
+
+namespace base {
+
+std::wstring NumberToWString(int value) {
+ return internal::IntToStringT<std::wstring>(value);
+}
+
+std::wstring NumberToWString(unsigned value) {
+ return internal::IntToStringT<std::wstring>(value);
+}
+
+std::wstring NumberToWString(long value) {
+ return internal::IntToStringT<std::wstring>(value);
+}
+
+std::wstring NumberToWString(unsigned long value) {
+ return internal::IntToStringT<std::wstring>(value);
+}
+
+std::wstring NumberToWString(long long value) {
+ return internal::IntToStringT<std::wstring>(value);
+}
+
+std::wstring NumberToWString(unsigned long long value) {
+ return internal::IntToStringT<std::wstring>(value);
+}
+
+std::wstring NumberToWString(double value) {
+ return internal::DoubleToStringT<std::wstring>(value);
+}
+
+#if defined(BASE_STRING16_IS_STD_U16STRING)
+namespace internal {
+
+template <>
+class WhitespaceHelper<wchar_t> {
+ public:
+ static bool Invoke(wchar_t c) { return 0 != iswspace(c); }
+};
+
+} // namespace internal
+
+bool StringToInt(WStringPiece input, int* output) {
+ return internal::StringToIntImpl(input, *output);
+}
+
+bool StringToUint(WStringPiece input, unsigned* output) {
+ return internal::StringToIntImpl(input, *output);
+}
+
+bool StringToInt64(WStringPiece input, int64_t* output) {
+ return internal::StringToIntImpl(input, *output);
+}
+
+bool StringToUint64(WStringPiece input, uint64_t* output) {
+ return internal::StringToIntImpl(input, *output);
+}
+
+bool StringToSizeT(WStringPiece input, size_t* output) {
+ return internal::StringToIntImpl(input, *output);
+}
+
+bool StringToDouble(WStringPiece input, double* output) {
+ return internal::StringToDoubleImpl(
+ input, reinterpret_cast<const uint16_t*>(input.data()), *output);
+}
+#endif // defined(BASE_STRING16_IS_STD_U16STRING)
+
+} // namespace base
diff --git a/chromium/base/strings/string_number_conversions_win.h b/chromium/base/strings/string_number_conversions_win.h
new file mode 100644
index 00000000000..5abcc291130
--- /dev/null
+++ b/chromium/base/strings/string_number_conversions_win.h
@@ -0,0 +1,40 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_
+#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_
+
+#include <string>
+
+#include "base/base_export.h"
+#include "base/strings/string_piece.h"
+
+namespace base {
+
+BASE_EXPORT std::wstring NumberToWString(int value);
+BASE_EXPORT std::wstring NumberToWString(unsigned int value);
+BASE_EXPORT std::wstring NumberToWString(long value);
+BASE_EXPORT std::wstring NumberToWString(unsigned long value);
+BASE_EXPORT std::wstring NumberToWString(long long value);
+BASE_EXPORT std::wstring NumberToWString(unsigned long long value);
+BASE_EXPORT std::wstring NumberToWString(double value);
+
+// The following section contains overloads of the cross-platform APIs for
+// std::wstring and base::WStringPiece. These are only enabled if std::wstring
+// and base::string16 are distinct types, as otherwise this would result in an
+// ODR violation.
+// TODO(crbug.com/911896): Remove those guards once base::string16 is
+// std::u16string.
+#if defined(BASE_STRING16_IS_STD_U16STRING)
+BASE_EXPORT bool StringToInt(WStringPiece input, int* output);
+BASE_EXPORT bool StringToUint(WStringPiece input, unsigned* output);
+BASE_EXPORT bool StringToInt64(WStringPiece input, int64_t* output);
+BASE_EXPORT bool StringToUint64(WStringPiece input, uint64_t* output);
+BASE_EXPORT bool StringToSizeT(WStringPiece input, size_t* output);
+BASE_EXPORT bool StringToDouble(WStringPiece input, double* output);
+#endif // defined(BASE_STRING16_IS_STD_U16STRING)
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_
diff --git a/chromium/base/strings/string_piece.h b/chromium/base/strings/string_piece.h
index bc24b4d1e90..f60af47177b 100644
--- a/chromium/base/strings/string_piece.h
+++ b/chromium/base/strings/string_piece.h
@@ -25,11 +25,12 @@
#include <stddef.h>
#include <iosfwd>
+#include <ostream>
#include <string>
#include <type_traits>
#include "base/base_export.h"
-#include "base/logging.h"
+#include "base/check_op.h"
#include "base/strings/char_traits.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece_forward.h"
@@ -148,6 +149,7 @@ template <typename STRING_TYPE> class BasicStringPiece {
public:
// Standard STL container boilerplate.
typedef size_t size_type;
+ typedef typename STRING_TYPE::traits_type traits_type;
typedef typename STRING_TYPE::value_type value_type;
typedef const value_type* pointer;
typedef const value_type& reference;
@@ -162,7 +164,7 @@ template <typename STRING_TYPE> class BasicStringPiece {
// We provide non-explicit singleton constructors so users can pass
// in a "const char*" or a "string" wherever a "StringPiece" is
// expected (likewise for char16, string16, StringPiece16).
- constexpr BasicStringPiece() : ptr_(NULL), length_(0) {}
+ constexpr BasicStringPiece() : ptr_(nullptr), length_(0) {}
// TODO(crbug.com/1049498): Construction from nullptr is not allowed for
// std::basic_string_view, so remove the special handling for it.
// Note: This doesn't just use STRING_TYPE::traits_type::length(), since that
diff --git a/chromium/base/strings/string_split.cc b/chromium/base/strings/string_split.cc
index a968e802e8c..4ba0412cc2c 100644
--- a/chromium/base/strings/string_split.cc
+++ b/chromium/base/strings/string_split.cc
@@ -7,6 +7,7 @@
#include <stddef.h>
#include "base/logging.h"
+#include "base/strings/string_split_internal.h"
#include "base/strings/string_util.h"
#include "base/third_party/icu/icu_utf.h"
@@ -14,56 +15,6 @@ namespace base {
namespace {
-// Returns either the ASCII or UTF-16 whitespace.
-template<typename Str> BasicStringPiece<Str> WhitespaceForType();
-#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
-template <>
-WStringPiece WhitespaceForType<std::wstring>() {
- return kWhitespaceWide;
-}
-#endif
-
-template<> StringPiece16 WhitespaceForType<string16>() {
- return kWhitespaceUTF16;
-}
-template<> StringPiece WhitespaceForType<std::string>() {
- return kWhitespaceASCII;
-}
-
-// General string splitter template. Can take 8- or 16-bit input, can produce
-// the corresponding string or StringPiece output.
-template <typename OutputStringType, typename Str>
-static std::vector<OutputStringType> SplitStringT(
- BasicStringPiece<Str> str,
- BasicStringPiece<Str> delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
- std::vector<OutputStringType> result;
- if (str.empty())
- return result;
-
- size_t start = 0;
- while (start != Str::npos) {
- size_t end = str.find_first_of(delimiter, start);
-
- BasicStringPiece<Str> piece;
- if (end == Str::npos) {
- piece = str.substr(start);
- start = Str::npos;
- } else {
- piece = str.substr(start, end - start);
- start = end + 1;
- }
-
- if (whitespace == TRIM_WHITESPACE)
- piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
-
- if (result_type == SPLIT_WANT_ALL || !piece.empty())
- result.emplace_back(piece);
- }
- return result;
-}
-
bool AppendStringKeyValue(StringPiece input,
char delimiter,
StringPairs* result) {
@@ -94,67 +45,38 @@ bool AppendStringKeyValue(StringPiece input,
return true;
}
-template <typename OutputStringType, typename Str>
-std::vector<OutputStringType> SplitStringUsingSubstrT(
- BasicStringPiece<Str> input,
- BasicStringPiece<Str> delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
- using Piece = BasicStringPiece<Str>;
- using size_type = typename Piece::size_type;
-
- std::vector<OutputStringType> result;
- if (delimiter.size() == 0) {
- result.emplace_back(input);
- return result;
- }
-
- for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
- begin_index = end_index + delimiter.size()) {
- end_index = input.find(delimiter, begin_index);
- Piece term = end_index == Piece::npos
- ? input.substr(begin_index)
- : input.substr(begin_index, end_index - begin_index);
-
- if (whitespace == TRIM_WHITESPACE)
- term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
-
- if (result_type == SPLIT_WANT_ALL || !term.empty())
- result.emplace_back(term);
- }
-
- return result;
-}
-
} // namespace
std::vector<std::string> SplitString(StringPiece input,
StringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
- return SplitStringT<std::string>(input, separators, whitespace, result_type);
+ return internal::SplitStringT<std::string>(input, separators, whitespace,
+ result_type);
}
std::vector<string16> SplitString(StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
- return SplitStringT<string16>(input, separators, whitespace, result_type);
+ return internal::SplitStringT<string16>(input, separators, whitespace,
+ result_type);
}
std::vector<StringPiece> SplitStringPiece(StringPiece input,
StringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
- return SplitStringT<StringPiece>(input, separators, whitespace, result_type);
+ return internal::SplitStringT<StringPiece>(input, separators, whitespace,
+ result_type);
}
std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
- return SplitStringT<StringPiece16>(input, separators, whitespace,
- result_type);
+ return internal::SplitStringT<StringPiece16>(input, separators, whitespace,
+ result_type);
}
bool SplitStringIntoKeyValuePairs(StringPiece input,
@@ -192,16 +114,16 @@ std::vector<string16> SplitStringUsingSubstr(StringPiece16 input,
StringPiece16 delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
- return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace,
- result_type);
+ return internal::SplitStringUsingSubstrT<string16>(input, delimiter,
+ whitespace, result_type);
}
std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
StringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
- return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace,
- result_type);
+ return internal::SplitStringUsingSubstrT<std::string>(
+ input, delimiter, whitespace, result_type);
}
std::vector<StringPiece16> SplitStringPieceUsingSubstr(
@@ -210,8 +132,8 @@ std::vector<StringPiece16> SplitStringPieceUsingSubstr(
WhitespaceHandling whitespace,
SplitResult result_type) {
std::vector<StringPiece16> result;
- return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace,
- result_type);
+ return internal::SplitStringUsingSubstrT<StringPiece16>(
+ input, delimiter, whitespace, result_type);
}
std::vector<StringPiece> SplitStringPieceUsingSubstr(
@@ -219,41 +141,8 @@ std::vector<StringPiece> SplitStringPieceUsingSubstr(
StringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
- return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace,
- result_type);
-}
-
-#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
-std::vector<std::wstring> SplitString(WStringPiece input,
- WStringPiece separators,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
- return SplitStringT<std::wstring>(input, separators, whitespace, result_type);
-}
-
-std::vector<WStringPiece> SplitStringPiece(WStringPiece input,
- WStringPiece separators,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
- return SplitStringT<WStringPiece>(input, separators, whitespace, result_type);
-}
-
-std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input,
- WStringPiece delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
- return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace,
- result_type);
-}
-
-std::vector<WStringPiece> SplitStringPieceUsingSubstr(
- WStringPiece input,
- WStringPiece delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
- return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace,
- result_type);
+ return internal::SplitStringUsingSubstrT<StringPiece>(
+ input, delimiter, whitespace, result_type);
}
-#endif
} // namespace base
diff --git a/chromium/base/strings/string_split.h b/chromium/base/strings/string_split.h
index efa8b199fe0..73c15d79f1b 100644
--- a/chromium/base/strings/string_split.h
+++ b/chromium/base/strings/string_split.h
@@ -138,32 +138,10 @@ BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr(
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
-#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
-BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input,
- WStringPiece separators,
- WhitespaceHandling whitespace,
- SplitResult result_type)
- WARN_UNUSED_RESULT;
-
-BASE_EXPORT std::vector<WStringPiece> SplitStringPiece(
- WStringPiece input,
- WStringPiece separators,
- WhitespaceHandling whitespace,
- SplitResult result_type) WARN_UNUSED_RESULT;
-
-BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr(
- WStringPiece input,
- WStringPiece delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type) WARN_UNUSED_RESULT;
+} // namespace base
-BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr(
- WStringPiece input,
- WStringPiece delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type) WARN_UNUSED_RESULT;
+#if defined(OS_WIN)
+#include "base/strings/string_split_win.h"
#endif
-} // namespace base
-
#endif // BASE_STRINGS_STRING_SPLIT_H_
diff --git a/chromium/base/strings/string_split_internal.h b/chromium/base/strings/string_split_internal.h
new file mode 100644
index 00000000000..71d8030b3d2
--- /dev/null
+++ b/chromium/base/strings/string_split_internal.h
@@ -0,0 +1,100 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRING_SPLIT_INTERNAL_H_
+#define BASE_STRINGS_STRING_SPLIT_INTERNAL_H_
+
+#include <vector>
+
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+
+namespace base {
+
+namespace internal {
+
+// Returns either the ASCII or UTF-16 whitespace.
+template <typename Str>
+BasicStringPiece<Str> WhitespaceForType();
+
+template <>
+inline StringPiece16 WhitespaceForType<string16>() {
+ return kWhitespaceUTF16;
+}
+template <>
+inline StringPiece WhitespaceForType<std::string>() {
+ return kWhitespaceASCII;
+}
+
+// General string splitter template. Can take 8- or 16-bit input, can produce
+// the corresponding string or StringPiece output.
+template <typename OutputStringType, typename Str>
+static std::vector<OutputStringType> SplitStringT(
+ BasicStringPiece<Str> str,
+ BasicStringPiece<Str> delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ std::vector<OutputStringType> result;
+ if (str.empty())
+ return result;
+
+ size_t start = 0;
+ while (start != Str::npos) {
+ size_t end = str.find_first_of(delimiter, start);
+
+ BasicStringPiece<Str> piece;
+ if (end == Str::npos) {
+ piece = str.substr(start);
+ start = Str::npos;
+ } else {
+ piece = str.substr(start, end - start);
+ start = end + 1;
+ }
+
+ if (whitespace == TRIM_WHITESPACE)
+ piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
+
+ if (result_type == SPLIT_WANT_ALL || !piece.empty())
+ result.emplace_back(piece);
+ }
+ return result;
+}
+
+template <typename OutputStringType, typename Str>
+std::vector<OutputStringType> SplitStringUsingSubstrT(
+ BasicStringPiece<Str> input,
+ BasicStringPiece<Str> delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ using Piece = BasicStringPiece<Str>;
+ using size_type = typename Piece::size_type;
+
+ std::vector<OutputStringType> result;
+ if (delimiter.size() == 0) {
+ result.emplace_back(input);
+ return result;
+ }
+
+ for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
+ begin_index = end_index + delimiter.size()) {
+ end_index = input.find(delimiter, begin_index);
+ Piece term = end_index == Piece::npos
+ ? input.substr(begin_index)
+ : input.substr(begin_index, end_index - begin_index);
+
+ if (whitespace == TRIM_WHITESPACE)
+ term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
+
+ if (result_type == SPLIT_WANT_ALL || !term.empty())
+ result.emplace_back(term);
+ }
+
+ return result;
+}
+
+} // namespace internal
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRING_SPLIT_INTERNAL_H_
diff --git a/chromium/base/strings/string_split_win.cc b/chromium/base/strings/string_split_win.cc
new file mode 100644
index 00000000000..91184bd058e
--- /dev/null
+++ b/chromium/base/strings/string_split_win.cc
@@ -0,0 +1,59 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_split_win.h"
+
+#include <string>
+#include <vector>
+
+#include "base/strings/string_piece.h"
+#include "base/strings/string_split_internal.h"
+
+namespace base {
+
+#if defined(BASE_STRING16_IS_STD_U16STRING)
+namespace internal {
+
+template <>
+inline WStringPiece WhitespaceForType<std::wstring>() {
+ return kWhitespaceWide;
+}
+
+} // namespace internal
+
+std::vector<std::wstring> SplitString(WStringPiece input,
+ WStringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return internal::SplitStringT<std::wstring>(input, separators, whitespace,
+ result_type);
+}
+
+std::vector<WStringPiece> SplitStringPiece(WStringPiece input,
+ WStringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return internal::SplitStringT<WStringPiece>(input, separators, whitespace,
+ result_type);
+}
+
+std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input,
+ WStringPiece delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return internal::SplitStringUsingSubstrT<std::wstring>(
+ input, delimiter, whitespace, result_type);
+}
+
+std::vector<WStringPiece> SplitStringPieceUsingSubstr(
+ WStringPiece input,
+ WStringPiece delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return internal::SplitStringUsingSubstrT<WStringPiece>(
+ input, delimiter, whitespace, result_type);
+}
+#endif
+
+} // namespace base
diff --git a/chromium/base/strings/string_split_win.h b/chromium/base/strings/string_split_win.h
new file mode 100644
index 00000000000..51627d9eeb8
--- /dev/null
+++ b/chromium/base/strings/string_split_win.h
@@ -0,0 +1,53 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRING_SPLIT_WIN_H_
+#define BASE_STRINGS_STRING_SPLIT_WIN_H_
+
+#include <string>
+#include <vector>
+
+#include "base/base_export.h"
+#include "base/compiler_specific.h"
+#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_split.h"
+
+namespace base {
+
+// The following section contains overloads of the cross-platform APIs for
+// std::wstring and base::WStringPiece. These are only enabled if std::wstring
+// and base::string16 are distinct types, as otherwise this would result in an
+// ODR violation.
+// TODO(crbug.com/911896): Remove those guards once base::string16 is
+// std::u16string.
+#if defined(BASE_STRING16_IS_STD_U16STRING)
+BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input,
+ WStringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type)
+ WARN_UNUSED_RESULT;
+
+BASE_EXPORT std::vector<WStringPiece> SplitStringPiece(
+ WStringPiece input,
+ WStringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) WARN_UNUSED_RESULT;
+
+BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr(
+ WStringPiece input,
+ WStringPiece delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) WARN_UNUSED_RESULT;
+
+BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr(
+ WStringPiece input,
+ WStringPiece delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) WARN_UNUSED_RESULT;
+#endif
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRING_SPLIT_WIN_H_
diff --git a/chromium/base/strings/string_util.cc b/chromium/base/strings/string_util.cc
index 924455491a6..a883c97eca4 100644
--- a/chromium/base/strings/string_util.cc
+++ b/chromium/base/strings/string_util.cc
@@ -18,11 +18,13 @@
#include <algorithm>
#include <limits>
+#include <type_traits>
#include <vector>
-#include "base/logging.h"
+#include "base/check_op.h"
#include "base/no_destructor.h"
#include "base/stl_util.h"
+#include "base/strings/string_util_internal.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/strings/utf_string_conversions.h"
#include "base/third_party/icu/icu_utf.h"
@@ -30,60 +32,6 @@
namespace base {
-namespace {
-
-// Used by ReplaceStringPlaceholders to track the position in the string of
-// replaced parameters.
-struct ReplacementOffset {
- ReplacementOffset(uintptr_t parameter, size_t offset)
- : parameter(parameter),
- offset(offset) {}
-
- // Index of the parameter.
- uintptr_t parameter;
-
- // Starting position in the string.
- size_t offset;
-};
-
-static bool CompareParameter(const ReplacementOffset& elem1,
- const ReplacementOffset& elem2) {
- return elem1.parameter < elem2.parameter;
-}
-
-// Assuming that a pointer is the size of a "machine word", then
-// uintptr_t is an integer type that is also a machine word.
-using MachineWord = uintptr_t;
-
-inline bool IsMachineWordAligned(const void* pointer) {
- return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1));
-}
-
-template <typename CharacterType>
-struct NonASCIIMask;
-template <>
-struct NonASCIIMask<char> {
- static constexpr MachineWord value() {
- return static_cast<MachineWord>(0x8080808080808080ULL);
- }
-};
-template <>
-struct NonASCIIMask<char16> {
- static constexpr MachineWord value() {
- return static_cast<MachineWord>(0xFF80FF80FF80FF80ULL);
- }
-};
-#if defined(WCHAR_T_IS_UTF32)
-template <>
-struct NonASCIIMask<wchar_t> {
- static constexpr MachineWord value() {
- return static_cast<MachineWord>(0xFFFFFF80FFFFFF80ULL);
- }
-};
-#endif // WCHAR_T_IS_UTF32
-
-} // namespace
-
bool IsWprintfFormatPortable(const wchar_t* format) {
for (const wchar_t* position = format; *position != '\0'; ++position) {
if (*position == '%') {
@@ -119,89 +67,38 @@ bool IsWprintfFormatPortable(const wchar_t* format) {
return true;
}
-namespace {
-
-template<typename StringType>
-StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
- StringType ret;
- ret.reserve(str.size());
- for (size_t i = 0; i < str.size(); i++)
- ret.push_back(ToLowerASCII(str[i]));
- return ret;
-}
-
-template<typename StringType>
-StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
- StringType ret;
- ret.reserve(str.size());
- for (size_t i = 0; i < str.size(); i++)
- ret.push_back(ToUpperASCII(str[i]));
- return ret;
-}
-
-} // namespace
-
std::string ToLowerASCII(StringPiece str) {
- return ToLowerASCIIImpl<std::string>(str);
+ return internal::ToLowerASCIIImpl(str);
}
string16 ToLowerASCII(StringPiece16 str) {
- return ToLowerASCIIImpl<string16>(str);
+ return internal::ToLowerASCIIImpl(str);
}
std::string ToUpperASCII(StringPiece str) {
- return ToUpperASCIIImpl<std::string>(str);
+ return internal::ToUpperASCIIImpl(str);
}
string16 ToUpperASCII(StringPiece16 str) {
- return ToUpperASCIIImpl<string16>(str);
-}
-
-template<class StringType>
-int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
- BasicStringPiece<StringType> b) {
- // Find the first characters that aren't equal and compare them. If the end
- // of one of the strings is found before a nonequal character, the lengths
- // of the strings are compared.
- size_t i = 0;
- while (i < a.length() && i < b.length()) {
- typename StringType::value_type lower_a = ToLowerASCII(a[i]);
- typename StringType::value_type lower_b = ToLowerASCII(b[i]);
- if (lower_a < lower_b)
- return -1;
- if (lower_a > lower_b)
- return 1;
- i++;
- }
-
- // End of one string hit before finding a different character. Expect the
- // common case to be "strings equal" at this point so check that first.
- if (a.length() == b.length())
- return 0;
-
- if (a.length() < b.length())
- return -1;
- return 1;
+ return internal::ToUpperASCIIImpl(str);
}
int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
- return CompareCaseInsensitiveASCIIT<std::string>(a, b);
+ return internal::CompareCaseInsensitiveASCIIT(a, b);
}
int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
- return CompareCaseInsensitiveASCIIT<string16>(a, b);
+ return internal::CompareCaseInsensitiveASCIIT(a, b);
}
bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
- if (a.length() != b.length())
- return false;
- return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0;
+ return a.size() == b.size() &&
+ internal::CompareCaseInsensitiveASCIIT(a, b) == 0;
}
bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
- if (a.length() != b.length())
- return false;
- return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0;
+ return a.size() == b.size() &&
+ internal::CompareCaseInsensitiveASCIIT(a, b) == 0;
}
const std::string& EmptyString() {
@@ -214,107 +111,56 @@ const string16& EmptyString16() {
return *s16;
}
-template <class StringType>
-bool ReplaceCharsT(const StringType& input,
- BasicStringPiece<StringType> find_any_of_these,
- BasicStringPiece<StringType> replace_with,
- StringType* output);
-
-bool ReplaceChars(const string16& input,
+bool ReplaceChars(StringPiece16 input,
StringPiece16 replace_chars,
StringPiece16 replace_with,
string16* output) {
- return ReplaceCharsT(input, replace_chars, replace_with, output);
+ return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
}
-bool ReplaceChars(const std::string& input,
+bool ReplaceChars(StringPiece input,
StringPiece replace_chars,
StringPiece replace_with,
std::string* output) {
- return ReplaceCharsT(input, replace_chars, replace_with, output);
+ return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
}
-bool RemoveChars(const string16& input,
+bool RemoveChars(StringPiece16 input,
StringPiece16 remove_chars,
string16* output) {
- return ReplaceCharsT(input, remove_chars, StringPiece16(), output);
+ return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output);
}
-bool RemoveChars(const std::string& input,
+bool RemoveChars(StringPiece input,
StringPiece remove_chars,
std::string* output) {
- return ReplaceCharsT(input, remove_chars, StringPiece(), output);
-}
-
-template <typename Str>
-TrimPositions TrimStringT(BasicStringPiece<Str> input,
- BasicStringPiece<Str> trim_chars,
- TrimPositions positions,
- Str* output) {
- // Find the edges of leading/trailing whitespace as desired. Need to use
- // a StringPiece version of input to be able to call find* on it with the
- // StringPiece version of trim_chars (normally the trim_chars will be a
- // constant so avoid making a copy).
- const size_t last_char = input.length() - 1;
- const size_t first_good_char =
- (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0;
- const size_t last_good_char = (positions & TRIM_TRAILING)
- ? input.find_last_not_of(trim_chars)
- : last_char;
-
- // When the string was all trimmed, report that we stripped off characters
- // from whichever position the caller was interested in. For empty input, we
- // stripped no characters, but we still need to clear |output|.
- if (input.empty() || first_good_char == Str::npos ||
- last_good_char == Str::npos) {
- bool input_was_empty = input.empty(); // in case output == &input
- output->clear();
- return input_was_empty ? TRIM_NONE : positions;
- }
-
- // Trim.
- output->assign(input.data() + first_good_char,
- last_good_char - first_good_char + 1);
-
- // Return where we trimmed from.
- return static_cast<TrimPositions>(
- (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) |
- (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING));
+ return internal::ReplaceCharsT(input, remove_chars, StringPiece(), output);
}
bool TrimString(StringPiece16 input,
StringPiece16 trim_chars,
string16* output) {
- return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
+ return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
+ TRIM_NONE;
}
bool TrimString(StringPiece input,
StringPiece trim_chars,
std::string* output) {
- return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
-}
-
-template<typename Str>
-BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
- BasicStringPiece<Str> trim_chars,
- TrimPositions positions) {
- size_t begin = (positions & TRIM_LEADING) ?
- input.find_first_not_of(trim_chars) : 0;
- size_t end = (positions & TRIM_TRAILING) ?
- input.find_last_not_of(trim_chars) + 1 : input.size();
- return input.substr(begin, end - begin);
+ return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
+ TRIM_NONE;
}
StringPiece16 TrimString(StringPiece16 input,
StringPiece16 trim_chars,
TrimPositions positions) {
- return TrimStringPieceT(input, trim_chars, positions);
+ return internal::TrimStringPieceT(input, trim_chars, positions);
}
StringPiece TrimString(StringPiece input,
StringPiece trim_chars,
TrimPositions positions) {
- return TrimStringPieceT(input, trim_chars, positions);
+ return internal::TrimStringPieceT(input, trim_chars, positions);
}
void TruncateUTF8ToByteSize(const std::string& input,
@@ -357,74 +203,36 @@ void TruncateUTF8ToByteSize(const std::string& input,
TrimPositions TrimWhitespace(StringPiece16 input,
TrimPositions positions,
string16* output) {
- return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
+ return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16),
+ positions, output);
}
StringPiece16 TrimWhitespace(StringPiece16 input,
TrimPositions positions) {
- return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
+ return internal::TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16),
+ positions);
}
TrimPositions TrimWhitespaceASCII(StringPiece input,
TrimPositions positions,
std::string* output) {
- return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
+ return internal::TrimStringT(input, StringPiece(kWhitespaceASCII), positions,
+ output);
}
StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
- return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
+ return internal::TrimStringPieceT(input, StringPiece(kWhitespaceASCII),
+ positions);
}
-template<typename STR>
-STR CollapseWhitespaceT(const STR& text,
- bool trim_sequences_with_line_breaks) {
- STR result;
- result.resize(text.size());
-
- // Set flags to pretend we're already in a trimmed whitespace sequence, so we
- // will trim any leading whitespace.
- bool in_whitespace = true;
- bool already_trimmed = true;
-
- int chars_written = 0;
- for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
- if (IsUnicodeWhitespace(*i)) {
- if (!in_whitespace) {
- // Reduce all whitespace sequences to a single space.
- in_whitespace = true;
- result[chars_written++] = L' ';
- }
- if (trim_sequences_with_line_breaks && !already_trimmed &&
- ((*i == '\n') || (*i == '\r'))) {
- // Whitespace sequences containing CR or LF are eliminated entirely.
- already_trimmed = true;
- --chars_written;
- }
- } else {
- // Non-whitespace chracters are copied straight across.
- in_whitespace = false;
- already_trimmed = false;
- result[chars_written++] = *i;
- }
- }
-
- if (in_whitespace && !already_trimmed) {
- // Any trailing whitespace is eliminated.
- --chars_written;
- }
-
- result.resize(chars_written);
- return result;
-}
-
-string16 CollapseWhitespace(const string16& text,
+string16 CollapseWhitespace(StringPiece16 text,
bool trim_sequences_with_line_breaks) {
- return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
+ return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
}
-std::string CollapseWhitespaceASCII(const std::string& text,
+std::string CollapseWhitespaceASCII(StringPiece text,
bool trim_sequences_with_line_breaks) {
- return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
+ return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
}
bool ContainsOnlyChars(StringPiece input, StringPiece characters) {
@@ -435,198 +243,63 @@ bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) {
return input.find_first_not_of(characters) == StringPiece16::npos;
}
-template <class Char>
-inline bool DoIsStringASCII(const Char* characters, size_t length) {
- if (!length)
- return true;
- constexpr MachineWord non_ascii_bit_mask = NonASCIIMask<Char>::value();
- MachineWord all_char_bits = 0;
- const Char* end = characters + length;
-
- // Prologue: align the input.
- while (!IsMachineWordAligned(characters) && characters < end)
- all_char_bits |= *characters++;
- if (all_char_bits & non_ascii_bit_mask)
- return false;
-
- // Compare the values of CPU word size.
- constexpr size_t chars_per_word = sizeof(MachineWord) / sizeof(Char);
- constexpr int batch_count = 16;
- while (characters <= end - batch_count * chars_per_word) {
- all_char_bits = 0;
- for (int i = 0; i < batch_count; ++i) {
- all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
- characters += chars_per_word;
- }
- if (all_char_bits & non_ascii_bit_mask)
- return false;
- }
-
- // Process the remaining words.
- all_char_bits = 0;
- while (characters <= end - chars_per_word) {
- all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
- characters += chars_per_word;
- }
-
- // Process the remaining bytes.
- while (characters < end)
- all_char_bits |= *characters++;
-
- return !(all_char_bits & non_ascii_bit_mask);
-}
bool IsStringASCII(StringPiece str) {
- return DoIsStringASCII(str.data(), str.length());
+ return internal::DoIsStringASCII(str.data(), str.length());
}
bool IsStringASCII(StringPiece16 str) {
- return DoIsStringASCII(str.data(), str.length());
+ return internal::DoIsStringASCII(str.data(), str.length());
}
#if defined(WCHAR_T_IS_UTF32)
bool IsStringASCII(WStringPiece str) {
- return DoIsStringASCII(str.data(), str.length());
+ return internal::DoIsStringASCII(str.data(), str.length());
}
#endif
-template <bool (*Validator)(uint32_t)>
-inline static bool DoIsStringUTF8(StringPiece str) {
- const char* src = str.data();
- int32_t src_len = static_cast<int32_t>(str.length());
- int32_t char_index = 0;
-
- while (char_index < src_len) {
- int32_t code_point;
- CBU8_NEXT(src, char_index, src_len, code_point);
- if (!Validator(code_point))
- return false;
- }
- return true;
-}
-
bool IsStringUTF8(StringPiece str) {
- return DoIsStringUTF8<IsValidCharacter>(str);
+ return internal::DoIsStringUTF8<IsValidCharacter>(str);
}
bool IsStringUTF8AllowingNoncharacters(StringPiece str) {
- return DoIsStringUTF8<IsValidCodepoint>(str);
-}
-
-// Implementation note: Normally this function will be called with a hardcoded
-// constant for the lowercase_ascii parameter. Constructing a StringPiece from
-// a C constant requires running strlen, so the result will be two passes
-// through the buffers, one to file the length of lowercase_ascii, and one to
-// compare each letter.
-//
-// This function could have taken a const char* to avoid this and only do one
-// pass through the string. But the strlen is faster than the case-insensitive
-// compares and lets us early-exit in the case that the strings are different
-// lengths (will often be the case for non-matches). So whether one approach or
-// the other will be faster depends on the case.
-//
-// The hardcoded strings are typically very short so it doesn't matter, and the
-// string piece gives additional flexibility for the caller (doesn't have to be
-// null terminated) so we choose the StringPiece route.
-template<typename Str>
-static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
- StringPiece lowercase_ascii) {
- if (str.size() != lowercase_ascii.size())
- return false;
- for (size_t i = 0; i < str.size(); i++) {
- if (ToLowerASCII(str[i]) != lowercase_ascii[i])
- return false;
- }
- return true;
+ return internal::DoIsStringUTF8<IsValidCodepoint>(str);
}
bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
- return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
+ return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
}
bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
- return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
+ return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
}
bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
- if (str.length() != ascii.length())
- return false;
- return std::equal(ascii.begin(), ascii.end(), str.begin());
-}
-
-template<typename Str>
-bool StartsWithT(BasicStringPiece<Str> str,
- BasicStringPiece<Str> search_for,
- CompareCase case_sensitivity) {
- if (search_for.size() > str.size())
- return false;
-
- BasicStringPiece<Str> source = str.substr(0, search_for.size());
-
- switch (case_sensitivity) {
- case CompareCase::SENSITIVE:
- return source == search_for;
-
- case CompareCase::INSENSITIVE_ASCII:
- return std::equal(
- search_for.begin(), search_for.end(),
- source.begin(),
- CaseInsensitiveCompareASCII<typename Str::value_type>());
-
- default:
- NOTREACHED();
- return false;
- }
+ return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end());
}
bool StartsWith(StringPiece str,
StringPiece search_for,
CompareCase case_sensitivity) {
- return StartsWithT<std::string>(str, search_for, case_sensitivity);
+ return internal::StartsWithT(str, search_for, case_sensitivity);
}
bool StartsWith(StringPiece16 str,
StringPiece16 search_for,
CompareCase case_sensitivity) {
- return StartsWithT<string16>(str, search_for, case_sensitivity);
-}
-
-template <typename Str>
-bool EndsWithT(BasicStringPiece<Str> str,
- BasicStringPiece<Str> search_for,
- CompareCase case_sensitivity) {
- if (search_for.size() > str.size())
- return false;
-
- BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
- search_for.size());
-
- switch (case_sensitivity) {
- case CompareCase::SENSITIVE:
- return source == search_for;
-
- case CompareCase::INSENSITIVE_ASCII:
- return std::equal(
- source.begin(), source.end(),
- search_for.begin(),
- CaseInsensitiveCompareASCII<typename Str::value_type>());
-
- default:
- NOTREACHED();
- return false;
- }
+ return internal::StartsWithT(str, search_for, case_sensitivity);
}
bool EndsWith(StringPiece str,
StringPiece search_for,
CompareCase case_sensitivity) {
- return EndsWithT<std::string>(str, search_for, case_sensitivity);
+ return internal::EndsWithT(str, search_for, case_sensitivity);
}
bool EndsWith(StringPiece16 str,
StringPiece16 search_for,
CompareCase case_sensitivity) {
- return EndsWithT<string16>(str, search_for, case_sensitivity);
+ return internal::EndsWithT(str, search_for, case_sensitivity);
}
char HexDigitToInt(wchar_t c) {
@@ -680,384 +353,93 @@ string16 FormatBytesUnlocalized(int64_t bytes) {
return ASCIIToUTF16(buf);
}
-// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings.
-template <class StringType>
-struct SubstringMatcher {
- BasicStringPiece<StringType> find_this;
-
- size_t Find(const StringType& input, size_t pos) {
- return input.find(find_this.data(), pos, find_this.length());
- }
- size_t MatchSize() { return find_this.length(); }
-};
-
-// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters.
-template <class StringType>
-struct CharacterMatcher {
- BasicStringPiece<StringType> find_any_of_these;
-
- size_t Find(const StringType& input, size_t pos) {
- return input.find_first_of(find_any_of_these.data(), pos,
- find_any_of_these.length());
- }
- constexpr size_t MatchSize() { return 1; }
-};
-
-enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST };
-
-// Runs in O(n) time in the length of |str|, and transforms the string without
-// reallocating when possible. Returns |true| if any matches were found.
-//
-// This is parameterized on a |Matcher| traits type, so that it can be the
-// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset().
-template <class StringType, class Matcher>
-bool DoReplaceMatchesAfterOffset(StringType* str,
- size_t initial_offset,
- Matcher matcher,
- BasicStringPiece<StringType> replace_with,
- ReplaceType replace_type) {
- using CharTraits = typename StringType::traits_type;
-
- const size_t find_length = matcher.MatchSize();
- if (!find_length)
- return false;
-
- // If the find string doesn't appear, there's nothing to do.
- size_t first_match = matcher.Find(*str, initial_offset);
- if (first_match == StringType::npos)
- return false;
-
- // If we're only replacing one instance, there's no need to do anything
- // complicated.
- const size_t replace_length = replace_with.length();
- if (replace_type == ReplaceType::REPLACE_FIRST) {
- str->replace(first_match, find_length, replace_with.data(), replace_length);
- return true;
- }
-
- // If the find and replace strings are the same length, we can simply use
- // replace() on each instance, and finish the entire operation in O(n) time.
- if (find_length == replace_length) {
- auto* buffer = &((*str)[0]);
- for (size_t offset = first_match; offset != StringType::npos;
- offset = matcher.Find(*str, offset + replace_length)) {
- CharTraits::copy(buffer + offset, replace_with.data(), replace_length);
- }
- return true;
- }
-
- // Since the find and replace strings aren't the same length, a loop like the
- // one above would be O(n^2) in the worst case, as replace() will shift the
- // entire remaining string each time. We need to be more clever to keep things
- // O(n).
- //
- // When the string is being shortened, it's possible to just shift the matches
- // down in one pass while finding, and truncate the length at the end of the
- // search.
- //
- // If the string is being lengthened, more work is required. The strategy used
- // here is to make two find() passes through the string. The first pass counts
- // the number of matches to determine the new size. The second pass will
- // either construct the new string into a new buffer (if the existing buffer
- // lacked capacity), or else -- if there is room -- create a region of scratch
- // space after |first_match| by shifting the tail of the string to a higher
- // index, and doing in-place moves from the tail to lower indices thereafter.
- size_t str_length = str->length();
- size_t expansion = 0;
- if (replace_length > find_length) {
- // This operation lengthens the string; determine the new length by counting
- // matches.
- const size_t expansion_per_match = (replace_length - find_length);
- size_t num_matches = 0;
- for (size_t match = first_match; match != StringType::npos;
- match = matcher.Find(*str, match + find_length)) {
- expansion += expansion_per_match;
- ++num_matches;
- }
- const size_t final_length = str_length + expansion;
-
- if (str->capacity() < final_length) {
- // If we'd have to allocate a new buffer to grow the string, build the
- // result directly into the new allocation via append().
- StringType src(str->get_allocator());
- str->swap(src);
- str->reserve(final_length);
-
- size_t pos = 0;
- for (size_t match = first_match;; match = matcher.Find(src, pos)) {
- str->append(src, pos, match - pos);
- str->append(replace_with.data(), replace_length);
- pos = match + find_length;
-
- // A mid-loop test/break enables skipping the final Find() call; the
- // number of matches is known, so don't search past the last one.
- if (!--num_matches)
- break;
- }
-
- // Handle substring after the final match.
- str->append(src, pos, str_length - pos);
- return true;
- }
-
- // Prepare for the copy/move loop below -- expand the string to its final
- // size by shifting the data after the first match to the end of the resized
- // string.
- size_t shift_src = first_match + find_length;
- size_t shift_dst = shift_src + expansion;
-
- // Big |expansion| factors (relative to |str_length|) require padding up to
- // |shift_dst|.
- if (shift_dst > str_length)
- str->resize(shift_dst);
-
- str->replace(shift_dst, str_length - shift_src, *str, shift_src,
- str_length - shift_src);
- str_length = final_length;
- }
-
- // We can alternate replacement and move operations. This won't overwrite the
- // unsearched region of the string so long as |write_offset| <= |read_offset|;
- // that condition is always satisfied because:
- //
- // (a) If the string is being shortened, |expansion| is zero and
- // |write_offset| grows slower than |read_offset|.
- //
- // (b) If the string is being lengthened, |write_offset| grows faster than
- // |read_offset|, but |expansion| is big enough so that |write_offset|
- // will only catch up to |read_offset| at the point of the last match.
- auto* buffer = &((*str)[0]);
- size_t write_offset = first_match;
- size_t read_offset = first_match + expansion;
- do {
- if (replace_length) {
- CharTraits::copy(buffer + write_offset, replace_with.data(),
- replace_length);
- write_offset += replace_length;
- }
- read_offset += find_length;
-
- // min() clamps StringType::npos (the largest unsigned value) to str_length.
- size_t match = std::min(matcher.Find(*str, read_offset), str_length);
-
- size_t length = match - read_offset;
- if (length) {
- CharTraits::move(buffer + write_offset, buffer + read_offset, length);
- write_offset += length;
- read_offset += length;
- }
- } while (read_offset < str_length);
-
- // If we're shortening the string, truncate it now.
- str->resize(write_offset);
- return true;
-}
-
-template <class StringType>
-bool ReplaceCharsT(const StringType& input,
- BasicStringPiece<StringType> find_any_of_these,
- BasicStringPiece<StringType> replace_with,
- StringType* output) {
- // Commonly, this is called with output and input being the same string; in
- // that case, this assignment is inexpensive.
- *output = input;
-
- return DoReplaceMatchesAfterOffset(
- output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with,
- ReplaceType::REPLACE_ALL);
-}
-
void ReplaceFirstSubstringAfterOffset(string16* str,
size_t start_offset,
StringPiece16 find_this,
StringPiece16 replace_with) {
- DoReplaceMatchesAfterOffset(str, start_offset,
- SubstringMatcher<string16>{find_this},
- replace_with, ReplaceType::REPLACE_FIRST);
+ internal::DoReplaceMatchesAfterOffset(
+ str, start_offset, internal::SubstringMatcher<string16>{find_this},
+ replace_with, internal::ReplaceType::REPLACE_FIRST);
}
void ReplaceFirstSubstringAfterOffset(std::string* str,
size_t start_offset,
StringPiece find_this,
StringPiece replace_with) {
- DoReplaceMatchesAfterOffset(str, start_offset,
- SubstringMatcher<std::string>{find_this},
- replace_with, ReplaceType::REPLACE_FIRST);
+ internal::DoReplaceMatchesAfterOffset(
+ str, start_offset, internal::SubstringMatcher<std::string>{find_this},
+ replace_with, internal::ReplaceType::REPLACE_FIRST);
}
void ReplaceSubstringsAfterOffset(string16* str,
size_t start_offset,
StringPiece16 find_this,
StringPiece16 replace_with) {
- DoReplaceMatchesAfterOffset(str, start_offset,
- SubstringMatcher<string16>{find_this},
- replace_with, ReplaceType::REPLACE_ALL);
+ internal::DoReplaceMatchesAfterOffset(
+ str, start_offset, internal::SubstringMatcher<string16>{find_this},
+ replace_with, internal::ReplaceType::REPLACE_ALL);
}
void ReplaceSubstringsAfterOffset(std::string* str,
size_t start_offset,
StringPiece find_this,
StringPiece replace_with) {
- DoReplaceMatchesAfterOffset(str, start_offset,
- SubstringMatcher<std::string>{find_this},
- replace_with, ReplaceType::REPLACE_ALL);
-}
-
-template <class string_type>
-inline typename string_type::value_type* WriteIntoT(string_type* str,
- size_t length_with_null) {
- DCHECK_GE(length_with_null, 1u);
- str->reserve(length_with_null);
- str->resize(length_with_null - 1);
- return &((*str)[0]);
+ internal::DoReplaceMatchesAfterOffset(
+ str, start_offset, internal::SubstringMatcher<std::string>{find_this},
+ replace_with, internal::ReplaceType::REPLACE_ALL);
}
char* WriteInto(std::string* str, size_t length_with_null) {
- return WriteIntoT(str, length_with_null);
+ return internal::WriteIntoT(str, length_with_null);
}
char16* WriteInto(string16* str, size_t length_with_null) {
- return WriteIntoT(str, length_with_null);
+ return internal::WriteIntoT(str, length_with_null);
}
-// Generic version for all JoinString overloads. |list_type| must be a sequence
-// (std::vector or std::initializer_list) of strings/StringPieces (std::string,
-// string16, StringPiece or StringPiece16). |string_type| is either std::string
-// or string16.
-template <typename list_type, typename string_type>
-static string_type JoinStringT(const list_type& parts,
- BasicStringPiece<string_type> sep) {
- if (base::empty(parts))
- return string_type();
-
- // Pre-allocate the eventual size of the string. Start with the size of all of
- // the separators (note that this *assumes* parts.size() > 0).
- size_t total_size = (parts.size() - 1) * sep.size();
- for (const auto& part : parts)
- total_size += part.size();
- string_type result;
- result.reserve(total_size);
-
- auto iter = parts.begin();
- DCHECK(iter != parts.end());
- result.append(iter->data(), iter->size());
- ++iter;
-
- for (; iter != parts.end(); ++iter) {
- result.append(sep.data(), sep.size());
- result.append(iter->data(), iter->size());
- }
-
- // Sanity-check that we pre-allocated correctly.
- DCHECK_EQ(total_size, result.size());
-
- return result;
+std::string JoinString(span<const std::string> parts, StringPiece separator) {
+ return internal::JoinStringT(parts, separator);
}
-std::string JoinString(const std::vector<std::string>& parts,
- StringPiece separator) {
- return JoinStringT(parts, separator);
+string16 JoinString(span<const string16> parts, StringPiece16 separator) {
+ return internal::JoinStringT(parts, separator);
}
-string16 JoinString(const std::vector<string16>& parts,
- StringPiece16 separator) {
- return JoinStringT(parts, separator);
-}
-
-std::string JoinString(const std::vector<StringPiece>& parts,
- StringPiece separator) {
- return JoinStringT(parts, separator);
+std::string JoinString(span<const StringPiece> parts, StringPiece separator) {
+ return internal::JoinStringT(parts, separator);
}
-string16 JoinString(const std::vector<StringPiece16>& parts,
- StringPiece16 separator) {
- return JoinStringT(parts, separator);
+string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator) {
+ return internal::JoinStringT(parts, separator);
}
std::string JoinString(std::initializer_list<StringPiece> parts,
StringPiece separator) {
- return JoinStringT(parts, separator);
+ return internal::JoinStringT(parts, separator);
}
string16 JoinString(std::initializer_list<StringPiece16> parts,
StringPiece16 separator) {
- return JoinStringT(parts, separator);
+ return internal::JoinStringT(parts, separator);
}
-template<class FormatStringType, class OutStringType>
-OutStringType DoReplaceStringPlaceholders(
- const FormatStringType& format_string,
- const std::vector<OutStringType>& subst,
- std::vector<size_t>* offsets) {
- size_t substitutions = subst.size();
- DCHECK_LT(substitutions, 10U);
-
- size_t sub_length = 0;
- for (const auto& cur : subst)
- sub_length += cur.length();
-
- OutStringType formatted;
- formatted.reserve(format_string.length() + sub_length);
-
- std::vector<ReplacementOffset> r_offsets;
- for (auto i = format_string.begin(); i != format_string.end(); ++i) {
- if ('$' == *i) {
- if (i + 1 != format_string.end()) {
- ++i;
- if ('$' == *i) {
- while (i != format_string.end() && '$' == *i) {
- formatted.push_back('$');
- ++i;
- }
- --i;
- } else {
- if (*i < '1' || *i > '9') {
- DLOG(ERROR) << "Invalid placeholder: $" << *i;
- continue;
- }
- uintptr_t index = *i - '1';
- if (offsets) {
- ReplacementOffset r_offset(index,
- static_cast<int>(formatted.size()));
- r_offsets.insert(
- std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset,
- &CompareParameter),
- r_offset);
- }
- if (index < substitutions)
- formatted.append(subst.at(index));
- }
- }
- } else {
- formatted.push_back(*i);
- }
- }
- if (offsets) {
- for (const auto& cur : r_offsets)
- offsets->push_back(cur.offset);
- }
- return formatted;
-}
-
-string16 ReplaceStringPlaceholders(const string16& format_string,
+string16 ReplaceStringPlaceholders(StringPiece16 format_string,
const std::vector<string16>& subst,
std::vector<size_t>* offsets) {
- return DoReplaceStringPlaceholders(format_string, subst, offsets);
+ return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
}
std::string ReplaceStringPlaceholders(StringPiece format_string,
const std::vector<std::string>& subst,
std::vector<size_t>* offsets) {
- return DoReplaceStringPlaceholders(format_string, subst, offsets);
+ return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
}
string16 ReplaceStringPlaceholders(const string16& format_string,
const string16& a,
size_t* offset) {
std::vector<size_t> offsets;
- std::vector<string16> subst;
- subst.push_back(a);
- string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
+ string16 result = ReplaceStringPlaceholders(format_string, {a}, &offsets);
DCHECK_EQ(1U, offsets.size());
if (offset)
@@ -1065,65 +447,11 @@ string16 ReplaceStringPlaceholders(const string16& format_string,
return result;
}
-#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
-
-TrimPositions TrimWhitespace(WStringPiece input,
- TrimPositions positions,
- std::wstring* output) {
- return TrimStringT(input, WStringPiece(kWhitespaceWide), positions, output);
-}
-
-WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) {
- return TrimStringPieceT(input, WStringPiece(kWhitespaceWide), positions);
-}
-
-bool TrimString(WStringPiece input,
- WStringPiece trim_chars,
- std::wstring* output) {
- return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
-}
-
-WStringPiece TrimString(WStringPiece input,
- WStringPiece trim_chars,
- TrimPositions positions) {
- return TrimStringPieceT(input, trim_chars, positions);
-}
-
-wchar_t* WriteInto(std::wstring* str, size_t length_with_null) {
- return WriteIntoT(str, length_with_null);
-}
-
-#endif
-
-// The following code is compatible with the OpenBSD lcpy interface. See:
-// http://www.gratisoft.us/todd/papers/strlcpy.html
-// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
-
-namespace {
-
-template <typename CHAR>
-size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
- for (size_t i = 0; i < dst_size; ++i) {
- if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
- return i;
- }
-
- // We were left off at dst_size. We over copied 1 byte. Null terminate.
- if (dst_size != 0)
- dst[dst_size - 1] = 0;
-
- // Count the rest of the |src|, and return it's length in characters.
- while (src[dst_size]) ++dst_size;
- return dst_size;
-}
-
-} // namespace
-
size_t strlcpy(char* dst, const char* src, size_t dst_size) {
- return lcpyT<char>(dst, src, dst_size);
+ return internal::lcpyT(dst, src, dst_size);
}
size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
- return lcpyT<wchar_t>(dst, src, dst_size);
+ return internal::lcpyT(dst, src, dst_size);
}
} // namespace base
diff --git a/chromium/base/strings/string_util.h b/chromium/base/strings/string_util.h
index f9f5e10ade9..e5a0487cff1 100644
--- a/chromium/base/strings/string_util.h
+++ b/chromium/base/strings/string_util.h
@@ -14,10 +14,12 @@
#include <initializer_list>
#include <string>
+#include <type_traits>
#include <vector>
#include "base/base_export.h"
#include "base/compiler_specific.h"
+#include "base/containers/span.h"
#include "base/stl_util.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h" // For implicit conversions.
@@ -84,19 +86,17 @@ BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format);
// ASCII-specific tolower. The standard library's tolower is locale sensitive,
// so we don't want to use it here.
-inline char ToLowerASCII(char c) {
- return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
-}
-inline char16 ToLowerASCII(char16 c) {
+template <typename CharT,
+ typename = std::enable_if_t<std::is_integral<CharT>::value>>
+CharT ToLowerASCII(CharT c) {
return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
}
// ASCII-specific toupper. The standard library's toupper is locale sensitive,
// so we don't want to use it here.
-inline char ToUpperASCII(char c) {
- return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
-}
-inline char16 ToUpperASCII(char16 c) {
+template <typename CharT,
+ typename = std::enable_if_t<std::is_integral<CharT>::value>>
+CharT ToUpperASCII(CharT c) {
return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
}
@@ -170,10 +170,10 @@ BASE_EXPORT extern const char kUtf8ByteOrderMark[];
// Removes characters in |remove_chars| from anywhere in |input|. Returns true
// if any characters were removed. |remove_chars| must be null-terminated.
// NOTE: Safe to use the same variable for both |input| and |output|.
-BASE_EXPORT bool RemoveChars(const string16& input,
+BASE_EXPORT bool RemoveChars(StringPiece16 input,
StringPiece16 remove_chars,
string16* output);
-BASE_EXPORT bool RemoveChars(const std::string& input,
+BASE_EXPORT bool RemoveChars(StringPiece input,
StringPiece remove_chars,
std::string* output);
@@ -182,11 +182,11 @@ BASE_EXPORT bool RemoveChars(const std::string& input,
// the |replace_with| string. Returns true if any characters were replaced.
// |replace_chars| must be null-terminated.
// NOTE: Safe to use the same variable for both |input| and |output|.
-BASE_EXPORT bool ReplaceChars(const string16& input,
+BASE_EXPORT bool ReplaceChars(StringPiece16 input,
StringPiece16 replace_chars,
StringPiece16 replace_with,
string16* output);
-BASE_EXPORT bool ReplaceChars(const std::string& input,
+BASE_EXPORT bool ReplaceChars(StringPiece input,
StringPiece replace_chars,
StringPiece replace_with,
std::string* output);
@@ -226,69 +226,6 @@ BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
const size_t byte_size,
std::string* output);
-#if defined(WCHAR_T_IS_UTF16)
-// Utility functions to access the underlying string buffer as a wide char
-// pointer.
-//
-// Note: These functions violate strict aliasing when char16 and wchar_t are
-// unrelated types. We thus pass -fno-strict-aliasing to the compiler on
-// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2].
-//
-// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244
-// [2]
-// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949
-inline wchar_t* as_writable_wcstr(char16* str) {
- return reinterpret_cast<wchar_t*>(str);
-}
-
-inline wchar_t* as_writable_wcstr(string16& str) {
- return reinterpret_cast<wchar_t*>(data(str));
-}
-
-inline const wchar_t* as_wcstr(const char16* str) {
- return reinterpret_cast<const wchar_t*>(str);
-}
-
-inline const wchar_t* as_wcstr(StringPiece16 str) {
- return reinterpret_cast<const wchar_t*>(str.data());
-}
-
-// Utility functions to access the underlying string buffer as a char16 pointer.
-inline char16* as_writable_u16cstr(wchar_t* str) {
- return reinterpret_cast<char16*>(str);
-}
-
-inline char16* as_writable_u16cstr(std::wstring& str) {
- return reinterpret_cast<char16*>(data(str));
-}
-
-inline const char16* as_u16cstr(const wchar_t* str) {
- return reinterpret_cast<const char16*>(str);
-}
-
-inline const char16* as_u16cstr(WStringPiece str) {
- return reinterpret_cast<const char16*>(str.data());
-}
-
-// Utility functions to convert between base::WStringPiece and
-// base::StringPiece16.
-inline WStringPiece AsWStringPiece(StringPiece16 str) {
- return WStringPiece(as_wcstr(str.data()), str.size());
-}
-
-inline StringPiece16 AsStringPiece16(WStringPiece str) {
- return StringPiece16(as_u16cstr(str.data()), str.size());
-}
-
-inline std::wstring AsWString(StringPiece16 str) {
- return std::wstring(as_wcstr(str.data()), str.size());
-}
-
-inline string16 AsString16(WStringPiece str) {
- return string16(as_u16cstr(str.data()), str.size());
-}
-#endif // defined(WCHAR_T_IS_UTF16)
-
// Trims any whitespace from either end of the input string.
//
// The StringPiece versions return a substring referencing the input buffer.
@@ -315,11 +252,10 @@ BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input,
// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
// sequences containing a CR or LF are trimmed.
// (3) All other whitespace sequences are converted to single spaces.
-BASE_EXPORT string16 CollapseWhitespace(
- const string16& text,
- bool trim_sequences_with_line_breaks);
+BASE_EXPORT string16 CollapseWhitespace(StringPiece16 text,
+ bool trim_sequences_with_line_breaks);
BASE_EXPORT std::string CollapseWhitespaceASCII(
- const std::string& text,
+ StringPiece text,
bool trim_sequences_with_line_breaks);
// Returns true if |input| is empty or contains only characters found in
@@ -347,6 +283,7 @@ BASE_EXPORT bool IsStringUTF8AllowingNoncharacters(StringPiece str);
// does not leave early if it is not the case.
BASE_EXPORT bool IsStringASCII(StringPiece str);
BASE_EXPORT bool IsStringASCII(StringPiece16 str);
+
#if defined(WCHAR_T_IS_UTF32)
BASE_EXPORT bool IsStringASCII(WStringPiece str);
#endif
@@ -488,8 +425,8 @@ BASE_EXPORT void ReplaceSubstringsAfterOffset(
BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);
BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
-// Joins a vector or list of strings into a single string, inserting |separator|
-// (which may be empty) in between all elements.
+// Joins a list of strings into a single string, inserting |separator| (which
+// may be empty) in between all elements.
//
// Note this is inverse of SplitString()/SplitStringPiece() defined in
// string_split.h.
@@ -501,13 +438,13 @@ BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
// copies of those strings are created until the final join operation.
//
// Use StrCat (in base/strings/strcat.h) if you don't need a separator.
-BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts,
+BASE_EXPORT std::string JoinString(span<const std::string> parts,
StringPiece separator);
-BASE_EXPORT string16 JoinString(const std::vector<string16>& parts,
+BASE_EXPORT string16 JoinString(span<const string16> parts,
StringPiece16 separator);
-BASE_EXPORT std::string JoinString(const std::vector<StringPiece>& parts,
+BASE_EXPORT std::string JoinString(span<const StringPiece> parts,
StringPiece separator);
-BASE_EXPORT string16 JoinString(const std::vector<StringPiece16>& parts,
+BASE_EXPORT string16 JoinString(span<const StringPiece16> parts,
StringPiece16 separator);
// Explicit initializer_list overloads are required to break ambiguity when used
// with a literal initializer list (otherwise the compiler would not be able to
@@ -521,10 +458,10 @@ BASE_EXPORT string16 JoinString(std::initializer_list<StringPiece16> parts,
// Additionally, any number of consecutive '$' characters is replaced by that
// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
// NULL. This only allows you to use up to nine replacements.
-BASE_EXPORT string16 ReplaceStringPlaceholders(
- const string16& format_string,
- const std::vector<string16>& subst,
- std::vector<size_t>* offsets);
+BASE_EXPORT string16
+ReplaceStringPlaceholders(StringPiece16 format_string,
+ const std::vector<string16>& subst,
+ std::vector<size_t>* offsets);
BASE_EXPORT std::string ReplaceStringPlaceholders(
StringPiece format_string,
@@ -536,25 +473,6 @@ BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
const string16& a,
size_t* offset);
-#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
-BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input,
- TrimPositions positions,
- std::wstring* output);
-
-BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input,
- TrimPositions positions);
-
-BASE_EXPORT bool TrimString(WStringPiece input,
- WStringPiece trim_chars,
- std::wstring* output);
-
-BASE_EXPORT WStringPiece TrimString(WStringPiece input,
- WStringPiece trim_chars,
- TrimPositions positions);
-
-BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);
-#endif
-
} // namespace base
#if defined(OS_WIN)
diff --git a/chromium/base/strings/string_util_internal.h b/chromium/base/strings/string_util_internal.h
new file mode 100644
index 00000000000..da3fb07cc30
--- /dev/null
+++ b/chromium/base/strings/string_util_internal.h
@@ -0,0 +1,625 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRING_UTIL_INTERNAL_H_
+#define BASE_STRINGS_STRING_UTIL_INTERNAL_H_
+
+#include "base/logging.h"
+#include "base/notreached.h"
+#include "base/strings/string_piece.h"
+#include "base/third_party/icu/icu_utf.h"
+
+namespace base {
+
+namespace internal {
+
+// Used by ReplaceStringPlaceholders to track the position in the string of
+// replaced parameters.
+struct ReplacementOffset {
+ ReplacementOffset(uintptr_t parameter, size_t offset)
+ : parameter(parameter), offset(offset) {}
+
+ // Index of the parameter.
+ uintptr_t parameter;
+
+ // Starting position in the string.
+ size_t offset;
+};
+
+static bool CompareParameter(const ReplacementOffset& elem1,
+ const ReplacementOffset& elem2) {
+ return elem1.parameter < elem2.parameter;
+}
+
+// Assuming that a pointer is the size of a "machine word", then
+// uintptr_t is an integer type that is also a machine word.
+using MachineWord = uintptr_t;
+
+inline bool IsMachineWordAligned(const void* pointer) {
+ return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1));
+}
+
+template <typename StringType>
+StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
+ StringType ret;
+ ret.reserve(str.size());
+ for (size_t i = 0; i < str.size(); i++)
+ ret.push_back(ToLowerASCII(str[i]));
+ return ret;
+}
+
+template <typename StringType>
+StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
+ StringType ret;
+ ret.reserve(str.size());
+ for (size_t i = 0; i < str.size(); i++)
+ ret.push_back(ToUpperASCII(str[i]));
+ return ret;
+}
+
+template <class StringType>
+int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
+ BasicStringPiece<StringType> b) {
+ // Find the first characters that aren't equal and compare them. If the end
+ // of one of the strings is found before a nonequal character, the lengths
+ // of the strings are compared.
+ size_t i = 0;
+ while (i < a.length() && i < b.length()) {
+ typename StringType::value_type lower_a = ToLowerASCII(a[i]);
+ typename StringType::value_type lower_b = ToLowerASCII(b[i]);
+ if (lower_a < lower_b)
+ return -1;
+ if (lower_a > lower_b)
+ return 1;
+ i++;
+ }
+
+ // End of one string hit before finding a different character. Expect the
+ // common case to be "strings equal" at this point so check that first.
+ if (a.length() == b.length())
+ return 0;
+
+ if (a.length() < b.length())
+ return -1;
+ return 1;
+}
+
+template <typename Str>
+TrimPositions TrimStringT(BasicStringPiece<Str> input,
+ BasicStringPiece<Str> trim_chars,
+ TrimPositions positions,
+ Str* output) {
+ // Find the edges of leading/trailing whitespace as desired. Need to use
+ // a StringPiece version of input to be able to call find* on it with the
+ // StringPiece version of trim_chars (normally the trim_chars will be a
+ // constant so avoid making a copy).
+ const size_t last_char = input.length() - 1;
+ const size_t first_good_char =
+ (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0;
+ const size_t last_good_char = (positions & TRIM_TRAILING)
+ ? input.find_last_not_of(trim_chars)
+ : last_char;
+
+ // When the string was all trimmed, report that we stripped off characters
+ // from whichever position the caller was interested in. For empty input, we
+ // stripped no characters, but we still need to clear |output|.
+ if (input.empty() || first_good_char == Str::npos ||
+ last_good_char == Str::npos) {
+ bool input_was_empty = input.empty(); // in case output == &input
+ output->clear();
+ return input_was_empty ? TRIM_NONE : positions;
+ }
+
+ // Trim.
+ output->assign(input.data() + first_good_char,
+ last_good_char - first_good_char + 1);
+
+ // Return where we trimmed from.
+ return static_cast<TrimPositions>(
+ (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) |
+ (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING));
+}
+
+template <typename Str>
+BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
+ BasicStringPiece<Str> trim_chars,
+ TrimPositions positions) {
+ size_t begin =
+ (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0;
+ size_t end = (positions & TRIM_TRAILING)
+ ? input.find_last_not_of(trim_chars) + 1
+ : input.size();
+ return input.substr(begin, end - begin);
+}
+
+template <typename STR>
+STR CollapseWhitespaceT(BasicStringPiece<STR> text,
+ bool trim_sequences_with_line_breaks) {
+ STR result;
+ result.resize(text.size());
+
+ // Set flags to pretend we're already in a trimmed whitespace sequence, so we
+ // will trim any leading whitespace.
+ bool in_whitespace = true;
+ bool already_trimmed = true;
+
+ int chars_written = 0;
+ for (auto c : text) {
+ if (IsUnicodeWhitespace(c)) {
+ if (!in_whitespace) {
+ // Reduce all whitespace sequences to a single space.
+ in_whitespace = true;
+ result[chars_written++] = L' ';
+ }
+ if (trim_sequences_with_line_breaks && !already_trimmed &&
+ ((c == '\n') || (c == '\r'))) {
+ // Whitespace sequences containing CR or LF are eliminated entirely.
+ already_trimmed = true;
+ --chars_written;
+ }
+ } else {
+ // Non-whitespace characters are copied straight across.
+ in_whitespace = false;
+ already_trimmed = false;
+ result[chars_written++] = c;
+ }
+ }
+
+ if (in_whitespace && !already_trimmed) {
+ // Any trailing whitespace is eliminated.
+ --chars_written;
+ }
+
+ result.resize(chars_written);
+ return result;
+}
+
+template <class Char>
+bool DoIsStringASCII(const Char* characters, size_t length) {
+ // Bitmasks to detect non ASCII characters for character sizes of 8, 16 and 32
+ // bits.
+ constexpr MachineWord NonASCIIMasks[] = {
+ 0, MachineWord(0x8080808080808080ULL), MachineWord(0xFF80FF80FF80FF80ULL),
+ 0, MachineWord(0xFFFFFF80FFFFFF80ULL),
+ };
+
+ if (!length)
+ return true;
+ constexpr MachineWord non_ascii_bit_mask = NonASCIIMasks[sizeof(Char)];
+ static_assert(non_ascii_bit_mask, "Error: Invalid Mask");
+ MachineWord all_char_bits = 0;
+ const Char* end = characters + length;
+
+ // Prologue: align the input.
+ while (!IsMachineWordAligned(characters) && characters < end)
+ all_char_bits |= *characters++;
+ if (all_char_bits & non_ascii_bit_mask)
+ return false;
+
+ // Compare the values of CPU word size.
+ constexpr size_t chars_per_word = sizeof(MachineWord) / sizeof(Char);
+ constexpr int batch_count = 16;
+ while (characters <= end - batch_count * chars_per_word) {
+ all_char_bits = 0;
+ for (int i = 0; i < batch_count; ++i) {
+ all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
+ characters += chars_per_word;
+ }
+ if (all_char_bits & non_ascii_bit_mask)
+ return false;
+ }
+
+ // Process the remaining words.
+ all_char_bits = 0;
+ while (characters <= end - chars_per_word) {
+ all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
+ characters += chars_per_word;
+ }
+
+ // Process the remaining bytes.
+ while (characters < end)
+ all_char_bits |= *characters++;
+
+ return !(all_char_bits & non_ascii_bit_mask);
+}
+
+template <bool (*Validator)(uint32_t)>
+inline static bool DoIsStringUTF8(StringPiece str) {
+ const char* src = str.data();
+ int32_t src_len = static_cast<int32_t>(str.length());
+ int32_t char_index = 0;
+
+ while (char_index < src_len) {
+ int32_t code_point;
+ CBU8_NEXT(src, char_index, src_len, code_point);
+ if (!Validator(code_point))
+ return false;
+ }
+ return true;
+}
+
+// Implementation note: Normally this function will be called with a hardcoded
+// constant for the lowercase_ascii parameter. Constructing a StringPiece from
+// a C constant requires running strlen, so the result will be two passes
+// through the buffers, one to file the length of lowercase_ascii, and one to
+// compare each letter.
+//
+// This function could have taken a const char* to avoid this and only do one
+// pass through the string. But the strlen is faster than the case-insensitive
+// compares and lets us early-exit in the case that the strings are different
+// lengths (will often be the case for non-matches). So whether one approach or
+// the other will be faster depends on the case.
+//
+// The hardcoded strings are typically very short so it doesn't matter, and the
+// string piece gives additional flexibility for the caller (doesn't have to be
+// null terminated) so we choose the StringPiece route.
+template <typename Str>
+static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
+ StringPiece lowercase_ascii) {
+ return std::equal(
+ str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(),
+ [](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; });
+}
+
+template <typename Str>
+bool StartsWithT(BasicStringPiece<Str> str,
+ BasicStringPiece<Str> search_for,
+ CompareCase case_sensitivity) {
+ if (search_for.size() > str.size())
+ return false;
+
+ BasicStringPiece<Str> source = str.substr(0, search_for.size());
+
+ switch (case_sensitivity) {
+ case CompareCase::SENSITIVE:
+ return source == search_for;
+
+ case CompareCase::INSENSITIVE_ASCII:
+ return std::equal(
+ search_for.begin(), search_for.end(), source.begin(),
+ CaseInsensitiveCompareASCII<typename Str::value_type>());
+
+ default:
+ NOTREACHED();
+ return false;
+ }
+}
+
+template <typename Str>
+bool EndsWithT(BasicStringPiece<Str> str,
+ BasicStringPiece<Str> search_for,
+ CompareCase case_sensitivity) {
+ if (search_for.size() > str.size())
+ return false;
+
+ BasicStringPiece<Str> source =
+ str.substr(str.size() - search_for.size(), search_for.size());
+
+ switch (case_sensitivity) {
+ case CompareCase::SENSITIVE:
+ return source == search_for;
+
+ case CompareCase::INSENSITIVE_ASCII:
+ return std::equal(
+ source.begin(), source.end(), search_for.begin(),
+ CaseInsensitiveCompareASCII<typename Str::value_type>());
+
+ default:
+ NOTREACHED();
+ return false;
+ }
+}
+
+// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings.
+template <class StringType>
+struct SubstringMatcher {
+ BasicStringPiece<StringType> find_this;
+
+ size_t Find(const StringType& input, size_t pos) {
+ return input.find(find_this.data(), pos, find_this.length());
+ }
+ size_t MatchSize() { return find_this.length(); }
+};
+
+// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters.
+template <class StringType>
+struct CharacterMatcher {
+ BasicStringPiece<StringType> find_any_of_these;
+
+ size_t Find(const StringType& input, size_t pos) {
+ return input.find_first_of(find_any_of_these.data(), pos,
+ find_any_of_these.length());
+ }
+ constexpr size_t MatchSize() { return 1; }
+};
+
+enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST };
+
+// Runs in O(n) time in the length of |str|, and transforms the string without
+// reallocating when possible. Returns |true| if any matches were found.
+//
+// This is parameterized on a |Matcher| traits type, so that it can be the
+// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset().
+template <class StringType, class Matcher>
+bool DoReplaceMatchesAfterOffset(StringType* str,
+ size_t initial_offset,
+ Matcher matcher,
+ BasicStringPiece<StringType> replace_with,
+ ReplaceType replace_type) {
+ using CharTraits = typename StringType::traits_type;
+
+ const size_t find_length = matcher.MatchSize();
+ if (!find_length)
+ return false;
+
+ // If the find string doesn't appear, there's nothing to do.
+ size_t first_match = matcher.Find(*str, initial_offset);
+ if (first_match == StringType::npos)
+ return false;
+
+ // If we're only replacing one instance, there's no need to do anything
+ // complicated.
+ const size_t replace_length = replace_with.length();
+ if (replace_type == ReplaceType::REPLACE_FIRST) {
+ str->replace(first_match, find_length, replace_with.data(), replace_length);
+ return true;
+ }
+
+ // If the find and replace strings are the same length, we can simply use
+ // replace() on each instance, and finish the entire operation in O(n) time.
+ if (find_length == replace_length) {
+ auto* buffer = &((*str)[0]);
+ for (size_t offset = first_match; offset != StringType::npos;
+ offset = matcher.Find(*str, offset + replace_length)) {
+ CharTraits::copy(buffer + offset, replace_with.data(), replace_length);
+ }
+ return true;
+ }
+
+ // Since the find and replace strings aren't the same length, a loop like the
+ // one above would be O(n^2) in the worst case, as replace() will shift the
+ // entire remaining string each time. We need to be more clever to keep things
+ // O(n).
+ //
+ // When the string is being shortened, it's possible to just shift the matches
+ // down in one pass while finding, and truncate the length at the end of the
+ // search.
+ //
+ // If the string is being lengthened, more work is required. The strategy used
+ // here is to make two find() passes through the string. The first pass counts
+ // the number of matches to determine the new size. The second pass will
+ // either construct the new string into a new buffer (if the existing buffer
+ // lacked capacity), or else -- if there is room -- create a region of scratch
+ // space after |first_match| by shifting the tail of the string to a higher
+ // index, and doing in-place moves from the tail to lower indices thereafter.
+ size_t str_length = str->length();
+ size_t expansion = 0;
+ if (replace_length > find_length) {
+ // This operation lengthens the string; determine the new length by counting
+ // matches.
+ const size_t expansion_per_match = (replace_length - find_length);
+ size_t num_matches = 0;
+ for (size_t match = first_match; match != StringType::npos;
+ match = matcher.Find(*str, match + find_length)) {
+ expansion += expansion_per_match;
+ ++num_matches;
+ }
+ const size_t final_length = str_length + expansion;
+
+ if (str->capacity() < final_length) {
+ // If we'd have to allocate a new buffer to grow the string, build the
+ // result directly into the new allocation via append().
+ StringType src(str->get_allocator());
+ str->swap(src);
+ str->reserve(final_length);
+
+ size_t pos = 0;
+ for (size_t match = first_match;; match = matcher.Find(src, pos)) {
+ str->append(src, pos, match - pos);
+ str->append(replace_with.data(), replace_length);
+ pos = match + find_length;
+
+ // A mid-loop test/break enables skipping the final Find() call; the
+ // number of matches is known, so don't search past the last one.
+ if (!--num_matches)
+ break;
+ }
+
+ // Handle substring after the final match.
+ str->append(src, pos, str_length - pos);
+ return true;
+ }
+
+ // Prepare for the copy/move loop below -- expand the string to its final
+ // size by shifting the data after the first match to the end of the resized
+ // string.
+ size_t shift_src = first_match + find_length;
+ size_t shift_dst = shift_src + expansion;
+
+ // Big |expansion| factors (relative to |str_length|) require padding up to
+ // |shift_dst|.
+ if (shift_dst > str_length)
+ str->resize(shift_dst);
+
+ str->replace(shift_dst, str_length - shift_src, *str, shift_src,
+ str_length - shift_src);
+ str_length = final_length;
+ }
+
+ // We can alternate replacement and move operations. This won't overwrite the
+ // unsearched region of the string so long as |write_offset| <= |read_offset|;
+ // that condition is always satisfied because:
+ //
+ // (a) If the string is being shortened, |expansion| is zero and
+ // |write_offset| grows slower than |read_offset|.
+ //
+ // (b) If the string is being lengthened, |write_offset| grows faster than
+ // |read_offset|, but |expansion| is big enough so that |write_offset|
+ // will only catch up to |read_offset| at the point of the last match.
+ auto* buffer = &((*str)[0]);
+ size_t write_offset = first_match;
+ size_t read_offset = first_match + expansion;
+ do {
+ if (replace_length) {
+ CharTraits::copy(buffer + write_offset, replace_with.data(),
+ replace_length);
+ write_offset += replace_length;
+ }
+ read_offset += find_length;
+
+ // min() clamps StringType::npos (the largest unsigned value) to str_length.
+ size_t match = std::min(matcher.Find(*str, read_offset), str_length);
+
+ size_t length = match - read_offset;
+ if (length) {
+ CharTraits::move(buffer + write_offset, buffer + read_offset, length);
+ write_offset += length;
+ read_offset += length;
+ }
+ } while (read_offset < str_length);
+
+ // If we're shortening the string, truncate it now.
+ str->resize(write_offset);
+ return true;
+}
+
+template <class StringType>
+bool ReplaceCharsT(BasicStringPiece<StringType> input,
+ BasicStringPiece<StringType> find_any_of_these,
+ BasicStringPiece<StringType> replace_with,
+ StringType* output) {
+ // Commonly, this is called with output and input being the same string; in
+ // that case, skip the copy.
+ if (input.data() != output->data() || input.size() != output->size())
+ output->assign(input.data(), input.size());
+
+ return DoReplaceMatchesAfterOffset(
+ output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with,
+ ReplaceType::REPLACE_ALL);
+}
+
+template <class string_type>
+inline typename string_type::value_type* WriteIntoT(string_type* str,
+ size_t length_with_null) {
+ DCHECK_GE(length_with_null, 1u);
+ str->reserve(length_with_null);
+ str->resize(length_with_null - 1);
+ return &((*str)[0]);
+}
+
+// Generic version for all JoinString overloads. |list_type| must be a sequence
+// (base::span or std::initializer_list) of strings/StringPieces (std::string,
+// string16, StringPiece or StringPiece16). |string_type| is either std::string
+// or string16.
+template <typename list_type, typename string_type>
+static string_type JoinStringT(list_type parts,
+ BasicStringPiece<string_type> sep) {
+ if (base::empty(parts))
+ return string_type();
+
+ // Pre-allocate the eventual size of the string. Start with the size of all of
+ // the separators (note that this *assumes* parts.size() > 0).
+ size_t total_size = (parts.size() - 1) * sep.size();
+ for (const auto& part : parts)
+ total_size += part.size();
+ string_type result;
+ result.reserve(total_size);
+
+ auto iter = parts.begin();
+ DCHECK(iter != parts.end());
+ result.append(iter->data(), iter->size());
+ ++iter;
+
+ for (; iter != parts.end(); ++iter) {
+ result.append(sep.data(), sep.size());
+ result.append(iter->data(), iter->size());
+ }
+
+ // Sanity-check that we pre-allocated correctly.
+ DCHECK_EQ(total_size, result.size());
+
+ return result;
+}
+
+template <class StringType>
+StringType DoReplaceStringPlaceholders(
+ BasicStringPiece<StringType> format_string,
+ const std::vector<StringType>& subst,
+ std::vector<size_t>* offsets) {
+ size_t substitutions = subst.size();
+ DCHECK_LT(substitutions, 10U);
+
+ size_t sub_length = 0;
+ for (const auto& cur : subst)
+ sub_length += cur.length();
+
+ StringType formatted;
+ formatted.reserve(format_string.length() + sub_length);
+
+ std::vector<ReplacementOffset> r_offsets;
+ for (auto i = format_string.begin(); i != format_string.end(); ++i) {
+ if ('$' == *i) {
+ if (i + 1 != format_string.end()) {
+ ++i;
+ if ('$' == *i) {
+ while (i != format_string.end() && '$' == *i) {
+ formatted.push_back('$');
+ ++i;
+ }
+ --i;
+ } else {
+ if (*i < '1' || *i > '9') {
+ DLOG(ERROR) << "Invalid placeholder: $" << *i;
+ continue;
+ }
+ uintptr_t index = *i - '1';
+ if (offsets) {
+ ReplacementOffset r_offset(index,
+ static_cast<int>(formatted.size()));
+ r_offsets.insert(
+ std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset,
+ &CompareParameter),
+ r_offset);
+ }
+ if (index < substitutions)
+ formatted.append(subst.at(index));
+ }
+ }
+ } else {
+ formatted.push_back(*i);
+ }
+ }
+ if (offsets) {
+ for (const auto& cur : r_offsets)
+ offsets->push_back(cur.offset);
+ }
+ return formatted;
+}
+
+// The following code is compatible with the OpenBSD lcpy interface. See:
+// http://www.gratisoft.us/todd/papers/strlcpy.html
+// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
+
+template <typename CHAR>
+size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
+ for (size_t i = 0; i < dst_size; ++i) {
+ if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
+ return i;
+ }
+
+ // We were left off at dst_size. We over copied 1 byte. Null terminate.
+ if (dst_size != 0)
+ dst[dst_size - 1] = 0;
+
+ // Count the rest of the |src|, and return it's length in characters.
+ while (src[dst_size])
+ ++dst_size;
+ return dst_size;
+}
+
+} // namespace internal
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRING_UTIL_INTERNAL_H_
diff --git a/chromium/base/strings/string_util_posix.h b/chromium/base/strings/string_util_posix.h
index 8299118e106..c869df78e58 100644
--- a/chromium/base/strings/string_util_posix.h
+++ b/chromium/base/strings/string_util_posix.h
@@ -11,7 +11,7 @@
#include <string.h>
#include <wchar.h>
-#include "base/logging.h"
+#include "base/check.h"
namespace base {
diff --git a/chromium/base/strings/string_util_unittest.cc b/chromium/base/strings/string_util_unittest.cc
index f1132b9be4e..b9be7fbfa84 100644
--- a/chromium/base/strings/string_util_unittest.cc
+++ b/chromium/base/strings/string_util_unittest.cc
@@ -12,6 +12,7 @@
#include <algorithm>
#include <type_traits>
+#include "base/bits.h"
#include "base/stl_util.h"
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
@@ -678,28 +679,28 @@ TEST(StringUtilTest, FormatBytesUnlocalized) {
int64_t bytes;
const char* expected;
} cases[] = {
- // Expected behavior: we show one post-decimal digit when we have
- // under two pre-decimal digits, except in cases where it makes no
- // sense (zero or bytes).
- // Since we switch units once we cross the 1000 mark, this keeps
- // the display of file sizes or bytes consistently around three
- // digits.
- {0, "0 B"},
- {512, "512 B"},
- {1024*1024, "1.0 MB"},
- {1024*1024*1024, "1.0 GB"},
- {10LL*1024*1024*1024, "10.0 GB"},
- {99LL*1024*1024*1024, "99.0 GB"},
- {105LL*1024*1024*1024, "105 GB"},
- {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
- {~(1LL << 63), "8192 PB"},
-
- {99*1024 + 103, "99.1 kB"},
- {1024*1024 + 103, "1.0 MB"},
- {1024*1024 + 205 * 1024, "1.2 MB"},
- {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
- {10LL*1024*1024*1024, "10.0 GB"},
- {100LL*1024*1024*1024, "100 GB"},
+ // Expected behavior: we show one post-decimal digit when we have
+ // under two pre-decimal digits, except in cases where it makes no
+ // sense (zero or bytes).
+ // Since we switch units once we cross the 1000 mark, this keeps
+ // the display of file sizes or bytes consistently around three
+ // digits.
+ {0, "0 B"},
+ {512, "512 B"},
+ {1024 * 1024, "1.0 MB"},
+ {1024 * 1024 * 1024, "1.0 GB"},
+ {10LL * 1024 * 1024 * 1024, "10.0 GB"},
+ {99LL * 1024 * 1024 * 1024, "99.0 GB"},
+ {105LL * 1024 * 1024 * 1024, "105 GB"},
+ {105LL * 1024 * 1024 * 1024 + 500LL * 1024 * 1024, "105 GB"},
+ {~(bits::LeftmostBit<int64_t>()), "8192 PB"},
+
+ {99 * 1024 + 103, "99.1 kB"},
+ {1024 * 1024 + 103, "1.0 MB"},
+ {1024 * 1024 + 205 * 1024, "1.2 MB"},
+ {1024 * 1024 * 1024 + (927 * 1024 * 1024), "1.9 GB"},
+ {10LL * 1024 * 1024 * 1024, "10.0 GB"},
+ {100LL * 1024 * 1024 * 1024, "100 GB"},
};
for (const auto& i : cases) {
diff --git a/chromium/base/strings/string_util_win.cc b/chromium/base/strings/string_util_win.cc
new file mode 100644
index 00000000000..2c19729e0a5
--- /dev/null
+++ b/chromium/base/strings/string_util_win.cc
@@ -0,0 +1,145 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_util_win.h"
+
+#include "base/strings/string_util_internal.h"
+
+namespace base {
+
+#if defined(BASE_STRING16_IS_STD_U16STRING)
+bool IsStringASCII(WStringPiece str) {
+ return internal::DoIsStringASCII(str.data(), str.length());
+}
+
+std::wstring ToLowerASCII(WStringPiece str) {
+ return internal::ToLowerASCIIImpl(str);
+}
+
+std::wstring ToUpperASCII(WStringPiece str) {
+ return internal::ToUpperASCIIImpl(str);
+}
+
+int CompareCaseInsensitiveASCII(WStringPiece a, WStringPiece b) {
+ return internal::CompareCaseInsensitiveASCIIT(a, b);
+}
+
+bool EqualsCaseInsensitiveASCII(WStringPiece a, WStringPiece b) {
+ return a.size() == b.size() &&
+ internal::CompareCaseInsensitiveASCIIT(a, b) == 0;
+}
+
+bool RemoveChars(WStringPiece input,
+ WStringPiece remove_chars,
+ std::wstring* output) {
+ return internal::ReplaceCharsT(input, remove_chars, WStringPiece(), output);
+}
+
+bool ReplaceChars(WStringPiece input,
+ WStringPiece replace_chars,
+ WStringPiece replace_with,
+ std::wstring* output) {
+ return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
+}
+
+bool TrimString(WStringPiece input,
+ WStringPiece trim_chars,
+ std::wstring* output) {
+ return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
+ TRIM_NONE;
+}
+
+WStringPiece TrimString(WStringPiece input,
+ WStringPiece trim_chars,
+ TrimPositions positions) {
+ return internal::TrimStringPieceT(input, trim_chars, positions);
+}
+
+TrimPositions TrimWhitespace(WStringPiece input,
+ TrimPositions positions,
+ std::wstring* output) {
+ return internal::TrimStringT(input, WStringPiece(kWhitespaceWide), positions,
+ output);
+}
+
+WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) {
+ return internal::TrimStringPieceT(input, WStringPiece(kWhitespaceWide),
+ positions);
+}
+
+std::wstring CollapseWhitespace(WStringPiece text,
+ bool trim_sequences_with_line_breaks) {
+ return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
+}
+
+bool ContainsOnlyChars(WStringPiece input, WStringPiece characters) {
+ return input.find_first_not_of(characters) == StringPiece::npos;
+}
+
+bool LowerCaseEqualsASCII(WStringPiece str, StringPiece lowercase_ascii) {
+ return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
+}
+
+bool EqualsASCII(WStringPiece str, StringPiece ascii) {
+ return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end());
+}
+
+bool StartsWith(WStringPiece str,
+ WStringPiece search_for,
+ CompareCase case_sensitivity) {
+ return internal::StartsWithT(str, search_for, case_sensitivity);
+}
+
+bool EndsWith(WStringPiece str,
+ WStringPiece search_for,
+ CompareCase case_sensitivity) {
+ return internal::EndsWithT(str, search_for, case_sensitivity);
+}
+
+void ReplaceFirstSubstringAfterOffset(std::wstring* str,
+ size_t start_offset,
+ WStringPiece find_this,
+ WStringPiece replace_with) {
+ internal::DoReplaceMatchesAfterOffset(
+ str, start_offset, internal::SubstringMatcher<std::wstring>{find_this},
+ replace_with, internal::ReplaceType::REPLACE_FIRST);
+}
+
+void ReplaceSubstringsAfterOffset(std::wstring* str,
+ size_t start_offset,
+ WStringPiece find_this,
+ WStringPiece replace_with) {
+ internal::DoReplaceMatchesAfterOffset(
+ str, start_offset, internal::SubstringMatcher<std::wstring>{find_this},
+ replace_with, internal::ReplaceType::REPLACE_ALL);
+}
+
+wchar_t* WriteInto(std::wstring* str, size_t length_with_null) {
+ return internal::WriteIntoT(str, length_with_null);
+}
+
+std::wstring JoinString(span<const std::wstring> parts,
+ WStringPiece separator) {
+ return internal::JoinStringT(parts, separator);
+}
+
+std::wstring JoinString(span<const WStringPiece> parts,
+ WStringPiece separator) {
+ return internal::JoinStringT(parts, separator);
+}
+
+std::wstring JoinString(std::initializer_list<WStringPiece> parts,
+ WStringPiece separator) {
+ return internal::JoinStringT(parts, separator);
+}
+
+std::wstring ReplaceStringPlaceholders(WStringPiece format_string,
+ const std::vector<std::wstring>& subst,
+ std::vector<size_t>* offsets) {
+ return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
+}
+
+#endif
+
+} // namespace base
diff --git a/chromium/base/strings/string_util_win.h b/chromium/base/strings/string_util_win.h
index 7f260bfc8b4..c39133d923c 100644
--- a/chromium/base/strings/string_util_win.h
+++ b/chromium/base/strings/string_util_win.h
@@ -11,7 +11,14 @@
#include <string.h>
#include <wchar.h>
-#include "base/logging.h"
+#include <string>
+#include <vector>
+
+#include "base/check.h"
+#include "base/containers/span.h"
+#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
namespace base {
@@ -39,6 +46,154 @@ inline int vswprintf(wchar_t* buffer, size_t size,
return length;
}
+// Utility functions to access the underlying string buffer as a wide char
+// pointer.
+//
+// Note: These functions violate strict aliasing when char16 and wchar_t are
+// unrelated types. We thus pass -fno-strict-aliasing to the compiler on
+// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2].
+//
+// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244
+// [2]
+// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949
+inline wchar_t* as_writable_wcstr(char16* str) {
+ return reinterpret_cast<wchar_t*>(str);
+}
+
+inline wchar_t* as_writable_wcstr(string16& str) {
+ return reinterpret_cast<wchar_t*>(data(str));
+}
+
+inline const wchar_t* as_wcstr(const char16* str) {
+ return reinterpret_cast<const wchar_t*>(str);
+}
+
+inline const wchar_t* as_wcstr(StringPiece16 str) {
+ return reinterpret_cast<const wchar_t*>(str.data());
+}
+
+// Utility functions to access the underlying string buffer as a char16 pointer.
+inline char16* as_writable_u16cstr(wchar_t* str) {
+ return reinterpret_cast<char16*>(str);
+}
+
+inline char16* as_writable_u16cstr(std::wstring& str) {
+ return reinterpret_cast<char16*>(data(str));
+}
+
+inline const char16* as_u16cstr(const wchar_t* str) {
+ return reinterpret_cast<const char16*>(str);
+}
+
+inline const char16* as_u16cstr(WStringPiece str) {
+ return reinterpret_cast<const char16*>(str.data());
+}
+
+// Utility functions to convert between base::WStringPiece and
+// base::StringPiece16.
+inline WStringPiece AsWStringPiece(StringPiece16 str) {
+ return WStringPiece(as_wcstr(str.data()), str.size());
+}
+
+inline StringPiece16 AsStringPiece16(WStringPiece str) {
+ return StringPiece16(as_u16cstr(str.data()), str.size());
+}
+
+inline std::wstring AsWString(StringPiece16 str) {
+ return std::wstring(as_wcstr(str.data()), str.size());
+}
+
+inline string16 AsString16(WStringPiece str) {
+ return string16(as_u16cstr(str.data()), str.size());
+}
+
+// The following section contains overloads of the cross-platform APIs for
+// std::wstring and base::WStringPiece. These are only enabled if std::wstring
+// and base::string16 are distinct types, as otherwise this would result in an
+// ODR violation.
+// TODO(crbug.com/911896): Remove those guards once base::string16 is
+// std::u16string.
+#if defined(BASE_STRING16_IS_STD_U16STRING)
+BASE_EXPORT bool IsStringASCII(WStringPiece str);
+
+BASE_EXPORT std::wstring ToLowerASCII(WStringPiece str);
+
+BASE_EXPORT std::wstring ToUpperASCII(WStringPiece str);
+
+BASE_EXPORT int CompareCaseInsensitiveASCII(WStringPiece a, WStringPiece b);
+
+BASE_EXPORT bool EqualsCaseInsensitiveASCII(WStringPiece a, WStringPiece b);
+
+BASE_EXPORT bool RemoveChars(WStringPiece input,
+ WStringPiece remove_chars,
+ std::wstring* output);
+
+BASE_EXPORT bool ReplaceChars(WStringPiece input,
+ WStringPiece replace_chars,
+ WStringPiece replace_with,
+ std::wstring* output);
+
+BASE_EXPORT bool TrimString(WStringPiece input,
+ WStringPiece trim_chars,
+ std::string* output);
+
+BASE_EXPORT WStringPiece TrimString(WStringPiece input,
+ WStringPiece trim_chars,
+ TrimPositions positions);
+
+BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input,
+ TrimPositions positions,
+ std::wstring* output);
+
+BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input,
+ TrimPositions positions);
+
+BASE_EXPORT std::wstring CollapseWhitespace(
+ WStringPiece text,
+ bool trim_sequences_with_line_breaks);
+
+BASE_EXPORT bool ContainsOnlyChars(WStringPiece input, WStringPiece characters);
+
+BASE_EXPORT bool LowerCaseEqualsASCII(WStringPiece str,
+ StringPiece lowecase_ascii);
+
+BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii);
+
+BASE_EXPORT bool StartsWith(WStringPiece str,
+ WStringPiece search_for,
+ CompareCase case_sensitivity);
+
+BASE_EXPORT bool EndsWith(WStringPiece str,
+ WStringPiece search_for,
+ CompareCase case_sensitivity);
+
+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::wstring* str,
+ size_t start_offset,
+ WStringPiece find_this,
+ WStringPiece replace_with);
+
+BASE_EXPORT void ReplaceSubstringsAfterOffset(std::wstring* str,
+ size_t start_offset,
+ WStringPiece find_this,
+ WStringPiece replace_with);
+
+BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);
+
+BASE_EXPORT std::wstring JoinString(span<const std::wstring> parts,
+ WStringPiece separator);
+
+BASE_EXPORT std::wstring JoinString(span<const WStringPiece> parts,
+ WStringPiece separator);
+
+BASE_EXPORT std::wstring JoinString(std::initializer_list<WStringPiece> parts,
+ WStringPiece separator);
+
+BASE_EXPORT std::wstring ReplaceStringPlaceholders(
+ WStringPiece format_string,
+ const std::vector<string16>& subst,
+ std::vector<size_t>* offsets);
+#endif
+
} // namespace base
#endif // BASE_STRINGS_STRING_UTIL_WIN_H_
diff --git a/chromium/base/strings/utf_string_conversions.cc b/chromium/base/strings/utf_string_conversions.cc
index 9a79889159e..0b55cd9e59d 100644
--- a/chromium/base/strings/utf_string_conversions.cc
+++ b/chromium/base/strings/utf_string_conversions.cc
@@ -339,4 +339,16 @@ std::string UTF16ToASCII(StringPiece16 utf16) {
return std::string(utf16.begin(), utf16.end());
}
+#if defined(WCHAR_T_IS_UTF16)
+std::wstring ASCIIToWide(StringPiece ascii) {
+ DCHECK(IsStringASCII(ascii)) << ascii;
+ return std::wstring(ascii.begin(), ascii.end());
+}
+
+std::string WideToASCII(WStringPiece wide) {
+ DCHECK(IsStringASCII(wide)) << wide;
+ return std::string(wide.begin(), wide.end());
+}
+#endif // defined(WCHAR_T_IS_UTF16)
+
} // namespace base
diff --git a/chromium/base/strings/utf_string_conversions.h b/chromium/base/strings/utf_string_conversions.h
index f780fb4f4f8..9ee91453a02 100644
--- a/chromium/base/strings/utf_string_conversions.h
+++ b/chromium/base/strings/utf_string_conversions.h
@@ -12,6 +12,7 @@
#include "base/base_export.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
+#include "build/build_config.h"
namespace base {
@@ -49,6 +50,16 @@ BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT;
// beforehand.
BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16) WARN_UNUSED_RESULT;
+#if defined(WCHAR_T_IS_UTF16)
+// This converts an ASCII string, typically a hardcoded constant, to a wide
+// string.
+BASE_EXPORT std::wstring ASCIIToWide(StringPiece ascii) WARN_UNUSED_RESULT;
+
+// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
+// beforehand.
+BASE_EXPORT std::string WideToASCII(WStringPiece wide) WARN_UNUSED_RESULT;
+#endif // defined(WCHAR_T_IS_UTF16)
+
} // namespace base
#endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_