summaryrefslogtreecommitdiff
path: root/Source/WTF/wtf/text
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WTF/wtf/text')
-rw-r--r--Source/WTF/wtf/text/ASCIIFastPath.h16
-rw-r--r--Source/WTF/wtf/text/AtomicString.cpp467
-rw-r--r--Source/WTF/wtf/text/AtomicString.h250
-rw-r--r--Source/WTF/wtf/text/AtomicStringHash.h21
-rw-r--r--Source/WTF/wtf/text/AtomicStringImpl.cpp540
-rw-r--r--Source/WTF/wtf/text/AtomicStringImpl.h91
-rw-r--r--Source/WTF/wtf/text/AtomicStringTable.cpp15
-rw-r--r--Source/WTF/wtf/text/AtomicStringTable.h1
-rw-r--r--Source/WTF/wtf/text/Base64.cpp76
-rw-r--r--Source/WTF/wtf/text/Base64.h134
-rw-r--r--Source/WTF/wtf/text/CString.cpp14
-rw-r--r--Source/WTF/wtf/text/CString.h8
-rw-r--r--Source/WTF/wtf/text/IntegerToStringConversion.h84
-rw-r--r--Source/WTF/wtf/text/LChar.h8
-rw-r--r--Source/WTF/wtf/text/LineBreakIteratorPoolICU.h132
-rw-r--r--Source/WTF/wtf/text/OrdinalNumber.h54
-rw-r--r--Source/WTF/wtf/text/StringBuffer.h2
-rw-r--r--Source/WTF/wtf/text/StringBuilder.cpp153
-rw-r--r--Source/WTF/wtf/text/StringBuilder.h55
-rw-r--r--Source/WTF/wtf/text/StringCommon.h656
-rw-r--r--Source/WTF/wtf/text/StringConcatenate.h893
-rw-r--r--Source/WTF/wtf/text/StringConcatenateNumbers.h175
-rw-r--r--Source/WTF/wtf/text/StringHash.h37
-rw-r--r--Source/WTF/wtf/text/StringImpl.cpp942
-rw-r--r--Source/WTF/wtf/text/StringImpl.h974
-rw-r--r--Source/WTF/wtf/text/StringOperators.h8
-rw-r--r--Source/WTF/wtf/text/StringStatics.cpp36
-rw-r--r--Source/WTF/wtf/text/StringView.cpp285
-rw-r--r--Source/WTF/wtf/text/StringView.h952
-rw-r--r--Source/WTF/wtf/text/SymbolImpl.cpp59
-rw-r--r--Source/WTF/wtf/text/SymbolImpl.h126
-rw-r--r--Source/WTF/wtf/text/SymbolRegistry.cpp63
-rw-r--r--Source/WTF/wtf/text/SymbolRegistry.h113
-rw-r--r--Source/WTF/wtf/text/TextBreakIterator.cpp448
-rw-r--r--Source/WTF/wtf/text/TextBreakIterator.h191
-rw-r--r--Source/WTF/wtf/text/TextBreakIteratorInternalICU.h37
-rw-r--r--Source/WTF/wtf/text/TextPosition.h37
-rw-r--r--Source/WTF/wtf/text/UniquedStringImpl.h65
-rw-r--r--Source/WTF/wtf/text/WTFString.cpp273
-rw-r--r--Source/WTF/wtf/text/WTFString.h352
-rw-r--r--Source/WTF/wtf/text/icu/UTextProvider.cpp72
-rw-r--r--Source/WTF/wtf/text/icu/UTextProvider.h111
-rw-r--r--Source/WTF/wtf/text/icu/UTextProviderLatin1.cpp394
-rw-r--r--Source/WTF/wtf/text/icu/UTextProviderLatin1.h46
-rw-r--r--Source/WTF/wtf/text/icu/UTextProviderUTF16.cpp184
-rw-r--r--Source/WTF/wtf/text/icu/UTextProviderUTF16.h37
-rw-r--r--Source/WTF/wtf/text/unix/TextBreakIteratorInternalICUUnix.cpp41
47 files changed, 6825 insertions, 2903 deletions
diff --git a/Source/WTF/wtf/text/ASCIIFastPath.h b/Source/WTF/wtf/text/ASCIIFastPath.h
index d057a6fa1..eb54828a2 100644
--- a/Source/WTF/wtf/text/ASCIIFastPath.h
+++ b/Source/WTF/wtf/text/ASCIIFastPath.h
@@ -22,12 +22,14 @@
#ifndef ASCIIFastPath_h
#define ASCIIFastPath_h
-#if OS(DARWIN) && (CPU(X86) || CPU(X86_64))
-#include <emmintrin.h>
-#endif
#include <stdint.h>
+#include <unicode/utypes.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/unicode/Unicode.h>
+#include <wtf/text/LChar.h>
+
+#if CPU(X86_SSE2)
+#include <emmintrin.h>
+#endif
namespace WTF {
@@ -107,7 +109,7 @@ inline bool charactersAreAllASCII(const CharacterType* characters, size_t length
inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length)
{
-#if OS(DARWIN) && (CPU(X86) || CPU(X86_64))
+#if CPU(X86_SSE2)
const uintptr_t memoryAccessSize = 16; // Memory accesses on 16 byte (128 bit) alignment
const uintptr_t memoryAccessMask = memoryAccessSize - 1;
@@ -137,7 +139,7 @@ inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, s
ASSERT(!(source[i] & 0xff00));
destination[i] = static_cast<LChar>(source[i]);
}
-#elif COMPILER(GCC) && CPU(ARM64) && defined(NDEBUG)
+#elif COMPILER(GCC_OR_CLANG) && CPU(ARM64) && defined(NDEBUG)
const LChar* const end = destination + length;
const uintptr_t memoryAccessSize = 16;
@@ -158,7 +160,7 @@ inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, s
while (destination != end)
*destination++ = static_cast<LChar>(*source++);
-#elif COMPILER(GCC) && CPU(ARM_NEON) && !(PLATFORM(BIG_ENDIAN) || PLATFORM(MIDDLE_ENDIAN)) && defined(NDEBUG)
+#elif COMPILER(GCC_OR_CLANG) && CPU(ARM_NEON) && !(CPU(BIG_ENDIAN) || CPU(MIDDLE_ENDIAN)) && defined(NDEBUG)
const LChar* const end = destination + length;
const uintptr_t memoryAccessSize = 8;
diff --git a/Source/WTF/wtf/text/AtomicString.cpp b/Source/WTF/wtf/text/AtomicString.cpp
index 5803dd018..cd8ef8ffc 100644
--- a/Source/WTF/wtf/text/AtomicString.cpp
+++ b/Source/WTF/wtf/text/AtomicString.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2008, 2013-2014, 2016 Apple Inc. All rights reserved.
* Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
* Copyright (C) 2012 Google Inc. All rights reserved.
*
@@ -23,452 +23,81 @@
#include "config.h"
#include "AtomicString.h"
-#include "AtomicStringTable.h"
-#include "HashSet.h"
#include "IntegerToStringConversion.h"
-#include "StringHash.h"
-#include "Threading.h"
-#include "WTFThreadData.h"
#include "dtoa.h"
-#include <wtf/unicode/UTF8.h>
#if USE(WEB_THREAD)
-#include "TCSpinLock.h"
+#include "Lock.h"
#endif
namespace WTF {
-using namespace Unicode;
-
-static_assert(sizeof(AtomicString) == sizeof(String), "AtomicString and String must be same size!");
-
-#if USE(WEB_THREAD)
-
-class AtomicStringTableLocker : public SpinLockHolder {
- WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker);
-
- static SpinLock s_stringTableLock;
-public:
- AtomicStringTableLocker()
- : SpinLockHolder(&s_stringTableLock)
- {
- }
-};
-
-SpinLock AtomicStringTableLocker::s_stringTableLock = SPINLOCK_INITIALIZER;
-
-#else
-
-class AtomicStringTableLocker {
- WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker);
-public:
- AtomicStringTableLocker() { }
-};
-
-#endif // USE(WEB_THREAD)
-
-static ALWAYS_INLINE HashSet<StringImpl*>& stringTable()
-{
- return wtfThreadData().atomicStringTable()->table();
-}
-
-template<typename T, typename HashTranslator>
-static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
-{
- AtomicStringTableLocker locker;
-
- HashSet<StringImpl*>::AddResult addResult = stringTable().add<HashTranslator>(value);
-
- // If the string is newly-translated, then we need to adopt it.
- // The boolean in the pair tells us if that is so.
- return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator;
-}
-
-struct CStringTranslator {
- static unsigned hash(const LChar* c)
- {
- return StringHasher::computeHashAndMaskTop8Bits(c);
- }
-
- static inline bool equal(StringImpl* r, const LChar* s)
- {
- return WTF::equal(r, s);
- }
-
- static void translate(StringImpl*& location, const LChar* const& c, unsigned hash)
- {
- location = &StringImpl::create(c).leakRef();
- location->setHash(hash);
- location->setIsAtomic(true);
- }
-};
-
-PassRefPtr<StringImpl> AtomicString::add(const LChar* c)
+template<AtomicString::CaseConvertType type>
+ALWAYS_INLINE AtomicString AtomicString::convertASCIICase() const
{
- if (!c)
- return 0;
- if (!*c)
- return StringImpl::empty();
-
- return addToStringTable<const LChar*, CStringTranslator>(c);
-}
-
-template<typename CharacterType>
-struct HashTranslatorCharBuffer {
- const CharacterType* s;
- unsigned length;
-};
-
-typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
-struct UCharBufferTranslator {
- static unsigned hash(const UCharBuffer& buf)
- {
- return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
- }
-
- static bool equal(StringImpl* const& str, const UCharBuffer& buf)
- {
- return WTF::equal(str, buf.s, buf.length);
- }
-
- static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
- {
- location = &StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef();
- location->setHash(hash);
- location->setIsAtomic(true);
- }
-};
-
-template<typename CharacterType>
-struct HashAndCharacters {
- unsigned hash;
- const CharacterType* characters;
- unsigned length;
-};
-
-template<typename CharacterType>
-struct HashAndCharactersTranslator {
- static unsigned hash(const HashAndCharacters<CharacterType>& buffer)
- {
- ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length));
- return buffer.hash;
- }
-
- static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer)
- {
- return WTF::equal(string, buffer.characters, buffer.length);
- }
-
- static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash)
- {
- location = &StringImpl::create(buffer.characters, buffer.length).leakRef();
- location->setHash(hash);
- location->setIsAtomic(true);
- }
-};
+ StringImpl* impl = this->impl();
+ if (UNLIKELY(!impl))
+ return nullAtom;
-struct HashAndUTF8Characters {
- unsigned hash;
- const char* characters;
+ // Convert short strings without allocating a new StringImpl, since
+ // there's a good chance these strings are already in the atomic
+ // string table and so no memory allocation will be required.
unsigned length;
- unsigned utf16Length;
-};
-
-struct HashAndUTF8CharactersTranslator {
- static unsigned hash(const HashAndUTF8Characters& buffer)
- {
- return buffer.hash;
- }
-
- static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
- {
- if (buffer.utf16Length != string->length())
- return false;
-
- // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
- if (buffer.utf16Length != buffer.length) {
- const UChar* stringCharacters = string->deprecatedCharacters();
-
- return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length);
- }
-
- if (string->is8Bit()) {
- const LChar* stringCharacters = string->characters8();
-
- for (unsigned i = 0; i < buffer.length; ++i) {
- ASSERT(isASCII(buffer.characters[i]));
- if (stringCharacters[i] != buffer.characters[i])
- return false;
+ const unsigned localBufferSize = 100;
+ if (impl->is8Bit() && (length = impl->length()) <= localBufferSize) {
+ const LChar* characters = impl->characters8();
+ unsigned failingIndex;
+ for (unsigned i = 0; i < length; ++i) {
+ if (type == CaseConvertType::Lower ? UNLIKELY(isASCIIUpper(characters[i])) : LIKELY(isASCIILower(characters[i]))) {
+ failingIndex = i;
+ goto SlowPath;
}
-
- return true;
}
-
- const UChar* stringCharacters = string->characters16();
-
- for (unsigned i = 0; i < buffer.length; ++i) {
- ASSERT(isASCII(buffer.characters[i]));
- if (stringCharacters[i] != buffer.characters[i])
- return false;
- }
-
- return true;
- }
-
- static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
- {
- UChar* target;
- RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target);
-
- bool isAllASCII;
- const char* source = buffer.characters;
- if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK)
- ASSERT_NOT_REACHED();
-
- if (isAllASCII)
- newString = StringImpl::create(buffer.characters, buffer.length);
-
- location = newString.release().leakRef();
- location->setHash(hash);
- location->setIsAtomic(true);
- }
-};
-
-PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
-{
- if (!s)
- return 0;
-
- if (!length)
- return StringImpl::empty();
-
- UCharBuffer buffer = { s, length };
- return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
-}
-
-PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
-{
- ASSERT(s);
- ASSERT(existingHash);
-
- if (!length)
- return StringImpl::empty();
-
- HashAndCharacters<UChar> buffer = { existingHash, s, length };
- return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar>>(buffer);
-}
-
-PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
-{
- if (!s)
- return 0;
-
- unsigned length = 0;
- while (s[length] != UChar(0))
- ++length;
-
- if (!length)
- return StringImpl::empty();
-
- UCharBuffer buffer = { s, length };
- return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
-}
-
-struct SubstringLocation {
- StringImpl* baseString;
- unsigned start;
- unsigned length;
-};
-
-struct SubstringTranslator {
- static unsigned hash(const SubstringLocation& buffer)
- {
- return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->deprecatedCharacters() + buffer.start, buffer.length);
- }
-
- static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
- {
- return WTF::equal(string, buffer.baseString->deprecatedCharacters() + buffer.start, buffer.length);
+ return *this;
+SlowPath:
+ LChar localBuffer[localBufferSize];
+ for (unsigned i = 0; i < failingIndex; ++i)
+ localBuffer[i] = characters[i];
+ for (unsigned i = failingIndex; i < length; ++i)
+ localBuffer[i] = type == CaseConvertType::Lower ? toASCIILower(characters[i]) : toASCIIUpper(characters[i]);
+ return AtomicString(localBuffer, length);
}
- static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
- {
- location = &StringImpl::create(buffer.baseString, buffer.start, buffer.length).leakRef();
- location->setHash(hash);
- location->setIsAtomic(true);
- }
-};
-
-PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length)
-{
- if (!baseString)
- return 0;
-
- if (!length || start >= baseString->length())
- return StringImpl::empty();
+ Ref<StringImpl> convertedString = type == CaseConvertType::Lower ? impl->convertToASCIILowercase() : impl->convertToASCIIUppercase();
+ if (LIKELY(convertedString.ptr() == impl))
+ return *this;
- unsigned maxLength = baseString->length() - start;
- if (length >= maxLength) {
- if (!start)
- return add(baseString);
- length = maxLength;
- }
-
- SubstringLocation buffer = { baseString, start, length };
- return addToStringTable<SubstringLocation, SubstringTranslator>(buffer);
+ AtomicString result;
+ result.m_string = AtomicStringImpl::add(convertedString.ptr());
+ return result;
}
-
-typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
-struct LCharBufferTranslator {
- static unsigned hash(const LCharBuffer& buf)
- {
- return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
- }
-
- static bool equal(StringImpl* const& str, const LCharBuffer& buf)
- {
- return WTF::equal(str, buf.s, buf.length);
- }
-
- static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
- {
- location = &StringImpl::create(buf.s, buf.length).leakRef();
- location->setHash(hash);
- location->setIsAtomic(true);
- }
-};
-
-typedef HashTranslatorCharBuffer<char> CharBuffer;
-struct CharBufferFromLiteralDataTranslator {
- static unsigned hash(const CharBuffer& buf)
- {
- return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length);
- }
- static bool equal(StringImpl* const& str, const CharBuffer& buf)
- {
- return WTF::equal(str, buf.s, buf.length);
- }
-
- static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash)
- {
- location = &StringImpl::createFromLiteral(buf.s, buf.length).leakRef();
- location->setHash(hash);
- location->setIsAtomic(true);
- }
-};
-
-PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length)
+AtomicString AtomicString::convertToASCIILowercase() const
{
- if (!s)
- return 0;
-
- if (!length)
- return StringImpl::empty();
-
- LCharBuffer buffer = { s, length };
- return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
+ return convertASCIICase<CaseConvertType::Lower>();
}
-PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length)
+AtomicString AtomicString::convertToASCIIUppercase() const
{
- ASSERT(characters);
- ASSERT(length);
-
- CharBuffer buffer = { characters, length };
- return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer);
+ return convertASCIICase<CaseConvertType::Upper>();
}
-PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* string)
-{
- if (!string->length())
- return StringImpl::empty();
-
- ASSERT_WITH_MESSAGE(!string->isAtomic(), "AtomicString should not hit the slow case if the string is already atomic.");
-
- AtomicStringTableLocker locker;
- HashSet<StringImpl*>::AddResult addResult = stringTable().add(string);
-
- if (addResult.isNewEntry) {
- ASSERT(*addResult.iterator == string);
- string->setIsAtomic(true);
- }
-
- return *addResult.iterator;
-}
-
-template<typename CharacterType>
-static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl)
-{
- HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() };
- return stringTable().find<HashAndCharactersTranslator<CharacterType>>(buffer);
-}
-
-AtomicStringImpl* AtomicString::find(const StringImpl* stringImpl)
-{
- ASSERT(stringImpl);
- ASSERT(stringImpl->existingHash());
-
- if (!stringImpl->length())
- return static_cast<AtomicStringImpl*>(StringImpl::empty());
-
- AtomicStringTableLocker locker;
- HashSet<StringImpl*>::iterator iterator;
- if (stringImpl->is8Bit())
- iterator = findString<LChar>(stringImpl);
- else
- iterator = findString<UChar>(stringImpl);
- if (iterator == stringTable().end())
- return 0;
- return static_cast<AtomicStringImpl*>(*iterator);
-}
-
-void AtomicString::remove(StringImpl* string)
-{
- ASSERT(string->isAtomic());
- AtomicStringTableLocker locker;
- HashSet<StringImpl*>& atomicStringTable = stringTable();
- HashSet<StringImpl*>::iterator iterator = atomicStringTable.find(string);
- ASSERT_WITH_MESSAGE(iterator != atomicStringTable.end(), "The string being removed is atomic in the string table of an other thread!");
- atomicStringTable.remove(iterator);
-}
-
-AtomicString AtomicString::lower() const
+AtomicString AtomicString::number(int number)
{
- // Note: This is a hot function in the Dromaeo benchmark.
- StringImpl* impl = this->impl();
- if (UNLIKELY(!impl))
- return AtomicString();
-
- RefPtr<StringImpl> lowerImpl = impl->lower();
- AtomicString returnValue;
- if (LIKELY(lowerImpl == impl))
- returnValue.m_string = lowerImpl.release();
- else
- returnValue.m_string = addSlowCase(lowerImpl.get());
- return returnValue;
+ return numberToStringSigned<AtomicString>(number);
}
-AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
+AtomicString AtomicString::number(unsigned number)
{
- HashAndUTF8Characters buffer;
- buffer.characters = charactersStart;
- buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
-
- if (!buffer.hash)
- return nullAtom;
-
- AtomicString atomicString;
- atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
- return atomicString;
+ return numberToStringUnsigned<AtomicString>(number);
}
-AtomicString AtomicString::number(int number)
+AtomicString AtomicString::number(unsigned long number)
{
- return numberToStringSigned<AtomicString>(number);
+ return numberToStringUnsigned<AtomicString>(number);
}
-AtomicString AtomicString::number(unsigned number)
+AtomicString AtomicString::number(unsigned long long number)
{
return numberToStringUnsigned<AtomicString>(number);
}
@@ -479,13 +108,13 @@ AtomicString AtomicString::number(double number)
return String(numberToFixedPrecisionString(number, 6, buffer, true));
}
-#if !ASSERT_DISABLED
-bool AtomicString::isInAtomicStringTable(StringImpl* string)
+AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
{
- AtomicStringTableLocker locker;
- return stringTable().contains(string);
+ auto impl = AtomicStringImpl::addUTF8(charactersStart, charactersEnd);
+ if (!impl)
+ return nullAtom;
+ return impl.get();
}
-#endif
#ifndef NDEBUG
void AtomicString::show() const
diff --git a/Source/WTF/wtf/text/AtomicString.h b/Source/WTF/wtf/text/AtomicString.h
index 4142de142..91bb20a8b 100644
--- a/Source/WTF/wtf/text/AtomicString.h
+++ b/Source/WTF/wtf/text/AtomicString.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004, 2005, 2006, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2006, 2008, 2014-2016 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -23,6 +23,7 @@
#include <utility>
#include <wtf/text/AtomicStringImpl.h>
+#include <wtf/text/IntegerToStringConversion.h>
#include <wtf/text/WTFString.h>
// Define 'NO_IMPLICIT_ATOMICSTRING' before including this header,
@@ -41,34 +42,38 @@ class AtomicString {
public:
WTF_EXPORT_PRIVATE static void init();
- AtomicString() { }
- AtomicString(const LChar* s) : m_string(add(s)) { }
- AtomicString(const char* s) : m_string(add(s)) { }
- AtomicString(const LChar* s, unsigned length) : m_string(add(s, length)) { }
- AtomicString(const UChar* s, unsigned length) : m_string(add(s, length)) { }
- AtomicString(const UChar* s, unsigned length, unsigned existingHash) : m_string(add(s, length, existingHash)) { }
- AtomicString(const UChar* s) : m_string(add(s)) { }
+ AtomicString();
+ AtomicString(const LChar*);
+ AtomicString(const char*);
+ AtomicString(const LChar*, unsigned length);
+ AtomicString(const UChar*, unsigned length);
+ AtomicString(const UChar*, unsigned length, unsigned existingHash);
+ AtomicString(const UChar*);
template<size_t inlineCapacity>
explicit AtomicString(const Vector<UChar, inlineCapacity>& characters)
- : m_string(add(characters.data(), characters.size()))
+ : m_string(AtomicStringImpl::add(characters.data(), characters.size()))
{
}
- ATOMICSTRING_CONVERSION AtomicString(StringImpl* imp) : m_string(add(imp)) { }
- AtomicString(AtomicStringImpl* imp) : m_string(imp) { }
- ATOMICSTRING_CONVERSION AtomicString(const String& s) : m_string(add(s.impl())) { }
- AtomicString(StringImpl* baseString, unsigned start, unsigned length) : m_string(add(baseString, start, length)) { }
+ AtomicString(AtomicStringImpl*);
+ AtomicString(RefPtr<AtomicStringImpl>&&);
+ ATOMICSTRING_CONVERSION AtomicString(StringImpl*);
+ ATOMICSTRING_CONVERSION AtomicString(const String&);
+ AtomicString(StringImpl* baseString, unsigned start, unsigned length);
+
+ // FIXME: AtomicString doesn’t always have AtomicStringImpl, so one of those two names needs to change..
+ AtomicString(UniquedStringImpl* uid);
enum ConstructFromLiteralTag { ConstructFromLiteral };
AtomicString(const char* characters, unsigned length, ConstructFromLiteralTag)
- : m_string(addFromLiteralData(characters, length))
+ : m_string(AtomicStringImpl::addLiteral(characters, length))
{
}
template<unsigned charactersCount>
ALWAYS_INLINE AtomicString(const char (&characters)[charactersCount], ConstructFromLiteralTag)
- : m_string(addFromLiteralData(characters, charactersCount - 1))
+ : m_string(AtomicStringImpl::addLiteral(characters, charactersCount - 1))
{
COMPILE_ASSERT(charactersCount > 1, AtomicStringFromLiteralNotEmpty);
COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), AtomicStringFromLiteralCannotOverflow);
@@ -77,15 +82,15 @@ public:
// We have to declare the copy constructor and copy assignment operator as well, otherwise
// they'll be implicitly deleted by adding the move constructor and move assignment operator.
AtomicString(const AtomicString& other) : m_string(other.m_string) { }
- AtomicString(AtomicString&& other) : m_string(std::move(other.m_string)) { }
+ AtomicString(AtomicString&& other) : m_string(WTFMove(other.m_string)) { }
AtomicString& operator=(const AtomicString& other) { m_string = other.m_string; return *this; }
- AtomicString& operator=(AtomicString&& other) { m_string = std::move(other.m_string); return *this; }
+ AtomicString& operator=(AtomicString&& other) { m_string = WTFMove(other.m_string); return *this; }
// Hash table deleted values, which are only constructed and never copied or destroyed.
AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
bool isHashTableDeletedValue() const { return m_string.isHashTableDeletedValue(); }
- WTF_EXPORT_STRING_API static AtomicStringImpl* find(const StringImpl*);
+ unsigned existingHash() const { return isNull() ? 0 : impl()->existingHash(); }
operator const String&() const { return m_string; }
const String& string() const { return m_string; };
@@ -93,31 +98,46 @@ public:
AtomicStringImpl* impl() const { return static_cast<AtomicStringImpl *>(m_string.impl()); }
bool is8Bit() const { return m_string.is8Bit(); }
- const UChar* characters() const { return m_string.deprecatedCharacters(); } // FIXME: Delete this.
const LChar* characters8() const { return m_string.characters8(); }
const UChar* characters16() const { return m_string.characters16(); }
unsigned length() const { return m_string.length(); }
-
+
UChar operator[](unsigned int i) const { return m_string[i]; }
WTF_EXPORT_STRING_API static AtomicString number(int);
WTF_EXPORT_STRING_API static AtomicString number(unsigned);
+ WTF_EXPORT_STRING_API static AtomicString number(unsigned long);
+ WTF_EXPORT_STRING_API static AtomicString number(unsigned long long);
WTF_EXPORT_STRING_API static AtomicString number(double);
// If we need more overloads of the number function, we can add all the others that String has, but these seem to do for now.
bool contains(UChar c) const { return m_string.contains(c); }
bool contains(const LChar* s, bool caseSensitive = true) const
{ return m_string.contains(s, caseSensitive); }
- bool contains(const String& s, bool caseSensitive = true) const
+ bool contains(const String& s) const
+ { return m_string.contains(s); }
+ bool contains(const String& s, bool caseSensitive) const
{ return m_string.contains(s, caseSensitive); }
+ bool containsIgnoringASCIICase(const String& s) const
+ { return m_string.containsIgnoringASCIICase(s); }
size_t find(UChar c, unsigned start = 0) const { return m_string.find(c, start); }
size_t find(const LChar* s, unsigned start = 0, bool caseSentitive = true) const
{ return m_string.find(s, start, caseSentitive); }
size_t find(const String& s, unsigned start = 0, bool caseSentitive = true) const
{ return m_string.find(s, start, caseSentitive); }
-
- bool startsWith(const String& s, bool caseSensitive = true) const
+ size_t findIgnoringASCIICase(const String& s) const
+ { return m_string.findIgnoringASCIICase(s); }
+ size_t findIgnoringASCIICase(const String& s, unsigned startOffset) const
+ { return m_string.findIgnoringASCIICase(s, startOffset); }
+ size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const
+ { return m_string.find(matchFunction, start); }
+
+ bool startsWith(const String& s) const
+ { return m_string.startsWith(s); }
+ bool startsWithIgnoringASCIICase(const String& s) const
+ { return m_string.startsWithIgnoringASCIICase(s); }
+ bool startsWith(const String& s, bool caseSensitive) const
{ return m_string.startsWith(s, caseSensitive); }
bool startsWith(UChar character) const
{ return m_string.startsWith(character); }
@@ -125,17 +145,21 @@ public:
bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const
{ return m_string.startsWith<matchLength>(prefix, caseSensitive); }
- bool endsWith(const String& s, bool caseSensitive = true) const
+ bool endsWith(const String& s) const
+ { return m_string.endsWith(s); }
+ bool endsWithIgnoringASCIICase(const String& s) const
+ { return m_string.endsWithIgnoringASCIICase(s); }
+ bool endsWith(const String& s, bool caseSensitive) const
{ return m_string.endsWith(s, caseSensitive); }
bool endsWith(UChar character) const
{ return m_string.endsWith(character); }
template<unsigned matchLength>
bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const
{ return m_string.endsWith<matchLength>(prefix, caseSensitive); }
-
- WTF_EXPORT_STRING_API AtomicString lower() const;
- AtomicString upper() const { return AtomicString(impl()->upper()); }
-
+
+ WTF_EXPORT_STRING_API AtomicString convertToASCIILowercase() const;
+ WTF_EXPORT_STRING_API AtomicString convertToASCIIUppercase() const;
+
int toInt(bool* ok = 0) const { return m_string.toInt(ok); }
double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); }
float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); }
@@ -144,13 +168,11 @@ public:
bool isNull() const { return m_string.isNull(); }
bool isEmpty() const { return m_string.isEmpty(); }
- static void remove(StringImpl*);
-
#if USE(CF)
- AtomicString(CFStringRef s) : m_string(add(s)) { }
-#endif
+ AtomicString(CFStringRef);
+#endif
#ifdef __OBJC__
- AtomicString(NSString* s) : m_string(add((CFStringRef)s)) { }
+ AtomicString(NSString*);
operator NSString*() const { return m_string; }
#endif
@@ -167,37 +189,16 @@ private:
// The explicit constructors with AtomicString::ConstructFromLiteral must be used for literals.
AtomicString(ASCIILiteral);
- String m_string;
-
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const LChar*);
- ALWAYS_INLINE static PassRefPtr<StringImpl> add(const char* s) { return add(reinterpret_cast<const LChar*>(s)); };
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const LChar*, unsigned length);
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const UChar*, unsigned length);
- ALWAYS_INLINE static PassRefPtr<StringImpl> add(const char* s, unsigned length) { return add(reinterpret_cast<const LChar*>(s), length); };
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const UChar*, unsigned length, unsigned existingHash);
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const UChar*);
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(StringImpl*, unsigned offset, unsigned length);
- ALWAYS_INLINE static PassRefPtr<StringImpl> add(StringImpl* string)
- {
- if (!string || string->isAtomic()) {
- ASSERT_WITH_MESSAGE(!string || isInAtomicStringTable(string), "The atomic string comes from an other thread!");
- return string;
- }
- return addSlowCase(string);
- }
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> addFromLiteralData(const char* characters, unsigned length);
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> addSlowCase(StringImpl*);
-#if USE(CF)
- WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(CFStringRef);
-#endif
+ enum class CaseConvertType { Upper, Lower };
+ template<CaseConvertType> AtomicString convertASCIICase() const;
WTF_EXPORT_STRING_API static AtomicString fromUTF8Internal(const char*, const char*);
-#if !ASSERT_DISABLED
- WTF_EXPORT_STRING_API static bool isInAtomicStringTable(StringImpl*);
-#endif
+ String m_string;
};
+static_assert(sizeof(AtomicString) == sizeof(String), "AtomicString and String must be same size!");
+
inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); }
bool operator==(const AtomicString&, const LChar*);
inline bool operator==(const AtomicString& a, const char* b) { return WTF::equal(a.impl(), reinterpret_cast<const LChar*>(b)); }
@@ -216,25 +217,99 @@ inline bool operator!=(const LChar* a, const AtomicString& b) { return !(b == a)
inline bool operator!=(const String& a, const AtomicString& b) { return !equal(a.impl(), b.impl()); }
inline bool operator!=(const Vector<UChar>& a, const AtomicString& b) { return !(a == b); }
-inline bool equalIgnoringCase(const AtomicString& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); }
-inline bool equalIgnoringCase(const AtomicString& a, const LChar* b) { return equalIgnoringCase(a.impl(), b); }
-inline bool equalIgnoringCase(const AtomicString& a, const char* b) { return equalIgnoringCase(a.impl(), reinterpret_cast<const LChar*>(b)); }
-inline bool equalIgnoringCase(const AtomicString& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
-inline bool equalIgnoringCase(const LChar* a, const AtomicString& b) { return equalIgnoringCase(a, b.impl()); }
-inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equalIgnoringCase(reinterpret_cast<const LChar*>(a), b.impl()); }
-inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+bool equalIgnoringASCIICase(const AtomicString&, const AtomicString&);
+bool equalIgnoringASCIICase(const AtomicString&, const String&);
+bool equalIgnoringASCIICase(const String&, const AtomicString&);
+bool equalIgnoringASCIICase(const AtomicString&, const char*);
+
+template<unsigned length> bool equalLettersIgnoringASCIICase(const AtomicString&, const char (&lowercaseLetters)[length]);
+
+inline AtomicString::AtomicString()
+{
+}
+
+inline AtomicString::AtomicString(const LChar* s)
+ : m_string(AtomicStringImpl::add(s))
+{
+}
+
+inline AtomicString::AtomicString(const char* s)
+ : m_string(AtomicStringImpl::add(s))
+{
+}
+
+inline AtomicString::AtomicString(const LChar* s, unsigned length)
+ : m_string(AtomicStringImpl::add(s, length))
+{
+}
+
+inline AtomicString::AtomicString(const UChar* s, unsigned length)
+ : m_string(AtomicStringImpl::add(s, length))
+{
+}
+
+inline AtomicString::AtomicString(const UChar* s, unsigned length, unsigned existingHash)
+ : m_string(AtomicStringImpl::add(s, length, existingHash))
+{
+}
+
+inline AtomicString::AtomicString(const UChar* s)
+ : m_string(AtomicStringImpl::add(s))
+{
+}
+
+inline AtomicString::AtomicString(AtomicStringImpl* imp)
+ : m_string(imp)
+{
+}
+
+inline AtomicString::AtomicString(RefPtr<AtomicStringImpl>&& imp)
+ : m_string(WTFMove(imp))
+{
+}
+
+inline AtomicString::AtomicString(StringImpl* imp)
+ : m_string(AtomicStringImpl::add(imp))
+{
+}
+
+inline AtomicString::AtomicString(const String& s)
+ : m_string(AtomicStringImpl::add(s.impl()))
+{
+}
+
+inline AtomicString::AtomicString(StringImpl* baseString, unsigned start, unsigned length)
+ : m_string(AtomicStringImpl::add(baseString, start, length))
+{
+}
+
+inline AtomicString::AtomicString(UniquedStringImpl* uid)
+ : m_string(uid)
+{
+}
+
+#if USE(CF)
+inline AtomicString::AtomicString(CFStringRef s)
+ : m_string(AtomicStringImpl::add(s))
+{
+}
+#endif
+
+#ifdef __OBJC__
+inline AtomicString::AtomicString(NSString* s)
+ : m_string(AtomicStringImpl::add((__bridge CFStringRef)s))
+{
+}
+#endif
// Define external global variables for the commonly used atomic strings.
// These are only usable from the main thread.
#ifndef ATOMICSTRING_HIDE_GLOBALS
extern const WTF_EXPORTDATA AtomicString nullAtom;
extern const WTF_EXPORTDATA AtomicString emptyAtom;
-extern const WTF_EXPORTDATA AtomicString textAtom;
-extern const WTF_EXPORTDATA AtomicString commentAtom;
extern const WTF_EXPORTDATA AtomicString starAtom;
extern const WTF_EXPORTDATA AtomicString xmlAtom;
extern const WTF_EXPORTDATA AtomicString xmlnsAtom;
-extern const WTF_EXPORTDATA AtomicString xlinkAtom;
inline AtomicString AtomicString::fromUTF8(const char* characters, size_t length)
{
@@ -251,7 +326,7 @@ inline AtomicString AtomicString::fromUTF8(const char* characters)
return nullAtom;
if (!*characters)
return emptyAtom;
- return fromUTF8Internal(characters, 0);
+ return fromUTF8Internal(characters, nullptr);
}
#endif
@@ -261,19 +336,48 @@ template<> struct DefaultHash<AtomicString> {
typedef AtomicStringHash Hash;
};
+template<unsigned length> inline bool equalLettersIgnoringASCIICase(const AtomicString& string, const char (&lowercaseLetters)[length])
+{
+ return equalLettersIgnoringASCIICase(string.string(), lowercaseLetters);
+}
+
+inline bool equalIgnoringASCIICase(const AtomicString& a, const AtomicString& b)
+{
+ return equalIgnoringASCIICase(a.string(), b.string());
+}
+
+inline bool equalIgnoringASCIICase(const AtomicString& a, const String& b)
+{
+ return equalIgnoringASCIICase(a.string(), b);
+}
+
+inline bool equalIgnoringASCIICase(const String& a, const AtomicString& b)
+{
+ return equalIgnoringASCIICase(a, b.string());
+}
+
+inline bool equalIgnoringASCIICase(const AtomicString& a, const char* b)
+{
+ return equalIgnoringASCIICase(a.string(), b);
+}
+
+template<> struct IntegerToStringConversionTrait<AtomicString> {
+ using ReturnType = AtomicString;
+ using AdditionalArgumentType = void;
+ static AtomicString flush(LChar* characters, unsigned length, void*) { return { characters, length }; }
+};
+
} // namespace WTF
#ifndef ATOMICSTRING_HIDE_GLOBALS
using WTF::AtomicString;
using WTF::nullAtom;
using WTF::emptyAtom;
-using WTF::textAtom;
-using WTF::commentAtom;
using WTF::starAtom;
using WTF::xmlAtom;
using WTF::xmlnsAtom;
-using WTF::xlinkAtom;
#endif
#include <wtf/text/StringConcatenate.h>
+
#endif // AtomicString_h
diff --git a/Source/WTF/wtf/text/AtomicStringHash.h b/Source/WTF/wtf/text/AtomicStringHash.h
index 6130d9493..417619350 100644
--- a/Source/WTF/wtf/text/AtomicStringHash.h
+++ b/Source/WTF/wtf/text/AtomicStringHash.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * 3. Neither the name of Apple Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -48,11 +48,20 @@ namespace WTF {
static const bool safeToCompareToEmptyOrDeleted = false;
};
- // AtomicStringHash is the default hash for AtomicString
- template<> struct HashTraits<WTF::AtomicString> : GenericHashTraits<WTF::AtomicString> {
- static const bool emptyValueIsZero = true;
- static void constructDeletedValue(WTF::AtomicString& slot) { new (NotNull, &slot) WTF::AtomicString(HashTableDeletedValue); }
- static bool isDeletedValue(const WTF::AtomicString& slot) { return slot.isHashTableDeletedValue(); }
+ template<> struct HashTraits<WTF::AtomicString> : SimpleClassHashTraits<WTF::AtomicString> {
+ static const bool hasIsEmptyValueFunction = true;
+ static bool isEmptyValue(const AtomicString& value)
+ {
+ return value.isNull();
+ }
+
+ static void customDeleteBucket(AtomicString& value)
+ {
+ // See unique_ptr's customDeleteBucket() for an explanation.
+ ASSERT(!isDeletedValue(value));
+ AtomicString valueToBeDestroyed = WTFMove(value);
+ constructDeletedValue(value);
+ }
};
}
diff --git a/Source/WTF/wtf/text/AtomicStringImpl.cpp b/Source/WTF/wtf/text/AtomicStringImpl.cpp
new file mode 100644
index 000000000..fb50b7fdd
--- /dev/null
+++ b/Source/WTF/wtf/text/AtomicStringImpl.cpp
@@ -0,0 +1,540 @@
+/*
+ * Copyright (C) 2004-2008, 2013-2014 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
+ * Copyright (C) 2012 Google Inc. All rights reserved.
+ * Copyright (C) 2015 Yusuke Suzuki<utatane.tea@gmail.com>. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "AtomicStringImpl.h"
+
+#include "AtomicStringTable.h"
+#include "CommaPrinter.h"
+#include "DataLog.h"
+#include "HashSet.h"
+#include "IntegerToStringConversion.h"
+#include "StringHash.h"
+#include "StringPrintStream.h"
+#include "Threading.h"
+#include "WTFThreadData.h"
+#include <wtf/unicode/UTF8.h>
+
+#if USE(WEB_THREAD)
+#include "Lock.h"
+#endif
+
+namespace WTF {
+
+using namespace Unicode;
+
+#if USE(WEB_THREAD)
+
+class AtomicStringTableLocker : public LockHolder {
+ WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker);
+
+ static StaticLock s_stringTableLock;
+public:
+ AtomicStringTableLocker()
+ : LockHolder(&s_stringTableLock)
+ {
+ }
+};
+
+StaticLock AtomicStringTableLocker::s_stringTableLock;
+
+#else
+
+class AtomicStringTableLocker {
+ WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker);
+public:
+ AtomicStringTableLocker() { }
+};
+
+#endif // USE(WEB_THREAD)
+
+using StringTableImpl = HashSet<StringImpl*>;
+
+static ALWAYS_INLINE StringTableImpl& stringTable()
+{
+ return wtfThreadData().atomicStringTable()->table();
+}
+
+template<typename T, typename HashTranslator>
+static inline Ref<AtomicStringImpl> addToStringTable(AtomicStringTableLocker&, StringTableImpl& atomicStringTable, const T& value)
+{
+ auto addResult = atomicStringTable.add<HashTranslator>(value);
+
+ // If the string is newly-translated, then we need to adopt it.
+ // The boolean in the pair tells us if that is so.
+ if (addResult.isNewEntry)
+ return adoptRef(static_cast<AtomicStringImpl&>(**addResult.iterator));
+ return *static_cast<AtomicStringImpl*>(*addResult.iterator);
+}
+
+template<typename T, typename HashTranslator>
+static inline Ref<AtomicStringImpl> addToStringTable(const T& value)
+{
+ AtomicStringTableLocker locker;
+ return addToStringTable<T, HashTranslator>(locker, stringTable(), value);
+}
+
+struct CStringTranslator {
+ static unsigned hash(const LChar* c)
+ {
+ return StringHasher::computeHashAndMaskTop8Bits(c);
+ }
+
+ static inline bool equal(StringImpl* r, const LChar* s)
+ {
+ return WTF::equal(r, s);
+ }
+
+ static void translate(StringImpl*& location, const LChar* const& c, unsigned hash)
+ {
+ location = &StringImpl::create(c).leakRef();
+ location->setHash(hash);
+ location->setIsAtomic(true);
+ }
+};
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::add(const LChar* c)
+{
+ if (!c)
+ return nullptr;
+ if (!*c)
+ return static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ return addToStringTable<const LChar*, CStringTranslator>(c);
+}
+
+template<typename CharacterType>
+struct HashTranslatorCharBuffer {
+ const CharacterType* s;
+ unsigned length;
+};
+
+typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
+struct UCharBufferTranslator {
+ static unsigned hash(const UCharBuffer& buf)
+ {
+ return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
+ }
+
+ static bool equal(StringImpl* const& str, const UCharBuffer& buf)
+ {
+ return WTF::equal(str, buf.s, buf.length);
+ }
+
+ static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
+ {
+ location = &StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef();
+ location->setHash(hash);
+ location->setIsAtomic(true);
+ }
+};
+
+template<typename CharacterType>
+struct HashAndCharacters {
+ unsigned hash;
+ const CharacterType* characters;
+ unsigned length;
+};
+
+template<typename CharacterType>
+struct HashAndCharactersTranslator {
+ static unsigned hash(const HashAndCharacters<CharacterType>& buffer)
+ {
+ ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length));
+ return buffer.hash;
+ }
+
+ static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer)
+ {
+ return WTF::equal(string, buffer.characters, buffer.length);
+ }
+
+ static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash)
+ {
+ location = &StringImpl::create(buffer.characters, buffer.length).leakRef();
+ location->setHash(hash);
+ location->setIsAtomic(true);
+ }
+};
+
+struct HashAndUTF8Characters {
+ unsigned hash;
+ const char* characters;
+ unsigned length;
+ unsigned utf16Length;
+};
+
+struct HashAndUTF8CharactersTranslator {
+ static unsigned hash(const HashAndUTF8Characters& buffer)
+ {
+ return buffer.hash;
+ }
+
+ static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
+ {
+ if (buffer.utf16Length != string->length())
+ return false;
+
+ // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
+ if (buffer.utf16Length != buffer.length) {
+ if (string->is8Bit())
+ return equalLatin1WithUTF8(string->characters8(), buffer.characters, buffer.characters + buffer.length);
+
+ return equalUTF16WithUTF8(string->characters16(), buffer.characters, buffer.characters + buffer.length);
+ }
+
+ if (string->is8Bit()) {
+ const LChar* stringCharacters = string->characters8();
+
+ for (unsigned i = 0; i < buffer.length; ++i) {
+ ASSERT(isASCII(buffer.characters[i]));
+ if (stringCharacters[i] != buffer.characters[i])
+ return false;
+ }
+
+ return true;
+ }
+
+ const UChar* stringCharacters = string->characters16();
+
+ for (unsigned i = 0; i < buffer.length; ++i) {
+ ASSERT(isASCII(buffer.characters[i]));
+ if (stringCharacters[i] != buffer.characters[i])
+ return false;
+ }
+
+ return true;
+ }
+
+ static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
+ {
+ UChar* target;
+ auto newString = StringImpl::createUninitialized(buffer.utf16Length, target);
+
+ bool isAllASCII;
+ const char* source = buffer.characters;
+ if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK)
+ ASSERT_NOT_REACHED();
+
+ if (isAllASCII)
+ newString = StringImpl::create(buffer.characters, buffer.length);
+
+ location = &newString.leakRef();
+ location->setHash(hash);
+ location->setIsAtomic(true);
+ }
+};
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::add(const UChar* s, unsigned length)
+{
+ if (!s)
+ return nullptr;
+
+ if (!length)
+ return static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ UCharBuffer buffer = { s, length };
+ return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
+}
+
+Ref<AtomicStringImpl> AtomicStringImpl::add(const UChar* s, unsigned length, unsigned existingHash)
+{
+ ASSERT(s);
+ ASSERT(existingHash);
+
+ if (!length)
+ return *static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ HashAndCharacters<UChar> buffer = { existingHash, s, length };
+ return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar>>(buffer);
+}
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::add(const UChar* s)
+{
+ if (!s)
+ return nullptr;
+
+ unsigned length = 0;
+ while (s[length] != UChar(0))
+ ++length;
+
+ if (!length)
+ return static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ UCharBuffer buffer = { s, length };
+ return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
+}
+
+struct SubstringLocation {
+ StringImpl* baseString;
+ unsigned start;
+ unsigned length;
+};
+
+struct SubstringTranslator {
+ static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
+ {
+ location = &StringImpl::createSubstringSharingImpl(*buffer.baseString, buffer.start, buffer.length).leakRef();
+ location->setHash(hash);
+ location->setIsAtomic(true);
+ }
+};
+
+struct SubstringTranslator8 : SubstringTranslator {
+ static unsigned hash(const SubstringLocation& buffer)
+ {
+ return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
+ }
+
+ static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
+ {
+ return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
+ }
+};
+
+struct SubstringTranslator16 : SubstringTranslator {
+ static unsigned hash(const SubstringLocation& buffer)
+ {
+ return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
+ }
+
+ static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
+ {
+ return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
+ }
+};
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::add(StringImpl* baseString, unsigned start, unsigned length)
+{
+ if (!baseString)
+ return nullptr;
+
+ if (!length || start >= baseString->length())
+ return static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ unsigned maxLength = baseString->length() - start;
+ if (length >= maxLength) {
+ if (!start)
+ return add(baseString);
+ length = maxLength;
+ }
+
+ SubstringLocation buffer = { baseString, start, length };
+ if (baseString->is8Bit())
+ return addToStringTable<SubstringLocation, SubstringTranslator8>(buffer);
+ return addToStringTable<SubstringLocation, SubstringTranslator16>(buffer);
+}
+
+typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
+struct LCharBufferTranslator {
+ static unsigned hash(const LCharBuffer& buf)
+ {
+ return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
+ }
+
+ static bool equal(StringImpl* const& str, const LCharBuffer& buf)
+ {
+ return WTF::equal(str, buf.s, buf.length);
+ }
+
+ static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
+ {
+ location = &StringImpl::create(buf.s, buf.length).leakRef();
+ location->setHash(hash);
+ location->setIsAtomic(true);
+ }
+};
+
+typedef HashTranslatorCharBuffer<char> CharBuffer;
+struct CharBufferFromLiteralDataTranslator {
+ static unsigned hash(const CharBuffer& buf)
+ {
+ return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length);
+ }
+
+ static bool equal(StringImpl* const& str, const CharBuffer& buf)
+ {
+ return WTF::equal(str, buf.s, buf.length);
+ }
+
+ static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash)
+ {
+ location = &StringImpl::createFromLiteral(buf.s, buf.length).leakRef();
+ location->setHash(hash);
+ location->setIsAtomic(true);
+ }
+};
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::add(const LChar* s, unsigned length)
+{
+ if (!s)
+ return nullptr;
+
+ if (!length)
+ return static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ LCharBuffer buffer = { s, length };
+ return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
+}
+
+Ref<AtomicStringImpl> AtomicStringImpl::addLiteral(const char* characters, unsigned length)
+{
+ ASSERT(characters);
+ ASSERT(length);
+
+ CharBuffer buffer = { characters, length };
+ return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer);
+}
+
+static inline Ref<AtomicStringImpl> addSubstring(AtomicStringTableLocker& locker, StringTableImpl& atomicStringTable, StringImpl& base)
+{
+ ASSERT(base.length());
+ ASSERT(base.isSymbol() || base.isStatic());
+
+ SubstringLocation buffer = { &base, 0, base.length() };
+ if (base.is8Bit())
+ return addToStringTable<SubstringLocation, SubstringTranslator8>(locker, atomicStringTable, buffer);
+ return addToStringTable<SubstringLocation, SubstringTranslator16>(locker, atomicStringTable, buffer);
+}
+
+static inline Ref<AtomicStringImpl> addSubstring(StringImpl& base)
+{
+ AtomicStringTableLocker locker;
+ return addSubstring(locker, stringTable(), base);
+}
+
+Ref<AtomicStringImpl> AtomicStringImpl::addSlowCase(StringImpl& string)
+{
+ if (!string.length())
+ return *static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ if (string.isSymbol() || string.isStatic())
+ return addSubstring(string);
+
+ ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicStringImpl should not hit the slow case if the string is already atomic.");
+
+ AtomicStringTableLocker locker;
+ auto addResult = stringTable().add(&string);
+
+ if (addResult.isNewEntry) {
+ ASSERT(*addResult.iterator == &string);
+ string.setIsAtomic(true);
+ }
+
+ return *static_cast<AtomicStringImpl*>(*addResult.iterator);
+}
+
+Ref<AtomicStringImpl> AtomicStringImpl::addSlowCase(AtomicStringTable& stringTable, StringImpl& string)
+{
+ if (!string.length())
+ return *static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ if (string.isSymbol() || string.isStatic()) {
+ AtomicStringTableLocker locker;
+ return addSubstring(locker, stringTable.table(), string);
+ }
+
+ ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicStringImpl should not hit the slow case if the string is already atomic.");
+
+ AtomicStringTableLocker locker;
+ auto addResult = stringTable.table().add(&string);
+
+ if (addResult.isNewEntry) {
+ ASSERT(*addResult.iterator == &string);
+ string.setIsAtomic(true);
+ }
+
+ return *static_cast<AtomicStringImpl*>(*addResult.iterator);
+}
+
+void AtomicStringImpl::remove(AtomicStringImpl* string)
+{
+ ASSERT(string->isAtomic());
+ AtomicStringTableLocker locker;
+ auto& atomicStringTable = stringTable();
+ auto iterator = atomicStringTable.find(string);
+ ASSERT_WITH_MESSAGE(iterator != atomicStringTable.end(), "The string being removed is atomic in the string table of an other thread!");
+ ASSERT(string == *iterator);
+ atomicStringTable.remove(iterator);
+}
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::lookUpSlowCase(StringImpl& string)
+{
+ ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicStringImpls should return from the fast case.");
+
+ if (!string.length())
+ return static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ AtomicStringTableLocker locker;
+ auto& atomicStringTable = stringTable();
+ auto iterator = atomicStringTable.find(&string);
+ if (iterator != atomicStringTable.end())
+ return static_cast<AtomicStringImpl*>(*iterator);
+ return nullptr;
+}
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::addUTF8(const char* charactersStart, const char* charactersEnd)
+{
+ HashAndUTF8Characters buffer;
+ buffer.characters = charactersStart;
+ buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
+
+ if (!buffer.hash)
+ return nullptr;
+
+ return addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
+}
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::lookUpInternal(const LChar* characters, unsigned length)
+{
+ AtomicStringTableLocker locker;
+ auto& table = stringTable();
+
+ LCharBuffer buffer = { characters, length };
+ auto iterator = table.find<LCharBufferTranslator>(buffer);
+ if (iterator != table.end())
+ return static_cast<AtomicStringImpl*>(*iterator);
+ return nullptr;
+}
+
+RefPtr<AtomicStringImpl> AtomicStringImpl::lookUpInternal(const UChar* characters, unsigned length)
+{
+ AtomicStringTableLocker locker;
+ auto& table = stringTable();
+
+ UCharBuffer buffer = { characters, length };
+ auto iterator = table.find<UCharBufferTranslator>(buffer);
+ if (iterator != table.end())
+ return static_cast<AtomicStringImpl*>(*iterator);
+ return nullptr;
+}
+
+#if !ASSERT_DISABLED
+bool AtomicStringImpl::isInAtomicStringTable(StringImpl* string)
+{
+ AtomicStringTableLocker locker;
+ return stringTable().contains(string);
+}
+#endif
+
+} // namespace WTF
diff --git a/Source/WTF/wtf/text/AtomicStringImpl.h b/Source/WTF/wtf/text/AtomicStringImpl.h
index 45114aca5..1cde4b0ed 100644
--- a/Source/WTF/wtf/text/AtomicStringImpl.h
+++ b/Source/WTF/wtf/text/AtomicStringImpl.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Apple Computer, Inc.
+ * Copyright (C) 2006 Apple Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -21,18 +21,97 @@
#ifndef AtomicStringImpl_h
#define AtomicStringImpl_h
-#include <wtf/text/StringImpl.h>
+#include <wtf/text/UniquedStringImpl.h>
namespace WTF {
-class AtomicStringImpl : public StringImpl
-{
+class AtomicStringTable;
+
+class AtomicStringImpl : public UniquedStringImpl {
public:
- AtomicStringImpl() : StringImpl(0) {}
+ static RefPtr<AtomicStringImpl> lookUp(LChar* characters, unsigned length)
+ {
+ return lookUpInternal(characters, length);
+ }
+ static RefPtr<AtomicStringImpl> lookUp(UChar* characters, unsigned length)
+ {
+ return lookUpInternal(characters, length);
+ }
+ static RefPtr<AtomicStringImpl> lookUp(StringImpl* string)
+ {
+ if (!string || string->isAtomic())
+ return static_cast<AtomicStringImpl*>(string);
+ return lookUpSlowCase(*string);
+ }
+
+ static void remove(AtomicStringImpl*);
+
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(const LChar*);
+ ALWAYS_INLINE static RefPtr<AtomicStringImpl> add(const char* s) { return add(reinterpret_cast<const LChar*>(s)); };
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(const LChar*, unsigned length);
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(const UChar*, unsigned length);
+ ALWAYS_INLINE static RefPtr<AtomicStringImpl> add(const char* s, unsigned length) { return add(reinterpret_cast<const LChar*>(s), length); };
+ WTF_EXPORT_STRING_API static Ref<AtomicStringImpl> add(const UChar*, unsigned length, unsigned existingHash);
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(const UChar*);
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(StringImpl*, unsigned offset, unsigned length);
+ ALWAYS_INLINE static RefPtr<AtomicStringImpl> add(StringImpl* string)
+ {
+ if (!string)
+ return static_cast<AtomicStringImpl*>(string);
+ return add(*string);
+ }
+ WTF_EXPORT_STRING_API static Ref<AtomicStringImpl> addLiteral(const char* characters, unsigned length);
+
+ // Returns null if the input data contains an invalid UTF-8 sequence.
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> addUTF8(const char* start, const char* end);
+#if USE(CF)
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(CFStringRef);
+#endif
+
+ template<typename StringTableProvider>
+ ALWAYS_INLINE static RefPtr<AtomicStringImpl> addWithStringTableProvider(StringTableProvider& stringTableProvider, StringImpl* string)
+ {
+ if (!string)
+ return nullptr;
+ return add(*stringTableProvider.atomicStringTable(), *string);
+ }
+
+#if !ASSERT_DISABLED
+ WTF_EXPORT_STRING_API static bool isInAtomicStringTable(StringImpl*);
+#endif
+
+private:
+ AtomicStringImpl() = delete;
+
+ ALWAYS_INLINE static Ref<AtomicStringImpl> add(StringImpl& string)
+ {
+ if (string.isAtomic()) {
+ ASSERT_WITH_MESSAGE(!string.length() || isInAtomicStringTable(&string), "The atomic string comes from an other thread!");
+ return static_cast<AtomicStringImpl&>(string);
+ }
+ return addSlowCase(string);
+ }
+
+ ALWAYS_INLINE static Ref<AtomicStringImpl> add(AtomicStringTable& stringTable, StringImpl& string)
+ {
+ if (string.isAtomic()) {
+ ASSERT_WITH_MESSAGE(!string.length() || isInAtomicStringTable(&string), "The atomic string comes from an other thread!");
+ return static_cast<AtomicStringImpl&>(string);
+ }
+ return addSlowCase(stringTable, string);
+ }
+
+ WTF_EXPORT_STRING_API static Ref<AtomicStringImpl> addSlowCase(StringImpl&);
+ WTF_EXPORT_STRING_API static Ref<AtomicStringImpl> addSlowCase(AtomicStringTable&, StringImpl&);
+
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> lookUpSlowCase(StringImpl&);
+
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> lookUpInternal(const LChar*, unsigned length);
+ WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> lookUpInternal(const UChar*, unsigned length);
};
#if !ASSERT_DISABLED
-// AtomicStringImpls created from StaticASCIILiteral will ASSERT
+// AtomicStringImpls created from StaticStringImpl will ASSERT
// in the generic ValueCheck<T>::checkConsistency
// as they are not allocated by fastMalloc.
// We don't currently have any way to detect that case
diff --git a/Source/WTF/wtf/text/AtomicStringTable.cpp b/Source/WTF/wtf/text/AtomicStringTable.cpp
index d961b17e2..fe8a4884d 100644
--- a/Source/WTF/wtf/text/AtomicStringTable.cpp
+++ b/Source/WTF/wtf/text/AtomicStringTable.cpp
@@ -37,25 +37,28 @@ void AtomicStringTable::create(WTFThreadData& data)
bool currentThreadIsWebThread = isWebThread();
if (currentThreadIsWebThread || isUIThread())
- data.m_atomicStringTable = sharedStringTable;
+ data.m_defaultAtomicStringTable = sharedStringTable;
else
- data.m_atomicStringTable = new AtomicStringTable;
+ data.m_defaultAtomicStringTable = new AtomicStringTable;
// We do the following so that its destruction happens only
// once - on the main UI thread.
if (!currentThreadIsWebThread)
data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
#else
- data.m_atomicStringTable = new AtomicStringTable;
+ data.m_defaultAtomicStringTable = new AtomicStringTable;
data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
#endif // USE(WEB_THREAD)
}
+AtomicStringTable::~AtomicStringTable()
+{
+ for (auto* string : m_table)
+ string->setIsAtomic(false);
+}
+
void AtomicStringTable::destroy(AtomicStringTable* table)
{
- HashSet<StringImpl*>::iterator end = table->m_table.end();
- for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter)
- (*iter)->setIsAtomic(false);
delete table;
}
diff --git a/Source/WTF/wtf/text/AtomicStringTable.h b/Source/WTF/wtf/text/AtomicStringTable.h
index 57826cb71..71d956d27 100644
--- a/Source/WTF/wtf/text/AtomicStringTable.h
+++ b/Source/WTF/wtf/text/AtomicStringTable.h
@@ -33,6 +33,7 @@ class StringImpl;
class AtomicStringTable {
WTF_MAKE_FAST_ALLOCATED;
public:
+ WTF_EXPORT_PRIVATE ~AtomicStringTable();
static void create(WTFThreadData&);
HashSet<StringImpl*>& table() { return m_table; }
diff --git a/Source/WTF/wtf/text/Base64.cpp b/Source/WTF/wtf/text/Base64.cpp
index 2323f3fa3..714a7ead4 100644
--- a/Source/WTF/wtf/text/Base64.cpp
+++ b/Source/WTF/wtf/text/Base64.cpp
@@ -1,7 +1,7 @@
/*
Copyright (C) 2000-2001 Dawit Alemayehu <adawit@kde.org>
Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
- Copyright (C) 2007, 2008, 2013 Apple Inc. All rights reserved.
+ Copyright (C) 2007, 2008, 2013, 2016 Apple Inc. All rights reserved.
Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
This program is free software; you can redistribute it and/or modify
@@ -92,7 +92,7 @@ static const char base64URLDecMap[128] = {
0x31, 0x32, 0x33, nonAlphabet, nonAlphabet, nonAlphabet, nonAlphabet, nonAlphabet
};
-static inline void base64EncodeInternal(const char* data, unsigned len, Vector<char>& out, Base64EncodePolicy policy, const char (&encodeMap)[64])
+static inline void base64EncodeInternal(const unsigned char* data, unsigned len, Vector<char>& out, Base64EncodePolicy policy, const char (&encodeMap)[64])
{
out.clear();
if (!len)
@@ -160,29 +160,29 @@ static inline void base64EncodeInternal(const char* data, unsigned len, Vector<c
String base64Encode(const void* data, unsigned length, Base64EncodePolicy policy)
{
Vector<char> result;
- base64EncodeInternal(static_cast<const char*>(data), length, result, policy, base64EncMap);
+ base64EncodeInternal(static_cast<const unsigned char*>(data), length, result, policy, base64EncMap);
return String(result.data(), result.size());
}
void base64Encode(const void* data, unsigned len, Vector<char>& out, Base64EncodePolicy policy)
{
- base64EncodeInternal(static_cast<const char*>(data), len, out, policy, base64EncMap);
+ base64EncodeInternal(static_cast<const unsigned char*>(data), len, out, policy, base64EncMap);
}
String base64URLEncode(const void* data, unsigned length)
{
Vector<char> result;
- base64EncodeInternal(static_cast<const char*>(data), length, result, Base64URLPolicy, base64URLEncMap);
+ base64EncodeInternal(static_cast<const unsigned char*>(data), length, result, Base64URLPolicy, base64URLEncMap);
return String(result.data(), result.size());
}
void base64URLEncode(const void* data, unsigned len, Vector<char>& out)
{
- base64EncodeInternal(static_cast<const char*>(data), len, out, Base64URLPolicy, base64URLEncMap);
+ base64EncodeInternal(static_cast<const unsigned char*>(data), len, out, Base64URLPolicy, base64URLEncMap);
}
template<typename T>
-static inline bool base64DecodeInternal(const T* data, unsigned length, Vector<char>& out, Base64DecodePolicy policy, const char (&decodeMap)[128])
+static inline bool base64DecodeInternal(const T* data, unsigned length, SignedOrUnsignedCharVectorAdapter& out, unsigned options, const char (&decodeMap)[128])
{
out.clear();
if (!length)
@@ -192,29 +192,47 @@ static inline bool base64DecodeInternal(const T* data, unsigned length, Vector<c
unsigned equalsSignCount = 0;
unsigned outLength = 0;
+ bool hadError = false;
for (unsigned idx = 0; idx < length; ++idx) {
unsigned ch = data[idx];
if (ch == '=') {
++equalsSignCount;
- // There should be no padding if length is a multiple of 4, and there
- // should never be more than 2 padding characters.
- if (policy == Base64FailOnInvalidCharacterOrExcessPadding && (length % 4 || equalsSignCount > 2))
- return false;
+ // There should never be more than 2 padding characters.
+ if (options & Base64ValidatePadding && equalsSignCount > 2) {
+ hadError = true;
+ break;
+ }
} else {
char decodedCharacter = ch < WTF_ARRAY_LENGTH(decodeMap) ? decodeMap[ch] : nonAlphabet;
if (decodedCharacter != nonAlphabet) {
- if (equalsSignCount)
- return false;
- out[outLength] = decodedCharacter;
- ++outLength;
- } else if (policy == Base64FailOnInvalidCharacterOrExcessPadding || policy == Base64FailOnInvalidCharacter || (policy == Base64IgnoreWhitespace && !isSpaceOrNewline(ch)))
- return false;
+ if (equalsSignCount) {
+ hadError = true;
+ break;
+ }
+ out[outLength++] = decodedCharacter;
+ } else if (!(options & Base64IgnoreSpacesAndNewLines) || !isSpaceOrNewline(ch)) {
+ hadError = true;
+ break;
+ }
}
}
+ // Make sure we shrink back the Vector before returning. outLength may be shorter than expected
+ // in case of error or in case of ignored spaces.
+ if (outLength < out.size())
+ out.shrink(outLength);
+
+ if (hadError)
+ return false;
+
if (!outLength)
return !equalsSignCount;
+ // The should be no padding if length is a multiple of 4.
+ // We use (outLength + equalsSignCount) instead of length because we don't want to account for ignored characters (i.e. spaces).
+ if (options & Base64ValidatePadding && equalsSignCount && (outLength + equalsSignCount) % 4)
+ return false;
+
// Valid data is (n * 4 + [0,2,3]) characters long.
if ((outLength % 4) == 1)
return false;
@@ -248,12 +266,15 @@ static inline bool base64DecodeInternal(const T* data, unsigned length, Vector<c
return true;
}
-bool base64Decode(const String& in, SignedOrUnsignedCharVectorAdapter out, Base64DecodePolicy policy)
+bool base64Decode(const String& in, SignedOrUnsignedCharVectorAdapter out, unsigned options)
{
- return base64DecodeInternal<UChar>(in.deprecatedCharacters(), in.length(), out, policy, base64DecMap);
+ unsigned length = in.length();
+ if (!length || in.is8Bit())
+ return base64DecodeInternal(in.characters8(), length, out, options, base64DecMap);
+ return base64DecodeInternal(in.characters16(), length, out, options, base64DecMap);
}
-bool base64Decode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter out, Base64DecodePolicy policy)
+bool base64Decode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter out, unsigned options)
{
out.clear();
@@ -261,17 +282,20 @@ bool base64Decode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter out,
if (in.size() > UINT_MAX)
return false;
- return base64DecodeInternal<char>(in.data(), in.size(), out, policy, base64DecMap);
+ return base64DecodeInternal(reinterpret_cast<const LChar*>(in.data()), in.size(), out, options, base64DecMap);
}
-bool base64Decode(const char* data, unsigned len, SignedOrUnsignedCharVectorAdapter out, Base64DecodePolicy policy)
+bool base64Decode(const char* data, unsigned len, SignedOrUnsignedCharVectorAdapter out, unsigned options)
{
- return base64DecodeInternal<char>(data, len, out, policy, base64DecMap);
+ return base64DecodeInternal(reinterpret_cast<const LChar*>(data), len, out, options, base64DecMap);
}
bool base64URLDecode(const String& in, SignedOrUnsignedCharVectorAdapter out)
{
- return base64DecodeInternal<UChar>(in.deprecatedCharacters(), in.length(), out, Base64FailOnInvalidCharacter, base64URLDecMap);
+ unsigned length = in.length();
+ if (!length || in.is8Bit())
+ return base64DecodeInternal(in.characters8(), length, out, Base64Default, base64URLDecMap);
+ return base64DecodeInternal(in.characters16(), length, out, Base64Default, base64URLDecMap);
}
bool base64URLDecode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter out)
@@ -282,12 +306,12 @@ bool base64URLDecode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter o
if (in.size() > UINT_MAX)
return false;
- return base64DecodeInternal<char>(in.data(), in.size(), out, Base64FailOnInvalidCharacter, base64URLDecMap);
+ return base64DecodeInternal(reinterpret_cast<const LChar*>(in.data()), in.size(), out, Base64Default, base64URLDecMap);
}
bool base64URLDecode(const char* data, unsigned len, SignedOrUnsignedCharVectorAdapter out)
{
- return base64DecodeInternal<char>(data, len, out, Base64FailOnInvalidCharacter, base64URLDecMap);
+ return base64DecodeInternal(reinterpret_cast<const LChar*>(data), len, out, Base64Default, base64URLDecMap);
}
} // namespace WTF
diff --git a/Source/WTF/wtf/text/Base64.h b/Source/WTF/wtf/text/Base64.h
index 1dfcf2698..820557558 100644
--- a/Source/WTF/wtf/text/Base64.h
+++ b/Source/WTF/wtf/text/Base64.h
@@ -1,7 +1,7 @@
/*
* Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
* Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
- * Copyright (C) 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2013, 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -12,10 +12,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -40,22 +40,70 @@ enum Base64EncodePolicy {
Base64URLPolicy // No padding, no LFs.
};
-enum Base64DecodePolicy {
- Base64FailOnInvalidCharacterOrExcessPadding,
- Base64FailOnInvalidCharacter,
- Base64IgnoreWhitespace,
- Base64IgnoreInvalidCharacters
+enum Base64DecodeOptions {
+ Base64Default = 0,
+ Base64ValidatePadding = 1 << 0,
+ Base64IgnoreSpacesAndNewLines = 1 << 1,
};
class SignedOrUnsignedCharVectorAdapter {
public:
- SignedOrUnsignedCharVectorAdapter(Vector<char>& vector) { m_vector.c = &vector; }
- SignedOrUnsignedCharVectorAdapter(Vector<uint8_t>& vector) { m_vector.u = &vector; }
-
- operator Vector<char>&() { return *m_vector.c; }
- void clear() { m_vector.c->clear(); }
+ SignedOrUnsignedCharVectorAdapter(Vector<char>& vector)
+ : m_isSigned(true)
+ {
+ m_vector.c = &vector;
+ }
+ SignedOrUnsignedCharVectorAdapter(Vector<uint8_t>& vector)
+ : m_isSigned(false)
+ {
+ m_vector.u = &vector;
+ }
+
+ uint8_t* data()
+ {
+ if (m_isSigned)
+ return reinterpret_cast<uint8_t*>(m_vector.c->data());
+ return m_vector.u->data();
+ }
+
+ size_t size() const
+ {
+ if (m_isSigned)
+ return m_vector.c->size();
+ return m_vector.u->size();
+ }
+
+ void clear()
+ {
+ if (m_isSigned) {
+ m_vector.c->clear();
+ return;
+ }
+ m_vector.u->clear();
+ }
+
+ void grow(size_t size)
+ {
+ if (m_isSigned) {
+ m_vector.c->grow(size);
+ return;
+ }
+ m_vector.u->grow(size);
+ }
+
+ void shrink(size_t size)
+ {
+ if (m_isSigned) {
+ m_vector.c->shrink(size);
+ return;
+ }
+ m_vector.u->shrink(size);
+ }
+
+ uint8_t& operator[](size_t position) { return data()[position]; }
private:
+ bool m_isSigned;
union {
Vector<char>* c;
Vector<uint8_t>* u;
@@ -64,14 +112,32 @@ private:
class ConstSignedOrUnsignedCharVectorAdapter {
public:
- ConstSignedOrUnsignedCharVectorAdapter(const Vector<char>& vector) { m_vector.c = &vector; }
- ConstSignedOrUnsignedCharVectorAdapter(const Vector<uint8_t>& vector) { m_vector.u = &vector; }
-
- operator const Vector<char>&() { return *m_vector.c; }
- const char* data() const { return m_vector.c->data(); }
- size_t size() const { return m_vector.c->size(); }
+ ConstSignedOrUnsignedCharVectorAdapter(const Vector<char>& vector)
+ : m_isSigned(false)
+ {
+ m_vector.c = &vector;
+ }
+ ConstSignedOrUnsignedCharVectorAdapter(const Vector<uint8_t>& vector)
+ : m_isSigned(true)
+ {
+ m_vector.u = &vector;
+ }
+
+ const uint8_t* data() const
+ {
+ if (m_isSigned)
+ return reinterpret_cast<const uint8_t*>(m_vector.c->data());
+ return m_vector.u->data();
+ }
+ size_t size() const
+ {
+ if (m_isSigned)
+ return m_vector.c->size();
+ return m_vector.u->size();
+ }
private:
+ bool m_isSigned;
union {
const Vector<char>* c;
const Vector<uint8_t>* u;
@@ -79,15 +145,15 @@ private:
};
WTF_EXPORT_PRIVATE void base64Encode(const void*, unsigned, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs);
-WTF_EXPORT_PRIVATE void base64Encode(ConstSignedOrUnsignedCharVectorAdapter, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs);
-WTF_EXPORT_PRIVATE void base64Encode(const CString&, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs);
+void base64Encode(ConstSignedOrUnsignedCharVectorAdapter, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs);
+void base64Encode(const CString&, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs);
WTF_EXPORT_PRIVATE String base64Encode(const void*, unsigned, Base64EncodePolicy = Base64DoNotInsertLFs);
-WTF_EXPORT_PRIVATE String base64Encode(ConstSignedOrUnsignedCharVectorAdapter, Base64EncodePolicy = Base64DoNotInsertLFs);
-WTF_EXPORT_PRIVATE String base64Encode(const CString&, Base64EncodePolicy = Base64DoNotInsertLFs);
+String base64Encode(ConstSignedOrUnsignedCharVectorAdapter, Base64EncodePolicy = Base64DoNotInsertLFs);
+String base64Encode(const CString&, Base64EncodePolicy = Base64DoNotInsertLFs);
-WTF_EXPORT_PRIVATE bool base64Decode(const String&, SignedOrUnsignedCharVectorAdapter, Base64DecodePolicy = Base64FailOnInvalidCharacter);
-WTF_EXPORT_PRIVATE bool base64Decode(const Vector<char>&, SignedOrUnsignedCharVectorAdapter, Base64DecodePolicy = Base64FailOnInvalidCharacter);
-WTF_EXPORT_PRIVATE bool base64Decode(const char*, unsigned, SignedOrUnsignedCharVectorAdapter, Base64DecodePolicy = Base64FailOnInvalidCharacter);
+WTF_EXPORT_PRIVATE bool base64Decode(const String&, SignedOrUnsignedCharVectorAdapter, unsigned options = Base64Default);
+WTF_EXPORT_PRIVATE bool base64Decode(const Vector<char>&, SignedOrUnsignedCharVectorAdapter, unsigned options = Base64Default);
+WTF_EXPORT_PRIVATE bool base64Decode(const char*, unsigned, SignedOrUnsignedCharVectorAdapter, unsigned options = Base64Default);
inline void base64Encode(ConstSignedOrUnsignedCharVectorAdapter in, Vector<char>& out, Base64EncodePolicy policy)
{
@@ -115,11 +181,12 @@ inline String base64Encode(const CString& in, Base64EncodePolicy policy)
// ======================================================================================
WTF_EXPORT_PRIVATE void base64URLEncode(const void*, unsigned, Vector<char>&);
-WTF_EXPORT_PRIVATE void base64URLEncode(ConstSignedOrUnsignedCharVectorAdapter, Vector<char>&);
-WTF_EXPORT_PRIVATE void base64URLEncode(const CString&, Vector<char>&);
+void base64URLEncode(ConstSignedOrUnsignedCharVectorAdapter, Vector<char>&);
+void base64URLEncode(const CString&, Vector<char>&);
+
WTF_EXPORT_PRIVATE String base64URLEncode(const void*, unsigned);
-WTF_EXPORT_PRIVATE String base64URLEncode(ConstSignedOrUnsignedCharVectorAdapter);
-WTF_EXPORT_PRIVATE String base64URLEncode(const CString&);
+String base64URLEncode(ConstSignedOrUnsignedCharVectorAdapter);
+String base64URLEncode(const CString&);
WTF_EXPORT_PRIVATE bool base64URLDecode(const String&, SignedOrUnsignedCharVectorAdapter);
WTF_EXPORT_PRIVATE bool base64URLDecode(const Vector<char>&, SignedOrUnsignedCharVectorAdapter);
@@ -150,11 +217,8 @@ inline String base64URLEncode(const CString& in)
using WTF::Base64EncodePolicy;
using WTF::Base64DoNotInsertLFs;
using WTF::Base64InsertLFs;
-using WTF::Base64DecodePolicy;
-using WTF::Base64FailOnInvalidCharacterOrExcessPadding;
-using WTF::Base64FailOnInvalidCharacter;
-using WTF::Base64IgnoreWhitespace;
-using WTF::Base64IgnoreInvalidCharacters;
+using WTF::Base64ValidatePadding;
+using WTF::Base64IgnoreSpacesAndNewLines;
using WTF::base64Encode;
using WTF::base64Decode;
using WTF::base64URLDecode;
diff --git a/Source/WTF/wtf/text/CString.cpp b/Source/WTF/wtf/text/CString.cpp
index e44a96e80..21b37eba8 100644
--- a/Source/WTF/wtf/text/CString.cpp
+++ b/Source/WTF/wtf/text/CString.cpp
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -28,18 +28,18 @@
#include "CString.h"
#include <string.h>
-#include <wtf/StringHasher.h>
+#include <wtf/Hasher.h>
namespace WTF {
-PassRefPtr<CStringBuffer> CStringBuffer::createUninitialized(size_t length)
+Ref<CStringBuffer> CStringBuffer::createUninitialized(size_t length)
{
RELEASE_ASSERT(length < (std::numeric_limits<unsigned>::max() - sizeof(CStringBuffer)));
// The +1 is for the terminating null character.
size_t size = sizeof(CStringBuffer) + length + 1;
CStringBuffer* stringBuffer = static_cast<CStringBuffer*>(fastMalloc(size));
- return adoptRef(new (NotNull, stringBuffer) CStringBuffer(length));
+ return adoptRef(*new (NotNull, stringBuffer) CStringBuffer(length));
}
CString::CString(const char* str)
@@ -76,7 +76,7 @@ char* CString::mutableData()
return 0;
return m_buffer->mutableData();
}
-
+
CString CString::newUninitialized(size_t length, char*& characterBuffer)
{
CString result;
@@ -92,7 +92,7 @@ void CString::copyBufferIfNeeded()
if (!m_buffer || m_buffer->hasOneRef())
return;
- RefPtr<CStringBuffer> buffer = m_buffer.release();
+ RefPtr<CStringBuffer> buffer = WTFMove(m_buffer);
size_t length = buffer->length();
m_buffer = CStringBuffer::createUninitialized(length);
memcpy(m_buffer->mutableData(), buffer->data(), length + 1);
diff --git a/Source/WTF/wtf/text/CString.h b/Source/WTF/wtf/text/CString.h
index 1941a2dbe..4d8d80399 100644
--- a/Source/WTF/wtf/text/CString.h
+++ b/Source/WTF/wtf/text/CString.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -28,7 +28,7 @@
#include <wtf/HashFunctions.h>
#include <wtf/HashTraits.h>
-#include <wtf/PassRefPtr.h>
+#include <wtf/Ref.h>
#include <wtf/RefCounted.h>
namespace WTF {
@@ -43,7 +43,7 @@ public:
private:
friend class CString;
- static PassRefPtr<CStringBuffer> createUninitialized(size_t length);
+ static Ref<CStringBuffer> createUninitialized(size_t length);
CStringBuffer(size_t length) : m_length(length) { }
char* mutableData() { return reinterpret_cast_ptr<char*>(this + 1); }
diff --git a/Source/WTF/wtf/text/IntegerToStringConversion.h b/Source/WTF/wtf/text/IntegerToStringConversion.h
index 649fb05ef..563614d4f 100644
--- a/Source/WTF/wtf/text/IntegerToStringConversion.h
+++ b/Source/WTF/wtf/text/IntegerToStringConversion.h
@@ -22,8 +22,6 @@
#ifndef IntegerToStringConversion_h
#define IntegerToStringConversion_h
-#include "StringBuilder.h"
-
namespace WTF {
enum PositiveOrNegativeNumber {
@@ -33,22 +31,6 @@ enum PositiveOrNegativeNumber {
template<typename T> struct IntegerToStringConversionTrait;
-template<> struct IntegerToStringConversionTrait<AtomicString> {
- typedef AtomicString ReturnType;
- typedef void AdditionalArgumentType;
- static ReturnType flush(LChar* characters, unsigned length, void*) { return AtomicString(characters, length); }
-};
-template<> struct IntegerToStringConversionTrait<String> {
- typedef String ReturnType;
- typedef void AdditionalArgumentType;
- static ReturnType flush(LChar* characters, unsigned length, void*) { return String(characters, length); }
-};
-template<> struct IntegerToStringConversionTrait<StringBuilder> {
- typedef void ReturnType;
- typedef StringBuilder AdditionalArgumentType;
- static ReturnType flush(LChar* characters, unsigned length, StringBuilder* stringBuilder) { stringBuilder->append(characters, length); }
-};
-
template<typename T, typename UnsignedIntegerType, PositiveOrNegativeNumber NumberType, typename AdditionalArgumentType>
static typename IntegerToStringConversionTrait<T>::ReturnType numberToStringImpl(UnsignedIntegerType number, AdditionalArgumentType additionalArgument)
{
@@ -81,6 +63,72 @@ inline typename IntegerToStringConversionTrait<T>::ReturnType numberToStringUnsi
return numberToStringImpl<T, UnsignedIntegerType, PositiveNumber>(number, additionalArgument);
}
+
+template<typename CharacterType, typename UnsignedIntegerType, PositiveOrNegativeNumber NumberType>
+static void writeNumberToBufferImpl(UnsignedIntegerType number, CharacterType* destination)
+{
+ LChar buf[sizeof(UnsignedIntegerType) * 3 + 1];
+ LChar* end = buf + WTF_ARRAY_LENGTH(buf);
+ LChar* p = end;
+
+ do {
+ *--p = static_cast<LChar>((number % 10) + '0');
+ number /= 10;
+ } while (number);
+
+ if (NumberType == NegativeNumber)
+ *--p = '-';
+
+ while (p < end)
+ *destination++ = static_cast<CharacterType>(*p++);
+}
+
+template<typename CharacterType, typename SignedIntegerType>
+inline void writeNumberToBufferSigned(SignedIntegerType number, CharacterType* destination)
+{
+ if (number < 0)
+ return writeNumberToBufferImpl<CharacterType, typename std::make_unsigned<SignedIntegerType>::type, NegativeNumber>(-number, destination);
+ return writeNumberToBufferImpl<CharacterType, typename std::make_unsigned<SignedIntegerType>::type, PositiveNumber>(number, destination);
+}
+
+template<typename CharacterType, typename UnsignedIntegerType>
+inline void writeNumberToBufferUnsigned(UnsignedIntegerType number, CharacterType* destination)
+{
+ return writeNumberToBufferImpl<CharacterType, UnsignedIntegerType, PositiveNumber>(number, destination);
+}
+
+
+template<typename UnsignedIntegerType, PositiveOrNegativeNumber NumberType>
+static unsigned lengthOfNumberAsStringImpl(UnsignedIntegerType number)
+{
+ unsigned length = 0;
+
+ do {
+ ++length;
+ number /= 10;
+ } while (number);
+
+ if (NumberType == NegativeNumber)
+ ++length;
+
+ return length;
+}
+
+template<typename SignedIntegerType>
+inline unsigned lengthOfNumberAsStringSigned(SignedIntegerType number)
+{
+ if (number < 0)
+ return lengthOfNumberAsStringImpl<typename std::make_unsigned<SignedIntegerType>::type, NegativeNumber>(-number);
+ return lengthOfNumberAsStringImpl<typename std::make_unsigned<SignedIntegerType>::type, PositiveNumber>(number);
+}
+
+template<typename UnsignedIntegerType>
+inline unsigned lengthOfNumberAsStringUnsigned(UnsignedIntegerType number)
+{
+ return lengthOfNumberAsStringImpl<UnsignedIntegerType, PositiveNumber>(number);
+}
+
+
} // namespace WTF
#endif // IntegerToStringConversion_h
diff --git a/Source/WTF/wtf/text/LChar.h b/Source/WTF/wtf/text/LChar.h
index b7bb89794..4d31dafb9 100644
--- a/Source/WTF/wtf/text/LChar.h
+++ b/Source/WTF/wtf/text/LChar.h
@@ -10,17 +10,17 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
@@ -29,7 +29,7 @@
// A type to hold a single Latin-1 character.
// This type complements the UChar type that we get from the ICU library.
-// To parallel that type, we put it outside any namespace.
+// To parallel that type, we put this one in the global namespace.
typedef unsigned char LChar;
#endif
diff --git a/Source/WTF/wtf/text/LineBreakIteratorPoolICU.h b/Source/WTF/wtf/text/LineBreakIteratorPoolICU.h
new file mode 100644
index 000000000..0cbae4030
--- /dev/null
+++ b/Source/WTF/wtf/text/LineBreakIteratorPoolICU.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2011 Apple Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include "TextBreakIterator.h"
+#include <unicode/uloc.h>
+#include <wtf/HashMap.h>
+#include <wtf/NeverDestroyed.h>
+#include <wtf/ThreadSpecific.h>
+#include <wtf/text/AtomicString.h>
+
+namespace WTF {
+
+class LineBreakIteratorPool {
+ WTF_MAKE_NONCOPYABLE(LineBreakIteratorPool);
+public:
+ LineBreakIteratorPool() = default;
+
+ static LineBreakIteratorPool& sharedPool()
+ {
+ static NeverDestroyed<WTF::ThreadSpecific<LineBreakIteratorPool>> pool;
+ return *pool.get();
+ }
+
+ static AtomicString makeLocaleWithBreakKeyword(const AtomicString& locale, LineBreakIteratorMode mode)
+ {
+ // The uloc functions model locales as char*, so we have to downconvert our AtomicString.
+ auto utf8Locale = locale.string().utf8();
+ if (!utf8Locale.length())
+ return locale;
+ Vector<char> scratchBuffer(utf8Locale.length() + 11, 0);
+ memcpy(scratchBuffer.data(), utf8Locale.data(), utf8Locale.length());
+
+ const char* keywordValue = nullptr;
+ switch (mode) {
+ case LineBreakIteratorMode::Default:
+ // nullptr will cause any existing values to be removed.
+ break;
+ case LineBreakIteratorMode::Loose:
+ keywordValue = "loose";
+ break;
+ case LineBreakIteratorMode::Normal:
+ keywordValue = "normal";
+ break;
+ case LineBreakIteratorMode::Strict:
+ keywordValue = "strict";
+ break;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t lengthNeeded = uloc_setKeywordValue("lb", keywordValue, scratchBuffer.data(), scratchBuffer.size(), &status);
+ if (U_SUCCESS(status))
+ return AtomicString::fromUTF8(scratchBuffer.data(), lengthNeeded);
+ if (status == U_BUFFER_OVERFLOW_ERROR) {
+ scratchBuffer.grow(lengthNeeded + 1);
+ memset(scratchBuffer.data() + utf8Locale.length(), 0, scratchBuffer.size() - utf8Locale.length());
+ status = U_ZERO_ERROR;
+ int32_t lengthNeeded2 = uloc_setKeywordValue("lb", keywordValue, scratchBuffer.data(), scratchBuffer.size(), &status);
+ if (!U_SUCCESS(status) || lengthNeeded != lengthNeeded2)
+ return locale;
+ return AtomicString::fromUTF8(scratchBuffer.data(), lengthNeeded);
+ }
+ return locale;
+ }
+
+ UBreakIterator* take(const AtomicString& locale, LineBreakIteratorMode mode)
+ {
+ auto localeWithOptionalBreakKeyword = makeLocaleWithBreakKeyword(locale, mode);
+
+ UBreakIterator* iterator = nullptr;
+ for (size_t i = 0; i < m_pool.size(); ++i) {
+ if (m_pool[i].first == localeWithOptionalBreakKeyword) {
+ iterator = m_pool[i].second;
+ m_pool.remove(i);
+ break;
+ }
+ }
+
+ if (!iterator) {
+ iterator = openLineBreakIterator(localeWithOptionalBreakKeyword);
+ if (!iterator)
+ return nullptr;
+ }
+
+ ASSERT(!m_vendedIterators.contains(iterator));
+ m_vendedIterators.add(iterator, localeWithOptionalBreakKeyword);
+ return iterator;
+ }
+
+ void put(UBreakIterator* iterator)
+ {
+ ASSERT(m_vendedIterators.contains(iterator));
+ if (m_pool.size() == capacity) {
+ closeLineBreakIterator(m_pool[0].second);
+ m_pool.remove(0);
+ }
+ m_pool.uncheckedAppend({ m_vendedIterators.take(iterator), iterator });
+ }
+
+private:
+ static constexpr size_t capacity = 4;
+
+ Vector<std::pair<AtomicString, UBreakIterator*>, capacity> m_pool;
+ HashMap<UBreakIterator*, AtomicString> m_vendedIterators;
+
+ friend WTF::ThreadSpecific<LineBreakIteratorPool>::operator LineBreakIteratorPool*();
+};
+
+}
diff --git a/Source/WTF/wtf/text/OrdinalNumber.h b/Source/WTF/wtf/text/OrdinalNumber.h
new file mode 100644
index 000000000..bb5d62d66
--- /dev/null
+++ b/Source/WTF/wtf/text/OrdinalNumber.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2010, Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+namespace WTF {
+
+// An abstract number of element in a sequence. The sequence has a first element.
+// This type should be used instead of integer because 2 contradicting traditions can
+// call a first element '0' or '1' which makes integer type ambiguous.
+class OrdinalNumber {
+public:
+ static OrdinalNumber beforeFirst() { return OrdinalNumber(-1); }
+ static OrdinalNumber fromZeroBasedInt(int zeroBasedInt) { return OrdinalNumber(zeroBasedInt); }
+ static OrdinalNumber fromOneBasedInt(int oneBasedInt) { return OrdinalNumber(oneBasedInt - 1); }
+
+ OrdinalNumber() : m_zeroBasedValue(0) { }
+
+ int zeroBasedInt() const { return m_zeroBasedValue; }
+ int oneBasedInt() const { return m_zeroBasedValue + 1; }
+
+ bool operator==(OrdinalNumber other) { return m_zeroBasedValue == other.m_zeroBasedValue; }
+ bool operator!=(OrdinalNumber other) { return !((*this) == other); }
+ bool operator>(OrdinalNumber other) { return m_zeroBasedValue > other.m_zeroBasedValue; }
+
+private:
+ OrdinalNumber(int zeroBasedInt) : m_zeroBasedValue(zeroBasedInt) { }
+ int m_zeroBasedValue;
+};
+
+}
+
+using WTF::OrdinalNumber;
diff --git a/Source/WTF/wtf/text/StringBuffer.h b/Source/WTF/wtf/text/StringBuffer.h
index 22e161101..f293d333d 100644
--- a/Source/WTF/wtf/text/StringBuffer.h
+++ b/Source/WTF/wtf/text/StringBuffer.h
@@ -30,8 +30,8 @@
#define StringBuffer_h
#include <wtf/Assertions.h>
-#include <wtf/unicode/Unicode.h>
#include <limits>
+#include <unicode/utypes.h>
namespace WTF {
diff --git a/Source/WTF/wtf/text/StringBuilder.cpp b/Source/WTF/wtf/text/StringBuilder.cpp
index c483ba146..436015a43 100644
--- a/Source/WTF/wtf/text/StringBuilder.cpp
+++ b/Source/WTF/wtf/text/StringBuilder.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2010, 2013, 2016 Apple Inc. All rights reserved.
* Copyright (C) 2012 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,6 +28,7 @@
#include "StringBuilder.h"
#include "IntegerToStringConversion.h"
+#include "MathExtras.h"
#include "WTFString.h"
#include <wtf/dtoa.h>
@@ -58,12 +59,7 @@ void StringBuilder::reifyString() const
if (m_length == m_buffer->length())
m_string = m_buffer.get();
else
- m_string = StringImpl::create(m_buffer, 0, m_length);
-
- if (m_buffer->has16BitShadow() && m_valid16BitShadowLength < m_length)
- m_buffer->upconvertCharacters(m_valid16BitShadowLength, m_length);
-
- m_valid16BitShadowLength = m_length;
+ m_string = StringImpl::createSubstringSharingImpl(*m_buffer, 0, m_length);
}
void StringBuilder::resize(unsigned newSize)
@@ -84,6 +80,7 @@ void StringBuilder::resize(unsigned newSize)
allocateBuffer(m_buffer->characters16(), m_buffer->length());
}
m_length = newSize;
+ ASSERT(m_buffer->length() >= m_length);
return;
}
@@ -92,7 +89,7 @@ void StringBuilder::resize(unsigned newSize)
ASSERT(m_length == m_string.length());
ASSERT(newSize < m_string.length());
m_length = newSize;
- m_string = StringImpl::create(m_string.impl(), 0, newSize);
+ m_string = StringImpl::createSubstringSharingImpl(*m_string.impl(), 0, newSize);
}
// Allocate a new 8 bit buffer, copying in currentCharacters (these may come from either m_string
@@ -101,12 +98,13 @@ void StringBuilder::allocateBuffer(const LChar* currentCharacters, unsigned requ
{
ASSERT(m_is8Bit);
// Copy the existing data into a new buffer, set result to point to the end of the existing data.
- RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8);
+ auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8);
memcpy(m_bufferCharacters8, currentCharacters, static_cast<size_t>(m_length) * sizeof(LChar)); // This can't overflow.
// Update the builder state.
- m_buffer = buffer.release();
+ m_buffer = WTFMove(buffer);
m_string = String();
+ ASSERT(m_buffer->length() == requiredLength);
}
// Allocate a new 16 bit buffer, copying in currentCharacters (these may come from either m_string
@@ -115,12 +113,13 @@ void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requ
{
ASSERT(!m_is8Bit);
// Copy the existing data into a new buffer, set result to point to the end of the existing data.
- RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16);
+ auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16);
memcpy(m_bufferCharacters16, currentCharacters, static_cast<size_t>(m_length) * sizeof(UChar)); // This can't overflow.
// Update the builder state.
- m_buffer = buffer.release();
+ m_buffer = WTFMove(buffer);
m_string = String();
+ ASSERT(m_buffer->length() == requiredLength);
}
// Allocate a new 16 bit buffer, copying in currentCharacters (which is 8 bit and may come
@@ -128,16 +127,18 @@ void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requ
void StringBuilder::allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength)
{
ASSERT(m_is8Bit);
+ ASSERT(requiredLength >= m_length);
// Copy the existing data into a new buffer, set result to point to the end of the existing data.
- RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16);
+ auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16);
for (unsigned i = 0; i < m_length; ++i)
m_bufferCharacters16[i] = currentCharacters[i];
m_is8Bit = false;
// Update the builder state.
- m_buffer = buffer.release();
+ m_buffer = WTFMove(buffer);
m_string = String();
+ ASSERT(m_buffer->length() == requiredLength);
}
template <>
@@ -151,9 +152,10 @@ void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength)
ASSERT(m_buffer->is8Bit());
if (m_buffer->hasOneRef())
- m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters8);
+ m_buffer = StringImpl::reallocate(m_buffer.releaseNonNull(), requiredLength, m_bufferCharacters8);
else
allocateBuffer(m_buffer->characters8(), requiredLength);
+ ASSERT(m_buffer->length() == requiredLength);
}
template <>
@@ -166,9 +168,10 @@ void StringBuilder::reallocateBuffer<UChar>(unsigned requiredLength)
if (m_buffer->is8Bit())
allocateBufferUpConvert(m_buffer->characters8(), requiredLength);
else if (m_buffer->hasOneRef())
- m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters16);
+ m_buffer = StringImpl::reallocate(m_buffer.releaseNonNull(), requiredLength, m_bufferCharacters16);
else
allocateBuffer(m_buffer->characters16(), requiredLength);
+ ASSERT(m_buffer->length() == requiredLength);
}
void StringBuilder::reserveCapacity(unsigned newCapacity)
@@ -193,6 +196,7 @@ void StringBuilder::reserveCapacity(unsigned newCapacity)
allocateBuffer(m_string.characters16(), newCapacity);
}
}
+ ASSERT(!newCapacity || m_buffer->length() >= newCapacity);
}
// Make 'length' additional capacity be available in m_buffer, update m_string & m_length,
@@ -233,11 +237,12 @@ CharType* StringBuilder::appendUninitializedSlow(unsigned requiredLength)
reallocateBuffer<CharType>(expandedCapacity(capacity(), requiredLength));
} else {
ASSERT(m_string.length() == m_length);
- allocateBuffer(m_length ? m_string.getCharacters<CharType>() : 0, expandedCapacity(capacity(), requiredLength));
+ allocateBuffer(m_length ? m_string.characters<CharType>() : 0, expandedCapacity(capacity(), requiredLength));
}
CharType* result = getBufferCharacters<CharType>() + m_length;
m_length = requiredLength;
+ ASSERT(m_buffer->length() >= m_length);
return result;
}
@@ -271,10 +276,11 @@ void StringBuilder::append(const UChar* characters, unsigned length)
allocateBufferUpConvert(m_string.isNull() ? 0 : m_string.characters8(), expandedCapacity(capacity(), requiredLength));
}
- memcpy(m_bufferCharacters16 + m_length, characters, static_cast<size_t>(length) * sizeof(UChar));
+ memcpy(m_bufferCharacters16 + m_length, characters, static_cast<size_t>(length) * sizeof(UChar));
m_length = requiredLength;
} else
memcpy(appendUninitialized<UChar>(length), characters, static_cast<size_t>(length) * sizeof(UChar));
+ ASSERT(m_buffer->length() >= m_length);
}
void StringBuilder::append(const LChar* characters, unsigned length)
@@ -300,6 +306,20 @@ void StringBuilder::append(const LChar* characters, unsigned length)
}
}
+#if USE(CF)
+
+void StringBuilder::append(CFStringRef string)
+{
+ // Fast path: avoid constructing a temporary String when possible.
+ if (auto* characters = CFStringGetCStringPtr(string, kCFStringEncodingISOLatin1)) {
+ append(reinterpret_cast<const LChar*>(characters), CFStringGetLength(string));
+ return;
+ }
+ append(String(string));
+}
+
+#endif
+
void StringBuilder::appendNumber(int number)
{
numberToStringSigned<StringBuilder>(number, this);
@@ -361,8 +381,103 @@ void StringBuilder::shrinkToFit()
reallocateBuffer<LChar>(m_length);
else
reallocateBuffer<UChar>(m_length);
- m_string = m_buffer.release();
+ m_string = WTFMove(m_buffer);
+ }
+}
+
+template <typename OutputCharacterType, typename InputCharacterType>
+static void appendQuotedJSONStringInternalSlow(OutputCharacterType*& output, const InputCharacterType character)
+{
+ switch (character) {
+ case '\t':
+ *output++ = '\\';
+ *output++ = 't';
+ break;
+ case '\r':
+ *output++ = '\\';
+ *output++ = 'r';
+ break;
+ case '\n':
+ *output++ = '\\';
+ *output++ = 'n';
+ break;
+ case '\f':
+ *output++ = '\\';
+ *output++ = 'f';
+ break;
+ case '\b':
+ *output++ = '\\';
+ *output++ = 'b';
+ break;
+ default:
+ ASSERT(!(character & 0xFF00));
+ *output++ = '\\';
+ *output++ = 'u';
+ *output++ = '0';
+ *output++ = '0';
+ *output++ = upperNibbleToLowercaseASCIIHexDigit(character);
+ *output++ = lowerNibbleToLowercaseASCIIHexDigit(character);
+ break;
+ }
+}
+
+template <typename OutputCharacterType, typename InputCharacterType>
+static void appendQuotedJSONStringInternal(OutputCharacterType*& output, const InputCharacterType* input, unsigned length)
+{
+ for (const InputCharacterType* end = input + length; input != end; ++input) {
+ const InputCharacterType character = *input;
+ if (LIKELY(character != '"' && character != '\\' && character > 0x1F)) {
+ *output++ = character;
+ continue;
+ }
+
+ if (character == '"' || character == '\\') {
+ *output++ = '\\';
+ *output++ = character;
+ continue;
+ }
+
+ appendQuotedJSONStringInternalSlow(output, character);
+ }
+}
+
+void StringBuilder::appendQuotedJSONString(const String& string)
+{
+ // Make sure we have enough buffer space to append this string without having
+ // to worry about reallocating in the middle.
+ // The 2 is for the '"' quotes on each end.
+ // The 6 is for characters that need to be \uNNNN encoded.
+ Checked<unsigned> stringLength = string.length();
+ Checked<unsigned> maximumCapacityRequired = length();
+ maximumCapacityRequired += 2 + stringLength * 6;
+ unsigned allocationSize = maximumCapacityRequired.unsafeGet();
+ // This max() is here to allow us to allocate sizes between the range [2^31, 2^32 - 2] because roundUpToPowerOfTwo(1<<31 + some int smaller than 1<<31) == 0.
+ allocationSize = std::max(allocationSize, roundUpToPowerOfTwo(allocationSize));
+
+ if (is8Bit() && !string.is8Bit())
+ allocateBufferUpConvert(m_bufferCharacters8, allocationSize);
+ else
+ reserveCapacity(allocationSize);
+ ASSERT(m_buffer->length() >= allocationSize);
+
+ if (is8Bit()) {
+ ASSERT(string.is8Bit());
+ LChar* output = m_bufferCharacters8 + m_length;
+ *output++ = '"';
+ appendQuotedJSONStringInternal(output, string.characters8(), string.length());
+ *output++ = '"';
+ m_length = output - m_bufferCharacters8;
+ } else {
+ UChar* output = m_bufferCharacters16 + m_length;
+ *output++ = '"';
+ if (string.is8Bit())
+ appendQuotedJSONStringInternal(output, string.characters8(), string.length());
+ else
+ appendQuotedJSONStringInternal(output, string.characters16(), string.length());
+ *output++ = '"';
+ m_length = output - m_bufferCharacters16;
}
+ ASSERT(m_buffer->length() >= m_length);
}
} // namespace WTF
diff --git a/Source/WTF/wtf/text/StringBuilder.h b/Source/WTF/wtf/text/StringBuilder.h
index 26be90633..d02737a02 100644
--- a/Source/WTF/wtf/text/StringBuilder.h
+++ b/Source/WTF/wtf/text/StringBuilder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009, 2010, 2012, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2009-2010, 2012-2013, 2016 Apple Inc. All rights reserved.
* Copyright (C) 2012 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,6 +28,8 @@
#define StringBuilder_h
#include <wtf/text/AtomicString.h>
+#include <wtf/text/IntegerToStringConversion.h>
+#include <wtf/text/StringView.h>
#include <wtf/text/WTFString.h>
namespace WTF {
@@ -40,7 +42,6 @@ public:
StringBuilder()
: m_length(0)
, m_is8Bit(true)
- , m_valid16BitShadowLength(0)
, m_bufferCharacters8(0)
{
}
@@ -50,6 +51,11 @@ public:
ALWAYS_INLINE void append(const char* characters, unsigned length) { append(reinterpret_cast<const LChar*>(characters), length); }
+ void append(const AtomicString& atomicString)
+ {
+ append(atomicString.string());
+ }
+
void append(const String& string)
{
if (!string.length())
@@ -89,6 +95,21 @@ public:
append(other.characters16(), other.m_length);
}
+ void append(StringView stringView)
+ {
+ if (stringView.is8Bit())
+ append(stringView.characters8(), stringView.length());
+ else
+ append(stringView.characters16(), stringView.length());
+ }
+
+#if USE(CF)
+ WTF_EXPORT_PRIVATE void append(CFStringRef);
+#endif
+#if USE(CF) && defined(__OBJC__)
+ void append(NSString *string) { append((__bridge CFStringRef)string); }
+#endif
+
void append(const String& string, unsigned offset, unsigned length)
{
if (!string.length())
@@ -151,6 +172,8 @@ public:
append(U16_TRAIL(c));
}
+ WTF_EXPORT_PRIVATE void appendQuotedJSONString(const String&);
+
template<unsigned charactersCount>
ALWAYS_INLINE void appendLiteral(const char (&characters)[charactersCount]) { append(characters, charactersCount - 1); }
@@ -248,32 +271,15 @@ public:
return m_buffer->characters16();
}
- const UChar* characters() const { return deprecatedCharacters(); } // FIXME: Delete this.
- const UChar* deprecatedCharacters() const
- {
- if (!m_length)
- return 0;
- if (!m_string.isNull())
- return m_string.deprecatedCharacters();
- ASSERT(m_buffer);
- if (m_buffer->has16BitShadow() && m_valid16BitShadowLength < m_length)
- m_buffer->upconvertCharacters(m_valid16BitShadowLength, m_length);
-
- m_valid16BitShadowLength = m_length;
-
- return m_buffer->deprecatedCharacters();
- }
-
bool is8Bit() const { return m_is8Bit; }
void clear()
{
m_length = 0;
m_string = String();
- m_buffer = 0;
+ m_buffer = nullptr;
m_bufferCharacters8 = 0;
m_is8Bit = true;
- m_valid16BitShadowLength = 0;
}
void swap(StringBuilder& stringBuilder)
@@ -282,8 +288,8 @@ public:
m_string.swap(stringBuilder.m_string);
m_buffer.swap(stringBuilder.m_buffer);
std::swap(m_is8Bit, stringBuilder.m_is8Bit);
- std::swap(m_valid16BitShadowLength, stringBuilder.m_valid16BitShadowLength);
std::swap(m_bufferCharacters8, stringBuilder.m_bufferCharacters8);
+ ASSERT(!m_buffer || m_buffer->length() >= m_length);
}
private:
@@ -304,7 +310,6 @@ private:
mutable String m_string;
RefPtr<StringImpl> m_buffer;
bool m_is8Bit;
- mutable unsigned m_valid16BitShadowLength;
union {
LChar* m_bufferCharacters8;
UChar* m_bufferCharacters16;
@@ -364,6 +369,12 @@ inline bool operator!=(const StringBuilder& a, const String& b) { return !equal(
inline bool operator==(const String& a, const StringBuilder& b) { return equal(b, a); }
inline bool operator!=(const String& a, const StringBuilder& b) { return !equal(b, a); }
+template<> struct IntegerToStringConversionTrait<StringBuilder> {
+ using ReturnType = void;
+ using AdditionalArgumentType = StringBuilder;
+ static void flush(LChar* characters, unsigned length, StringBuilder* stringBuilder) { stringBuilder->append(characters, length); }
+};
+
} // namespace WTF
using WTF::StringBuilder;
diff --git a/Source/WTF/wtf/text/StringCommon.h b/Source/WTF/wtf/text/StringCommon.h
new file mode 100644
index 000000000..d35d8905d
--- /dev/null
+++ b/Source/WTF/wtf/text/StringCommon.h
@@ -0,0 +1,656 @@
+/*
+ * Copyright (C) 2015-2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef StringCommon_h
+#define StringCommon_h
+
+#include <unicode/uchar.h>
+#include <wtf/ASCIICType.h>
+
+namespace WTF {
+
+template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, const CharacterTypeB*, unsigned length);
+template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, unsigned lengthA, const CharacterTypeB*, unsigned lengthB);
+
+template<typename StringClassA, typename StringClassB> bool equalIgnoringASCIICaseCommon(const StringClassA&, const StringClassB&);
+
+template<typename CharacterType> bool equalLettersIgnoringASCIICase(const CharacterType*, const char* lowercaseLetters, unsigned length);
+template<typename CharacterType, unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const CharacterType*, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]);
+
+template<typename StringClass, unsigned length> bool equalLettersIgnoringASCIICaseCommon(const StringClass&, const char (&lowercaseLetters)[length]);
+
+template<typename T>
+inline T loadUnaligned(const char* s)
+{
+#if COMPILER(CLANG)
+ T tmp;
+ memcpy(&tmp, s, sizeof(T));
+ return tmp;
+#else
+ // This may result in undefined behavior due to unaligned access.
+ return *reinterpret_cast<const T*>(s);
+#endif
+}
+
+// Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe.
+#if (CPU(X86_64) || CPU(ARM64)) && !ASAN_ENABLED
+ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length)
+{
+ unsigned dwordLength = length >> 3;
+
+ const char* a = reinterpret_cast<const char*>(aLChar);
+ const char* b = reinterpret_cast<const char*>(bLChar);
+
+ if (dwordLength) {
+ for (unsigned i = 0; i != dwordLength; ++i) {
+ if (loadUnaligned<uint64_t>(a) != loadUnaligned<uint64_t>(b))
+ return false;
+
+ a += sizeof(uint64_t);
+ b += sizeof(uint64_t);
+ }
+ }
+
+ if (length & 4) {
+ if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b))
+ return false;
+
+ a += sizeof(uint32_t);
+ b += sizeof(uint32_t);
+ }
+
+ if (length & 2) {
+ if (loadUnaligned<uint16_t>(a) != loadUnaligned<uint16_t>(b))
+ return false;
+
+ a += sizeof(uint16_t);
+ b += sizeof(uint16_t);
+ }
+
+ if (length & 1 && (*reinterpret_cast<const LChar*>(a) != *reinterpret_cast<const LChar*>(b)))
+ return false;
+
+ return true;
+}
+
+ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length)
+{
+ unsigned dwordLength = length >> 2;
+
+ const char* a = reinterpret_cast<const char*>(aUChar);
+ const char* b = reinterpret_cast<const char*>(bUChar);
+
+ if (dwordLength) {
+ for (unsigned i = 0; i != dwordLength; ++i) {
+ if (loadUnaligned<uint64_t>(a) != loadUnaligned<uint64_t>(b))
+ return false;
+
+ a += sizeof(uint64_t);
+ b += sizeof(uint64_t);
+ }
+ }
+
+ if (length & 2) {
+ if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b))
+ return false;
+
+ a += sizeof(uint32_t);
+ b += sizeof(uint32_t);
+ }
+
+ if (length & 1 && (*reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b)))
+ return false;
+
+ return true;
+}
+#elif CPU(X86) && !ASAN_ENABLED
+ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length)
+{
+ const char* a = reinterpret_cast<const char*>(aLChar);
+ const char* b = reinterpret_cast<const char*>(bLChar);
+
+ unsigned wordLength = length >> 2;
+ for (unsigned i = 0; i != wordLength; ++i) {
+ if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b))
+ return false;
+ a += sizeof(uint32_t);
+ b += sizeof(uint32_t);
+ }
+
+ length &= 3;
+
+ if (length) {
+ const LChar* aRemainder = reinterpret_cast<const LChar*>(a);
+ const LChar* bRemainder = reinterpret_cast<const LChar*>(b);
+
+ for (unsigned i = 0; i < length; ++i) {
+ if (aRemainder[i] != bRemainder[i])
+ return false;
+ }
+ }
+
+ return true;
+}
+
+ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length)
+{
+ const char* a = reinterpret_cast<const char*>(aUChar);
+ const char* b = reinterpret_cast<const char*>(bUChar);
+
+ unsigned wordLength = length >> 1;
+ for (unsigned i = 0; i != wordLength; ++i) {
+ if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b))
+ return false;
+ a += sizeof(uint32_t);
+ b += sizeof(uint32_t);
+ }
+
+ if (length & 1 && *reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b))
+ return false;
+
+ return true;
+}
+#elif PLATFORM(IOS) && WTF_ARM_ARCH_AT_LEAST(7) && !ASAN_ENABLED
+ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length)
+{
+ bool isEqual = false;
+ uint32_t aValue;
+ uint32_t bValue;
+ asm("subs %[length], #4\n"
+ "blo 2f\n"
+
+ "0:\n" // Label 0 = Start of loop over 32 bits.
+ "ldr %[aValue], [%[a]], #4\n"
+ "ldr %[bValue], [%[b]], #4\n"
+ "cmp %[aValue], %[bValue]\n"
+ "bne 66f\n"
+ "subs %[length], #4\n"
+ "bhs 0b\n"
+
+ // At this point, length can be:
+ // -0: 00000000000000000000000000000000 (0 bytes left)
+ // -1: 11111111111111111111111111111111 (3 bytes left)
+ // -2: 11111111111111111111111111111110 (2 bytes left)
+ // -3: 11111111111111111111111111111101 (1 byte left)
+ // -4: 11111111111111111111111111111100 (length was 0)
+ // The pointers are at the correct position.
+ "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters.
+ "tst %[length], #2\n"
+ "beq 1f\n"
+ "ldrh %[aValue], [%[a]], #2\n"
+ "ldrh %[bValue], [%[b]], #2\n"
+ "cmp %[aValue], %[bValue]\n"
+ "bne 66f\n"
+
+ "1:\n" // Label 1 = Check for a single character left.
+ "tst %[length], #1\n"
+ "beq 42f\n"
+ "ldrb %[aValue], [%[a]]\n"
+ "ldrb %[bValue], [%[b]]\n"
+ "cmp %[aValue], %[bValue]\n"
+ "bne 66f\n"
+
+ "42:\n" // Label 42 = Success.
+ "mov %[isEqual], #1\n"
+ "66:\n" // Label 66 = End without changing isEqual to 1.
+ : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue)
+ :
+ :
+ );
+ return isEqual;
+}
+
+ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length)
+{
+ bool isEqual = false;
+ uint32_t aValue;
+ uint32_t bValue;
+ asm("subs %[length], #2\n"
+ "blo 1f\n"
+
+ "0:\n" // Label 0 = Start of loop over 32 bits.
+ "ldr %[aValue], [%[a]], #4\n"
+ "ldr %[bValue], [%[b]], #4\n"
+ "cmp %[aValue], %[bValue]\n"
+ "bne 66f\n"
+ "subs %[length], #2\n"
+ "bhs 0b\n"
+
+ // At this point, length can be:
+ // -0: 00000000000000000000000000000000 (0 bytes left)
+ // -1: 11111111111111111111111111111111 (1 character left, 2 bytes)
+ // -2: 11111111111111111111111111111110 (length was zero)
+ // The pointers are at the correct position.
+ "1:\n" // Label 1 = Check for a single character left.
+ "tst %[length], #1\n"
+ "beq 42f\n"
+ "ldrh %[aValue], [%[a]]\n"
+ "ldrh %[bValue], [%[b]]\n"
+ "cmp %[aValue], %[bValue]\n"
+ "bne 66f\n"
+
+ "42:\n" // Label 42 = Success.
+ "mov %[isEqual], #1\n"
+ "66:\n" // Label 66 = End without changing isEqual to 1.
+ : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue)
+ :
+ :
+ );
+ return isEqual;
+}
+#elif !ASAN_ENABLED
+ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); }
+ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); }
+#else
+ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length)
+{
+ for (unsigned i = 0; i < length; ++i) {
+ if (a[i] != b[i])
+ return false;
+ }
+ return true;
+}
+ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length)
+{
+ for (unsigned i = 0; i < length; ++i) {
+ if (a[i] != b[i])
+ return false;
+ }
+ return true;
+}
+#endif
+
+ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length)
+{
+ for (unsigned i = 0; i < length; ++i) {
+ if (a[i] != b[i])
+ return false;
+ }
+ return true;
+}
+
+ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); }
+
+template<typename StringClassA, typename StringClassB>
+ALWAYS_INLINE bool equalCommon(const StringClassA& a, const StringClassB& b)
+{
+ unsigned length = a.length();
+ if (length != b.length())
+ return false;
+
+ if (a.is8Bit()) {
+ if (b.is8Bit())
+ return equal(a.characters8(), b.characters8(), length);
+
+ return equal(a.characters8(), b.characters16(), length);
+ }
+
+ if (b.is8Bit())
+ return equal(a.characters16(), b.characters8(), length);
+
+ return equal(a.characters16(), b.characters16(), length);
+}
+
+template<typename StringClassA, typename StringClassB>
+ALWAYS_INLINE bool equalCommon(const StringClassA* a, const StringClassB* b)
+{
+ if (a == b)
+ return true;
+ if (!a || !b)
+ return false;
+ return equal(*a, *b);
+}
+
+template<typename StringClass, unsigned length> bool equal(const StringClass& a, const UChar (&codeUnits)[length])
+{
+ if (a.length() != length)
+ return false;
+
+ if (a.is8Bit())
+ return equal(a.characters8(), codeUnits, length);
+
+ return equal(a.characters16(), codeUnits, length);
+}
+
+template<typename CharacterTypeA, typename CharacterTypeB>
+inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB* b, unsigned length)
+{
+ for (unsigned i = 0; i < length; ++i) {
+ if (toASCIILower(a[i]) != toASCIILower(b[i]))
+ return false;
+ }
+ return true;
+}
+
+template<typename CharacterTypeA, typename CharacterTypeB> inline bool equalIgnoringASCIICase(const CharacterTypeA* a, unsigned lengthA, const CharacterTypeB* b, unsigned lengthB)
+{
+ return lengthA == lengthB && equalIgnoringASCIICase(a, b, lengthA);
+}
+
+template<typename StringClassA, typename StringClassB>
+bool equalIgnoringASCIICaseCommon(const StringClassA& a, const StringClassB& b)
+{
+ unsigned length = a.length();
+ if (length != b.length())
+ return false;
+
+ if (a.is8Bit()) {
+ if (b.is8Bit())
+ return equalIgnoringASCIICase(a.characters8(), b.characters8(), length);
+
+ return equalIgnoringASCIICase(a.characters8(), b.characters16(), length);
+ }
+
+ if (b.is8Bit())
+ return equalIgnoringASCIICase(a.characters16(), b.characters8(), length);
+
+ return equalIgnoringASCIICase(a.characters16(), b.characters16(), length);
+}
+
+template<typename StringClassA> bool equalIgnoringASCIICaseCommon(const StringClassA& a, const char* b)
+{
+ unsigned length = a.length();
+ if (length != strlen(b))
+ return false;
+
+ if (a.is8Bit())
+ return equalIgnoringASCIICase(a.characters8(), b, length);
+
+ return equalIgnoringASCIICase(a.characters16(), b, length);
+}
+
+template<typename StringClassA, typename StringClassB>
+bool startsWith(const StringClassA& reference, const StringClassB& prefix)
+{
+ unsigned prefixLength = prefix.length();
+ if (prefixLength > reference.length())
+ return false;
+
+ if (reference.is8Bit()) {
+ if (prefix.is8Bit())
+ return equal(reference.characters8(), prefix.characters8(), prefixLength);
+ return equal(reference.characters8(), prefix.characters16(), prefixLength);
+ }
+ if (prefix.is8Bit())
+ return equal(reference.characters16(), prefix.characters8(), prefixLength);
+ return equal(reference.characters16(), prefix.characters16(), prefixLength);
+}
+
+template<typename StringClassA, typename StringClassB>
+bool startsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& prefix)
+{
+ unsigned prefixLength = prefix.length();
+ if (prefixLength > reference.length())
+ return false;
+
+ if (reference.is8Bit()) {
+ if (prefix.is8Bit())
+ return equalIgnoringASCIICase(reference.characters8(), prefix.characters8(), prefixLength);
+ return equalIgnoringASCIICase(reference.characters8(), prefix.characters16(), prefixLength);
+ }
+ if (prefix.is8Bit())
+ return equalIgnoringASCIICase(reference.characters16(), prefix.characters8(), prefixLength);
+ return equalIgnoringASCIICase(reference.characters16(), prefix.characters16(), prefixLength);
+}
+
+template<typename StringClassA, typename StringClassB>
+bool endsWith(const StringClassA& reference, const StringClassB& suffix)
+{
+ unsigned suffixLength = suffix.length();
+ unsigned referenceLength = reference.length();
+ if (suffixLength > referenceLength)
+ return false;
+
+ unsigned startOffset = referenceLength - suffixLength;
+
+ if (reference.is8Bit()) {
+ if (suffix.is8Bit())
+ return equal(reference.characters8() + startOffset, suffix.characters8(), suffixLength);
+ return equal(reference.characters8() + startOffset, suffix.characters16(), suffixLength);
+ }
+ if (suffix.is8Bit())
+ return equal(reference.characters16() + startOffset, suffix.characters8(), suffixLength);
+ return equal(reference.characters16() + startOffset, suffix.characters16(), suffixLength);
+}
+
+template<typename StringClassA, typename StringClassB>
+bool endsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& suffix)
+{
+ unsigned suffixLength = suffix.length();
+ unsigned referenceLength = reference.length();
+ if (suffixLength > referenceLength)
+ return false;
+
+ unsigned startOffset = referenceLength - suffixLength;
+
+ if (reference.is8Bit()) {
+ if (suffix.is8Bit())
+ return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters8(), suffixLength);
+ return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters16(), suffixLength);
+ }
+ if (suffix.is8Bit())
+ return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters8(), suffixLength);
+ return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters16(), suffixLength);
+}
+
+template <typename SearchCharacterType, typename MatchCharacterType>
+size_t findIgnoringASCIICase(const SearchCharacterType* source, const MatchCharacterType* matchCharacters, unsigned startOffset, unsigned searchLength, unsigned matchLength)
+{
+ ASSERT(searchLength >= matchLength);
+
+ const SearchCharacterType* startSearchedCharacters = source + startOffset;
+
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ for (unsigned i = 0; i <= delta; ++i) {
+ if (equalIgnoringASCIICase(startSearchedCharacters + i, matchCharacters, matchLength))
+ return startOffset + i;
+ }
+ return notFound;
+}
+
+template<typename StringClassA, typename StringClassB>
+size_t findIgnoringASCIICase(const StringClassA& source, const StringClassB& stringToFind, unsigned startOffset)
+{
+ unsigned sourceStringLength = source.length();
+ unsigned matchLength = stringToFind.length();
+ if (!matchLength)
+ return std::min(startOffset, sourceStringLength);
+
+ // Check startOffset & matchLength are in range.
+ if (startOffset > sourceStringLength)
+ return notFound;
+ unsigned searchLength = sourceStringLength - startOffset;
+ if (matchLength > searchLength)
+ return notFound;
+
+ if (source.is8Bit()) {
+ if (stringToFind.is8Bit())
+ return findIgnoringASCIICase(source.characters8(), stringToFind.characters8(), startOffset, searchLength, matchLength);
+ return findIgnoringASCIICase(source.characters8(), stringToFind.characters16(), startOffset, searchLength, matchLength);
+ }
+
+ if (stringToFind.is8Bit())
+ return findIgnoringASCIICase(source.characters16(), stringToFind.characters8(), startOffset, searchLength, matchLength);
+
+ return findIgnoringASCIICase(source.characters16(), stringToFind.characters16(), startOffset, searchLength, matchLength);
+}
+
+template <typename SearchCharacterType, typename MatchCharacterType>
+ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
+{
+ // Optimization: keep a running hash of the strings,
+ // only call equal() if the hashes match.
+
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ unsigned searchHash = 0;
+ unsigned matchHash = 0;
+
+ for (unsigned i = 0; i < matchLength; ++i) {
+ searchHash += searchCharacters[i];
+ matchHash += matchCharacters[i];
+ }
+
+ unsigned i = 0;
+ // keep looping until we match
+ while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
+ if (i == delta)
+ return notFound;
+ searchHash += searchCharacters[i + matchLength];
+ searchHash -= searchCharacters[i];
+ ++i;
+ }
+ return index + i;
+}
+
+template<typename CharacterType>
+inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0)
+{
+ while (index < length) {
+ if (characters[index] == matchCharacter)
+ return index;
+ ++index;
+ }
+ return notFound;
+}
+
+ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0)
+{
+ return find(characters, length, static_cast<UChar>(matchCharacter), index);
+}
+
+inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
+{
+ if (matchCharacter & ~0xFF)
+ return notFound;
+ return find(characters, length, static_cast<LChar>(matchCharacter), index);
+}
+
+template<typename StringClass>
+size_t findCommon(const StringClass& haystack, const StringClass& needle, unsigned start)
+{
+ unsigned needleLength = needle.length();
+
+ if (needleLength == 1) {
+ if (haystack.is8Bit())
+ return WTF::find(haystack.characters8(), haystack.length(), needle[0], start);
+ return WTF::find(haystack.characters16(), haystack.length(), needle[0], start);
+ }
+
+ if (!needleLength)
+ return std::min(start, haystack.length());
+
+ if (start > haystack.length())
+ return notFound;
+ unsigned searchLength = haystack.length() - start;
+ if (needleLength > searchLength)
+ return notFound;
+
+ if (haystack.is8Bit()) {
+ if (needle.is8Bit())
+ return findInner(haystack.characters8() + start, needle.characters8(), start, searchLength, needleLength);
+ return findInner(haystack.characters8() + start, needle.characters16(), start, searchLength, needleLength);
+ }
+
+ if (needle.is8Bit())
+ return findInner(haystack.characters16() + start, needle.characters8(), start, searchLength, needleLength);
+
+ return findInner(haystack.characters16() + start, needle.characters16(), start, searchLength, needleLength);
+}
+
+// This is marked inline since it's mostly used in non-inline functions for each string type.
+// When used directly in code it's probably OK to be inline; maybe the loop will be unrolled.
+template<typename CharacterType> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, const char* lowercaseLetters, unsigned length)
+{
+ for (unsigned i = 0; i < length; ++i) {
+ if (!isASCIIAlphaCaselessEqual(characters[i], lowercaseLetters[i]))
+ return false;
+ }
+ return true;
+}
+
+template<typename CharacterType, unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength])
+{
+ ASSERT(strlen(lowercaseLetters) == lowercaseLettersLength - 1);
+ unsigned lowercaseLettersStringLength = lowercaseLettersLength - 1;
+ return charactersLength == lowercaseLettersStringLength && equalLettersIgnoringASCIICase(characters, lowercaseLetters, lowercaseLettersStringLength);
+}
+
+template<typename StringClass> bool inline hasPrefixWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char* lowercaseLetters, unsigned length)
+{
+#if !ASSERT_DISABLED
+ ASSERT(*lowercaseLetters);
+ for (const char* letter = lowercaseLetters; *letter; ++letter)
+ ASSERT(toASCIILowerUnchecked(*letter) == *letter);
+#endif
+ ASSERT(string.length() >= length);
+
+ if (string.is8Bit())
+ return equalLettersIgnoringASCIICase(string.characters8(), lowercaseLetters, length);
+ return equalLettersIgnoringASCIICase(string.characters16(), lowercaseLetters, length);
+}
+
+// This is intentionally not marked inline because it's used often and is not speed-critical enough to want it inlined everywhere.
+template<typename StringClass> bool equalLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters)
+{
+ unsigned length = string.length();
+ if (length != strlen(lowercaseLetters))
+ return false;
+ return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, length);
+}
+
+template<typename StringClass> bool startsWithLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters)
+{
+ size_t prefixLength = strlen(lowercaseLetters);
+ if (!prefixLength)
+ return true;
+ if (string.length() < prefixLength)
+ return false;
+ return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, prefixLength);
+}
+
+template<typename StringClass, unsigned length> inline bool equalLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length])
+{
+ // Don't actually use the length; we are choosing code size over speed.
+ ASSERT(strlen(lowercaseLetters) == length - 1);
+ const char* pointer = lowercaseLetters;
+ return equalLettersIgnoringASCIICaseCommonWithoutLength(string, pointer);
+}
+
+template<typename StringClass, unsigned length> inline bool startsWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length])
+{
+ const char* pointer = lowercaseLetters;
+ return startsWithLettersIgnoringASCIICaseCommonWithoutLength(string, pointer);
+}
+
+}
+
+using WTF::equalIgnoringASCIICase;
+using WTF::equalLettersIgnoringASCIICase;
+
+#endif // StringCommon_h
diff --git a/Source/WTF/wtf/text/StringConcatenate.h b/Source/WTF/wtf/text/StringConcatenate.h
index baeccc1d2..affb7e195 100644
--- a/Source/WTF/wtf/text/StringConcatenate.h
+++ b/Source/WTF/wtf/text/StringConcatenate.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2010-2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -28,10 +28,14 @@
#include <string.h>
-#ifndef WTFString_h
+#ifndef AtomicString_h
#include <wtf/text/AtomicString.h>
#endif
+#ifndef StringView_h
+#include <wtf/text/StringView.h>
+#endif
+
// This macro is helpful for testing how many intermediate Strings are created while evaluating an
// expression containing operator+.
#ifndef WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING
@@ -41,382 +45,221 @@
namespace WTF {
template<typename StringType>
-class StringTypeAdapter {
-};
+class StringTypeAdapter;
template<>
class StringTypeAdapter<char> {
public:
- StringTypeAdapter<char>(char buffer)
- : m_buffer(buffer)
+ StringTypeAdapter<char>(char character)
+ : m_character(character)
{
}
unsigned length() { return 1; }
-
bool is8Bit() { return true; }
- void writeTo(LChar* destination)
+ void writeTo(LChar* destination) const
{
- *destination = m_buffer;
+ *destination = m_character;
}
- void writeTo(UChar* destination) { *destination = m_buffer; }
-
-private:
- unsigned char m_buffer;
-};
-
-template<>
-class StringTypeAdapter<LChar> {
-public:
- StringTypeAdapter<LChar>(LChar buffer)
- : m_buffer(buffer)
- {
- }
-
- unsigned length() { return 1; }
-
- bool is8Bit() { return true; }
-
- void writeTo(LChar* destination)
+ void writeTo(UChar* destination) const
{
- *destination = m_buffer;
+ *destination = m_character;
}
- void writeTo(UChar* destination) { *destination = m_buffer; }
+ String toString() const { return String(&m_character, 1); }
private:
- LChar m_buffer;
+ char m_character;
};
template<>
class StringTypeAdapter<UChar> {
public:
- StringTypeAdapter<UChar>(UChar buffer)
- : m_buffer(buffer)
+ StringTypeAdapter<UChar>(UChar character)
+ : m_character(character)
{
}
- unsigned length() { return 1; }
-
- bool is8Bit() { return m_buffer <= 0xff; }
+ unsigned length() const { return 1; }
+ bool is8Bit() const { return m_character <= 0xff; }
- void writeTo(LChar* destination)
+ void writeTo(LChar* destination) const
{
ASSERT(is8Bit());
- *destination = static_cast<LChar>(m_buffer);
+ *destination = static_cast<LChar>(m_character);
}
- void writeTo(UChar* destination) { *destination = m_buffer; }
-
-private:
- UChar m_buffer;
-};
-
-template<>
-class StringTypeAdapter<char*> {
-public:
- StringTypeAdapter<char*>(char* buffer)
- : m_buffer(buffer)
- , m_length(strlen(buffer))
- {
- }
-
- unsigned length() { return m_length; }
-
- bool is8Bit() { return true; }
-
- void writeTo(LChar* destination)
+ void writeTo(UChar* destination) const
{
- for (unsigned i = 0; i < m_length; ++i)
- destination[i] = static_cast<LChar>(m_buffer[i]);
+ *destination = m_character;
}
- void writeTo(UChar* destination)
- {
- for (unsigned i = 0; i < m_length; ++i) {
- unsigned char c = m_buffer[i];
- destination[i] = c;
- }
- }
+ String toString() const { return String(&m_character, 1); }
private:
- const char* m_buffer;
- unsigned m_length;
+ UChar m_character;
};
template<>
-class StringTypeAdapter<LChar*> {
+class StringTypeAdapter<const LChar*> {
public:
- StringTypeAdapter<LChar*>(LChar* buffer)
- : m_buffer(buffer)
- , m_length(strlen(reinterpret_cast<char*>(buffer)))
+ StringTypeAdapter(const LChar* characters)
+ : m_characters(characters)
+ , m_length(strlen(reinterpret_cast<const char*>(characters)))
{
}
- unsigned length() { return m_length; }
-
- bool is8Bit() { return true; }
+ unsigned length() const { return m_length; }
+ bool is8Bit() const { return true; }
- void writeTo(LChar* destination)
+ void writeTo(LChar* destination) const
{
- memcpy(destination, m_buffer, m_length * sizeof(LChar));
+ StringView(m_characters, m_length).getCharactersWithUpconvert(destination);
}
- void writeTo(UChar* destination)
+ void writeTo(UChar* destination) const
{
- StringImpl::copyChars(destination, m_buffer, m_length);
+ StringView(m_characters, m_length).getCharactersWithUpconvert(destination);
}
+ String toString() const { return String(m_characters, m_length); }
+
private:
- const LChar* m_buffer;
+ const LChar* m_characters;
unsigned m_length;
};
template<>
class StringTypeAdapter<const UChar*> {
public:
- StringTypeAdapter<const UChar*>(const UChar* buffer)
- : m_buffer(buffer)
+ StringTypeAdapter(const UChar* characters)
+ : m_characters(characters)
{
- size_t len = 0;
- while (m_buffer[len] != UChar(0))
- ++len;
+ unsigned length = 0;
+ while (m_characters[length])
+ ++length;
- if (len > std::numeric_limits<unsigned>::max())
+ if (length > std::numeric_limits<unsigned>::max()) // FIXME this is silly https://bugs.webkit.org/show_bug.cgi?id=165790
CRASH();
- m_length = len;
+ m_length = length;
}
- unsigned length() { return m_length; }
+ unsigned length() const { return m_length; }
+ bool is8Bit() const { return false; }
- bool is8Bit() { return false; }
-
- NO_RETURN_DUE_TO_CRASH void writeTo(LChar*)
+ NO_RETURN_DUE_TO_CRASH void writeTo(LChar*) const
{
- CRASH();
+ CRASH(); // FIXME make this a compile-time failure https://bugs.webkit.org/show_bug.cgi?id=165791
}
- void writeTo(UChar* destination)
+ void writeTo(UChar* destination) const
{
- memcpy(destination, m_buffer, m_length * sizeof(UChar));
+ memcpy(destination, m_characters, m_length * sizeof(UChar));
}
+ String toString() const { return String(m_characters, m_length); }
+
private:
- const UChar* m_buffer;
+ const UChar* m_characters;
unsigned m_length;
};
template<>
-class StringTypeAdapter<const char*> {
+class StringTypeAdapter<const char*> : public StringTypeAdapter<const LChar*> {
public:
- StringTypeAdapter<const char*>(const char* buffer)
- : m_buffer(buffer)
- , m_length(strlen(buffer))
- {
- }
-
- unsigned length() { return m_length; }
-
- bool is8Bit() { return true; }
-
- void writeTo(LChar* destination)
- {
- memcpy(destination, m_buffer, static_cast<size_t>(m_length) * sizeof(LChar));
- }
-
- void writeTo(UChar* destination)
+ StringTypeAdapter(const char* characters)
+ : StringTypeAdapter<const LChar*>(reinterpret_cast<const LChar*>(characters))
{
- for (unsigned i = 0; i < m_length; ++i) {
- unsigned char c = m_buffer[i];
- destination[i] = c;
- }
}
-
-private:
- const char* m_buffer;
- unsigned m_length;
};
template<>
-class StringTypeAdapter<const LChar*> {
+class StringTypeAdapter<char*> : public StringTypeAdapter<const char*> {
public:
- StringTypeAdapter<const LChar*>(const LChar* buffer)
- : m_buffer(buffer)
- , m_length(strlen(reinterpret_cast<const char*>(buffer)))
- {
- }
-
- unsigned length() { return m_length; }
-
- bool is8Bit() { return true; }
-
- void writeTo(LChar* destination)
+ StringTypeAdapter(const char* characters)
+ : StringTypeAdapter<const char*>(characters)
{
- memcpy(destination, m_buffer, static_cast<size_t>(m_length) * sizeof(LChar));
}
-
- void writeTo(UChar* destination)
- {
- StringImpl::copyChars(destination, m_buffer, m_length);
- }
-
-private:
- const LChar* m_buffer;
- unsigned m_length;
};
template<>
-class StringTypeAdapter<ASCIILiteral> {
+class StringTypeAdapter<ASCIILiteral> : public StringTypeAdapter<const char*> {
public:
- StringTypeAdapter<ASCIILiteral>(ASCIILiteral buffer)
- : m_buffer(reinterpret_cast<const LChar*>(static_cast<const char*>(buffer)))
- , m_length(strlen(buffer))
- {
- }
-
- size_t length() { return m_length; }
-
- bool is8Bit() { return true; }
-
- void writeTo(LChar* destination)
- {
- memcpy(destination, m_buffer, static_cast<size_t>(m_length));
- }
-
- void writeTo(UChar* destination)
+ StringTypeAdapter(ASCIILiteral characters)
+ : StringTypeAdapter<const char*>(characters)
{
- StringImpl::copyChars(destination, m_buffer, m_length);
}
-
-private:
- const LChar* m_buffer;
- unsigned m_length;
};
template<>
class StringTypeAdapter<Vector<char>> {
public:
- StringTypeAdapter<Vector<char>>(const Vector<char>& buffer)
- : m_buffer(buffer)
+ StringTypeAdapter(const Vector<char>& vector)
+ : m_vector(vector)
{
}
- size_t length() { return m_buffer.size(); }
-
- bool is8Bit() { return true; }
-
- void writeTo(LChar* destination)
- {
- for (size_t i = 0; i < m_buffer.size(); ++i)
- destination[i] = static_cast<unsigned char>(m_buffer[i]);
- }
+ size_t length() const { return m_vector.size(); }
+ bool is8Bit() const { return true; }
- void writeTo(UChar* destination)
- {
- for (size_t i = 0; i < m_buffer.size(); ++i)
- destination[i] = static_cast<unsigned char>(m_buffer[i]);
- }
-
-private:
- const Vector<char>& m_buffer;
-};
-
-template<>
-class StringTypeAdapter<Vector<LChar>> {
-public:
- StringTypeAdapter<Vector<LChar>>(const Vector<LChar>& buffer)
- : m_buffer(buffer)
+ void writeTo(LChar* destination) const
{
+ StringView(reinterpret_cast<const LChar*>(m_vector.data()), m_vector.size()).getCharactersWithUpconvert(destination);
}
- size_t length() { return m_buffer.size(); }
-
- bool is8Bit() { return true; }
-
- void writeTo(LChar* destination)
+ void writeTo(UChar* destination) const
{
- for (size_t i = 0; i < m_buffer.size(); ++i)
- destination[i] = m_buffer[i];
+ StringView(reinterpret_cast<const LChar*>(m_vector.data()), m_vector.size()).getCharactersWithUpconvert(destination);
}
- void writeTo(UChar* destination)
- {
- for (size_t i = 0; i < m_buffer.size(); ++i)
- destination[i] = m_buffer[i];
- }
+ String toString() const { return String(m_vector.data(), m_vector.size()); }
private:
- const Vector<LChar>& m_buffer;
+ const Vector<char>& m_vector;
};
template<>
class StringTypeAdapter<String> {
public:
StringTypeAdapter<String>(const String& string)
- : m_buffer(string)
+ : m_string(string)
{
}
- unsigned length() { return m_buffer.length(); }
+ unsigned length() const { return m_string.length(); }
+ bool is8Bit() const { return m_string.isNull() || m_string.is8Bit(); }
- bool is8Bit() { return m_buffer.isNull() || m_buffer.is8Bit(); }
-
- void writeTo(LChar* destination)
+ void writeTo(LChar* destination) const
{
- unsigned length = m_buffer.length();
-
- ASSERT(is8Bit());
- const LChar* data = m_buffer.characters8();
- for (unsigned i = 0; i < length; ++i)
- destination[i] = data[i];
-
+ StringView(m_string).getCharactersWithUpconvert(destination);
WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING();
}
- void writeTo(UChar* destination)
+ void writeTo(UChar* destination) const
{
- unsigned length = m_buffer.length();
-
- if (is8Bit()) {
- const LChar* data = m_buffer.characters8();
- for (unsigned i = 0; i < length; ++i)
- destination[i] = data[i];
- } else {
- const UChar* data = m_buffer.characters16();
- for (unsigned i = 0; i < length; ++i)
- destination[i] = data[i];
- }
-
+ StringView(m_string).getCharactersWithUpconvert(destination);
WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING();
}
+ String toString() const { return m_string; }
+
private:
- const String& m_buffer;
+ const String& m_string;
};
template<>
-class StringTypeAdapter<AtomicString> {
+class StringTypeAdapter<AtomicString> : public StringTypeAdapter<String> {
public:
- StringTypeAdapter<AtomicString>(const AtomicString& string)
- : m_adapter(string.string())
+ StringTypeAdapter(const AtomicString& string)
+ : StringTypeAdapter<String>(string.string())
{
}
-
- unsigned length() { return m_adapter.length(); }
-
- bool is8Bit() { return m_adapter.is8Bit(); }
-
- void writeTo(LChar* destination) { m_adapter.writeTo(destination); }
- void writeTo(UChar* destination) { m_adapter.writeTo(destination); }
-
-private:
- StringTypeAdapter<String> m_adapter;
};
-inline void sumWithOverflow(unsigned& total, unsigned addend, bool& overflow)
+inline void sumWithOverflow(bool& overflow, unsigned& total, unsigned addend)
{
unsigned oldTotal = total;
total = oldTotal + addend;
@@ -424,569 +267,97 @@ inline void sumWithOverflow(unsigned& total, unsigned addend, bool& overflow)
overflow = true;
}
-template<typename StringType1, typename StringType2>
-PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2)
-{
- StringTypeAdapter<StringType1> adapter1(string1);
- StringTypeAdapter<StringType2> adapter2(string2);
-
- bool overflow = false;
- unsigned length = adapter1.length();
- sumWithOverflow(length, adapter2.length(), overflow);
- if (overflow)
- return 0;
-
- if (adapter1.is8Bit() && adapter2.is8Bit()) {
- LChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- LChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
-
- return resultImpl.release();
- }
-
- UChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- UChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
-
- return resultImpl.release();
-}
-
-template<typename StringType1, typename StringType2, typename StringType3>
-PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3)
+template<typename... Unsigned>
+inline void sumWithOverflow(bool& overflow, unsigned& total, unsigned addend, Unsigned ...addends)
{
- StringTypeAdapter<StringType1> adapter1(string1);
- StringTypeAdapter<StringType2> adapter2(string2);
- StringTypeAdapter<StringType3> adapter3(string3);
-
- bool overflow = false;
- unsigned length = adapter1.length();
- sumWithOverflow(length, adapter2.length(), overflow);
- sumWithOverflow(length, adapter3.length(), overflow);
- if (overflow)
- return 0;
-
- if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit()) {
- LChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- LChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
-
- return resultImpl.release();
- }
-
- UChar* buffer = 0;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- UChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
-
- return resultImpl.release();
+ unsigned oldTotal = total;
+ total = oldTotal + addend;
+ if (total < oldTotal)
+ overflow = true;
+ sumWithOverflow(overflow, total, addends...);
}
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4>
-PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4)
+template<typename Adapter>
+inline bool are8Bit(Adapter adapter)
{
- StringTypeAdapter<StringType1> adapter1(string1);
- StringTypeAdapter<StringType2> adapter2(string2);
- StringTypeAdapter<StringType3> adapter3(string3);
- StringTypeAdapter<StringType4> adapter4(string4);
-
- bool overflow = false;
- unsigned length = adapter1.length();
- sumWithOverflow(length, adapter2.length(), overflow);
- sumWithOverflow(length, adapter3.length(), overflow);
- sumWithOverflow(length, adapter4.length(), overflow);
- if (overflow)
- return 0;
-
- if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit()) {
- LChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- LChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
-
- return resultImpl.release();
- }
-
- UChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- UChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
-
- return resultImpl.release();
+ return adapter.is8Bit();
}
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5>
-PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5)
+template<typename Adapter, typename... Adapters>
+inline bool are8Bit(Adapter adapter, Adapters ...adapters)
{
- StringTypeAdapter<StringType1> adapter1(string1);
- StringTypeAdapter<StringType2> adapter2(string2);
- StringTypeAdapter<StringType3> adapter3(string3);
- StringTypeAdapter<StringType4> adapter4(string4);
- StringTypeAdapter<StringType5> adapter5(string5);
-
- bool overflow = false;
- unsigned length = adapter1.length();
- sumWithOverflow(length, adapter2.length(), overflow);
- sumWithOverflow(length, adapter3.length(), overflow);
- sumWithOverflow(length, adapter4.length(), overflow);
- sumWithOverflow(length, adapter5.length(), overflow);
- if (overflow)
- return 0;
-
- if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit()) {
- LChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- LChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
-
- return resultImpl.release();
- }
-
- UChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- UChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
-
- return resultImpl.release();
+ return adapter.is8Bit() && are8Bit(adapters...);
}
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6>
-PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6)
+template<typename ResultType, typename Adapter>
+inline void makeStringAccumulator(ResultType* result, Adapter adapter)
{
- StringTypeAdapter<StringType1> adapter1(string1);
- StringTypeAdapter<StringType2> adapter2(string2);
- StringTypeAdapter<StringType3> adapter3(string3);
- StringTypeAdapter<StringType4> adapter4(string4);
- StringTypeAdapter<StringType5> adapter5(string5);
- StringTypeAdapter<StringType6> adapter6(string6);
-
- bool overflow = false;
- unsigned length = adapter1.length();
- sumWithOverflow(length, adapter2.length(), overflow);
- sumWithOverflow(length, adapter3.length(), overflow);
- sumWithOverflow(length, adapter4.length(), overflow);
- sumWithOverflow(length, adapter5.length(), overflow);
- sumWithOverflow(length, adapter6.length(), overflow);
- if (overflow)
- return 0;
-
- if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit()) {
- LChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- LChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
- result += adapter5.length();
- adapter6.writeTo(result);
-
- return resultImpl.release();
- }
-
- UChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- UChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
- result += adapter5.length();
- adapter6.writeTo(result);
-
- return resultImpl.release();
+ adapter.writeTo(result);
}
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7>
-PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7)
+template<typename ResultType, typename Adapter, typename... Adapters>
+inline void makeStringAccumulator(ResultType* result, Adapter adapter, Adapters ...adapters)
{
- StringTypeAdapter<StringType1> adapter1(string1);
- StringTypeAdapter<StringType2> adapter2(string2);
- StringTypeAdapter<StringType3> adapter3(string3);
- StringTypeAdapter<StringType4> adapter4(string4);
- StringTypeAdapter<StringType5> adapter5(string5);
- StringTypeAdapter<StringType6> adapter6(string6);
- StringTypeAdapter<StringType7> adapter7(string7);
-
- bool overflow = false;
- unsigned length = adapter1.length();
- sumWithOverflow(length, adapter2.length(), overflow);
- sumWithOverflow(length, adapter3.length(), overflow);
- sumWithOverflow(length, adapter4.length(), overflow);
- sumWithOverflow(length, adapter5.length(), overflow);
- sumWithOverflow(length, adapter6.length(), overflow);
- sumWithOverflow(length, adapter7.length(), overflow);
- if (overflow)
- return 0;
-
- if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit() && adapter7.is8Bit()) {
- LChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- LChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
- result += adapter5.length();
- adapter6.writeTo(result);
- result += adapter6.length();
- adapter7.writeTo(result);
-
- return resultImpl.release();
- }
-
- UChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- UChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
- result += adapter5.length();
- adapter6.writeTo(result);
- result += adapter6.length();
- adapter7.writeTo(result);
-
- return resultImpl.release();
+ adapter.writeTo(result);
+ makeStringAccumulator(result + adapter.length(), adapters...);
}
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8>
-PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8)
+template<typename StringTypeAdapter, typename... StringTypeAdapters>
+String tryMakeStringFromAdapters(StringTypeAdapter adapter, StringTypeAdapters ...adapters)
{
- StringTypeAdapter<StringType1> adapter1(string1);
- StringTypeAdapter<StringType2> adapter2(string2);
- StringTypeAdapter<StringType3> adapter3(string3);
- StringTypeAdapter<StringType4> adapter4(string4);
- StringTypeAdapter<StringType5> adapter5(string5);
- StringTypeAdapter<StringType6> adapter6(string6);
- StringTypeAdapter<StringType7> adapter7(string7);
- StringTypeAdapter<StringType8> adapter8(string8);
-
bool overflow = false;
- unsigned length = adapter1.length();
- sumWithOverflow(length, adapter2.length(), overflow);
- sumWithOverflow(length, adapter3.length(), overflow);
- sumWithOverflow(length, adapter4.length(), overflow);
- sumWithOverflow(length, adapter5.length(), overflow);
- sumWithOverflow(length, adapter6.length(), overflow);
- sumWithOverflow(length, adapter7.length(), overflow);
- sumWithOverflow(length, adapter8.length(), overflow);
+ unsigned length = adapter.length();
+ sumWithOverflow(overflow, length, adapters.length()...);
if (overflow)
- return 0;
+ return String();
- if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit() && adapter7.is8Bit() && adapter8.is8Bit()) {
+ if (are8Bit(adapter, adapters...)) {
LChar* buffer;
RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
if (!resultImpl)
- return 0;
-
- LChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
- result += adapter5.length();
- adapter6.writeTo(result);
- result += adapter6.length();
- adapter7.writeTo(result);
- result += adapter7.length();
- adapter8.writeTo(result);
-
- return resultImpl.release();
- }
-
- UChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- UChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
- result += adapter5.length();
- adapter6.writeTo(result);
- result += adapter6.length();
- adapter7.writeTo(result);
- result += adapter7.length();
- adapter8.writeTo(result);
-
- return resultImpl.release();
-}
-
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8, typename StringType9>
-PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8, StringType9 string9)
-{
- StringTypeAdapter<StringType1> adapter1(string1);
- StringTypeAdapter<StringType2> adapter2(string2);
- StringTypeAdapter<StringType3> adapter3(string3);
- StringTypeAdapter<StringType4> adapter4(string4);
- StringTypeAdapter<StringType5> adapter5(string5);
- StringTypeAdapter<StringType6> adapter6(string6);
- StringTypeAdapter<StringType7> adapter7(string7);
- StringTypeAdapter<StringType8> adapter8(string8);
- StringTypeAdapter<StringType9> adapter9(string9);
+ return String();
- bool overflow = false;
- unsigned length = adapter1.length();
- sumWithOverflow(length, adapter2.length(), overflow);
- sumWithOverflow(length, adapter3.length(), overflow);
- sumWithOverflow(length, adapter4.length(), overflow);
- sumWithOverflow(length, adapter5.length(), overflow);
- sumWithOverflow(length, adapter6.length(), overflow);
- sumWithOverflow(length, adapter7.length(), overflow);
- sumWithOverflow(length, adapter8.length(), overflow);
- sumWithOverflow(length, adapter9.length(), overflow);
- if (overflow)
- return 0;
+ makeStringAccumulator(buffer, adapter, adapters...);
- if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit() && adapter7.is8Bit() && adapter8.is8Bit() && adapter9.is8Bit()) {
- LChar* buffer;
- RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
- if (!resultImpl)
- return 0;
-
- LChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
- result += adapter5.length();
- adapter6.writeTo(result);
- result += adapter6.length();
- adapter7.writeTo(result);
- result += adapter7.length();
- adapter8.writeTo(result);
- result += adapter8.length();
- adapter9.writeTo(result);
-
- return resultImpl.release();
+ return WTFMove(resultImpl);
}
UChar* buffer;
RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer);
if (!resultImpl)
- return 0;
-
- UChar* result = buffer;
- adapter1.writeTo(result);
- result += adapter1.length();
- adapter2.writeTo(result);
- result += adapter2.length();
- adapter3.writeTo(result);
- result += adapter3.length();
- adapter4.writeTo(result);
- result += adapter4.length();
- adapter5.writeTo(result);
- result += adapter5.length();
- adapter6.writeTo(result);
- result += adapter6.length();
- adapter7.writeTo(result);
- result += adapter7.length();
- adapter8.writeTo(result);
- result += adapter8.length();
- adapter9.writeTo(result);
-
- return resultImpl.release();
-}
-
-
-// Convenience only.
-template<typename StringType1>
-String makeString(StringType1 string1)
-{
- return String(string1);
-}
-
-template<typename StringType1, typename StringType2>
-String makeString(StringType1 string1, StringType2 string2)
-{
- RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2);
- if (!resultImpl)
- CRASH();
- return resultImpl.release();
-}
-
-template<typename StringType1, typename StringType2, typename StringType3>
-String makeString(StringType1 string1, StringType2 string2, StringType3 string3)
-{
- RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3);
- if (!resultImpl)
- CRASH();
- return resultImpl.release();
-}
-
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4>
-String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4)
-{
- RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4);
- if (!resultImpl)
- CRASH();
- return resultImpl.release();
-}
+ return String();
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5>
-String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5)
-{
- RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5);
- if (!resultImpl)
- CRASH();
- return resultImpl.release();
-}
+ makeStringAccumulator(buffer, adapter, adapters...);
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6>
-String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6)
-{
- RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6);
- if (!resultImpl)
- CRASH();
- return resultImpl.release();
+ return WTFMove(resultImpl);
}
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7>
-String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7)
+template<typename... StringTypes>
+String tryMakeString(StringTypes ...strings)
{
- RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7);
- if (!resultImpl)
- CRASH();
- return resultImpl.release();
+ return tryMakeStringFromAdapters(StringTypeAdapter<StringTypes>(strings)...);
}
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8>
-String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8)
+// Convenience only.
+template<typename StringType>
+String makeString(StringType string)
{
- RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7, string8);
- if (!resultImpl)
- CRASH();
- return resultImpl.release();
+ return String(string);
}
-template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8, typename StringType9>
-String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8, StringType9 string9)
+template<typename... StringTypes>
+String makeString(StringTypes... strings)
{
- RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7, string8, string9);
- if (!resultImpl)
+ String result = tryMakeString(strings...);
+ if (!result)
CRASH();
- return resultImpl.release();
+ return result;
}
} // namespace WTF
using WTF::makeString;
+using WTF::tryMakeString;
#include <wtf/text/StringOperators.h>
#endif
diff --git a/Source/WTF/wtf/text/StringConcatenateNumbers.h b/Source/WTF/wtf/text/StringConcatenateNumbers.h
new file mode 100644
index 000000000..293e74504
--- /dev/null
+++ b/Source/WTF/wtf/text/StringConcatenateNumbers.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <wtf/dtoa.h>
+#include <wtf/text/IntegerToStringConversion.h>
+#include <wtf/text/StringConcatenate.h>
+
+namespace WTF {
+
+template<>
+class StringTypeAdapter<int> {
+public:
+ StringTypeAdapter<int>(int number)
+ : m_number(number)
+ {
+ }
+
+ unsigned length() const { return lengthOfNumberAsStringSigned(m_number); }
+ bool is8Bit() const { return true; }
+
+ void writeTo(LChar* destination) const { writeNumberToBufferSigned(m_number, destination); }
+ void writeTo(UChar* destination) const { writeNumberToBufferSigned(m_number, destination); }
+
+ String toString() const { return String::number(m_number); }
+
+private:
+ int m_number;
+};
+
+template<>
+class StringTypeAdapter<unsigned> {
+public:
+ StringTypeAdapter<unsigned>(unsigned number)
+ : m_number(number)
+ {
+ }
+
+ unsigned length() const { return lengthOfNumberAsStringUnsigned(m_number); }
+ bool is8Bit() const { return true; }
+
+ void writeTo(LChar* destination) const { writeNumberToBufferUnsigned(m_number, destination); }
+ void writeTo(UChar* destination) const { writeNumberToBufferUnsigned(m_number, destination); }
+
+ String toString() const { return String::number(m_number); }
+
+private:
+ unsigned m_number;
+};
+
+template<>
+class StringTypeAdapter<double> {
+public:
+ StringTypeAdapter<double>(double number)
+ {
+ numberToString(number, m_buffer);
+ m_length = strlen(m_buffer);
+ }
+
+ unsigned length() const { return m_length; }
+ bool is8Bit() const { return true; }
+
+ void writeTo(LChar* destination) const
+ {
+ for (unsigned i = 0; i < m_length; ++i)
+ destination[i] = m_buffer[i];
+ }
+
+ void writeTo(UChar* destination) const
+ {
+ for (unsigned i = 0; i < m_length; ++i)
+ destination[i] = m_buffer[i];
+ }
+
+ String toString() const { return { m_buffer, m_length }; }
+
+private:
+ NumberToStringBuffer m_buffer;
+ unsigned m_length;
+};
+
+template<>
+class StringTypeAdapter<float> : public StringTypeAdapter<double> {
+public:
+ StringTypeAdapter<float>(float number)
+ : StringTypeAdapter<double>(number)
+ {
+ }
+};
+
+class FormattedNumber {
+public:
+ static FormattedNumber fixedPrecision(double number, unsigned significantFigures = 6, bool truncateTrailingZeros = false)
+ {
+ FormattedNumber numberFormatter;
+ numberToFixedPrecisionString(number, significantFigures, numberFormatter.m_buffer, truncateTrailingZeros);
+ numberFormatter.m_length = strlen(numberFormatter.m_buffer);
+ return numberFormatter;
+ }
+
+ static FormattedNumber fixedWidth(double number, unsigned decimalPlaces)
+ {
+ FormattedNumber numberFormatter;
+ numberToFixedWidthString(number, decimalPlaces, numberFormatter.m_buffer);
+ numberFormatter.m_length = strlen(numberFormatter.m_buffer);
+ return numberFormatter;
+ }
+
+ unsigned length() const { return m_length; }
+ const LChar* buffer() const { return reinterpret_cast<const LChar*>(m_buffer); }
+
+private:
+ NumberToStringBuffer m_buffer;
+ unsigned m_length;
+};
+
+template<>
+class StringTypeAdapter<FormattedNumber> {
+public:
+ StringTypeAdapter<FormattedNumber>(const FormattedNumber& numberFormatter)
+ : m_numberFormatter(numberFormatter)
+ {
+ }
+
+ unsigned length() const { return m_numberFormatter.length(); }
+ bool is8Bit() const { return true; }
+
+ void writeTo(LChar* destination) const
+ {
+ auto buffer = m_numberFormatter.buffer();
+ auto length = m_numberFormatter.length();
+ for (unsigned i = 0; i < length; ++i)
+ destination[i] = buffer[i];
+ }
+
+ void writeTo(UChar* destination) const
+ {
+ auto buffer = m_numberFormatter.buffer();
+ auto length = m_numberFormatter.length();
+ for (unsigned i = 0; i < length; ++i)
+ destination[i] = buffer[i];
+ }
+
+ String toString() const { return { m_numberFormatter.buffer(), m_numberFormatter.length() }; }
+
+private:
+ const FormattedNumber& m_numberFormatter;
+};
+
+}
+
+using WTF::FormattedNumber;
diff --git a/Source/WTF/wtf/text/StringHash.h b/Source/WTF/wtf/text/StringHash.h
index 88bdd9369..139b5169e 100644
--- a/Source/WTF/wtf/text/StringHash.h
+++ b/Source/WTF/wtf/text/StringHash.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006, 2007, 2008, 2012, 2013 Apple Inc. All rights reserved
+ * Copyright (C) 2006-2008, 2012-2013, 2016 Apple Inc. All rights reserved
* Copyright (C) Research In Motion Limited 2009. All rights reserved.
*
* This library is free software; you can redistribute it and/or
@@ -24,7 +24,7 @@
#include <wtf/text/AtomicString.h>
#include <wtf/HashTraits.h>
-#include <wtf/StringHasher.h>
+#include <wtf/Hasher.h>
namespace WTF {
@@ -33,7 +33,15 @@ namespace WTF {
return value.isNull();
}
- // The hash() functions on StringHash and CaseFoldingHash do not support
+ inline void HashTraits<String>::customDeleteBucket(String& value)
+ {
+ // See unique_ptr's customDeleteBucket() for an explanation.
+ ASSERT(!isDeletedValue(value));
+ String valueToBeDestroyed = WTFMove(value);
+ constructDeletedValue(value);
+ }
+
+ // The hash() functions on StringHash and ASCIICaseInsensitiveHash do not support
// null strings. get(), contains(), and add() on HashMap<String,..., StringHash>
// cause a null-pointer dereference when passed null strings.
@@ -45,7 +53,7 @@ namespace WTF {
static unsigned hash(StringImpl* key) { return key->hash(); }
static inline bool equal(const StringImpl* a, const StringImpl* b)
{
- return equalNonNull(a, b);
+ return WTF::equal(*a, *b);
}
static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); }
@@ -71,14 +79,11 @@ namespace WTF {
static const bool safeToCompareToEmptyOrDeleted = false;
};
- class CaseFoldingHash {
+ class ASCIICaseInsensitiveHash {
public:
template<typename T> static inline UChar foldCase(T character)
{
- if (std::is_same<T, LChar>::value)
- return StringImpl::latin1CaseFoldTable[character];
-
- return u_foldCase(character, U_FOLD_CASE_DEFAULT);
+ return toASCIILower(character);
}
static unsigned hash(const UChar* data, unsigned length)
@@ -105,17 +110,23 @@ namespace WTF {
static inline unsigned hash(const char* data, unsigned length)
{
- return CaseFoldingHash::hash(reinterpret_cast<const LChar*>(data), length);
+ return hash(reinterpret_cast<const LChar*>(data), length);
}
+ static inline bool equal(const StringImpl& a, const StringImpl& b)
+ {
+ return equalIgnoringASCIICase(a, b);
+ }
static inline bool equal(const StringImpl* a, const StringImpl* b)
{
- return equalIgnoringCaseNonNull(a, b);
+ ASSERT(a);
+ ASSERT(b);
+ return equal(*a, *b);
}
static unsigned hash(const RefPtr<StringImpl>& key)
{
- return hash(*key);
+ return hash(key.get());
}
static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
@@ -167,8 +178,8 @@ namespace WTF {
}
+using WTF::ASCIICaseInsensitiveHash;
using WTF::AlreadyHashed;
-using WTF::CaseFoldingHash;
using WTF::StringHash;
#endif
diff --git a/Source/WTF/wtf/text/StringImpl.cpp b/Source/WTF/wtf/text/StringImpl.cpp
index 34794258c..ee66daf25 100644
--- a/Source/WTF/wtf/text/StringImpl.cpp
+++ b/Source/WTF/wtf/text/StringImpl.cpp
@@ -2,7 +2,7 @@
* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
* (C) 1999 Antti Koivisto (koivisto@kde.org)
* (C) 2001 Dirk Mueller ( mueller@kde.org )
- * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2003-2009, 2013-2016 Apple Inc. All rights reserved.
* Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
*
* This library is free software; you can redistribute it and/or
@@ -30,12 +30,14 @@
#include "StringHash.h"
#include <wtf/ProcessID.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/WTFThreadData.h>
#include <wtf/text/CString.h>
+#include <wtf/text/StringView.h>
+#include <wtf/text/SymbolImpl.h>
+#include <wtf/text/SymbolRegistry.h>
#include <wtf/unicode/CharacterNames.h>
#include <wtf/unicode/UTF8.h>
-#ifdef STRING_STATS
+#if STRING_STATS
#include <unistd.h>
#include <wtf/DataLog.h>
#endif
@@ -44,27 +46,21 @@ namespace WTF {
using namespace Unicode;
-COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small);
+static_assert(sizeof(StringImpl) == 2 * sizeof(int) + 2 * sizeof(void*), "StringImpl should stay small");
-#ifdef STRING_STATS
+#if STRING_STATS
StringStats StringImpl::m_stringStats;
-unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
+std::atomic<unsigned> StringStats::s_stringRemovesTillPrintStats(s_printStringStatsFrequency);
-void StringStats::removeString(StringImpl* string)
+void StringStats::removeString(StringImpl& string)
{
- unsigned length = string->length();
- bool isSubString = string->isSubString();
+ unsigned length = string.length();
+ bool isSubString = string.isSubString();
--m_totalNumberStrings;
- if (string->has16BitShadow()) {
- --m_numberUpconvertedStrings;
- if (!isSubString)
- m_totalUpconvertedData -= length;
- }
-
- if (string->is8Bit()) {
+ if (string.is8Bit()) {
--m_number8BitStrings;
if (!isSubString)
m_total8BitData -= length;
@@ -87,46 +83,46 @@ void StringStats::printStats()
unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
- dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
+ dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings.load(), percent8Bit, m_total8BitData.load(), m_total8BitData.load(), average8bitLength);
double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
- dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
-
- double percentUpconverted = m_totalNumberStrings ? ((double)m_numberUpconvertedStrings * 100) / (double)m_number8BitStrings : 0.0;
- double averageUpconvertedLength = m_numberUpconvertedStrings ? (double)m_totalUpconvertedData / (double)m_numberUpconvertedStrings : 0.0;
- dataLogF("%8u (%5.2f%%) upconverted %12llu chars %12llu bytes avg length %6.1f\n", m_numberUpconvertedStrings, percentUpconverted, m_totalUpconvertedData, m_totalUpconvertedData * 2, averageUpconvertedLength);
+ dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings.load(), percent16Bit, m_total16BitData.load(), m_total16BitData * 2, average16bitLength);
double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
- unsigned long long totalDataBytes = m_total8BitData + (m_total16BitData + m_totalUpconvertedData) * 2;
- dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
- unsigned long long totalSavedBytes = m_total8BitData - m_totalUpconvertedData;
+ unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
+ dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings.load(), totalNumberCharacters, totalDataBytes, averageLength);
+ unsigned long long totalSavedBytes = m_total8BitData;
double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
+
+ dataLogF("%8u StringImpl::ref calls\n", m_refCalls.load());
+ dataLogF("%8u StringImpl::deref calls\n", m_derefCalls.load());
}
#endif
+StringImpl::StaticStringImpl StringImpl::s_atomicEmptyString("", StringImpl::StringAtomic);
StringImpl::~StringImpl()
{
ASSERT(!isStatic());
- STRING_STATS_REMOVE_STRING(this);
+ StringView::invalidate(*this);
- if (isAtomic())
- AtomicString::remove(this);
- if (isIdentifier()) {
- if (!wtfThreadData().currentIdentifierTable()->remove(this))
- CRASH();
- }
+ STRING_STATS_REMOVE_STRING(*this);
- BufferOwnership ownership = bufferOwnership();
+ if (isAtomic() && length() && !isSymbol())
+ AtomicStringImpl::remove(static_cast<AtomicStringImpl*>(this));
- if (has16BitShadow()) {
- ASSERT(m_copyData16);
- fastFree(m_copyData16);
+ if (isSymbol()) {
+ auto& symbol = static_cast<SymbolImpl&>(*this);
+ auto* symbolRegistry = symbol.symbolRegistry();
+ if (symbolRegistry)
+ symbolRegistry->remove(symbol);
}
+ BufferOwnership ownership = bufferOwnership();
+
if (ownership == BufferInternal)
return;
if (ownership == BufferOwned) {
@@ -137,8 +133,8 @@ StringImpl::~StringImpl()
}
ASSERT(ownership == BufferSubstring);
- ASSERT(m_substringBuffer);
- m_substringBuffer->deref();
+ ASSERT(substringBuffer());
+ substringBuffer()->deref();
}
void StringImpl::destroy(StringImpl* stringImpl)
@@ -147,19 +143,19 @@ void StringImpl::destroy(StringImpl* stringImpl)
fastFree(stringImpl);
}
-PassRef<StringImpl> StringImpl::createFromLiteral(const char* characters, unsigned length)
+Ref<StringImpl> StringImpl::createFromLiteral(const char* characters, unsigned length)
{
ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty string");
ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(characters), length));
return adoptRef(*new StringImpl(reinterpret_cast<const LChar*>(characters), length, ConstructWithoutCopying));
}
-PassRef<StringImpl> StringImpl::createFromLiteral(const char* characters)
+Ref<StringImpl> StringImpl::createFromLiteral(const char* characters)
{
return createFromLiteral(characters, strlen(characters));
}
-PassRef<StringImpl> StringImpl::createWithoutCopying(const UChar* characters, unsigned length)
+Ref<StringImpl> StringImpl::createWithoutCopying(const UChar* characters, unsigned length)
{
if (!length)
return *empty();
@@ -167,7 +163,7 @@ PassRef<StringImpl> StringImpl::createWithoutCopying(const UChar* characters, un
return adoptRef(*new StringImpl(characters, length, ConstructWithoutCopying));
}
-PassRef<StringImpl> StringImpl::createWithoutCopying(const LChar* characters, unsigned length)
+Ref<StringImpl> StringImpl::createWithoutCopying(const LChar* characters, unsigned length)
{
if (!length)
return *empty();
@@ -176,7 +172,7 @@ PassRef<StringImpl> StringImpl::createWithoutCopying(const LChar* characters, un
}
template <typename CharType>
-inline PassRef<StringImpl> StringImpl::createUninitializedInternal(unsigned length, CharType*& data)
+inline Ref<StringImpl> StringImpl::createUninitializedInternal(unsigned length, CharType*& data)
{
if (!length) {
data = 0;
@@ -186,7 +182,7 @@ inline PassRef<StringImpl> StringImpl::createUninitializedInternal(unsigned leng
}
template <typename CharType>
-inline PassRef<StringImpl> StringImpl::createUninitializedInternalNonEmpty(unsigned length, CharType*& data)
+inline Ref<StringImpl> StringImpl::createUninitializedInternalNonEmpty(unsigned length, CharType*& data)
{
ASSERT(length);
@@ -195,26 +191,25 @@ inline PassRef<StringImpl> StringImpl::createUninitializedInternalNonEmpty(unsig
// heap allocation from this call.
if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharType)))
CRASH();
- size_t size = sizeof(StringImpl) + length * sizeof(CharType);
- StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
+ StringImpl* string = static_cast<StringImpl*>(fastMalloc(allocationSize<CharType>(length)));
- data = reinterpret_cast<CharType*>(string + 1);
+ data = string->tailPointer<CharType>();
return constructInternal<CharType>(string, length);
}
-PassRef<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
+Ref<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
{
return createUninitializedInternal(length, data);
}
-PassRef<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
+Ref<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
{
return createUninitializedInternal(length, data);
}
template <typename CharType>
-inline PassRef<StringImpl> StringImpl::reallocateInternal(PassRefPtr<StringImpl> originalString, unsigned length, CharType*& data)
-{
+inline Ref<StringImpl> StringImpl::reallocateInternal(Ref<StringImpl>&& originalString, unsigned length, CharType*& data)
+{
ASSERT(originalString->hasOneRef());
ASSERT(originalString->bufferOwnership() == BufferInternal);
@@ -226,28 +221,28 @@ inline PassRef<StringImpl> StringImpl::reallocateInternal(PassRefPtr<StringImpl>
// Same as createUninitialized() except here we use fastRealloc.
if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharType)))
CRASH();
- size_t size = sizeof(StringImpl) + length * sizeof(CharType);
+
originalString->~StringImpl();
- StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.leakRef(), size));
+ auto* string = static_cast<StringImpl*>(fastRealloc(&originalString.leakRef(), allocationSize<CharType>(length)));
- data = reinterpret_cast<CharType*>(string + 1);
+ data = string->tailPointer<CharType>();
return constructInternal<CharType>(string, length);
}
-PassRef<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data)
+Ref<StringImpl> StringImpl::reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data)
{
ASSERT(originalString->is8Bit());
- return reallocateInternal(originalString, length, data);
+ return reallocateInternal(WTFMove(originalString), length, data);
}
-PassRef<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data)
+Ref<StringImpl> StringImpl::reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data)
{
ASSERT(!originalString->is8Bit());
- return reallocateInternal(originalString, length, data);
+ return reallocateInternal(WTFMove(originalString), length, data);
}
template <typename CharType>
-inline PassRef<StringImpl> StringImpl::createInternal(const CharType* characters, unsigned length)
+inline Ref<StringImpl> StringImpl::createInternal(const CharType* characters, unsigned length)
{
if (!characters || !length)
return *empty();
@@ -258,23 +253,23 @@ inline PassRef<StringImpl> StringImpl::createInternal(const CharType* characters
return string;
}
-PassRef<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
+Ref<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
{
return createInternal(characters, length);
}
-PassRef<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
+Ref<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
{
return createInternal(characters, length);
}
-PassRef<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
+Ref<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
{
if (!characters || !length)
return *empty();
LChar* data;
- RefPtr<StringImpl> string = createUninitializedInternalNonEmpty(length, data);
+ auto string = createUninitializedInternalNonEmpty(length, data);
for (size_t i = 0; i < length; ++i) {
if (characters[i] & 0xff00)
@@ -282,15 +277,15 @@ PassRef<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, un
data[i] = static_cast<LChar>(characters[i]);
}
- return string.releaseNonNull();
+ return string;
}
-PassRef<StringImpl> StringImpl::create8BitIfPossible(const UChar* string)
+Ref<StringImpl> StringImpl::create8BitIfPossible(const UChar* string)
{
return StringImpl::create8BitIfPossible(string, lengthOfNullTerminatedString(string));
}
-PassRef<StringImpl> StringImpl::create(const LChar* string)
+Ref<StringImpl> StringImpl::create(const LChar* string)
{
if (!string)
return *empty();
@@ -300,41 +295,6 @@ PassRef<StringImpl> StringImpl::create(const LChar* string)
return create(string, length);
}
-const UChar* StringImpl::getData16SlowCase() const
-{
- if (has16BitShadow())
- return m_copyData16;
-
- if (bufferOwnership() == BufferSubstring) {
- // If this is a substring, return a pointer into the parent string.
- // TODO: Consider severing this string from the parent string
- unsigned offset = m_data8 - m_substringBuffer->characters8();
- return m_substringBuffer->deprecatedCharacters() + offset;
- }
-
- STRING_STATS_ADD_UPCONVERTED_STRING(m_length);
-
- unsigned len = length();
-
- m_copyData16 = static_cast<UChar*>(fastMalloc(len * sizeof(UChar)));
-
- m_hashAndFlags |= s_hashFlagHas16BitShadow;
-
- upconvertCharacters(0, len);
-
- return m_copyData16;
-}
-
-void StringImpl::upconvertCharacters(unsigned start, unsigned end) const
-{
- ASSERT(is8Bit());
- ASSERT(has16BitShadow());
-
- for (size_t i = start; i < end; ++i)
- m_copyData16[i] = m_data8[i];
-}
-
-
bool StringImpl::containsOnlyWhitespace()
{
// FIXME: The definition of whitespace here includes a number of characters
@@ -358,7 +318,7 @@ bool StringImpl::containsOnlyWhitespace()
return true;
}
-PassRef<StringImpl> StringImpl::substring(unsigned start, unsigned length)
+Ref<StringImpl> StringImpl::substring(unsigned start, unsigned length)
{
if (start >= m_length)
return *empty();
@@ -385,42 +345,23 @@ UChar32 StringImpl::characterStartingAt(unsigned i)
return 0;
}
-PassRef<StringImpl> StringImpl::lower()
+Ref<StringImpl> StringImpl::convertToLowercaseWithoutLocale()
{
- // Note: This is a hot function in the Dromaeo benchmark, specifically the
- // no-op code path up through the first 'return' statement.
+ // Note: At one time this was a hot function in the Dromaeo benchmark, specifically the
+ // no-op code path that may return ourself if we find no upper case letters and no invalid
+ // ASCII letters.
// First scan the string for uppercase and non-ASCII characters:
if (is8Bit()) {
- unsigned failingIndex;
for (unsigned i = 0; i < m_length; ++i) {
LChar character = m_data8[i];
- if (UNLIKELY((character & ~0x7F) || isASCIIUpper(character))) {
- failingIndex = i;
- goto SlowPath8bitLower;
- }
- }
- return *this;
-
-SlowPath8bitLower:
- LChar* data8;
- auto newImpl = createUninitializedInternalNonEmpty(m_length, data8);
-
- for (unsigned i = 0; i < failingIndex; ++i)
- data8[i] = m_data8[i];
-
- for (unsigned i = failingIndex; i < m_length; ++i) {
- LChar character = m_data8[i];
- if (!(character & ~0x7F))
- data8[i] = toASCIILower(character);
- else {
- ASSERT(u_tolower(character) <= 0xFF);
- data8[i] = static_cast<LChar>(u_tolower(character));
- }
+ if (UNLIKELY((character & ~0x7F) || isASCIIUpper(character)))
+ return convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(i);
}
- return newImpl;
+ return *this;
}
+
bool noUpper = true;
unsigned ored = 0;
@@ -451,26 +392,51 @@ SlowPath8bitLower:
// Do a slower implementation for cases that include non-ASCII characters.
UChar* data16;
- RefPtr<StringImpl> newImpl = createUninitializedInternalNonEmpty(m_length, data16);
+ auto newImpl = createUninitializedInternalNonEmpty(m_length, data16);
UErrorCode status = U_ZERO_ERROR;
int32_t realLength = u_strToLower(data16, length, m_data16, m_length, "", &status);
if (U_SUCCESS(status) && realLength == length)
- return newImpl.releaseNonNull();
+ return newImpl;
newImpl = createUninitialized(realLength, data16);
status = U_ZERO_ERROR;
u_strToLower(data16, realLength, m_data16, m_length, "", &status);
if (U_FAILURE(status))
return *this;
- return newImpl.releaseNonNull();
+ return newImpl;
+}
+
+Ref<StringImpl> StringImpl::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex)
+{
+ ASSERT(is8Bit());
+ LChar* data8;
+ auto newImpl = createUninitializedInternalNonEmpty(m_length, data8);
+
+ for (unsigned i = 0; i < failingIndex; ++i) {
+ ASSERT(!(m_data8[i] & ~0x7F) && !isASCIIUpper(m_data8[i]));
+ data8[i] = m_data8[i];
+ }
+
+ for (unsigned i = failingIndex; i < m_length; ++i) {
+ LChar character = m_data8[i];
+ if (!(character & ~0x7F))
+ data8[i] = toASCIILower(character);
+ else {
+ ASSERT(u_tolower(character) <= 0xFF);
+ data8[i] = static_cast<LChar>(u_tolower(character));
+ }
+ }
+
+ return newImpl;
}
-PassRef<StringImpl> StringImpl::upper()
+Ref<StringImpl> StringImpl::convertToUppercaseWithoutLocale()
{
- // This function could be optimized for no-op cases the way lower() is,
- // but in empirical testing, few actual calls to upper() are no-ops, so
- // it wouldn't be worth the extra time for pre-scanning.
+ // This function could be optimized for no-op cases the way
+ // convertToLowercaseWithoutLocale() is, but in empirical testing,
+ // few actual calls to upper() are no-ops, so it wouldn't be worth
+ // the extra time for pre-scanning.
if (m_length > static_cast<unsigned>(std::numeric_limits<int32_t>::max()))
CRASH();
@@ -478,30 +444,23 @@ PassRef<StringImpl> StringImpl::upper()
if (is8Bit()) {
LChar* data8;
- RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
+ auto newImpl = createUninitialized(m_length, data8);
// Do a faster loop for the case where all the characters are ASCII.
unsigned ored = 0;
for (int i = 0; i < length; ++i) {
LChar c = m_data8[i];
ored |= c;
-#if CPU(X86) && defined(_MSC_VER) && _MSC_VER >=1700
- // Workaround for an MSVC 2012 x86 optimizer bug. Remove once the bug is fixed.
- // See https://connect.microsoft.com/VisualStudio/feedback/details/780362/optimization-bug-of-range-comparison
- // for more details.
- data8[i] = c >= 'a' && c <= 'z' ? c & ~0x20 : c;
-#else
data8[i] = toASCIIUpper(c);
-#endif
}
if (!(ored & ~0x7F))
- return newImpl.releaseNonNull();
+ return newImpl;
// Do a slower implementation for cases that include non-ASCII Latin-1 characters.
int numberSharpSCharacters = 0;
// There are two special cases.
- // 1. latin-1 characters when converted to upper case are 16 bit characters.
+ // 1. Some Latin-1 characters when converted to upper case are 16 bit characters.
// 2. Lower case sharp-S converts to "SS" (two characters)
for (int32_t i = 0; i < length; ++i) {
LChar c = m_data8[i];
@@ -509,7 +468,7 @@ PassRef<StringImpl> StringImpl::upper()
++numberSharpSCharacters;
ASSERT(u_toupper(c) <= 0xFFFF);
UChar upper = u_toupper(c);
- if (UNLIKELY(upper > 0xff)) {
+ if (UNLIKELY(upper > 0xFF)) {
// Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
goto upconvert;
}
@@ -517,7 +476,7 @@ PassRef<StringImpl> StringImpl::upper()
}
if (!numberSharpSCharacters)
- return newImpl.releaseNonNull();
+ return newImpl;
// We have numberSSCharacters sharp-s characters, but none of the other special characters.
newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
@@ -535,14 +494,15 @@ PassRef<StringImpl> StringImpl::upper()
}
}
- return newImpl.releaseNonNull();
+ return newImpl;
}
upconvert:
- const UChar* source16 = deprecatedCharacters();
+ auto upconvertedCharacters = StringView(*this).upconvertedCharacters();
+ const UChar* source16 = upconvertedCharacters;
UChar* data16;
- RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
+ auto newImpl = createUninitialized(m_length, data16);
// Do a faster loop for the case where all the characters are ASCII.
unsigned ored = 0;
@@ -552,19 +512,19 @@ upconvert:
data16[i] = toASCIIUpper(c);
}
if (!(ored & ~0x7F))
- return newImpl.releaseNonNull();
+ return newImpl;
// Do a slower implementation for cases that include non-ASCII characters.
UErrorCode status = U_ZERO_ERROR;
int32_t realLength = u_strToUpper(data16, length, source16, m_length, "", &status);
if (U_SUCCESS(status) && realLength == length)
- return newImpl.releaseNonNull();
+ return newImpl;
newImpl = createUninitialized(realLength, data16);
status = U_ZERO_ERROR;
u_strToUpper(data16, realLength, source16, m_length, "", &status);
if (U_FAILURE(status))
return *this;
- return newImpl.releaseNonNull();
+ return newImpl;
}
static inline bool needsTurkishCasingRules(const AtomicString& localeIdentifier)
@@ -577,14 +537,14 @@ static inline bool needsTurkishCasingRules(const AtomicString& localeIdentifier)
&& (localeIdentifier.length() == 2 || localeIdentifier[2] == '-');
}
-PassRef<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
+Ref<StringImpl> StringImpl::convertToLowercaseWithLocale(const AtomicString& localeIdentifier)
{
// Use the more-optimized code path most of the time.
// Assuming here that the only locale-specific lowercasing is the Turkish casing rules.
// FIXME: Could possibly optimize further by looking for the specific sequences
// that have locale-specific lowercasing. There are only three of them.
if (!needsTurkishCasingRules(localeIdentifier))
- return lower();
+ return convertToLowercaseWithoutLocale();
// FIXME: Could share more code with the main StringImpl::lower by factoring out
// this last part into a shared function that takes a locale string, since this is
@@ -597,28 +557,29 @@ PassRef<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
// Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
// allocating memory just to turn localeIdentifier into a C string, and we assume
// there is no difference between the uppercasing for "tr" and "az" locales.
- const UChar* source16 = deprecatedCharacters();
+ auto upconvertedCharacters = StringView(*this).upconvertedCharacters();
+ const UChar* source16 = upconvertedCharacters;
UChar* data16;
- RefPtr<StringImpl> newString = createUninitialized(length, data16);
+ auto newString = createUninitialized(length, data16);
UErrorCode status = U_ZERO_ERROR;
int realLength = u_strToLower(data16, length, source16, length, "tr", &status);
if (U_SUCCESS(status) && realLength == length)
- return newString.releaseNonNull();
+ return newString;
newString = createUninitialized(realLength, data16);
status = U_ZERO_ERROR;
u_strToLower(data16, realLength, source16, length, "tr", &status);
if (U_FAILURE(status))
return *this;
- return newString.releaseNonNull();
+ return newString;
}
-PassRef<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
+Ref<StringImpl> StringImpl::convertToUppercaseWithLocale(const AtomicString& localeIdentifier)
{
// Use the more-optimized code path most of the time.
// Assuming here that the only locale-specific lowercasing is the Turkish casing rules,
// and that the only affected character is lowercase "i".
if (!needsTurkishCasingRules(localeIdentifier) || find('i') == notFound)
- return upper();
+ return convertToUppercaseWithoutLocale();
if (m_length > static_cast<unsigned>(std::numeric_limits<int32_t>::max()))
CRASH();
@@ -627,95 +588,145 @@ PassRef<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
// Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
// allocating memory just to turn localeIdentifier into a C string, and we assume
// there is no difference between the uppercasing for "tr" and "az" locales.
- const UChar* source16 = deprecatedCharacters();
+ auto upconvertedCharacters = StringView(*this).upconvertedCharacters();
+ const UChar* source16 = upconvertedCharacters;
UChar* data16;
- RefPtr<StringImpl> newString = createUninitialized(length, data16);
+ auto newString = createUninitialized(length, data16);
UErrorCode status = U_ZERO_ERROR;
int realLength = u_strToUpper(data16, length, source16, length, "tr", &status);
if (U_SUCCESS(status) && realLength == length)
- return newString.releaseNonNull();
+ return newString;
newString = createUninitialized(realLength, data16);
status = U_ZERO_ERROR;
u_strToUpper(data16, realLength, source16, length, "tr", &status);
if (U_FAILURE(status))
return *this;
- return newString.releaseNonNull();
-}
-
-PassRef<StringImpl> StringImpl::fill(UChar character)
-{
- if (!(character & ~0x7F)) {
- LChar* data;
- auto newImpl = createUninitialized(m_length, data);
- for (unsigned i = 0; i < m_length; ++i)
- data[i] = character;
- return newImpl;
- }
- UChar* data;
- auto newImpl = createUninitialized(m_length, data);
- for (unsigned i = 0; i < m_length; ++i)
- data[i] = character;
- return newImpl;
+ return newString;
}
-PassRef<StringImpl> StringImpl::foldCase()
+Ref<StringImpl> StringImpl::foldCase()
{
- if (m_length > static_cast<unsigned>(std::numeric_limits<int32_t>::max()))
- CRASH();
- int32_t length = m_length;
-
if (is8Bit()) {
- // Do a faster loop for the case where all the characters are ASCII.
- LChar* data;
- auto newImpl = createUninitialized(m_length, data);
- LChar ored = 0;
-
- for (int32_t i = 0; i < length; ++i) {
- LChar c = m_data8[i];
- data[i] = toASCIILower(c);
- ored |= c;
+ unsigned failingIndex;
+ for (unsigned i = 0; i < m_length; ++i) {
+ auto character = m_data8[i];
+ if (UNLIKELY(!isASCII(character) || isASCIIUpper(character))) {
+ failingIndex = i;
+ goto SlowPath;
+ }
}
+ // String was all ASCII and no uppercase, so just return as-is.
+ return *this;
- if (!(ored & ~0x7F))
- return newImpl;
-
- // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
- // FIXME: Shouldn't this use u_foldCase instead of u_tolower?
- for (int32_t i = 0; i < length; ++i) {
- ASSERT(u_tolower(m_data8[i]) <= 0xFF);
- data[i] = static_cast<LChar>(u_tolower(m_data8[i]));
+SlowPath:
+ bool need16BitCharacters = false;
+ for (unsigned i = failingIndex; i < m_length; ++i) {
+ auto character = m_data8[i];
+ if (character == 0xB5 || character == 0xDF) {
+ need16BitCharacters = true;
+ break;
+ }
}
- return newImpl;
+ if (!need16BitCharacters) {
+ LChar* data8;
+ auto folded = createUninitializedInternalNonEmpty(m_length, data8);
+ for (unsigned i = 0; i < failingIndex; ++i)
+ data8[i] = m_data8[i];
+ for (unsigned i = failingIndex; i < m_length; ++i) {
+ auto character = m_data8[i];
+ if (isASCII(character))
+ data8[i] = toASCIILower(character);
+ else {
+ ASSERT(u_foldCase(character, U_FOLD_CASE_DEFAULT) <= 0xFF);
+ data8[i] = static_cast<LChar>(u_foldCase(character, U_FOLD_CASE_DEFAULT));
+ }
+ }
+ return folded;
+ }
+ } else {
+ // FIXME: Unclear why we use goto in the 8-bit case, and a different approach in the 16-bit case.
+ bool noUpper = true;
+ unsigned ored = 0;
+ for (unsigned i = 0; i < m_length; ++i) {
+ UChar character = m_data16[i];
+ if (UNLIKELY(isASCIIUpper(character)))
+ noUpper = false;
+ ored |= character;
+ }
+ if (!(ored & ~0x7F)) {
+ if (noUpper) {
+ // String was all ASCII and no uppercase, so just return as-is.
+ return *this;
+ }
+ UChar* data16;
+ auto folded = createUninitializedInternalNonEmpty(m_length, data16);
+ for (unsigned i = 0; i < m_length; ++i)
+ data16[i] = toASCIILower(m_data16[i]);
+ return folded;
+ }
}
- // Do a faster loop for the case where all the characters are ASCII.
- UChar* data;
- RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
- UChar ored = 0;
- for (int32_t i = 0; i < length; ++i) {
- UChar c = m_data16[i];
- ored |= c;
- data[i] = toASCIILower(c);
- }
- if (!(ored & ~0x7F))
- return newImpl.releaseNonNull();
+ if (m_length > static_cast<unsigned>(std::numeric_limits<int32_t>::max()))
+ CRASH();
- // Do a slower implementation for cases that include non-ASCII characters.
+ auto upconvertedCharacters = StringView(*this).upconvertedCharacters();
+
+ UChar* data;
+ auto folded = createUninitializedInternalNonEmpty(m_length, data);
+ int32_t length = m_length;
UErrorCode status = U_ZERO_ERROR;
- int32_t realLength = u_strFoldCase(data, length, m_data16, m_length, U_FOLD_CASE_DEFAULT, &status);
+ int32_t realLength = u_strFoldCase(data, length, upconvertedCharacters, length, U_FOLD_CASE_DEFAULT, &status);
if (U_SUCCESS(status) && realLength == length)
- return newImpl.releaseNonNull();
- newImpl = createUninitialized(realLength, data);
+ return folded;
+ ASSERT(realLength > length);
+ folded = createUninitializedInternalNonEmpty(realLength, data);
status = U_ZERO_ERROR;
- u_strFoldCase(data, realLength, m_data16, m_length, U_FOLD_CASE_DEFAULT, &status);
+ u_strFoldCase(data, realLength, upconvertedCharacters, length, U_FOLD_CASE_DEFAULT, &status);
if (U_FAILURE(status))
return *this;
- return newImpl.releaseNonNull();
+ return folded;
+}
+
+template<StringImpl::CaseConvertType type, typename CharacterType>
+ALWAYS_INLINE Ref<StringImpl> StringImpl::convertASCIICase(StringImpl& impl, const CharacterType* data, unsigned length)
+{
+ unsigned failingIndex;
+ for (unsigned i = 0; i < length; ++i) {
+ CharacterType character = data[i];
+ if (type == CaseConvertType::Lower ? UNLIKELY(isASCIIUpper(character)) : LIKELY(isASCIILower(character))) {
+ failingIndex = i;
+ goto SlowPath;
+ }
+ }
+ return impl;
+
+SlowPath:
+ CharacterType* newData;
+ auto newImpl = createUninitializedInternalNonEmpty(length, newData);
+ for (unsigned i = 0; i < failingIndex; ++i)
+ newData[i] = data[i];
+ for (unsigned i = failingIndex; i < length; ++i)
+ newData[i] = type == CaseConvertType::Lower ? toASCIILower(data[i]) : toASCIIUpper(data[i]);
+ return newImpl;
+}
+
+Ref<StringImpl> StringImpl::convertToASCIILowercase()
+{
+ if (is8Bit())
+ return convertASCIICase<CaseConvertType::Lower>(*this, m_data8, m_length);
+ return convertASCIICase<CaseConvertType::Lower>(*this, m_data16, m_length);
+}
+
+Ref<StringImpl> StringImpl::convertToASCIIUppercase()
+{
+ if (is8Bit())
+ return convertASCIICase<CaseConvertType::Upper>(*this, m_data8, m_length);
+ return convertASCIICase<CaseConvertType::Upper>(*this, m_data16, m_length);
}
template <class UCharPredicate>
-inline PassRef<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
+inline Ref<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
{
if (!m_length)
return *this;
@@ -763,18 +774,18 @@ public:
}
};
-PassRef<StringImpl> StringImpl::stripWhiteSpace()
+Ref<StringImpl> StringImpl::stripWhiteSpace()
{
return stripMatchedCharacters(SpaceOrNewlinePredicate());
}
-PassRef<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
+Ref<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
{
return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
}
template <typename CharType>
-ALWAYS_INLINE PassRef<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
+ALWAYS_INLINE Ref<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
{
const CharType* from = characters;
const CharType* fromend = from + m_length;
@@ -803,10 +814,10 @@ ALWAYS_INLINE PassRef<StringImpl> StringImpl::removeCharacters(const CharType* c
data.shrink(outc);
- return adopt(data);
+ return adopt(WTFMove(data));
}
-PassRef<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
+Ref<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
{
if (is8Bit())
return removeCharacters(characters8(), findMatch);
@@ -814,11 +825,11 @@ PassRef<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findM
}
template <typename CharType, class UCharPredicate>
-inline PassRef<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate)
+inline Ref<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate)
{
StringBuffer<CharType> data(m_length);
- const CharType* from = getCharacters<CharType>();
+ const CharType* from = characters<CharType>();
const CharType* fromend = from + m_length;
int outc = 0;
bool changedToSpace = false;
@@ -847,17 +858,17 @@ inline PassRef<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPre
data.shrink(outc);
- return adopt(data);
+ return adopt(WTFMove(data));
}
-PassRef<StringImpl> StringImpl::simplifyWhiteSpace()
+Ref<StringImpl> StringImpl::simplifyWhiteSpace()
{
if (is8Bit())
return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate());
return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate());
}
-PassRef<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
+Ref<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
{
if (is8Bit())
return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace));
@@ -948,24 +959,54 @@ float StringImpl::toFloat(bool* ok)
return charactersToFloat(characters16(), m_length, ok);
}
-bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
+// Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
+static const UChar latin1CaseFoldTable[256] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+ 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
+ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df,
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
+};
+
+static inline bool equalCompatibilityCaseless(const LChar* a, const LChar* b, unsigned length)
{
while (length--) {
- if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldTable[*b++])
+ if (latin1CaseFoldTable[*a++] != latin1CaseFoldTable[*b++])
return false;
}
return true;
}
-bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
+static inline bool equalCompatibilityCaseless(const UChar* a, const LChar* b, unsigned length)
{
while (length--) {
- if (u_foldCase(*a++, U_FOLD_CASE_DEFAULT) != StringImpl::latin1CaseFoldTable[*b++])
+ if (u_foldCase(*a++, U_FOLD_CASE_DEFAULT) != latin1CaseFoldTable[*b++])
return false;
}
return true;
}
+static inline bool equalCompatibilityCaseless(const LChar* a, const UChar* b, unsigned length)
+{
+ return equalCompatibilityCaseless(b, a, length);
+}
+
+static inline bool equalCompatibilityCaseless(const UChar* a, const UChar* b, unsigned length)
+{
+ return !u_memcasecmp(a, b, length, U_FOLD_CASE_DEFAULT);
+}
+
size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
{
if (is8Bit())
@@ -986,8 +1027,11 @@ size_t StringImpl::find(const LChar* matchString, unsigned index)
return std::min(index, length());
// Optimization 1: fast case for strings of length 1.
- if (matchLength == 1)
+ if (matchLength == 1) {
+ if (is8Bit())
+ return WTF::find(characters8(), length(), matchString[0], index);
return WTF::find(characters16(), length(), *matchString, index);
+ }
// Check index & matchLength are in range.
if (index > length())
@@ -998,10 +1042,32 @@ size_t StringImpl::find(const LChar* matchString, unsigned index)
// delta is the number of additional times to test; delta == 0 means test only once.
unsigned delta = searchLength - matchLength;
- const UChar* searchCharacters = deprecatedCharacters() + index;
-
// Optimization 2: keep a running hash of the strings,
// only call equal if the hashes match.
+
+ if (is8Bit()) {
+ const LChar* searchCharacters = characters8() + index;
+
+ unsigned searchHash = 0;
+ unsigned matchHash = 0;
+ for (unsigned i = 0; i < matchLength; ++i) {
+ searchHash += searchCharacters[i];
+ matchHash += matchString[i];
+ }
+
+ unsigned i = 0;
+ while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
+ if (i == delta)
+ return notFound;
+ searchHash += searchCharacters[i + matchLength];
+ searchHash -= searchCharacters[i];
+ ++i;
+ }
+ return index + i;
+ }
+
+ const UChar* searchCharacters = characters16() + index;
+
unsigned searchHash = 0;
unsigned matchHash = 0;
for (unsigned i = 0; i < matchLength; ++i) {
@@ -1010,7 +1076,6 @@ size_t StringImpl::find(const LChar* matchString, unsigned index)
}
unsigned i = 0;
- // keep looping until we match
while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
if (i == delta)
return notFound;
@@ -1042,45 +1107,27 @@ size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
// delta is the number of additional times to test; delta == 0 means test only once.
unsigned delta = searchLength - matchLength;
- const UChar* searchCharacters = deprecatedCharacters() + index;
+ if (is8Bit()) {
+ const LChar* searchCharacters = characters8() + index;
- unsigned i = 0;
- // keep looping until we match
- while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
- if (i == delta)
- return notFound;
- ++i;
+ unsigned i = 0;
+ while (!equalCompatibilityCaseless(searchCharacters + i, matchString, matchLength)) {
+ if (i == delta)
+ return notFound;
+ ++i;
+ }
+ return index + i;
}
- return index + i;
-}
-
-template <typename SearchCharacterType, typename MatchCharacterType>
-ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
-{
- // Optimization: keep a running hash of the strings,
- // only call equal() if the hashes match.
- // delta is the number of additional times to test; delta == 0 means test only once.
- unsigned delta = searchLength - matchLength;
-
- unsigned searchHash = 0;
- unsigned matchHash = 0;
-
- for (unsigned i = 0; i < matchLength; ++i) {
- searchHash += searchCharacters[i];
- matchHash += matchCharacters[i];
- }
+ const UChar* searchCharacters = characters16() + index;
unsigned i = 0;
- // keep looping until we match
- while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
+ while (!equalCompatibilityCaseless(searchCharacters + i, matchString, matchLength)) {
if (i == delta)
return notFound;
- searchHash += searchCharacters[i + matchLength];
- searchHash -= searchCharacters[i];
++i;
}
- return index + i;
+ return index + i;
}
size_t StringImpl::find(StringImpl* matchString)
@@ -1128,35 +1175,7 @@ size_t StringImpl::find(StringImpl* matchString, unsigned index)
if (UNLIKELY(!matchString))
return notFound;
- unsigned matchLength = matchString->length();
-
- // Optimization 1: fast case for strings of length 1.
- if (matchLength == 1) {
- if (is8Bit())
- return WTF::find(characters8(), length(), (*matchString)[0], index);
- return WTF::find(characters16(), length(), (*matchString)[0], index);
- }
-
- if (UNLIKELY(!matchLength))
- return std::min(index, length());
-
- // Check index & matchLength are in range.
- if (index > length())
- return notFound;
- unsigned searchLength = length() - index;
- if (matchLength > searchLength)
- return notFound;
-
- if (is8Bit()) {
- if (matchString->is8Bit())
- return findInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
- return findInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
- }
-
- if (matchString->is8Bit())
- return findInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
-
- return findInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
+ return findCommon(*this, *matchString, index);
}
template <typename SearchCharacterType, typename MatchCharacterType>
@@ -1167,7 +1186,7 @@ ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea
unsigned i = 0;
// keep looping until we match
- while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
+ while (!equalCompatibilityCaseless(searchCharacters + i, matchCharacters, matchLength)) {
if (i == delta)
return notFound;
++i;
@@ -1203,11 +1222,28 @@ size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
}
-size_t StringImpl::findNextLineStart(unsigned index)
+size_t StringImpl::findIgnoringASCIICase(const StringImpl& matchString) const
{
- if (is8Bit())
- return WTF::findNextLineStart(characters8(), m_length, index);
- return WTF::findNextLineStart(characters16(), m_length, index);
+ return ::WTF::findIgnoringASCIICase(*this, matchString, 0);
+}
+
+size_t StringImpl::findIgnoringASCIICase(const StringImpl& matchString, unsigned startOffset) const
+{
+ return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset);
+}
+
+size_t StringImpl::findIgnoringASCIICase(const StringImpl* matchString) const
+{
+ if (!matchString)
+ return notFound;
+ return ::WTF::findIgnoringASCIICase(*this, *matchString, 0);
+}
+
+size_t StringImpl::findIgnoringASCIICase(const StringImpl* matchString, unsigned startOffset) const
+{
+ if (!matchString)
+ return notFound;
+ return ::WTF::findIgnoringASCIICase(*this, *matchString, startOffset);
}
size_t StringImpl::reverseFind(UChar c, unsigned index)
@@ -1284,7 +1320,7 @@ ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy
unsigned delta = std::min(index, length - matchLength);
// keep looping until we match
- while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
+ while (!equalCompatibilityCaseless(searchCharacters + delta, matchCharacters, matchLength)) {
if (!delta)
return notFound;
--delta;
@@ -1330,26 +1366,52 @@ ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star
return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
}
if (stringImpl->is8Bit())
- return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
- return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
+ return equalCompatibilityCaseless(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
+ return equalCompatibilityCaseless(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
+}
+
+ALWAYS_INLINE static bool equalInner(const StringImpl& stringImpl, unsigned startOffset, const StringImpl& matchString)
+{
+ if (startOffset > stringImpl.length())
+ return false;
+ if (matchString.length() > stringImpl.length())
+ return false;
+ if (matchString.length() + startOffset > stringImpl.length())
+ return false;
+
+ if (stringImpl.is8Bit()) {
+ if (matchString.is8Bit())
+ return equal(stringImpl.characters8() + startOffset, matchString.characters8(), matchString.length());
+ return equal(stringImpl.characters8() + startOffset, matchString.characters16(), matchString.length());
+ }
+ if (matchString.is8Bit())
+ return equal(stringImpl.characters16() + startOffset, matchString.characters8(), matchString.length());
+ return equal(stringImpl.characters16() + startOffset, matchString.characters16(), matchString.length());
}
bool StringImpl::startsWith(const StringImpl* str) const
{
if (!str)
return false;
+ return ::WTF::startsWith(*this, *str);
+}
- if (str->length() > length())
+bool StringImpl::startsWith(const StringImpl& str) const
+{
+ return ::WTF::startsWith(*this, str);
+}
+
+bool StringImpl::startsWithIgnoringASCIICase(const StringImpl* prefix) const
+{
+ if (!prefix)
return false;
- if (is8Bit()) {
- if (str->is8Bit())
- return equal(characters8(), str->characters8(), str->length());
- return equal(characters8(), str->characters16(), str->length());
- }
- if (str->is8Bit())
- return equal(characters16(), str->characters8(), str->length());
- return equal(characters16(), str->characters16(), str->length());
+ return ::WTF::startsWithIgnoringASCIICase(*this, *prefix);
+}
+
+bool StringImpl::startsWithIgnoringASCIICase(const StringImpl& prefix) const
+{
+ return ::WTF::startsWithIgnoringASCIICase(*this, prefix);
}
bool StringImpl::startsWith(UChar character) const
@@ -1365,6 +1427,24 @@ bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool
return equalInner(this, 0, matchString, matchLength, caseSensitive);
}
+bool StringImpl::hasInfixStartingAt(const StringImpl& matchString, unsigned startOffset) const
+{
+ return equalInner(*this, startOffset, matchString);
+}
+
+bool StringImpl::endsWith(StringImpl* suffix)
+{
+ if (!suffix)
+ return false;
+
+ return ::WTF::endsWith(*this, *suffix);
+}
+
+bool StringImpl::endsWith(StringImpl& suffix)
+{
+ return ::WTF::endsWith(*this, suffix);
+}
+
bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
{
ASSERT(matchString);
@@ -1375,6 +1455,19 @@ bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
return false;
}
+bool StringImpl::endsWithIgnoringASCIICase(const StringImpl* suffix) const
+{
+ if (!suffix)
+ return false;
+
+ return ::WTF::endsWithIgnoringASCIICase(*this, *suffix);
+}
+
+bool StringImpl::endsWithIgnoringASCIICase(const StringImpl& suffix) const
+{
+ return ::WTF::endsWithIgnoringASCIICase(*this, suffix);
+}
+
bool StringImpl::endsWith(UChar character) const
{
return m_length && (*this)[m_length - 1] == character;
@@ -1389,7 +1482,14 @@ bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool ca
return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
}
-PassRef<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
+bool StringImpl::hasInfixEndingAt(const StringImpl& matchString, unsigned endOffset) const
+{
+ if (endOffset < matchString.length())
+ return false;
+ return equalInner(*this, endOffset - matchString.length(), matchString);
+}
+
+Ref<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
{
if (oldC == newC)
return *this;
@@ -1450,7 +1550,7 @@ PassRef<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
return newImpl;
}
-PassRef<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
+Ref<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
{
position = std::min(position, length());
lengthToReplace = std::min(lengthToReplace, length() - position);
@@ -1495,7 +1595,7 @@ PassRef<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToRepl
return newImpl;
}
-PassRef<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
+Ref<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
{
if (!replacement)
return *this;
@@ -1506,7 +1606,7 @@ PassRef<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
return replace(pattern, replacement->m_data16, replacement->length());
}
-PassRef<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
+Ref<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
{
ASSERT(replacement);
@@ -1583,7 +1683,7 @@ PassRef<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement,
return newImpl;
}
-PassRef<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
+Ref<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
{
ASSERT(replacement);
@@ -1663,7 +1763,7 @@ PassRef<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement,
return newImpl;
}
-PassRef<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
+Ref<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
{
if (!pattern || !replacement)
return *this;
@@ -1770,34 +1870,9 @@ PassRef<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replace
return newImpl;
}
-static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
-{
- unsigned aLength = a->length();
- unsigned bLength = b->length();
- if (aLength != bLength)
- return false;
-
- if (a->is8Bit()) {
- if (b->is8Bit())
- return equal(a->characters8(), b->characters8(), aLength);
-
- return equal(a->characters8(), b->characters16(), aLength);
- }
-
- if (b->is8Bit())
- return equal(a->characters16(), b->characters8(), aLength);
-
- return equal(a->characters16(), b->characters16(), aLength);
-}
-
bool equal(const StringImpl* a, const StringImpl* b)
{
- if (a == b)
- return true;
- if (!a || !b)
- return false;
-
- return stringImplContentEqual(a, b);
+ return equalCommon(a, b);
}
template <typename CharType>
@@ -1860,109 +1935,34 @@ bool equal(const StringImpl* a, const LChar* b)
return !b[length];
}
-bool equalNonNull(const StringImpl* a, const StringImpl* b)
+bool equal(const StringImpl& a, const StringImpl& b)
{
- ASSERT(a && b);
- if (a == b)
- return true;
-
- return stringImplContentEqual(a, b);
+ return equalCommon(a, b);
}
-bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
+bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
{
- if (a == b)
+ if (!a && b && !b->length())
return true;
- if (!a || !b)
- return false;
-
- return CaseFoldingHash::equal(a, b);
-}
-
-bool equalIgnoringCase(const StringImpl* a, const LChar* b)
-{
- if (!a)
- return !b;
- if (!b)
- return !a;
-
- unsigned length = a->length();
-
- // Do a faster loop for the case where all the characters are ASCII.
- UChar ored = 0;
- bool equal = true;
- if (a->is8Bit()) {
- const LChar* as = a->characters8();
- for (unsigned i = 0; i != length; ++i) {
- LChar bc = b[i];
- if (!bc)
- return false;
- UChar ac = as[i];
- ored |= ac;
- equal = equal && (toASCIILower(ac) == toASCIILower(bc));
- }
-
- // Do a slower implementation for cases that include non-ASCII characters.
- if (ored & ~0x7F) {
- equal = true;
- for (unsigned i = 0; i != length; ++i)
- equal = equal && u_foldCase(as[i], U_FOLD_CASE_DEFAULT) == u_foldCase(b[i], U_FOLD_CASE_DEFAULT);
- }
-
- return equal && !b[length];
- }
-
- const UChar* as = a->characters16();
- for (unsigned i = 0; i != length; ++i) {
- LChar bc = b[i];
- if (!bc)
- return false;
- UChar ac = as[i];
- ored |= ac;
- equal = equal && (toASCIILower(ac) == toASCIILower(bc));
- }
-
- // Do a slower implementation for cases that include non-ASCII characters.
- if (ored & ~0x7F) {
- equal = true;
- for (unsigned i = 0; i != length; ++i) {
- equal = equal && u_foldCase(as[i], U_FOLD_CASE_DEFAULT) == u_foldCase(b[i], U_FOLD_CASE_DEFAULT);
- }
- }
-
- return equal && !b[length];
+ if (!b && a && !a->length())
+ return true;
+ return equal(a, b);
}
-bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
+bool equalIgnoringASCIICase(const StringImpl* a, const StringImpl* b)
{
- ASSERT(a && b);
if (a == b)
return true;
-
- unsigned length = a->length();
- if (length != b->length())
+ if (!a || !b)
return false;
-
- if (a->is8Bit()) {
- if (b->is8Bit())
- return equalIgnoringCase(a->characters8(), b->characters8(), length);
-
- return equalIgnoringCase(b->characters16(), a->characters8(), length);
- }
-
- if (b->is8Bit())
- return equalIgnoringCase(a->characters16(), b->characters8(), length);
-
- return equalIgnoringCase(a->characters16(), b->characters16(), length);
+ return equalIgnoringASCIICaseCommon(*a, *b);
}
-bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
+bool equalIgnoringASCIICaseNonNull(const StringImpl* a, const StringImpl* b)
{
- if (!a && b && !b->length())
- return true;
- if (!b && a && !a->length())
- return true;
- return equal(a, b);
+ ASSERT(a);
+ ASSERT(b);
+ return equalIgnoringASCIICase(*a, *b);
}
UCharDirection StringImpl::defaultWritingDirection(bool* hasStrongDirectionality)
@@ -1985,7 +1985,7 @@ UCharDirection StringImpl::defaultWritingDirection(bool* hasStrongDirectionality
return U_LEFT_TO_RIGHT;
}
-PassRef<StringImpl> StringImpl::adopt(StringBuffer<LChar>& buffer)
+Ref<StringImpl> StringImpl::adopt(StringBuffer<LChar>&& buffer)
{
unsigned length = buffer.length();
if (!length)
@@ -1993,7 +1993,7 @@ PassRef<StringImpl> StringImpl::adopt(StringBuffer<LChar>& buffer)
return adoptRef(*new StringImpl(buffer.release(), length));
}
-PassRef<StringImpl> StringImpl::adopt(StringBuffer<UChar>& buffer)
+Ref<StringImpl> StringImpl::adopt(StringBuffer<UChar>&& buffer)
{
unsigned length = buffer.length();
if (!length)
@@ -2005,11 +2005,7 @@ size_t StringImpl::sizeInBytes() const
{
// FIXME: support substrings
size_t size = length();
- if (is8Bit()) {
- if (has16BitShadow()) {
- size += 2 * size;
- }
- } else
+ if (!is8Bit())
size *= 2;
return size + sizeof(*this);
}
@@ -2023,8 +2019,7 @@ static inline void putUTF8Triple(char*& buffer, UChar ch)
*buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
}
-bool StringImpl::utf8Impl(
- const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode mode)
+bool StringImpl::utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode mode)
{
if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {
const UChar* charactersEnd = characters + length;
@@ -2075,8 +2070,21 @@ bool StringImpl::utf8Impl(
return true;
}
-CString StringImpl::utf8ForCharacters(
- const UChar* characters, unsigned length, ConversionMode mode)
+CString StringImpl::utf8ForCharacters(const LChar* characters, unsigned length)
+{
+ if (!length)
+ return CString("", 0);
+ if (length > std::numeric_limits<unsigned>::max() / 3)
+ return CString();
+ Vector<char, 1024> bufferVector(length * 3);
+ char* buffer = bufferVector.data();
+ const LChar* source = characters;
+ ConversionResult result = convertLatin1ToUTF8(&source, source + length, &buffer, buffer + bufferVector.size());
+ ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
+ return CString(bufferVector.data(), buffer - bufferVector.data());
+}
+
+CString StringImpl::utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode mode)
{
if (!length)
return CString("", 0);
@@ -2131,25 +2139,21 @@ CString StringImpl::utf8(ConversionMode mode) const
return utf8ForRange(0, length(), mode);
}
-// Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
-const UChar StringImpl::latin1CaseFoldTable[256] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
- 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
- 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
- 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
- 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
- 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
- 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
- 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
- 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
- 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
- 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
- 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
- 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
- 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df,
- 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
- 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
-};
-
+bool equalIgnoringNullity(const UChar* a, size_t aLength, StringImpl* b)
+{
+ if (!b)
+ return !aLength;
+ if (aLength != b->length())
+ return false;
+ if (b->is8Bit()) {
+ const LChar* bCharacters = b->characters8();
+ for (unsigned i = 0; i < aLength; ++i) {
+ if (a[i] != bCharacters[i])
+ return false;
+ }
+ return true;
+ }
+ return !memcmp(a, b->characters16(), b->length() * sizeof(UChar));
+}
} // namespace WTF
diff --git a/Source/WTF/wtf/text/StringImpl.h b/Source/WTF/wtf/text/StringImpl.h
index 770acf000..b2c45e8fa 100644
--- a/Source/WTF/wtf/text/StringImpl.h
+++ b/Source/WTF/wtf/text/StringImpl.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2005-2010, 2013-2016 Apple Inc. All rights reserved.
* Copyright (C) 2009 Google Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
@@ -24,16 +24,16 @@
#define StringImpl_h
#include <limits.h>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
#include <wtf/ASCIICType.h>
-#include <wtf/CompilationThread.h>
-#include <wtf/CryptographicallyRandomNumber.h>
#include <wtf/Forward.h>
+#include <wtf/Hasher.h>
#include <wtf/MathExtras.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/StringHasher.h>
#include <wtf/Vector.h>
#include <wtf/text/ConversionMode.h>
-#include <wtf/unicode/Unicode.h>
+#include <wtf/text/StringCommon.h>
#if USE(CF)
typedef const struct __CFString * CFStringRef;
@@ -43,27 +43,28 @@ typedef const struct __CFString * CFStringRef;
@class NSString;
#endif
-// FIXME: This is a temporary layering violation while we move string code to WTF.
-// Landing the file moves in one patch, will follow on with patches to change the namespaces.
namespace JSC {
-struct IdentifierASCIIStringTranslator;
namespace LLInt { class Data; }
class LLIntOffsetsExtractor;
-template <typename T> struct IdentifierCharBufferTranslator;
-struct IdentifierLCharFromUCharTranslator;
}
namespace WTF {
+class SymbolImpl;
+class SymbolRegistry;
+
struct CStringTranslator;
-template<typename CharacterType> struct HashAndCharactersTranslator;
+struct CharBufferFromLiteralDataTranslator;
struct HashAndUTF8CharactersTranslator;
struct LCharBufferTranslator;
-struct CharBufferFromLiteralDataTranslator;
+struct StringHash;
struct SubstringTranslator;
struct UCharBufferTranslator;
+
template<typename> class RetainPtr;
+template<typename> struct HashAndCharactersTranslator;
+
enum TextCaseSensitivity {
TextCaseSensitive,
TextCaseInsensitive
@@ -72,10 +73,10 @@ enum TextCaseSensitivity {
typedef bool (*CharacterMatchFunctionPtr)(UChar);
typedef bool (*IsWhiteSpaceFunctionPtr)(UChar);
-// Define STRING_STATS to turn on run time statistics of string sizes and memory usage
-#undef STRING_STATS
+// Define STRING_STATS to 1 turn on run time statistics of string sizes and memory usage
+#define STRING_STATS 0
-#ifdef STRING_STATS
+#if STRING_STATS
struct StringStats {
inline void add8BitString(unsigned length, bool isSubString = false)
{
@@ -93,33 +94,29 @@ struct StringStats {
m_total16BitData += length;
}
- inline void addUpconvertedString(unsigned length)
- {
- ++m_numberUpconvertedStrings;
- m_totalUpconvertedData += length;
- }
-
- void removeString(StringImpl*);
+ void removeString(StringImpl&);
void printStats();
static const unsigned s_printStringStatsFrequency = 5000;
- static unsigned s_stringRemovesTillPrintStats;
-
- unsigned m_totalNumberStrings;
- unsigned m_number8BitStrings;
- unsigned m_number16BitStrings;
- unsigned m_numberUpconvertedStrings;
- unsigned long long m_total8BitData;
- unsigned long long m_total16BitData;
- unsigned long long m_totalUpconvertedData;
+ static std::atomic<unsigned> s_stringRemovesTillPrintStats;
+
+ std::atomic<unsigned> m_refCalls;
+ std::atomic<unsigned> m_derefCalls;
+
+ std::atomic<unsigned> m_totalNumberStrings;
+ std::atomic<unsigned> m_number8BitStrings;
+ std::atomic<unsigned> m_number16BitStrings;
+ std::atomic<unsigned long long> m_total8BitData;
+ std::atomic<unsigned long long> m_total16BitData;
};
#define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length)
#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString)
#define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length)
#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString)
-#define STRING_STATS_ADD_UPCONVERTED_STRING(length) StringImpl::stringStats().addUpconvertedString(length)
#define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string)
+#define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls;
+#define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls;
#else
#define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0)
@@ -127,14 +124,12 @@ struct StringStats {
#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0)
#define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0)
#define STRING_STATS_REMOVE_STRING(string) ((void)0)
+#define STRING_STATS_REF_STRING(string) ((void)0)
+#define STRING_STATS_DEREF_STRING(string) ((void)0)
#endif
class StringImpl {
WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED;
- friend struct JSC::IdentifierASCIIStringTranslator;
- friend struct JSC::IdentifierCharBufferTranslator<LChar>;
- friend struct JSC::IdentifierCharBufferTranslator<UChar>;
- friend struct JSC::IdentifierLCharFromUCharTranslator;
friend struct WTF::CStringTranslator;
template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator;
friend struct WTF::HashAndUTF8CharactersTranslator;
@@ -142,9 +137,9 @@ class StringImpl {
friend struct WTF::LCharBufferTranslator;
friend struct WTF::SubstringTranslator;
friend struct WTF::UCharBufferTranslator;
- friend class AtomicStringImpl;
friend class JSC::LLInt::Data;
friend class JSC::LLIntOffsetsExtractor;
+ friend class SymbolImpl;
private:
enum BufferOwnership {
@@ -153,42 +148,26 @@ private:
BufferSubstring,
};
- // Used to construct static strings, which have an special refCount that can never hit zero.
- // This means that the static string will never be destroyed, which is important because
- // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
- enum ConstructStaticStringTag { ConstructStaticString };
- StringImpl(const UChar* characters, unsigned length, ConstructStaticStringTag)
- : m_refCount(s_refCountFlagIsStaticString)
- , m_length(length)
- , m_data16(characters)
- , m_buffer(0)
- , m_hashAndFlags(s_hashFlagIsIdentifier | BufferOwned)
- {
- // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
- // with impunity. The empty string is special because it is never entered into
- // AtomicString's HashKey, but still needs to compare correctly.
- STRING_STATS_ADD_16BIT_STRING(m_length);
-
- hash();
- }
-
- // Used to construct static strings, which have an special refCount that can never hit zero.
- // This means that the static string will never be destroyed, which is important because
- // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
- StringImpl(const LChar* characters, unsigned length, ConstructStaticStringTag)
- : m_refCount(s_refCountFlagIsStaticString)
- , m_length(length)
- , m_data8(characters)
- , m_buffer(0)
- , m_hashAndFlags(s_hashFlag8BitBuffer | s_hashFlagIsIdentifier | BufferOwned)
- {
- // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
- // with impunity. The empty string is special because it is never entered into
- // AtomicString's HashKey, but still needs to compare correctly.
- STRING_STATS_ADD_8BIT_STRING(m_length);
-
- hash();
- }
+ // The bottom 6 bits in the hash are flags.
+public:
+ static constexpr const unsigned s_flagCount = 6;
+private:
+ static constexpr const unsigned s_flagMask = (1u << s_flagCount) - 1;
+ static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags");
+ static constexpr const unsigned s_flagStringKindCount = 4;
+
+ static constexpr const unsigned s_hashFlagStringKindIsAtomic = 1u << (s_flagStringKindCount);
+ static constexpr const unsigned s_hashFlagStringKindIsSymbol = 1u << (s_flagStringKindCount + 1);
+ static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtomic | s_hashFlagStringKindIsSymbol;
+ static constexpr const unsigned s_hashFlag8BitBuffer = 1u << 3;
+ static constexpr const unsigned s_hashFlagDidReportCost = 1u << 2;
+ static constexpr const unsigned s_hashMaskBufferOwnership = (1u << 0) | (1u << 1);
+
+ enum StringKind {
+ StringNormal = 0u, // non-symbol, non-atomic
+ StringAtomic = s_hashFlagStringKindIsAtomic, // non-symbol, atomic
+ StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic
+ };
// FIXME: there has to be a less hacky way to do this.
enum Force8Bit { Force8BitConstructor };
@@ -196,9 +175,8 @@ private:
StringImpl(unsigned length, Force8Bit)
: m_refCount(s_refCountIncrement)
, m_length(length)
- , m_data8(reinterpret_cast<const LChar*>(this + 1))
- , m_buffer(0)
- , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal)
+ , m_data8(tailPointer<LChar>())
+ , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferInternal)
{
ASSERT(m_data8);
ASSERT(m_length);
@@ -210,9 +188,8 @@ private:
StringImpl(unsigned length)
: m_refCount(s_refCountIncrement)
, m_length(length)
- , m_data16(reinterpret_cast<const UChar*>(this + 1))
- , m_buffer(0)
- , m_hashAndFlags(BufferInternal)
+ , m_data16(tailPointer<UChar>())
+ , m_hashAndFlags(StringNormal | BufferInternal)
{
ASSERT(m_data16);
ASSERT(m_length);
@@ -225,8 +202,7 @@ private:
: m_refCount(s_refCountIncrement)
, m_length(length)
, m_data8(characters.leakPtr())
- , m_buffer(0)
- , m_hashAndFlags(s_hashFlag8BitBuffer | BufferOwned)
+ , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferOwned)
{
ASSERT(m_data8);
ASSERT(m_length);
@@ -239,26 +215,24 @@ private:
: m_refCount(s_refCountIncrement)
, m_length(length)
, m_data16(characters)
- , m_buffer(0)
- , m_hashAndFlags(BufferInternal)
+ , m_hashAndFlags(StringNormal | BufferInternal)
{
ASSERT(m_data16);
ASSERT(m_length);
- STRING_STATS_ADD_16BIT_STRING(0);
+ STRING_STATS_ADD_16BIT_STRING(m_length);
}
StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag)
: m_refCount(s_refCountIncrement)
, m_length(length)
, m_data8(characters)
- , m_buffer(0)
- , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal)
+ , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferInternal)
{
ASSERT(m_data8);
ASSERT(m_length);
- STRING_STATS_ADD_8BIT_STRING(0);
+ STRING_STATS_ADD_8BIT_STRING(m_length);
}
// Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
@@ -266,8 +240,7 @@ private:
: m_refCount(s_refCountIncrement)
, m_length(length)
, m_data16(characters.leakPtr())
- , m_buffer(0)
- , m_hashAndFlags(BufferOwned)
+ , m_hashAndFlags(StringNormal | BufferOwned)
{
ASSERT(m_data16);
ASSERT(m_length);
@@ -276,109 +249,74 @@ private:
}
// Used to create new strings that are a substring of an existing 8-bit StringImpl (BufferSubstring)
- StringImpl(const LChar* characters, unsigned length, PassRefPtr<StringImpl> base)
+ StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base)
: m_refCount(s_refCountIncrement)
, m_length(length)
, m_data8(characters)
- , m_substringBuffer(base.leakRef())
- , m_hashAndFlags(s_hashFlag8BitBuffer | BufferSubstring)
+ , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferSubstring)
{
ASSERT(is8Bit());
ASSERT(m_data8);
ASSERT(m_length);
- ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
+ ASSERT(base->bufferOwnership() != BufferSubstring);
+
+ substringBuffer() = &base.leakRef();
STRING_STATS_ADD_8BIT_STRING2(m_length, true);
}
// Used to create new strings that are a substring of an existing 16-bit StringImpl (BufferSubstring)
- StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
+ StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base)
: m_refCount(s_refCountIncrement)
, m_length(length)
, m_data16(characters)
- , m_substringBuffer(base.leakRef())
- , m_hashAndFlags(BufferSubstring)
+ , m_hashAndFlags(StringNormal | BufferSubstring)
{
ASSERT(!is8Bit());
ASSERT(m_data16);
ASSERT(m_length);
- ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
+ ASSERT(base->bufferOwnership() != BufferSubstring);
- STRING_STATS_ADD_16BIT_STRING2(m_length, true);
- }
+ substringBuffer() = &base.leakRef();
- enum CreateEmptyUnique_T { CreateEmptyUnique };
- StringImpl(CreateEmptyUnique_T)
- : m_refCount(s_refCountIncrement)
- , m_length(0)
- // We expect m_buffer to be initialized to 0 as we use it
- // to represent a null terminated buffer.
- , m_data16(reinterpret_cast<const UChar*>(&m_buffer))
- , m_buffer(0)
- {
- ASSERT(m_data16);
- // Set the hash early, so that all empty unique StringImpls have a hash,
- // and don't use the normal hashing algorithm - the unique nature of these
- // keys means that we don't need them to match any other string (in fact,
- // that's exactly the oposite of what we want!), and teh normal hash would
- // lead to lots of conflicts.
- unsigned hash = cryptographicallyRandomNumber() | 1;
- hash <<= s_flagCount;
- if (!hash)
- hash = 1 << s_flagCount;
- m_hashAndFlags = hash | BufferInternal;
-
- STRING_STATS_ADD_16BIT_STRING(m_length);
+ STRING_STATS_ADD_16BIT_STRING2(m_length, true);
}
- ~StringImpl();
-
public:
WTF_EXPORT_STRING_API static void destroy(StringImpl*);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const UChar*, unsigned length);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const LChar*, unsigned length);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> create8BitIfPossible(const UChar*, unsigned length);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> create(const UChar*, unsigned length);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> create(const LChar*, unsigned length);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> create8BitIfPossible(const UChar*, unsigned length);
template<size_t inlineCapacity>
- static PassRef<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
+ static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
{
return create8BitIfPossible(vector.data(), vector.size());
}
- WTF_EXPORT_STRING_API static PassRef<StringImpl> create8BitIfPossible(const UChar*);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> create8BitIfPossible(const UChar*);
- ALWAYS_INLINE static PassRef<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); }
- WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const LChar*);
- ALWAYS_INLINE static PassRef<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); }
+ ALWAYS_INLINE static Ref<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); }
+ WTF_EXPORT_STRING_API static Ref<StringImpl> create(const LChar*);
+ ALWAYS_INLINE static Ref<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); }
- static ALWAYS_INLINE PassRef<StringImpl> create8(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
+ static ALWAYS_INLINE Ref<StringImpl> createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length)
{
- ASSERT(rep);
- ASSERT(length <= rep->length());
+ ASSERT(length <= rep.length());
if (!length)
return *empty();
- ASSERT(rep->is8Bit());
- StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
- return adoptRef(*new StringImpl(rep->m_data8 + offset, length, ownerRep));
- }
-
- static ALWAYS_INLINE PassRef<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
- {
- ASSERT(rep);
- ASSERT(length <= rep->length());
-
- if (!length)
- return *empty();
+ auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep);
- StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
- if (rep->is8Bit())
- return adoptRef(*new StringImpl(rep->m_data8 + offset, length, ownerRep));
- return adoptRef(*new StringImpl(rep->m_data16 + offset, length, ownerRep));
+ // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string.
+ auto* stringImpl = static_cast<StringImpl*>(fastMalloc(allocationSize<StringImpl*>(1)));
+ if (rep.is8Bit())
+ return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data8 + offset, length, *ownerRep));
+ return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data16 + offset, length, *ownerRep));
}
template<unsigned charactersCount>
- ALWAYS_INLINE static PassRef<StringImpl> createFromLiteral(const char (&characters)[charactersCount])
+ ALWAYS_INLINE static Ref<StringImpl> createFromLiteral(const char (&characters)[charactersCount])
{
COMPILE_ASSERT(charactersCount > 1, StringImplFromLiteralNotEmpty);
COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow);
@@ -387,53 +325,50 @@ public:
}
// FIXME: Transition off of these functions to createWithoutCopying instead.
- WTF_EXPORT_STRING_API static PassRef<StringImpl> createFromLiteral(const char* characters, unsigned length);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> createFromLiteral(const char* characters);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> createFromLiteral(const char* characters, unsigned length);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> createFromLiteral(const char* characters);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> createWithoutCopying(const UChar* characters, unsigned length);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> createWithoutCopying(const LChar* characters, unsigned length);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> createWithoutCopying(const UChar* characters, unsigned length);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> createWithoutCopying(const LChar* characters, unsigned length);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> createUninitialized(unsigned length, LChar*& data);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> createUninitialized(unsigned length, UChar*& data);
- template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output)
+ WTF_EXPORT_STRING_API static Ref<StringImpl> createUninitialized(unsigned length, LChar*& data);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> createUninitialized(unsigned length, UChar*& data);
+ template <typename T> static ALWAYS_INLINE RefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output)
{
if (!length) {
- output = 0;
+ output = nullptr;
return empty();
}
if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(T))) {
- output = 0;
- return 0;
+ output = nullptr;
+ return nullptr;
}
StringImpl* resultImpl;
- if (!tryFastMalloc(sizeof(T) * length + sizeof(StringImpl)).getValue(resultImpl)) {
- output = 0;
- return 0;
+ if (!tryFastMalloc(allocationSize<T>(length)).getValue(resultImpl)) {
+ output = nullptr;
+ return nullptr;
}
- output = reinterpret_cast<T*>(resultImpl + 1);
+ output = resultImpl->tailPointer<T>();
return constructInternal<T>(resultImpl, length);
}
- static PassRef<StringImpl> createEmptyUnique()
- {
- return adoptRef(*new StringImpl(CreateEmptyUnique));
- }
-
- // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr,
+ // Reallocate the StringImpl. The originalString must be only owned by the Ref,
// and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(),
// the originalString can't be used after this function.
- static PassRef<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data);
- static PassRef<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data);
+ static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
+ static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); }
static unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; }
- static unsigned flagIsIdentifier() { return s_hashFlagIsIdentifier; }
+ static unsigned flagIsAtomic() { return s_hashFlagStringKindIsAtomic; }
+ static unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; }
+ static unsigned maskStringKind() { return s_hashMaskStringKind; }
static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); }
template<typename CharType, size_t inlineCapacity, typename OverflowHandler>
- static PassRef<StringImpl> adopt(Vector<CharType, inlineCapacity, OverflowHandler>& vector)
+ static Ref<StringImpl> adopt(Vector<CharType, inlineCapacity, OverflowHandler>&& vector)
{
if (size_t size = vector.size()) {
ASSERT(vector.data());
@@ -444,31 +379,24 @@ public:
return *empty();
}
- WTF_EXPORT_STRING_API static PassRef<StringImpl> adopt(StringBuffer<UChar>&);
- WTF_EXPORT_STRING_API static PassRef<StringImpl> adopt(StringBuffer<LChar>&);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> adopt(StringBuffer<UChar>&&);
+ WTF_EXPORT_STRING_API static Ref<StringImpl> adopt(StringBuffer<LChar>&&);
unsigned length() const { return m_length; }
+ static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); }
bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; }
ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; }
ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; }
- const UChar* characters() const { return deprecatedCharacters(); } // FIXME: Delete this.
- ALWAYS_INLINE const UChar* deprecatedCharacters() const
- {
- if (!is8Bit())
- return m_data16;
-
- return getData16SlowCase();
- }
template <typename CharType>
- ALWAYS_INLINE const CharType * getCharacters() const;
+ ALWAYS_INLINE const CharType *characters() const;
size_t cost() const
{
// For substrings, return the cost of the base string.
if (bufferOwnership() == BufferSubstring)
- return m_substringBuffer->cost();
+ return substringBuffer()->cost();
if (m_hashAndFlags & s_hashFlagDidReportCost)
return 0;
@@ -486,7 +414,7 @@ public:
return 0;
if (bufferOwnership() == BufferSubstring)
- return divideRoundedUp(m_substringBuffer->costDuringGC(), refCount());
+ return divideRoundedUp(substringBuffer()->costDuringGC(), refCount());
size_t result = m_length;
if (!is8Bit())
@@ -496,40 +424,28 @@ public:
WTF_EXPORT_STRING_API size_t sizeInBytes() const;
- bool has16BitShadow() const { return m_hashAndFlags & s_hashFlagHas16BitShadow; }
- WTF_EXPORT_STRING_API void upconvertCharacters(unsigned, unsigned) const;
- bool isIdentifier() const { return m_hashAndFlags & s_hashFlagIsIdentifier; }
- bool isIdentifierOrUnique() const { return isIdentifier() || isEmptyUnique(); }
- void setIsIdentifier(bool isIdentifier)
- {
- ASSERT(!isStatic());
- ASSERT(!isEmptyUnique());
- if (isIdentifier)
- m_hashAndFlags |= s_hashFlagIsIdentifier;
- else
- m_hashAndFlags &= ~s_hashFlagIsIdentifier;
- }
-
- bool isEmptyUnique() const
- {
- return !length() && !isStatic();
- }
+ StringKind stringKind() const { return static_cast<StringKind>(m_hashAndFlags & s_hashMaskStringKind); }
+ bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; }
+ bool isAtomic() const { return m_hashAndFlags & s_hashFlagStringKindIsAtomic; }
- bool isAtomic() const { return m_hashAndFlags & s_hashFlagIsAtomic; }
void setIsAtomic(bool isAtomic)
{
ASSERT(!isStatic());
- ASSERT(!isEmptyUnique());
- if (isAtomic)
- m_hashAndFlags |= s_hashFlagIsAtomic;
- else
- m_hashAndFlags &= ~s_hashFlagIsAtomic;
+ ASSERT(!isSymbol());
+ if (isAtomic) {
+ m_hashAndFlags |= s_hashFlagStringKindIsAtomic;
+ ASSERT(stringKind() == StringAtomic);
+ } else {
+ m_hashAndFlags &= ~s_hashFlagStringKindIsAtomic;
+ ASSERT(stringKind() == StringNormal);
+ }
}
-#ifdef STRING_STATS
- bool isSubString() const { return bufferOwnership() == BufferSubstring; }
+#if STRING_STATS
+ bool isSubString() const { return bufferOwnership() == BufferSubstring; }
#endif
+ static WTF_EXPORT_STRING_API CString utf8ForCharacters(const LChar* characters, unsigned length);
static WTF_EXPORT_STRING_API CString utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion);
WTF_EXPORT_STRING_API CString utf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const;
WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
@@ -577,7 +493,12 @@ public:
return existingHash();
return hashSlowCase();
}
-
+
+ WTF_EXPORT_PRIVATE unsigned concurrentHash() const;
+
+ unsigned symbolAwareHash() const;
+ unsigned existingSymbolAwareHash() const;
+
bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; }
inline size_t refCount() const
@@ -598,13 +519,15 @@ public:
inline void ref()
{
- ASSERT(!isCompilationThread());
+ STRING_STATS_REF_STRING(*this);
+
m_refCount += s_refCountIncrement;
}
inline void deref()
{
- ASSERT(!isCompilationThread());
+ STRING_STATS_DEREF_STRING(*this);
+
unsigned tempRefCount = m_refCount - s_refCountIncrement;
if (!tempRefCount) {
StringImpl::destroy(this);
@@ -613,7 +536,47 @@ public:
m_refCount = tempRefCount;
}
- WTF_EXPORT_PRIVATE static StringImpl* empty();
+ class StaticStringImpl {
+ WTF_MAKE_NONCOPYABLE(StaticStringImpl);
+ public:
+ // Used to construct static strings, which have an special refCount that can never hit zero.
+ // This means that the static string will never be destroyed, which is important because
+ // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
+ template<unsigned charactersCount>
+ constexpr StaticStringImpl(const char (&characters)[charactersCount], StringKind stringKind = StringNormal)
+ : m_refCount(s_refCountFlagIsStaticString)
+ , m_length(charactersCount - 1)
+ , m_data8(characters)
+ , m_hashAndFlags(s_hashFlag8BitBuffer | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount))
+ {
+ }
+
+ template<unsigned charactersCount>
+ constexpr StaticStringImpl(const char16_t (&characters)[charactersCount], StringKind stringKind = StringNormal)
+ : m_refCount(s_refCountFlagIsStaticString)
+ , m_length(charactersCount - 1)
+ , m_data16(characters)
+ , m_hashAndFlags(stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount))
+ {
+ }
+
+ operator StringImpl&()
+ {
+ return *reinterpret_cast<StringImpl*>(this);
+ }
+
+ // These member variables must match the layout of StringImpl.
+ unsigned m_refCount;
+ unsigned m_length;
+ union {
+ const char* m_data8;
+ const char16_t* m_data16;
+ };
+ unsigned m_hashAndFlags;
+ };
+
+ WTF_EXPORTDATA static StaticStringImpl s_atomicEmptyString;
+ ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_atomicEmptyString); }
// FIXME: Does this really belong in StringImpl?
template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters)
@@ -652,9 +615,9 @@ public:
// Some string features, like refcounting and the atomicity flag, are not
// thread-safe. We achieve thread safety by isolation, giving each thread
// its own copy of the string.
- PassRef<StringImpl> isolatedCopy() const;
+ Ref<StringImpl> isolatedCopy() const;
- WTF_EXPORT_STRING_API PassRef<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
+ WTF_EXPORT_STRING_API Ref<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
UChar at(unsigned i) const
{
@@ -686,23 +649,24 @@ public:
double toDouble(bool* ok = 0);
float toFloat(bool* ok = 0);
- WTF_EXPORT_STRING_API PassRef<StringImpl> lower();
- WTF_EXPORT_STRING_API PassRef<StringImpl> upper();
- WTF_EXPORT_STRING_API PassRef<StringImpl> lower(const AtomicString& localeIdentifier);
- WTF_EXPORT_STRING_API PassRef<StringImpl> upper(const AtomicString& localeIdentifier);
+ WTF_EXPORT_STRING_API Ref<StringImpl> convertToASCIILowercase();
+ WTF_EXPORT_STRING_API Ref<StringImpl> convertToASCIIUppercase();
+ WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithoutLocale();
+ WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned);
+ WTF_EXPORT_STRING_API Ref<StringImpl> convertToUppercaseWithoutLocale();
+ WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithLocale(const AtomicString& localeIdentifier);
+ WTF_EXPORT_STRING_API Ref<StringImpl> convertToUppercaseWithLocale(const AtomicString& localeIdentifier);
- WTF_EXPORT_STRING_API PassRef<StringImpl> fill(UChar);
- // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII?
- PassRef<StringImpl> foldCase();
+ Ref<StringImpl> foldCase();
- PassRef<StringImpl> stripWhiteSpace();
- PassRef<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr);
- WTF_EXPORT_STRING_API PassRef<StringImpl> simplifyWhiteSpace();
- PassRef<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr);
+ Ref<StringImpl> stripWhiteSpace();
+ Ref<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr);
+ WTF_EXPORT_STRING_API Ref<StringImpl> simplifyWhiteSpace();
+ Ref<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr);
- PassRef<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
+ Ref<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
template <typename CharType>
- ALWAYS_INLINE PassRef<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr);
+ ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr);
size_t find(LChar character, unsigned start = 0);
size_t find(char character, unsigned start = 0);
@@ -715,33 +679,44 @@ public:
size_t findIgnoringCase(const LChar*, unsigned index = 0);
ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); }
WTF_EXPORT_STRING_API size_t findIgnoringCase(StringImpl*, unsigned index = 0);
-
- WTF_EXPORT_STRING_API size_t findNextLineStart(unsigned index = UINT_MAX);
+ WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl&) const;
+ WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const;
+ WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl*) const;
+ WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl*, unsigned startOffset) const;
WTF_EXPORT_STRING_API size_t reverseFind(UChar, unsigned index = UINT_MAX);
WTF_EXPORT_STRING_API size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
WTF_EXPORT_STRING_API size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
WTF_EXPORT_STRING_API bool startsWith(const StringImpl*) const;
+ WTF_EXPORT_STRING_API bool startsWith(const StringImpl&) const;
+ WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringImpl*) const;
+ WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringImpl&) const;
bool startsWith(StringImpl* str, bool caseSensitive) { return caseSensitive ? startsWith(str) : (reverseFindIgnoringCase(str, 0) == 0); }
WTF_EXPORT_STRING_API bool startsWith(UChar) const;
WTF_EXPORT_STRING_API bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const;
template<unsigned matchLength>
bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); }
+ WTF_EXPORT_STRING_API bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const;
- WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive = true);
+ WTF_EXPORT_STRING_API bool endsWith(StringImpl*);
+ WTF_EXPORT_STRING_API bool endsWith(StringImpl&);
+ WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringImpl*) const;
+ WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringImpl&) const;
+ WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive);
WTF_EXPORT_STRING_API bool endsWith(UChar) const;
WTF_EXPORT_STRING_API bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const;
template<unsigned matchLength>
bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); }
+ WTF_EXPORT_STRING_API bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const;
- WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, UChar);
- WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, StringImpl*);
- ALWAYS_INLINE PassRef<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
- WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, const LChar*, unsigned replacementLength);
- PassRef<StringImpl> replace(UChar, const UChar*, unsigned replacementLength);
- WTF_EXPORT_STRING_API PassRef<StringImpl> replace(StringImpl*, StringImpl*);
- WTF_EXPORT_STRING_API PassRef<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
+ WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, UChar);
+ WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, StringImpl*);
+ ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
+ WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, const LChar*, unsigned replacementLength);
+ Ref<StringImpl> replace(UChar, const UChar*, unsigned replacementLength);
+ WTF_EXPORT_STRING_API Ref<StringImpl> replace(StringImpl*, StringImpl*);
+ WTF_EXPORT_STRING_API Ref<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
WTF_EXPORT_STRING_API UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr);
@@ -749,14 +724,70 @@ public:
RetainPtr<CFStringRef> createCFString();
#endif
#ifdef __OBJC__
- WTF_EXPORT_STRING_API operator NSString*();
+ WTF_EXPORT_STRING_API operator NSString *();
#endif
-#ifdef STRING_STATS
+#if STRING_STATS
ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
#endif
- WTF_EXPORT_STRING_API static const UChar latin1CaseFoldTable[256];
+protected:
+ ~StringImpl();
+
+ enum CreateSymbolTag { CreateSymbol };
+
+ // Used to create new symbol strings that holds existing 8-bit [[Description]] string as a substring buffer (BufferSubstring).
+ StringImpl(CreateSymbolTag, const LChar* characters, unsigned length)
+ : m_refCount(s_refCountIncrement)
+ , m_length(length)
+ , m_data8(characters)
+ , m_hashAndFlags(s_hashFlag8BitBuffer | StringSymbol | BufferSubstring)
+ {
+ ASSERT(is8Bit());
+ ASSERT(m_data8);
+ STRING_STATS_ADD_8BIT_STRING2(m_length, true);
+ }
+
+ // Used to create new symbol strings that holds existing 16-bit [[Description]] string as a substring buffer (BufferSubstring).
+ StringImpl(CreateSymbolTag, const UChar* characters, unsigned length)
+ : m_refCount(s_refCountIncrement)
+ , m_length(length)
+ , m_data16(characters)
+ , m_hashAndFlags(StringSymbol | BufferSubstring)
+ {
+ ASSERT(!is8Bit());
+ ASSERT(m_data16);
+ STRING_STATS_ADD_16BIT_STRING2(m_length, true);
+ }
+
+ // Null symbol.
+ StringImpl(CreateSymbolTag)
+ : m_refCount(s_refCountIncrement)
+ , m_length(0)
+ , m_data8(empty()->characters8())
+ , m_hashAndFlags(s_hashFlag8BitBuffer | StringSymbol | BufferSubstring)
+ {
+ ASSERT(is8Bit());
+ ASSERT(m_data8);
+ STRING_STATS_ADD_8BIT_STRING2(m_length, true);
+ }
+
+ template<typename T>
+ static size_t allocationSize(unsigned tailElementCount)
+ {
+ return tailOffset<T>() + tailElementCount * sizeof(T);
+ }
+
+ template<typename T>
+ static ptrdiff_t tailOffset()
+ {
+#if COMPILER(MSVC)
+ // MSVC doesn't support alignof yet.
+ return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl));
+#else
+ return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags));
+#endif
+ }
private:
bool requiresCopy() const
@@ -765,59 +796,61 @@ private:
return true;
if (is8Bit())
- return reinterpret_cast<const void*>(m_data8) == reinterpret_cast<const void*>(this + 1);
- return reinterpret_cast<const void*>(m_data16) == reinterpret_cast<const void*>(this + 1);
+ return m_data8 == tailPointer<LChar>();
+ return m_data16 == tailPointer<UChar>();
+ }
+
+ template<typename T>
+ const T* tailPointer() const
+ {
+ return reinterpret_cast_ptr<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>());
+ }
+
+ template<typename T>
+ T* tailPointer()
+ {
+ return reinterpret_cast_ptr<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>());
+ }
+
+ StringImpl* const& substringBuffer() const
+ {
+ ASSERT(bufferOwnership() == BufferSubstring);
+
+ return *tailPointer<StringImpl*>();
+ }
+
+ StringImpl*& substringBuffer()
+ {
+ ASSERT(bufferOwnership() == BufferSubstring);
+
+ return *tailPointer<StringImpl*>();
}
// This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
static const unsigned s_copyCharsInlineCutOff = 20;
+ enum class CaseConvertType { Upper, Lower };
+ template<CaseConvertType type, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType*, unsigned);
+
BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); }
- template <class UCharPredicate> PassRef<StringImpl> stripMatchedCharacters(UCharPredicate);
- template <typename CharType, class UCharPredicate> PassRef<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate);
- template <typename CharType> static PassRef<StringImpl> constructInternal(StringImpl*, unsigned);
- template <typename CharType> static PassRef<StringImpl> createUninitializedInternal(unsigned, CharType*&);
- template <typename CharType> static PassRef<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharType*&);
- template <typename CharType> static PassRef<StringImpl> reallocateInternal(PassRefPtr<StringImpl>, unsigned, CharType*&);
- template <typename CharType> static PassRef<StringImpl> createInternal(const CharType*, unsigned);
- WTF_EXPORT_STRING_API NEVER_INLINE const UChar* getData16SlowCase() const;
+ template <class UCharPredicate> Ref<StringImpl> stripMatchedCharacters(UCharPredicate);
+ template <typename CharType, class UCharPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate);
+ template <typename CharType> static Ref<StringImpl> constructInternal(StringImpl*, unsigned);
+ template <typename CharType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharType*&);
+ template <typename CharType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharType*&);
+ template <typename CharType> static Ref<StringImpl> reallocateInternal(Ref<StringImpl>&&, unsigned, CharType*&);
+ template <typename CharType> static Ref<StringImpl> createInternal(const CharType*, unsigned);
WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const;
// The bottom bit in the ref count indicates a static (immortal) string.
static const unsigned s_refCountFlagIsStaticString = 0x1;
static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag.
- // The bottom 7 bits in the hash are flags.
- static const unsigned s_flagCount = 7;
- static const unsigned s_flagMask = (1u << s_flagCount) - 1;
- COMPILE_ASSERT(s_flagCount <= StringHasher::flagCount, StringHasher_reserves_enough_bits_for_StringImpl_flags);
-
- static const unsigned s_hashFlagHas16BitShadow = 1u << 6;
- static const unsigned s_hashFlag8BitBuffer = 1u << 5;
- static const unsigned s_hashFlagIsAtomic = 1u << 4;
- static const unsigned s_hashFlagDidReportCost = 1u << 3;
- static const unsigned s_hashFlagIsIdentifier = 1u << 2;
- static const unsigned s_hashMaskBufferOwnership = 1u | (1u << 1);
-
-#ifdef STRING_STATS
+#if STRING_STATS
WTF_EXPORTDATA static StringStats m_stringStats;
#endif
public:
- struct StaticASCIILiteral {
- // These member variables must match the layout of StringImpl.
- unsigned m_refCount;
- unsigned m_length;
- const LChar* m_data8;
- void* m_buffer;
- unsigned m_hashAndFlags;
-
- // These values mimic ConstructFromLiteral.
- static const unsigned s_initialRefCount = s_refCountIncrement;
- static const unsigned s_initialFlags = s_hashFlag8BitBuffer | BufferInternal;
- static const unsigned s_hashShift = s_flagCount;
- };
-
#ifndef NDEBUG
void assertHashIsCorrect()
{
@@ -827,25 +860,20 @@ public:
#endif
private:
- // These member variables must match the layout of StaticASCIILiteral.
+ // These member variables must match the layout of StaticStringImpl.
unsigned m_refCount;
unsigned m_length;
union {
const LChar* m_data8;
const UChar* m_data16;
};
- union {
- void* m_buffer;
- StringImpl* m_substringBuffer;
- mutable UChar* m_copyData16;
- };
mutable unsigned m_hashAndFlags;
};
-COMPILE_ASSERT(sizeof(StringImpl) == sizeof(StringImpl::StaticASCIILiteral), StringImpl_should_match_its_StaticASCIILiteral);
+static_assert(sizeof(StringImpl) == sizeof(StringImpl::StaticStringImpl), "");
#if !ASSERT_DISABLED
-// StringImpls created from StaticASCIILiteral will ASSERT
+// StringImpls created from StaticStringImpl will ASSERT
// in the generic ValueCheck<T>::checkConsistency
// as they are not allocated by fastMalloc.
// We don't currently have any way to detect that case
@@ -857,15 +885,15 @@ ValueCheck<StringImpl*> {
#endif
template <>
-ALWAYS_INLINE PassRef<StringImpl> StringImpl::constructInternal<LChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length, Force8BitConstructor)); }
+ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length, Force8BitConstructor)); }
template <>
-ALWAYS_INLINE PassRef<StringImpl> StringImpl::constructInternal<UChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length)); }
+ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length)); }
template <>
-ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return characters8(); }
+ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const { return characters8(); }
template <>
-ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return deprecatedCharacters(); }
+ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const { return characters16(); }
WTF_EXPORT_STRING_API bool equal(const StringImpl*, const StringImpl*);
WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*);
@@ -875,266 +903,20 @@ WTF_EXPORT_STRING_API bool equal(const StringImpl*, const UChar*, unsigned);
inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); }
inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); }
inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); }
-WTF_EXPORT_STRING_API bool equalNonNull(const StringImpl* a, const StringImpl* b);
-
-// Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe.
-#if CPU(X86_64) || CPU(ARM64)
-ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length)
-{
- unsigned dwordLength = length >> 3;
-
- if (dwordLength) {
- const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a);
- const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b);
-
- for (unsigned i = 0; i != dwordLength; ++i) {
- if (*aDWordCharacters++ != *bDWordCharacters++)
- return false;
- }
-
- a = reinterpret_cast<const LChar*>(aDWordCharacters);
- b = reinterpret_cast<const LChar*>(bDWordCharacters);
- }
-
- if (length & 4) {
- if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uint32_t*>(b))
- return false;
-
- a += 4;
- b += 4;
- }
-
- if (length & 2) {
- if (*reinterpret_cast<const uint16_t*>(a) != *reinterpret_cast<const uint16_t*>(b))
- return false;
-
- a += 2;
- b += 2;
- }
-
- if (length & 1 && (*a != *b))
- return false;
-
- return true;
-}
-
-ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length)
-{
- unsigned dwordLength = length >> 2;
-
- if (dwordLength) {
- const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a);
- const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b);
-
- for (unsigned i = 0; i != dwordLength; ++i) {
- if (*aDWordCharacters++ != *bDWordCharacters++)
- return false;
- }
-
- a = reinterpret_cast<const UChar*>(aDWordCharacters);
- b = reinterpret_cast<const UChar*>(bDWordCharacters);
- }
-
- if (length & 2) {
- if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uint32_t*>(b))
- return false;
-
- a += 2;
- b += 2;
- }
-
- if (length & 1 && (*a != *b))
- return false;
-
- return true;
-}
-#elif CPU(X86)
-ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length)
-{
- const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a);
- const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b);
-
- unsigned wordLength = length >> 2;
- for (unsigned i = 0; i != wordLength; ++i) {
- if (*aCharacters++ != *bCharacters++)
- return false;
- }
-
- length &= 3;
-
- if (length) {
- const LChar* aRemainder = reinterpret_cast<const LChar*>(aCharacters);
- const LChar* bRemainder = reinterpret_cast<const LChar*>(bCharacters);
-
- for (unsigned i = 0; i < length; ++i) {
- if (aRemainder[i] != bRemainder[i])
- return false;
- }
- }
-
- return true;
-}
-
-ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length)
-{
- const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a);
- const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b);
-
- unsigned wordLength = length >> 1;
- for (unsigned i = 0; i != wordLength; ++i) {
- if (*aCharacters++ != *bCharacters++)
- return false;
- }
-
- if (length & 1 && *reinterpret_cast<const UChar*>(aCharacters) != *reinterpret_cast<const UChar*>(bCharacters))
- return false;
-
- return true;
-}
-#elif PLATFORM(IOS) && WTF_ARM_ARCH_AT_LEAST(7)
-ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length)
-{
- bool isEqual = false;
- uint32_t aValue;
- uint32_t bValue;
- asm("subs %[length], #4\n"
- "blo 2f\n"
-
- "0:\n" // Label 0 = Start of loop over 32 bits.
- "ldr %[aValue], [%[a]], #4\n"
- "ldr %[bValue], [%[b]], #4\n"
- "cmp %[aValue], %[bValue]\n"
- "bne 66f\n"
- "subs %[length], #4\n"
- "bhs 0b\n"
-
- // At this point, length can be:
- // -0: 00000000000000000000000000000000 (0 bytes left)
- // -1: 11111111111111111111111111111111 (3 bytes left)
- // -2: 11111111111111111111111111111110 (2 bytes left)
- // -3: 11111111111111111111111111111101 (1 byte left)
- // -4: 11111111111111111111111111111100 (length was 0)
- // The pointers are at the correct position.
- "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters.
- "tst %[length], #2\n"
- "beq 1f\n"
- "ldrh %[aValue], [%[a]], #2\n"
- "ldrh %[bValue], [%[b]], #2\n"
- "cmp %[aValue], %[bValue]\n"
- "bne 66f\n"
-
- "1:\n" // Label 1 = Check for a single character left.
- "tst %[length], #1\n"
- "beq 42f\n"
- "ldrb %[aValue], [%[a]]\n"
- "ldrb %[bValue], [%[b]]\n"
- "cmp %[aValue], %[bValue]\n"
- "bne 66f\n"
-
- "42:\n" // Label 42 = Success.
- "mov %[isEqual], #1\n"
- "66:\n" // Label 66 = End without changing isEqual to 1.
- : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue)
- :
- :
- );
- return isEqual;
-}
-
-ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length)
-{
- bool isEqual = false;
- uint32_t aValue;
- uint32_t bValue;
- asm("subs %[length], #2\n"
- "blo 1f\n"
-
- "0:\n" // Label 0 = Start of loop over 32 bits.
- "ldr %[aValue], [%[a]], #4\n"
- "ldr %[bValue], [%[b]], #4\n"
- "cmp %[aValue], %[bValue]\n"
- "bne 66f\n"
- "subs %[length], #2\n"
- "bhs 0b\n"
-
- // At this point, length can be:
- // -0: 00000000000000000000000000000000 (0 bytes left)
- // -1: 11111111111111111111111111111111 (1 character left, 2 bytes)
- // -2: 11111111111111111111111111111110 (length was zero)
- // The pointers are at the correct position.
- "1:\n" // Label 1 = Check for a single character left.
- "tst %[length], #1\n"
- "beq 42f\n"
- "ldrh %[aValue], [%[a]]\n"
- "ldrh %[bValue], [%[b]]\n"
- "cmp %[aValue], %[bValue]\n"
- "bne 66f\n"
-
- "42:\n" // Label 42 = Success.
- "mov %[isEqual], #1\n"
- "66:\n" // Label 66 = End without changing isEqual to 1.
- : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue)
- :
- :
- );
- return isEqual;
-}
-#else
-ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); }
-ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); }
-#endif
-
-ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length)
-{
- for (unsigned i = 0; i < length; ++i) {
- if (a[i] != b[i])
- return false;
- }
- return true;
-}
-
-ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); }
-
-WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const StringImpl*);
-WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const LChar*);
-inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equalIgnoringCase(b, a); }
-WTF_EXPORT_STRING_API bool equalIgnoringCase(const LChar*, const LChar*, unsigned);
-WTF_EXPORT_STRING_API bool equalIgnoringCase(const UChar*, const LChar*, unsigned);
-inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); }
-inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
-inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); }
-inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); }
-inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
-{
- ASSERT(length >= 0);
- return !u_memcasecmp(a, b, length, U_FOLD_CASE_DEFAULT);
-}
-WTF_EXPORT_STRING_API bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*);
+WTF_EXPORT_STRING_API bool equal(const StringImpl& a, const StringImpl& b);
WTF_EXPORT_STRING_API bool equalIgnoringNullity(StringImpl*, StringImpl*);
+WTF_EXPORT_STRING_API bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*);
-template<typename CharacterType>
-inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0)
-{
- while (index < length) {
- if (characters[index] == matchCharacter)
- return index;
- ++index;
- }
- return notFound;
-}
+bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&);
+WTF_EXPORT_STRING_API bool equalIgnoringASCIICase(const StringImpl*, const StringImpl*);
+bool equalIgnoringASCIICase(const StringImpl&, const char*);
+bool equalIgnoringASCIICase(const StringImpl*, const char*);
-ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0)
-{
- return find(characters, length, static_cast<UChar>(matchCharacter), index);
-}
+WTF_EXPORT_STRING_API bool equalIgnoringASCIICaseNonNull(const StringImpl*, const StringImpl*);
-inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
-{
- if (matchCharacter & ~0xFF)
- return notFound;
- return find(characters, length, static_cast<LChar>(matchCharacter), index);
-}
+template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]);
+template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl*, const char (&lowercaseLetters)[length]);
inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
{
@@ -1157,37 +939,6 @@ inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunct
}
template<typename CharacterType>
-inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0)
-{
- while (index < length) {
- CharacterType c = characters[index++];
- if ((c != '\n') && (c != '\r'))
- continue;
-
- // There can only be a start of a new line if there are more characters
- // beyond the current character.
- if (index < length) {
- // The 3 common types of line terminators are 1. \r\n (Windows),
- // 2. \r (old MacOS) and 3. \n (Unix'es).
-
- if (c == '\n')
- return index; // Case 3: just \n.
-
- CharacterType c2 = characters[index];
- if (c2 != '\n')
- return index; // Case 2: just \r.
-
- // Case 1: \r\n.
- // But, there's only a start of a new line if there are more
- // characters beyond the \r\n.
- if (++index < length)
- return index;
- }
- }
- return notFound;
-}
-
-template<typename CharacterType>
inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX)
{
if (!length)
@@ -1248,18 +999,13 @@ inline size_t StringImpl::find(UChar character, unsigned start)
return WTF::find(characters16(), m_length, character, start);
}
-template<size_t inlineCapacity>
-bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
+template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
{
- if (!b)
- return !a.size();
- if (a.size() != b->length())
- return false;
- return !memcmp(a.data(), b->deprecatedCharacters(), b->length() * sizeof(UChar));
+ return equalIgnoringNullity(a.data(), a.size(), b);
}
template<typename CharacterType1, typename CharacterType2>
-static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2)
+inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2)
{
const unsigned lmin = l1 < l2 ? l1 : l2;
unsigned pos = 0;
@@ -1278,22 +1024,22 @@ static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType
return (l1 > l2) ? 1 : -1;
}
-static inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2)
+inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2)
{
return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8());
}
-static inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2)
+inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2)
{
return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16());
}
-static inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2)
+inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2)
{
return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16());
}
-static inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
+inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
{
if (!string1)
return (string2 && string2->length()) ? -1 : 0;
@@ -1313,7 +1059,7 @@ static inline int codePointCompare(const StringImpl* string1, const StringImpl*
return codePointCompare16(string1, string2);
}
-static inline bool isSpaceOrNewline(UChar c)
+inline bool isSpaceOrNewline(UChar c)
{
// Use isASCIISpace() for basic Latin-1.
// This will include newlines, which aren't included in Unicode DirWS.
@@ -1332,7 +1078,7 @@ inline unsigned lengthOfNullTerminatedString(const CharacterType* string)
return static_cast<unsigned>(length);
}
-inline PassRef<StringImpl> StringImpl::isolatedCopy() const
+inline Ref<StringImpl> StringImpl::isolatedCopy() const
{
if (!requiresCopy()) {
if (is8Bit())
@@ -1345,8 +1091,6 @@ inline PassRef<StringImpl> StringImpl::isolatedCopy() const
return create(m_data16, m_length);
}
-struct StringHash;
-
// StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
template<typename T> struct DefaultHash;
template<> struct DefaultHash<StringImpl*> {
@@ -1356,11 +1100,45 @@ template<> struct DefaultHash<RefPtr<StringImpl>> {
typedef StringHash Hash;
};
+inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b)
+{
+ return equalIgnoringASCIICaseCommon(a, b);
+}
+
+inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b)
+{
+ return equalIgnoringASCIICaseCommon(a, b);
+}
+
+inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b)
+{
+ return a && equalIgnoringASCIICase(*a, b);
+}
+
+template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
+{
+ return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
+}
+
+template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
+{
+ return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters);
+}
+
+template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
+{
+ return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
+}
+
+template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
+{
+ return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters);
+}
+
} // namespace WTF
using WTF::StringImpl;
using WTF::equal;
-using WTF::equalNonNull;
using WTF::TextCaseSensitivity;
using WTF::TextCaseSensitive;
using WTF::TextCaseInsensitive;
diff --git a/Source/WTF/wtf/text/StringOperators.h b/Source/WTF/wtf/text/StringOperators.h
index cfd2f6d6c..5a2435658 100644
--- a/Source/WTF/wtf/text/StringOperators.h
+++ b/Source/WTF/wtf/text/StringOperators.h
@@ -35,10 +35,10 @@ public:
operator String() const
{
- RefPtr<StringImpl> resultImpl = tryMakeString(m_string1, m_string2);
- if (!resultImpl)
+ String result = tryMakeString(m_string1, m_string2);
+ if (!result)
CRASH();
- return resultImpl.release();
+ return result;
}
operator AtomicString() const
@@ -97,6 +97,8 @@ public:
void writeTo(LChar* destination) { m_buffer.writeTo(destination); }
void writeTo(UChar* destination) { m_buffer.writeTo(destination); }
+ String toString() const { return m_buffer; }
+
private:
StringAppend<StringType1, StringType2>& m_buffer;
};
diff --git a/Source/WTF/wtf/text/StringStatics.cpp b/Source/WTF/wtf/text/StringStatics.cpp
index 8f0c74cc0..0c2119c1d 100644
--- a/Source/WTF/wtf/text/StringStatics.cpp
+++ b/Source/WTF/wtf/text/StringStatics.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010, 2016 Apple Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -30,8 +30,8 @@
#endif
#include "AtomicString.h"
-#include "DynamicAnnotations.h"
#include "MainThread.h"
+#include "NeverDestroyed.h"
#include "StaticConstructors.h"
#include "StringImpl.h"
@@ -41,29 +41,11 @@
namespace WTF {
-StringImpl* StringImpl::empty()
-{
- // FIXME: This works around a bug in our port of PCRE, that a regular expression
- // run on the empty string may still perform a read from the first element, and
- // as such we need this to be a valid pointer. No code should ever be reading
- // from a zero length string, so this should be able to be a non-null pointer
- // into the zero-page.
- // Replace this with 'reinterpret_cast<UChar*>(static_cast<intptr_t>(1))' once
- // PCRE goes away.
- static LChar emptyLCharData = 0;
- DEFINE_STATIC_LOCAL(StringImpl, emptyString, (&emptyLCharData, 0, ConstructStaticString));
- WTF_ANNOTATE_BENIGN_RACE(&emptyString, "Benign race on StringImpl::emptyString reference counter");
- return &emptyString;
-}
-
WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, nullAtom)
WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, emptyAtom)
-WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, textAtom)
-WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, commentAtom)
WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, starAtom)
WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlAtom)
WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlnsAtom)
-WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, xlinkAtom)
NEVER_INLINE unsigned StringImpl::hashSlowCase() const
{
@@ -74,6 +56,17 @@ NEVER_INLINE unsigned StringImpl::hashSlowCase() const
return existingHash();
}
+unsigned StringImpl::concurrentHash() const
+{
+ unsigned hash;
+ if (is8Bit())
+ hash = StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length);
+ else
+ hash = StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length);
+ ASSERT(((hash << s_flagCount) >> s_flagCount) == hash);
+ return hash;
+}
+
void AtomicString::init()
{
static bool initialized;
@@ -84,12 +77,9 @@ void AtomicString::init()
// Use placement new to initialize the globals.
new (NotNull, (void*)&nullAtom) AtomicString;
new (NotNull, (void*)&emptyAtom) AtomicString("");
- new (NotNull, (void*)&textAtom) AtomicString("#text", AtomicString::ConstructFromLiteral);
- new (NotNull, (void*)&commentAtom) AtomicString("#comment", AtomicString::ConstructFromLiteral);
new (NotNull, (void*)&starAtom) AtomicString("*", AtomicString::ConstructFromLiteral);
new (NotNull, (void*)&xmlAtom) AtomicString("xml", AtomicString::ConstructFromLiteral);
new (NotNull, (void*)&xmlnsAtom) AtomicString("xmlns", AtomicString::ConstructFromLiteral);
- new (NotNull, (void*)&xlinkAtom) AtomicString("xlink", AtomicString::ConstructFromLiteral);
initialized = true;
}
diff --git a/Source/WTF/wtf/text/StringView.cpp b/Source/WTF/wtf/text/StringView.cpp
new file mode 100644
index 000000000..580799765
--- /dev/null
+++ b/Source/WTF/wtf/text/StringView.cpp
@@ -0,0 +1,285 @@
+/*
+
+Copyright (C) 2014-2017 Apple Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include "config.h"
+#include "StringView.h"
+
+#include <mutex>
+#include <unicode/ubrk.h>
+#include <wtf/HashMap.h>
+#include <wtf/Lock.h>
+#include <wtf/NeverDestroyed.h>
+#include <wtf/Optional.h>
+#include <wtf/text/TextBreakIterator.h>
+#include <wtf/unicode/UTF8.h>
+
+namespace WTF {
+
+using namespace Unicode;
+
+bool StringView::containsIgnoringASCIICase(const StringView& matchString) const
+{
+ return findIgnoringASCIICase(matchString) != notFound;
+}
+
+bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
+{
+ return findIgnoringASCIICase(matchString, startOffset) != notFound;
+}
+
+size_t StringView::findIgnoringASCIICase(const StringView& matchString) const
+{
+ return ::WTF::findIgnoringASCIICase(*this, matchString, 0);
+}
+
+size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
+{
+ return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset);
+}
+
+bool StringView::startsWith(const StringView& prefix) const
+{
+ return ::WTF::startsWith(*this, prefix);
+}
+
+bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const
+{
+ return ::WTF::startsWithIgnoringASCIICase(*this, prefix);
+}
+
+bool StringView::endsWith(const StringView& suffix) const
+{
+ return ::WTF::endsWith(*this, suffix);
+}
+
+bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const
+{
+ return ::WTF::endsWithIgnoringASCIICase(*this, suffix);
+}
+
+CString StringView::utf8(ConversionMode mode) const
+{
+ if (isNull())
+ return CString("", 0);
+ if (is8Bit())
+ return StringImpl::utf8ForCharacters(characters8(), length());
+ return StringImpl::utf8ForCharacters(characters16(), length(), mode);
+}
+
+size_t StringView::find(StringView matchString, unsigned start) const
+{
+ return findCommon(*this, matchString, start);
+}
+
+void StringView::SplitResult::Iterator::findNextSubstring()
+{
+ for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) {
+ if (separatorPosition > m_position) {
+ m_length = separatorPosition - m_position;
+ return;
+ }
+ }
+ m_length = m_result.m_string.length() - m_position;
+}
+
+auto StringView::SplitResult::Iterator::operator++() -> Iterator&
+{
+ ASSERT(m_position < m_result.m_string.length());
+ m_position += m_length;
+ if (m_position < m_result.m_string.length()) {
+ ++m_position;
+ findNextSubstring();
+ }
+ return *this;
+}
+
+class StringView::GraphemeClusters::Iterator::Impl {
+public:
+ Impl(const StringView& stringView, std::optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index)
+ : m_stringView(stringView)
+ , m_iterator(WTFMove(iterator))
+ , m_index(index)
+ , m_indexEnd(computeIndexEnd())
+ {
+ }
+
+ void operator++()
+ {
+ ASSERT(m_indexEnd > m_index);
+ m_index = m_indexEnd;
+ m_indexEnd = computeIndexEnd();
+ }
+
+ StringView operator*() const
+ {
+ if (m_stringView.is8Bit())
+ return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index);
+ return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index);
+ }
+
+ bool operator==(const Impl& other) const
+ {
+ ASSERT(&m_stringView == &other.m_stringView);
+ auto result = m_index == other.m_index;
+ ASSERT(!result || m_indexEnd == other.m_indexEnd);
+ return result;
+ }
+
+ unsigned computeIndexEnd()
+ {
+ if (!m_iterator)
+ return 0;
+ if (m_index == m_stringView.length())
+ return m_index;
+ return ubrk_following(m_iterator.value(), m_index);
+ }
+
+private:
+ const StringView& m_stringView;
+ std::optional<NonSharedCharacterBreakIterator> m_iterator;
+ unsigned m_index;
+ unsigned m_indexEnd;
+};
+
+StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index)
+ : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? std::nullopt : std::optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index))
+{
+}
+
+StringView::GraphemeClusters::Iterator::~Iterator()
+{
+}
+
+StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other)
+ : m_impl(WTFMove(other.m_impl))
+{
+}
+
+auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator&
+{
+ ++(*m_impl);
+ return *this;
+}
+
+StringView StringView::GraphemeClusters::Iterator::operator*() const
+{
+ return **m_impl;
+}
+
+bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const
+{
+ return *m_impl == *(other.m_impl);
+}
+
+bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const
+{
+ return !(*this == other);
+}
+
+#if CHECK_STRINGVIEW_LIFETIME
+
+// Manage reference count manually so UnderlyingString does not need to be defined in the header.
+
+struct StringView::UnderlyingString {
+ std::atomic_uint refCount { 1u };
+ bool isValid { true };
+ const StringImpl& string;
+ explicit UnderlyingString(const StringImpl&);
+};
+
+StringView::UnderlyingString::UnderlyingString(const StringImpl& string)
+ : string(string)
+{
+}
+
+static StaticLock underlyingStringsMutex;
+
+static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings()
+{
+ static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map;
+ return map;
+}
+
+void StringView::invalidate(const StringImpl& stringToBeDestroyed)
+{
+ UnderlyingString* underlyingString;
+ {
+ std::lock_guard<StaticLock> lock(underlyingStringsMutex);
+ underlyingString = underlyingStrings().take(&stringToBeDestroyed);
+ if (!underlyingString)
+ return;
+ }
+ ASSERT(underlyingString->isValid);
+ underlyingString->isValid = false;
+}
+
+bool StringView::underlyingStringIsValid() const
+{
+ return !m_underlyingString || m_underlyingString->isValid;
+}
+
+void StringView::adoptUnderlyingString(UnderlyingString* underlyingString)
+{
+ if (m_underlyingString) {
+ std::lock_guard<StaticLock> lock(underlyingStringsMutex);
+ if (!--m_underlyingString->refCount) {
+ if (m_underlyingString->isValid) {
+ underlyingStrings().remove(&m_underlyingString->string);
+ }
+ delete m_underlyingString;
+ }
+ }
+ m_underlyingString = underlyingString;
+}
+
+void StringView::setUnderlyingString(const StringImpl* string)
+{
+ UnderlyingString* underlyingString;
+ if (!string)
+ underlyingString = nullptr;
+ else {
+ std::lock_guard<StaticLock> lock(underlyingStringsMutex);
+ auto result = underlyingStrings().add(string, nullptr);
+ if (result.isNewEntry)
+ result.iterator->value = new UnderlyingString(*string);
+ else
+ ++result.iterator->value->refCount;
+ underlyingString = result.iterator->value;
+ }
+ adoptUnderlyingString(underlyingString);
+}
+
+void StringView::setUnderlyingString(const StringView& otherString)
+{
+ UnderlyingString* underlyingString = otherString.m_underlyingString;
+ if (underlyingString)
+ ++underlyingString->refCount;
+ adoptUnderlyingString(underlyingString);
+}
+
+#endif // CHECK_STRINGVIEW_LIFETIME
+
+} // namespace WTF
diff --git a/Source/WTF/wtf/text/StringView.h b/Source/WTF/wtf/text/StringView.h
index 70f4eb0cb..ef209f947 100644
--- a/Source/WTF/wtf/text/StringView.h
+++ b/Source/WTF/wtf/text/StringView.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014 Apple Inc. All rights reserved.
+ * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,132 +26,916 @@
#ifndef StringView_h
#define StringView_h
-#include <wtf/text/WTFString.h>
+#include <limits.h>
+#include <unicode/utypes.h>
+#include <wtf/Forward.h>
+#include <wtf/RetainPtr.h>
+#include <wtf/Vector.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/ConversionMode.h>
+#include <wtf/text/LChar.h>
+#include <wtf/text/StringCommon.h>
+
+// FIXME: Enabling the StringView lifetime checking causes the MSVC build to fail. Figure out why.
+#if defined(NDEBUG) || COMPILER(MSVC)
+#define CHECK_STRINGVIEW_LIFETIME 0
+#else
+#define CHECK_STRINGVIEW_LIFETIME 1
+#endif
namespace WTF {
+using CharacterMatchFunction = bool (*)(UChar);
+
// StringView is a non-owning reference to a string, similar to the proposed std::string_view.
-// Whether the string is 8-bit or 16-bit is encoded in the upper bit of the length member.
-// This means that strings longer than 2 Gigabytes can not be represented. If that turns out to be
-// a problem we can investigate alternative solutions.
class StringView {
public:
- StringView()
- : m_characters(nullptr)
- , m_length(0)
- {
+ StringView();
+#if CHECK_STRINGVIEW_LIFETIME
+ ~StringView();
+ StringView(StringView&&);
+ StringView(const StringView&);
+ StringView& operator=(StringView&&);
+ StringView& operator=(const StringView&);
+#endif
+
+ StringView(const AtomicString&);
+ StringView(const String&);
+ StringView(const StringImpl&);
+ StringView(const StringImpl*);
+ StringView(const LChar*, unsigned length);
+ StringView(const UChar*, unsigned length);
+ StringView(const char*);
+
+ static StringView empty();
+
+ unsigned length() const;
+ bool isEmpty() const;
+
+ explicit operator bool() const;
+ bool isNull() const;
+
+ UChar operator[](unsigned index) const;
+
+ class CodeUnits;
+ CodeUnits codeUnits() const;
+
+ class CodePoints;
+ CodePoints codePoints() const;
+
+ class GraphemeClusters;
+ GraphemeClusters graphemeClusters() const;
+
+ bool is8Bit() const;
+ const LChar* characters8() const;
+ const UChar* characters16() const;
+
+ String toString() const;
+ String toStringWithoutCopying() const;
+ AtomicString toAtomicString() const;
+
+#if USE(CF)
+ // This function converts null strings to empty strings.
+ WTF_EXPORT_STRING_API RetainPtr<CFStringRef> createCFStringWithoutCopying() const;
+#endif
+
+#ifdef __OBJC__
+ // These functions convert null strings to empty strings.
+ WTF_EXPORT_STRING_API RetainPtr<NSString> createNSString() const;
+ WTF_EXPORT_STRING_API RetainPtr<NSString> createNSStringWithoutCopying() const;
+#endif
+
+ WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
+
+ class UpconvertedCharacters;
+ UpconvertedCharacters upconvertedCharacters() const;
+
+ void getCharactersWithUpconvert(LChar*) const;
+ void getCharactersWithUpconvert(UChar*) const;
+
+ StringView substring(unsigned start, unsigned length = std::numeric_limits<unsigned>::max()) const;
+ StringView left(unsigned len) const { return substring(0, len); }
+ StringView right(unsigned len) const { return substring(length() - len, len); }
+
+ class SplitResult;
+ SplitResult split(UChar) const;
+
+ size_t find(UChar, unsigned start = 0) const;
+ size_t find(CharacterMatchFunction, unsigned start = 0) const;
+
+ WTF_EXPORT_STRING_API size_t find(StringView, unsigned start) const;
+
+ size_t reverseFind(UChar, unsigned index = UINT_MAX) const;
+
+ WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringView&) const;
+ WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringView&, unsigned startOffset) const;
+
+ bool contains(UChar) const;
+ WTF_EXPORT_STRING_API bool containsIgnoringASCIICase(const StringView&) const;
+ WTF_EXPORT_STRING_API bool containsIgnoringASCIICase(const StringView&, unsigned startOffset) const;
+
+ WTF_EXPORT_STRING_API bool startsWith(const StringView&) const;
+ WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringView&) const;
+
+ WTF_EXPORT_STRING_API bool endsWith(const StringView&) const;
+ WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringView&) const;
+
+ int toInt() const;
+ int toInt(bool& isValid) const;
+ int toIntStrict(bool& isValid) const;
+ float toFloat(bool& isValid) const;
+
+ static void invalidate(const StringImpl&);
+
+ struct UnderlyingString;
+
+private:
+ friend bool equal(StringView, StringView);
+
+ void initialize(const LChar*, unsigned length);
+ void initialize(const UChar*, unsigned length);
+
+#if CHECK_STRINGVIEW_LIFETIME
+ WTF_EXPORT_STRING_API bool underlyingStringIsValid() const;
+ WTF_EXPORT_STRING_API void setUnderlyingString(const StringImpl*);
+ WTF_EXPORT_STRING_API void setUnderlyingString(const StringView&);
+#else
+ bool underlyingStringIsValid() const { return true; }
+ void setUnderlyingString(const StringImpl*) { }
+ void setUnderlyingString(const StringView&) { }
+#endif
+ void clear();
+
+ const void* m_characters { nullptr };
+ unsigned m_length { 0 };
+ bool m_is8Bit { true };
+
+#if CHECK_STRINGVIEW_LIFETIME
+ void adoptUnderlyingString(UnderlyingString*);
+ UnderlyingString* m_underlyingString { nullptr };
+#endif
+};
+
+template<typename CharacterType, size_t inlineCapacity> void append(Vector<CharacterType, inlineCapacity>&, StringView);
+
+bool equal(StringView, StringView);
+bool equal(StringView, const LChar*);
+bool equal(StringView, const char*);
+
+bool equalIgnoringASCIICase(StringView, StringView);
+bool equalIgnoringASCIICase(StringView, const char*);
+
+template<unsigned length> bool equalLettersIgnoringASCIICase(StringView, const char (&lowercaseLetters)[length]);
+
+inline bool operator==(StringView a, StringView b) { return equal(a, b); }
+inline bool operator==(StringView a, const LChar* b) { return equal(a, b); }
+inline bool operator==(StringView a, const char* b) { return equal(a, b); }
+inline bool operator==(const LChar* a, StringView b) { return equal(b, a); }
+inline bool operator==(const char* a, StringView b) { return equal(b, a); }
+
+inline bool operator!=(StringView a, StringView b) { return !equal(a, b); }
+inline bool operator!=(StringView a, const LChar* b) { return !equal(a, b); }
+inline bool operator!=(StringView a, const char* b) { return !equal(a, b); }
+inline bool operator!=(const LChar* a, StringView b) { return !equal(b, a); }
+inline bool operator!=(const char* a, StringView b) { return !equal(b, a); }
+
+}
+
+#include <wtf/text/AtomicString.h>
+#include <wtf/text/WTFString.h>
+
+namespace WTF {
+
+inline StringView::StringView()
+{
+ // FIXME: It's peculiar that null strings are 16-bit and empty strings return 8-bit (according to the is8Bit function).
+}
+
+#if CHECK_STRINGVIEW_LIFETIME
+inline StringView::~StringView()
+{
+ setUnderlyingString(nullptr);
+}
+
+inline StringView::StringView(StringView&& other)
+ : m_characters(other.m_characters)
+ , m_length(other.m_length)
+ , m_is8Bit(other.m_is8Bit)
+{
+ ASSERT(other.underlyingStringIsValid());
+
+ other.clear();
+
+ setUnderlyingString(other);
+ other.setUnderlyingString(nullptr);
+}
+
+inline StringView::StringView(const StringView& other)
+ : m_characters(other.m_characters)
+ , m_length(other.m_length)
+ , m_is8Bit(other.m_is8Bit)
+{
+ ASSERT(other.underlyingStringIsValid());
+
+ setUnderlyingString(other);
+}
+
+inline StringView& StringView::operator=(StringView&& other)
+{
+ ASSERT(other.underlyingStringIsValid());
+
+ m_characters = other.m_characters;
+ m_length = other.m_length;
+ m_is8Bit = other.m_is8Bit;
+
+ other.clear();
+
+ setUnderlyingString(other);
+ other.setUnderlyingString(nullptr);
+
+ return *this;
+}
+
+inline StringView& StringView::operator=(const StringView& other)
+{
+ ASSERT(other.underlyingStringIsValid());
+
+ m_characters = other.m_characters;
+ m_length = other.m_length;
+ m_is8Bit = other.m_is8Bit;
+
+ setUnderlyingString(other);
+
+ return *this;
+}
+#endif // CHECK_STRINGVIEW_LIFETIME
+
+inline void StringView::initialize(const LChar* characters, unsigned length)
+{
+ m_characters = characters;
+ m_length = length;
+ m_is8Bit = true;
+}
+
+inline void StringView::initialize(const UChar* characters, unsigned length)
+{
+ m_characters = characters;
+ m_length = length;
+ m_is8Bit = false;
+}
+
+inline StringView::StringView(const LChar* characters, unsigned length)
+{
+ initialize(characters, length);
+}
+
+inline StringView::StringView(const UChar* characters, unsigned length)
+{
+ initialize(characters, length);
+}
+
+inline StringView::StringView(const char* characters)
+{
+ initialize(reinterpret_cast<const LChar*>(characters), strlen(characters));
+}
+
+inline StringView::StringView(const StringImpl& string)
+{
+ setUnderlyingString(&string);
+ if (string.is8Bit())
+ initialize(string.characters8(), string.length());
+ else
+ initialize(string.characters16(), string.length());
+}
+
+inline StringView::StringView(const StringImpl* string)
+{
+ if (!string)
+ return;
+
+ setUnderlyingString(string);
+ if (string->is8Bit())
+ initialize(string->characters8(), string->length());
+ else
+ initialize(string->characters16(), string->length());
+}
+
+inline StringView::StringView(const String& string)
+{
+ setUnderlyingString(string.impl());
+ if (!string.impl()) {
+ clear();
+ return;
+ }
+ if (string.is8Bit()) {
+ initialize(string.characters8(), string.length());
+ return;
}
+ initialize(string.characters16(), string.length());
+}
- StringView(const LChar* characters, unsigned length)
- {
- initialize(characters, length);
+inline StringView::StringView(const AtomicString& atomicString)
+ : StringView(atomicString.string())
+{
+}
+
+inline void StringView::clear()
+{
+ m_characters = nullptr;
+ m_length = 0;
+ m_is8Bit = true;
+}
+
+inline StringView StringView::empty()
+{
+ return StringView(reinterpret_cast<const LChar*>(""), 0);
+}
+
+inline const LChar* StringView::characters8() const
+{
+ ASSERT(is8Bit());
+ ASSERT(underlyingStringIsValid());
+ return static_cast<const LChar*>(m_characters);
+}
+
+inline const UChar* StringView::characters16() const
+{
+ ASSERT(!is8Bit());
+ ASSERT(underlyingStringIsValid());
+ return static_cast<const UChar*>(m_characters);
+}
+
+class StringView::UpconvertedCharacters {
+public:
+ explicit UpconvertedCharacters(const StringView&);
+ operator const UChar*() const { return m_characters; }
+ const UChar* get() const { return m_characters; }
+private:
+ Vector<UChar, 32> m_upconvertedCharacters;
+ const UChar* m_characters;
+};
+
+inline StringView::UpconvertedCharacters StringView::upconvertedCharacters() const
+{
+ return UpconvertedCharacters(*this);
+}
+
+inline bool StringView::isNull() const
+{
+ return !m_characters;
+}
+
+inline bool StringView::isEmpty() const
+{
+ return !length();
+}
+
+inline unsigned StringView::length() const
+{
+ return m_length;
+}
+
+inline StringView::operator bool() const
+{
+ return !isNull();
+}
+
+inline bool StringView::is8Bit() const
+{
+ return m_is8Bit;
+}
+
+inline StringView StringView::substring(unsigned start, unsigned length) const
+{
+ if (start >= this->length())
+ return empty();
+ unsigned maxLength = this->length() - start;
+
+ if (length >= maxLength) {
+ if (!start)
+ return *this;
+ length = maxLength;
}
- StringView(const UChar* characters, unsigned length)
- {
- initialize(characters, length);
+ if (is8Bit()) {
+ StringView result(characters8() + start, length);
+ result.setUnderlyingString(*this);
+ return result;
}
+ StringView result(characters16() + start, length);
+ result.setUnderlyingString(*this);
+ return result;
+}
- StringView(const String& string)
- : m_characters(nullptr)
- , m_length(0)
- {
- if (!string.impl())
- return;
-
- if (string.is8Bit())
- initialize(string.characters8(), string.length());
- else
- initialize(string.characters16(), string.length());
+inline UChar StringView::operator[](unsigned index) const
+{
+ ASSERT(index < length());
+ if (is8Bit())
+ return characters8()[index];
+ return characters16()[index];
+}
+
+inline bool StringView::contains(UChar character) const
+{
+ return find(character) != notFound;
+}
+
+inline void StringView::getCharactersWithUpconvert(LChar* destination) const
+{
+ ASSERT(is8Bit());
+ auto characters8 = this->characters8();
+ for (unsigned i = 0; i < m_length; ++i)
+ destination[i] = characters8[i];
+}
+
+inline void StringView::getCharactersWithUpconvert(UChar* destination) const
+{
+ if (is8Bit()) {
+ auto characters8 = this->characters8();
+ for (unsigned i = 0; i < m_length; ++i)
+ destination[i] = characters8[i];
+ return;
}
+ auto characters16 = this->characters16();
+ for (unsigned i = 0; i < m_length; ++i)
+ destination[i] = characters16[i];
+}
- static StringView empty()
- {
- return StringView(reinterpret_cast<const LChar*>(""), 0);
+inline StringView::UpconvertedCharacters::UpconvertedCharacters(const StringView& string)
+{
+ if (!string.is8Bit()) {
+ m_characters = string.characters16();
+ return;
}
+ const LChar* characters8 = string.characters8();
+ unsigned length = string.m_length;
+ m_upconvertedCharacters.reserveInitialCapacity(length);
+ for (unsigned i = 0; i < length; ++i)
+ m_upconvertedCharacters.uncheckedAppend(characters8[i]);
+ m_characters = m_upconvertedCharacters.data();
+}
- const LChar* characters8() const
- {
- ASSERT(is8Bit());
+inline String StringView::toString() const
+{
+ if (is8Bit())
+ return String(characters8(), m_length);
+ return String(characters16(), m_length);
+}
- return static_cast<const LChar*>(m_characters);
- }
+inline AtomicString StringView::toAtomicString() const
+{
+ if (is8Bit())
+ return AtomicString(characters8(), m_length);
+ return AtomicString(characters16(), m_length);
+}
- const UChar* characters16() const
- {
- ASSERT(!is8Bit());
+inline float StringView::toFloat(bool& isValid) const
+{
+ if (is8Bit())
+ return charactersToFloat(characters8(), m_length, &isValid);
+ return charactersToFloat(characters16(), m_length, &isValid);
+}
- return static_cast<const UChar*>(m_characters);
- }
+inline int StringView::toInt() const
+{
+ bool isValid;
+ return toInt(isValid);
+}
- bool isNull() const { return !m_characters; }
- bool isEmpty() const { return !length(); }
- unsigned length() const { return m_length & ~is16BitStringFlag; }
+inline int StringView::toInt(bool& isValid) const
+{
+ if (is8Bit())
+ return charactersToInt(characters8(), m_length, &isValid);
+ return charactersToInt(characters16(), m_length, &isValid);
+}
- explicit operator bool() const { return !isNull(); }
+inline int StringView::toIntStrict(bool& isValid) const
+{
+ if (is8Bit())
+ return charactersToIntStrict(characters8(), m_length, &isValid);
+ return charactersToIntStrict(characters16(), m_length, &isValid);
+}
- bool is8Bit() const { return !(m_length & is16BitStringFlag); }
+inline String StringView::toStringWithoutCopying() const
+{
+ if (is8Bit())
+ return StringImpl::createWithoutCopying(characters8(), m_length);
+ return StringImpl::createWithoutCopying(characters16(), m_length);
+}
- StringView substring(unsigned start, unsigned length = std::numeric_limits<unsigned>::max()) const
- {
- if (start >= this->length())
- return empty();
- unsigned maxLength = this->length() - start;
+inline size_t StringView::find(UChar character, unsigned start) const
+{
+ if (is8Bit())
+ return WTF::find(characters8(), m_length, character, start);
+ return WTF::find(characters16(), m_length, character, start);
+}
- if (length >= maxLength) {
- if (!start)
- return *this;
- length = maxLength;
- }
+inline size_t StringView::find(CharacterMatchFunction matchFunction, unsigned start) const
+{
+ if (is8Bit())
+ return WTF::find(characters8(), m_length, matchFunction, start);
+ return WTF::find(characters16(), m_length, matchFunction, start);
+}
- if (is8Bit())
- return StringView(characters8() + start, length);
+inline size_t StringView::reverseFind(UChar character, unsigned index) const
+{
+ if (is8Bit())
+ return WTF::reverseFind(characters8(), m_length, character, index);
+ return WTF::reverseFind(characters16(), m_length, character, index);
+}
- return StringView(characters16() + start, length);
- }
+#if !CHECK_STRINGVIEW_LIFETIME
+inline void StringView::invalidate(const StringImpl&)
+{
+}
+#endif
- String toString() const
- {
- if (is8Bit())
- return String(characters8(), length());
+template<typename StringType> class StringTypeAdapter;
- return String(characters16(), length());
+template<> class StringTypeAdapter<StringView> {
+public:
+ StringTypeAdapter<StringView>(StringView string)
+ : m_string(string)
+ {
}
- String toStringWithoutCopying() const
- {
- if (is8Bit())
- return StringImpl::createWithoutCopying(characters8(), length());
+ unsigned length() { return m_string.length(); }
+ bool is8Bit() { return m_string.is8Bit(); }
+ void writeTo(LChar* destination) { m_string.getCharactersWithUpconvert(destination); }
+ void writeTo(UChar* destination) { m_string.getCharactersWithUpconvert(destination); }
- return StringImpl::createWithoutCopying(characters16(), length());
- }
+ String toString() const { return m_string.toString(); }
private:
- void initialize(const LChar* characters, unsigned length)
- {
- ASSERT(!(length & is16BitStringFlag));
-
- m_characters = characters;
- m_length = length;
- }
+ StringView m_string;
+};
- void initialize(const UChar* characters, unsigned length)
- {
- ASSERT(!(length & is16BitStringFlag));
-
- m_characters = characters;
- m_length = is16BitStringFlag | length;
+template<typename CharacterType, size_t inlineCapacity> void append(Vector<CharacterType, inlineCapacity>& buffer, StringView string)
+{
+ unsigned oldSize = buffer.size();
+ buffer.grow(oldSize + string.length());
+ string.getCharactersWithUpconvert(buffer.data() + oldSize);
+}
+
+inline bool equal(StringView a, StringView b)
+{
+ if (a.m_characters == b.m_characters) {
+ ASSERT(a.is8Bit() == b.is8Bit());
+ return a.length() == b.length();
}
+
+ return equalCommon(a, b);
+}
+
+inline bool equal(StringView a, const LChar* b)
+{
+ if (!b)
+ return !a.isEmpty();
+ if (a.isEmpty())
+ return !b;
+ unsigned aLength = a.length();
+ if (a.is8Bit())
+ return equal(a.characters8(), b, aLength);
+ return equal(a.characters16(), b, aLength);
+}
- static const unsigned is16BitStringFlag = 1u << 31;
+inline bool equal(StringView a, const char* b)
+{
+ return equal(a, reinterpret_cast<const LChar*>(b));
+}
- const void* m_characters;
+inline bool equalIgnoringASCIICase(StringView a, StringView b)
+{
+ return equalIgnoringASCIICaseCommon(a, b);
+}
+
+inline bool equalIgnoringASCIICase(StringView a, const char* b)
+{
+ return equalIgnoringASCIICaseCommon(a, b);
+}
+
+class StringView::SplitResult {
+public:
+ explicit SplitResult(StringView, UChar separator);
+
+ class Iterator;
+ Iterator begin() const;
+ Iterator end() const;
+
+private:
+ StringView m_string;
+ UChar m_separator;
+};
+
+class StringView::GraphemeClusters {
+public:
+ explicit GraphemeClusters(const StringView&);
+
+ class Iterator;
+ Iterator begin() const;
+ Iterator end() const;
+
+private:
+ StringView m_stringView;
+};
+
+class StringView::CodePoints {
+public:
+ explicit CodePoints(const StringView&);
+
+ class Iterator;
+ Iterator begin() const;
+ Iterator end() const;
+
+private:
+ StringView m_stringView;
+};
+
+class StringView::CodeUnits {
+public:
+ explicit CodeUnits(const StringView&);
+
+ class Iterator;
+ Iterator begin() const;
+ Iterator end() const;
+
+private:
+ StringView m_stringView;
+};
+
+class StringView::SplitResult::Iterator {
+public:
+ StringView operator*() const;
+
+ WTF_EXPORT_PRIVATE Iterator& operator++();
+
+ bool operator==(const Iterator&) const;
+ bool operator!=(const Iterator&) const;
+
+private:
+ enum PositionTag { AtEnd };
+ Iterator(const SplitResult&);
+ Iterator(const SplitResult&, PositionTag);
+
+ WTF_EXPORT_PRIVATE void findNextSubstring();
+
+ friend SplitResult;
+
+ const SplitResult& m_result;
+ unsigned m_position { 0 };
unsigned m_length;
};
+class StringView::GraphemeClusters::Iterator {
+public:
+ WTF_EXPORT_PRIVATE Iterator() = delete;
+ WTF_EXPORT_PRIVATE Iterator(const StringView&, unsigned index);
+ WTF_EXPORT_PRIVATE ~Iterator();
+
+ Iterator(const Iterator&) = delete;
+ WTF_EXPORT_PRIVATE Iterator(Iterator&&);
+ Iterator& operator=(const Iterator&) = delete;
+ Iterator& operator=(Iterator&&) = delete;
+
+ WTF_EXPORT_PRIVATE StringView operator*() const;
+ WTF_EXPORT_PRIVATE Iterator& operator++();
+
+ WTF_EXPORT_PRIVATE bool operator==(const Iterator&) const;
+ WTF_EXPORT_PRIVATE bool operator!=(const Iterator&) const;
+
+private:
+ class Impl;
+
+ std::unique_ptr<Impl> m_impl;
+};
+
+class StringView::CodePoints::Iterator {
+public:
+ Iterator(const StringView&, unsigned index);
+
+ UChar32 operator*() const;
+ Iterator& operator++();
+
+ bool operator==(const Iterator&) const;
+ bool operator!=(const Iterator&) const;
+ Iterator& operator=(const Iterator&);
+
+private:
+ std::reference_wrapper<const StringView> m_stringView;
+ std::optional<unsigned> m_nextCodePointOffset;
+ UChar32 m_codePoint;
+};
+
+class StringView::CodeUnits::Iterator {
+public:
+ Iterator(const StringView&, unsigned index);
+
+ UChar operator*() const;
+ Iterator& operator++();
+
+ bool operator==(const Iterator&) const;
+ bool operator!=(const Iterator&) const;
+
+private:
+ const StringView& m_stringView;
+ unsigned m_index;
+};
+
+inline auto StringView::graphemeClusters() const -> GraphemeClusters
+{
+ return GraphemeClusters(*this);
+}
+
+inline auto StringView::codePoints() const -> CodePoints
+{
+ return CodePoints(*this);
+}
+
+inline auto StringView::codeUnits() const -> CodeUnits
+{
+ return CodeUnits(*this);
+}
+
+inline StringView::GraphemeClusters::GraphemeClusters(const StringView& stringView)
+ : m_stringView(stringView)
+{
+}
+
+inline auto StringView::GraphemeClusters::begin() const -> Iterator
+{
+ return Iterator(m_stringView, 0);
+}
+
+inline auto StringView::GraphemeClusters::end() const -> Iterator
+{
+ return Iterator(m_stringView, m_stringView.length());
+}
+
+inline StringView::CodePoints::CodePoints(const StringView& stringView)
+ : m_stringView(stringView)
+{
+}
+
+inline StringView::CodePoints::Iterator::Iterator(const StringView& stringView, unsigned index)
+ : m_stringView(stringView)
+ , m_nextCodePointOffset(index)
+{
+ operator++();
+}
+
+inline auto StringView::CodePoints::Iterator::operator++() -> Iterator&
+{
+ ASSERT(m_nextCodePointOffset);
+ if (m_nextCodePointOffset.value() == m_stringView.get().length()) {
+ m_nextCodePointOffset = std::nullopt;
+ return *this;
+ }
+ if (m_stringView.get().is8Bit())
+ m_codePoint = m_stringView.get().characters8()[m_nextCodePointOffset.value()++];
+ else
+ U16_NEXT(m_stringView.get().characters16(), m_nextCodePointOffset.value(), m_stringView.get().length(), m_codePoint);
+ ASSERT(m_nextCodePointOffset.value() <= m_stringView.get().length());
+ return *this;
+}
+
+inline auto StringView::CodePoints::Iterator::operator=(const Iterator& other) -> Iterator&
+{
+ m_stringView = other.m_stringView;
+ m_nextCodePointOffset = other.m_nextCodePointOffset;
+ m_codePoint = other.m_codePoint;
+ return *this;
+}
+
+inline UChar32 StringView::CodePoints::Iterator::operator*() const
+{
+ ASSERT(m_nextCodePointOffset);
+ return m_codePoint;
+}
+
+inline bool StringView::CodePoints::Iterator::operator==(const Iterator& other) const
+{
+ ASSERT(&m_stringView.get() == &other.m_stringView.get());
+ return m_nextCodePointOffset == other.m_nextCodePointOffset;
+}
+
+inline bool StringView::CodePoints::Iterator::operator!=(const Iterator& other) const
+{
+ return !(*this == other);
+}
+
+inline auto StringView::CodePoints::begin() const -> Iterator
+{
+ return Iterator(m_stringView, 0);
+}
+
+inline auto StringView::CodePoints::end() const -> Iterator
+{
+ return Iterator(m_stringView, m_stringView.length());
+}
+
+inline StringView::CodeUnits::CodeUnits(const StringView& stringView)
+ : m_stringView(stringView)
+{
+}
+
+inline StringView::CodeUnits::Iterator::Iterator(const StringView& stringView, unsigned index)
+ : m_stringView(stringView)
+ , m_index(index)
+{
+}
+
+inline auto StringView::CodeUnits::Iterator::operator++() -> Iterator&
+{
+ ++m_index;
+ return *this;
+}
+
+inline UChar StringView::CodeUnits::Iterator::operator*() const
+{
+ return m_stringView[m_index];
+}
+
+inline bool StringView::CodeUnits::Iterator::operator==(const Iterator& other) const
+{
+ ASSERT(&m_stringView == &other.m_stringView);
+ return m_index == other.m_index;
+}
+
+inline bool StringView::CodeUnits::Iterator::operator!=(const Iterator& other) const
+{
+ return !(*this == other);
+}
+
+inline auto StringView::CodeUnits::begin() const -> Iterator
+{
+ return Iterator(m_stringView, 0);
+}
+
+inline auto StringView::CodeUnits::end() const -> Iterator
+{
+ return Iterator(m_stringView, m_stringView.length());
+}
+
+inline auto StringView::split(UChar separator) const -> SplitResult
+{
+ return SplitResult { *this, separator };
+}
+
+inline StringView::SplitResult::SplitResult(StringView stringView, UChar separator)
+ : m_string { stringView }
+ , m_separator { separator }
+{
+}
+
+inline auto StringView::SplitResult::begin() const -> Iterator
+{
+ return Iterator { *this };
+}
+
+inline auto StringView::SplitResult::end() const -> Iterator
+{
+ return Iterator { *this, Iterator::AtEnd };
+}
+
+inline StringView::SplitResult::Iterator::Iterator(const SplitResult& result)
+ : m_result { result }
+{
+ findNextSubstring();
+}
+
+inline StringView::SplitResult::Iterator::Iterator(const SplitResult& result, PositionTag)
+ : m_result { result }
+ , m_position { result.m_string.length() }
+{
+}
+
+inline StringView StringView::SplitResult::Iterator::operator*() const
+{
+ ASSERT(m_position < m_result.m_string.length());
+ return m_result.m_string.substring(m_position, m_length);
+}
+
+inline bool StringView::SplitResult::Iterator::operator==(const Iterator& other) const
+{
+ ASSERT(&m_result == &other.m_result);
+ return m_position == other.m_position;
+}
+
+inline bool StringView::SplitResult::Iterator::operator!=(const Iterator& other) const
+{
+ return !(*this == other);
+}
+
+template<unsigned length> inline bool equalLettersIgnoringASCIICase(StringView string, const char (&lowercaseLetters)[length])
+{
+ return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
+}
+
} // namespace WTF
+using WTF::append;
+using WTF::equal;
using WTF::StringView;
#endif // StringView_h
diff --git a/Source/WTF/wtf/text/SymbolImpl.cpp b/Source/WTF/wtf/text/SymbolImpl.cpp
new file mode 100644
index 000000000..18ebea9e0
--- /dev/null
+++ b/Source/WTF/wtf/text/SymbolImpl.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 Yusuke Suzuki <utatane.tea@gmail.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "SymbolImpl.h"
+
+namespace WTF {
+
+// In addition to the normal hash value, store specialized hash value for
+// symbolized StringImpl*. And don't use the normal hash value for symbolized
+// StringImpl* when they are treated as Identifiers. Unique nature of these
+// symbolized StringImpl* keys means that we don't need them to match any other
+// string (in fact, that's exactly the oposite of what we want!), and the
+// normal hash would lead to lots of conflicts.
+unsigned SymbolImpl::nextHashForSymbol()
+{
+ static unsigned s_nextHashForSymbol = 0;
+ s_nextHashForSymbol += 1 << s_flagCount;
+ s_nextHashForSymbol |= 1 << 31;
+ return s_nextHashForSymbol;
+}
+
+Ref<SymbolImpl> SymbolImpl::create(StringImpl& rep)
+{
+ auto* ownerRep = (rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep;
+ ASSERT(ownerRep->bufferOwnership() != BufferSubstring);
+ if (rep.is8Bit())
+ return adoptRef(*new SymbolImpl(rep.m_data8, rep.length(), *ownerRep));
+ return adoptRef(*new SymbolImpl(rep.m_data16, rep.length(), *ownerRep));
+}
+
+Ref<SymbolImpl> SymbolImpl::createNullSymbol()
+{
+ return adoptRef(*new SymbolImpl);
+}
+
+} // namespace WTF
diff --git a/Source/WTF/wtf/text/SymbolImpl.h b/Source/WTF/wtf/text/SymbolImpl.h
new file mode 100644
index 000000000..293da0a59
--- /dev/null
+++ b/Source/WTF/wtf/text/SymbolImpl.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2015-2016 Yusuke Suzuki <utatane.tea@gmail.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <wtf/text/UniquedStringImpl.h>
+
+namespace WTF {
+
+// SymbolImpl is used to represent the symbol string impl.
+// It is uniqued string impl, but is not registered in Atomic String tables, so it's not atomic.
+class SymbolImpl : public UniquedStringImpl {
+private:
+ static constexpr const unsigned s_flagIsNullSymbol = 1u;
+
+public:
+ unsigned hashForSymbol() const { return m_hashForSymbol; }
+ SymbolRegistry* const& symbolRegistry() const { return m_symbolRegistry; }
+ SymbolRegistry*& symbolRegistry() { return m_symbolRegistry; }
+ bool isNullSymbol() const { return m_flags & s_flagIsNullSymbol; }
+
+ WTF_EXPORT_STRING_API static Ref<SymbolImpl> createNullSymbol();
+ WTF_EXPORT_STRING_API static Ref<SymbolImpl> create(StringImpl& rep);
+
+ Ref<StringImpl> extractFoldedString()
+ {
+ ASSERT(substringBuffer());
+ ASSERT(substringBuffer() == m_owner);
+ ASSERT(!substringBuffer()->isSymbol());
+ return createSubstringSharingImpl(*this, 0, length());
+ }
+
+private:
+ WTF_EXPORT_PRIVATE static unsigned nextHashForSymbol();
+
+ friend class StringImpl;
+
+ SymbolImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base)
+ : UniquedStringImpl(CreateSymbol, characters, length)
+ , m_owner(&base.leakRef())
+ , m_hashForSymbol(nextHashForSymbol())
+ {
+ ASSERT(StringImpl::tailOffset<StringImpl*>() == OBJECT_OFFSETOF(SymbolImpl, m_owner));
+ }
+
+ SymbolImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base)
+ : UniquedStringImpl(CreateSymbol, characters, length)
+ , m_owner(&base.leakRef())
+ , m_hashForSymbol(nextHashForSymbol())
+ {
+ ASSERT(StringImpl::tailOffset<StringImpl*>() == OBJECT_OFFSETOF(SymbolImpl, m_owner));
+ }
+
+ SymbolImpl()
+ : UniquedStringImpl(CreateSymbol)
+ , m_owner(StringImpl::empty())
+ , m_hashForSymbol(nextHashForSymbol())
+ , m_flags(s_flagIsNullSymbol)
+ {
+ ASSERT(StringImpl::tailOffset<StringImpl*>() == OBJECT_OFFSETOF(SymbolImpl, m_owner));
+ }
+
+ // The pointer to the owner string should be immediately following after the StringImpl layout,
+ // since we would like to align the layout of SymbolImpl to the one of BufferSubstring StringImpl.
+ StringImpl* m_owner;
+ SymbolRegistry* m_symbolRegistry { nullptr };
+ unsigned m_hashForSymbol;
+ unsigned m_flags { 0 };
+};
+
+inline unsigned StringImpl::symbolAwareHash() const
+{
+ if (isSymbol())
+ return static_cast<const SymbolImpl*>(this)->hashForSymbol();
+ return hash();
+}
+
+inline unsigned StringImpl::existingSymbolAwareHash() const
+{
+ if (isSymbol())
+ return static_cast<const SymbolImpl*>(this)->hashForSymbol();
+ return existingHash();
+}
+
+#if !ASSERT_DISABLED
+// SymbolImpls created from StaticStringImpl will ASSERT
+// in the generic ValueCheck<T>::checkConsistency
+// as they are not allocated by fastMalloc.
+// We don't currently have any way to detect that case
+// so we ignore the consistency check for all SymbolImpls*.
+template<> struct
+ValueCheck<SymbolImpl*> {
+ static void checkConsistency(const SymbolImpl*) { }
+};
+
+template<> struct
+ValueCheck<const SymbolImpl*> {
+ static void checkConsistency(const SymbolImpl*) { }
+};
+#endif
+
+} // namespace WTF
+
+using WTF::SymbolImpl;
diff --git a/Source/WTF/wtf/text/SymbolRegistry.cpp b/Source/WTF/wtf/text/SymbolRegistry.cpp
new file mode 100644
index 000000000..264bc5ca4
--- /dev/null
+++ b/Source/WTF/wtf/text/SymbolRegistry.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Yusuke Suzuki <utatane.tea@gmail.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "SymbolRegistry.h"
+
+namespace WTF {
+
+SymbolRegistry::~SymbolRegistry()
+{
+ for (auto& key : m_table)
+ static_cast<SymbolImpl&>(*key.impl()).symbolRegistry() = nullptr;
+}
+
+Ref<SymbolImpl> SymbolRegistry::symbolForKey(const String& rep)
+{
+ auto addResult = m_table.add(SymbolRegistryKey(rep.impl()));
+ if (!addResult.isNewEntry)
+ return *static_cast<SymbolImpl*>(addResult.iterator->impl());
+
+ auto symbol = SymbolImpl::create(*rep.impl());
+ symbol->symbolRegistry() = this;
+ *addResult.iterator = SymbolRegistryKey(&symbol.get());
+ return symbol;
+}
+
+String SymbolRegistry::keyForSymbol(SymbolImpl& uid)
+{
+ ASSERT(uid.symbolRegistry() == this);
+ return uid.extractFoldedString();
+}
+
+void SymbolRegistry::remove(SymbolImpl& uid)
+{
+ ASSERT(uid.symbolRegistry() == this);
+ auto iterator = m_table.find(SymbolRegistryKey(&uid));
+ ASSERT_WITH_MESSAGE(iterator != m_table.end(), "The string being removed is registered in the string table of an other thread!");
+ m_table.remove(iterator);
+}
+
+}
diff --git a/Source/WTF/wtf/text/SymbolRegistry.h b/Source/WTF/wtf/text/SymbolRegistry.h
new file mode 100644
index 000000000..06d276834
--- /dev/null
+++ b/Source/WTF/wtf/text/SymbolRegistry.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2015 Yusuke Suzuki <utatane.tea@gmail.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef WTF_SymbolRegistry_h
+#define WTF_SymbolRegistry_h
+
+#include <wtf/HashSet.h>
+#include <wtf/text/StringHash.h>
+#include <wtf/text/SymbolImpl.h>
+#include <wtf/text/WTFString.h>
+
+namespace WTF {
+
+// Since StringImpl* used for Symbol uid doesn't have a hash value reflecting the string content,
+// to compare with an external string in string contents, introduce SymbolRegistryKey.
+// SymbolRegistryKey holds a hash value reflecting the string content additionally.
+class SymbolRegistryKey {
+public:
+ SymbolRegistryKey() = default;
+ explicit SymbolRegistryKey(StringImpl* uid);
+ SymbolRegistryKey(WTF::HashTableDeletedValueType);
+
+ unsigned hash() const { return m_hash; }
+ StringImpl* impl() const { return m_impl; }
+
+ bool isHashTableDeletedValue() const { return m_impl == hashTableDeletedValue(); }
+
+private:
+ static StringImpl* hashTableDeletedValue() { return reinterpret_cast<StringImpl*>(-1); }
+
+ StringImpl* m_impl { nullptr };
+ unsigned m_hash { 0 };
+};
+
+template<typename T> struct DefaultHash;
+template<> struct DefaultHash<SymbolRegistryKey> {
+ struct Hash : StringHash {
+ static unsigned hash(const SymbolRegistryKey& key)
+ {
+ return key.hash();
+ }
+ static bool equal(const SymbolRegistryKey& a, const SymbolRegistryKey& b)
+ {
+ return StringHash::equal(a.impl(), b.impl());
+ }
+ };
+};
+
+template<> struct HashTraits<SymbolRegistryKey> : SimpleClassHashTraits<SymbolRegistryKey> {
+ static const bool hasIsEmptyValueFunction = true;
+ static bool isEmptyValue(const SymbolRegistryKey& key)
+ {
+ return key.impl() == nullptr;
+ }
+};
+
+class SymbolRegistry {
+ WTF_MAKE_NONCOPYABLE(SymbolRegistry);
+public:
+ SymbolRegistry() = default;
+ WTF_EXPORT_PRIVATE ~SymbolRegistry();
+
+ WTF_EXPORT_PRIVATE Ref<SymbolImpl> symbolForKey(const String&);
+ WTF_EXPORT_PRIVATE String keyForSymbol(SymbolImpl&);
+
+ void remove(SymbolImpl&);
+
+private:
+ HashSet<SymbolRegistryKey> m_table;
+};
+
+inline SymbolRegistryKey::SymbolRegistryKey(StringImpl* uid)
+ : m_impl(uid)
+{
+ if (uid->isSymbol()) {
+ if (uid->is8Bit())
+ m_hash = StringHasher::computeHashAndMaskTop8Bits(uid->characters8(), uid->length());
+ else
+ m_hash = StringHasher::computeHashAndMaskTop8Bits(uid->characters16(), uid->length());
+ } else
+ m_hash = uid->hash();
+}
+
+inline SymbolRegistryKey::SymbolRegistryKey(WTF::HashTableDeletedValueType)
+ : m_impl(hashTableDeletedValue())
+{
+}
+
+}
+
+#endif
diff --git a/Source/WTF/wtf/text/TextBreakIterator.cpp b/Source/WTF/wtf/text/TextBreakIterator.cpp
new file mode 100644
index 000000000..1edc32a5d
--- /dev/null
+++ b/Source/WTF/wtf/text/TextBreakIterator.cpp
@@ -0,0 +1,448 @@
+/*
+ * (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2004-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include "LineBreakIteratorPoolICU.h"
+#include "TextBreakIteratorInternalICU.h"
+#include "UTextProviderLatin1.h"
+#include "UTextProviderUTF16.h"
+#include <atomic>
+#include <mutex>
+#include <unicode/ubrk.h>
+#include <wtf/text/StringBuilder.h>
+
+// FIXME: This needs a better name
+#define ADDITIONAL_EMOJI_SUPPORT (PLATFORM(IOS) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101100))
+
+namespace WTF {
+
+// Iterator initialization
+
+static UBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID())
+{
+ UErrorCode openStatus = U_ZERO_ERROR;
+ UBreakIterator* iterator = ubrk_open(type, locale, 0, 0, &openStatus);
+ ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
+ return iterator;
+}
+
+#if !PLATFORM(IOS)
+
+static UBreakIterator* initializeIteratorWithRules(const char* breakRules)
+{
+ UParseError parseStatus;
+ UErrorCode openStatus = U_ZERO_ERROR;
+ unsigned length = strlen(breakRules);
+ auto upconvertedCharacters = StringView(reinterpret_cast<const LChar*>(breakRules), length).upconvertedCharacters();
+ UBreakIterator* iterator = ubrk_openRules(upconvertedCharacters, length, 0, 0, &parseStatus, &openStatus);
+ ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
+ return iterator;
+}
+
+#endif
+
+
+// Iterator text setting
+
+static UBreakIterator* setTextForIterator(UBreakIterator& iterator, StringView string)
+{
+ if (string.is8Bit()) {
+ UTextWithBuffer textLocal;
+ textLocal.text = UTEXT_INITIALIZER;
+ textLocal.text.extraSize = sizeof(textLocal.buffer);
+ textLocal.text.pExtra = textLocal.buffer;
+
+ UErrorCode openStatus = U_ZERO_ERROR;
+ UText* text = openLatin1UTextProvider(&textLocal, string.characters8(), string.length(), &openStatus);
+ if (U_FAILURE(openStatus)) {
+ LOG_ERROR("uTextOpenLatin1 failed with status %d", openStatus);
+ return nullptr;
+ }
+
+ UErrorCode setTextStatus = U_ZERO_ERROR;
+ ubrk_setUText(&iterator, text, &setTextStatus);
+ if (U_FAILURE(setTextStatus)) {
+ LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
+ return nullptr;
+ }
+
+ utext_close(text);
+ } else {
+ UErrorCode setTextStatus = U_ZERO_ERROR;
+ ubrk_setText(&iterator, string.characters16(), string.length(), &setTextStatus);
+ if (U_FAILURE(setTextStatus))
+ return nullptr;
+ }
+
+ return &iterator;
+}
+
+static UBreakIterator* setContextAwareTextForIterator(UBreakIterator& iterator, StringView string, const UChar* priorContext, unsigned priorContextLength)
+{
+ if (string.is8Bit()) {
+ UTextWithBuffer textLocal;
+ textLocal.text = UTEXT_INITIALIZER;
+ textLocal.text.extraSize = sizeof(textLocal.buffer);
+ textLocal.text.pExtra = textLocal.buffer;
+
+ UErrorCode openStatus = U_ZERO_ERROR;
+ UText* text = openLatin1ContextAwareUTextProvider(&textLocal, string.characters8(), string.length(), priorContext, priorContextLength, &openStatus);
+ if (U_FAILURE(openStatus)) {
+ LOG_ERROR("openLatin1ContextAwareUTextProvider failed with status %d", openStatus);
+ return nullptr;
+ }
+
+ UErrorCode setTextStatus = U_ZERO_ERROR;
+ ubrk_setUText(&iterator, text, &setTextStatus);
+ if (U_FAILURE(setTextStatus)) {
+ LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
+ return nullptr;
+ }
+
+ utext_close(text);
+ } else {
+ UText textLocal = UTEXT_INITIALIZER;
+
+ UErrorCode openStatus = U_ZERO_ERROR;
+ UText* text = openUTF16ContextAwareUTextProvider(&textLocal, string.characters16(), string.length(), priorContext, priorContextLength, &openStatus);
+ if (U_FAILURE(openStatus)) {
+ LOG_ERROR("openUTF16ContextAwareUTextProvider failed with status %d", openStatus);
+ return 0;
+ }
+
+ UErrorCode setTextStatus = U_ZERO_ERROR;
+ ubrk_setUText(&iterator, text, &setTextStatus);
+ if (U_FAILURE(setTextStatus)) {
+ LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
+ return nullptr;
+ }
+
+ utext_close(text);
+ }
+
+ return &iterator;
+}
+
+
+// Static iterators
+
+UBreakIterator* wordBreakIterator(StringView string)
+{
+ static UBreakIterator* staticWordBreakIterator = initializeIterator(UBRK_WORD);
+ if (!staticWordBreakIterator)
+ return nullptr;
+
+ return setTextForIterator(*staticWordBreakIterator, string);
+}
+
+UBreakIterator* sentenceBreakIterator(StringView string)
+{
+ static UBreakIterator* staticSentenceBreakIterator = initializeIterator(UBRK_SENTENCE);
+ if (!staticSentenceBreakIterator)
+ return nullptr;
+
+ return setTextForIterator(*staticSentenceBreakIterator, string);
+}
+
+UBreakIterator* cursorMovementIterator(StringView string)
+{
+#if !PLATFORM(IOS)
+ // This rule set is based on character-break iterator rules of ICU 57
+ // <http://source.icu-project.org/repos/icu/icu/tags/release-57-1/source/data/brkitr/>.
+ // The major differences from the original ones are listed below:
+ // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
+ // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
+ // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
+ // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
+ // * Added rules for regional indicator symbols.
+ static const char* kRules =
+ "$CR = [\\p{Grapheme_Cluster_Break = CR}];"
+ "$LF = [\\p{Grapheme_Cluster_Break = LF}];"
+ "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
+ "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks
+ "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
+ "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
+ "$L = [\\p{Grapheme_Cluster_Break = L}];"
+ "$V = [\\p{Grapheme_Cluster_Break = V}];"
+ "$T = [\\p{Grapheme_Cluster_Break = T}];"
+ "$LV = [\\p{Grapheme_Cluster_Break = LV}];"
+ "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];"
+ "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha
+ "$HinV = \\u094D;" // Devanagari Sign Virama
+ "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha
+ "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha
+ "$BenV = \\u09CD;" // Bengali Sign Virama
+ "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha
+ "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha
+ "$PanV = \\u0A4D;" // Gurmukhi Sign Virama
+ "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha
+ "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha
+ "$GujV = \\u0ACD;" // Gujarati Sign Virama
+ "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha
+ "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha
+ "$OriV = \\u0B4D;" // Oriya Sign Virama
+ "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha
+ "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha
+ "$TelV = \\u0C4D;" // Telugu Sign Virama
+ "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha
+ "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha
+ "$KanV = \\u0CCD;" // Kannada Sign Virama
+ "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha
+ "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha
+ "$MalV = \\u0D4D;" // Malayalam Sign Virama
+ "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha
+ "$RI = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
+ "$ZWJ = \\u200D;" // Zero width joiner
+ "$EmojiVar = [\\uFE0F];" // Emoji-style variation selector
+#if ADDITIONAL_EMOJI_SUPPORT
+ "$EmojiForSeqs = [\\u2640 \\u2642 \\u26F9 \\u2764 \\U0001F308 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA-\\U0001F3CC \\U0001F3F3 \\U0001F441 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F46F \\U0001F471 \\U0001F473 \\U0001F477 \\U0001F481-\\U0001F482 \\U0001F486-\\U0001F487 \\U0001F48B \\U0001F575 \\U0001F5E8 \\U0001F645-\\U0001F647 \\U0001F64B \\U0001F64D-\\U0001F64E \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\u2695-\\u2696 \\u2708 \\U0001F33E \\U0001F373 \\U0001F393 \\U0001F3A4 \\U0001F3A8 \\U0001F3EB \\U0001F3ED \\U0001F4BB-\\U0001F4BC \\U0001F527 \\U0001F52C \\U0001F680 \\U0001F692 \\U0001F926 \\U0001F937-\\U0001F939 \\U0001F93C-\\U0001F93E];" // Emoji that participate in ZWJ sequences
+ "$EmojiForMods = [\\u261D \\u26F9 \\u270A-\\u270D \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA \\U0001F3CB \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F575 \\U0001F590 \\U0001F595 \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0 \\U0001F918 \\U0001F3C2 \\U0001F3C7 \\U0001F3CC \\U0001F574 \\U0001F57A \\U0001F6CC \\U0001F919-\\U0001F91E \\U0001F926 \\U0001F930 \\U0001F933-\\U0001F939 \\U0001F93C-\\U0001F93E] ;" // Emoji that take Fitzpatrick modifiers
+#else
+ "$EmojiForSeqs = [\\u2764 \\U0001F466-\\U0001F469 \\U0001F48B];" // Emoji that participate in ZWJ sequences
+ "$EmojiForMods = [\\u261D \\u270A-\\u270C \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3C7 \\U0001F3CA \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0] ;" // Emoji that take Fitzpatrick modifiers
+#endif
+ "$EmojiMods = [\\U0001F3FB-\\U0001F3FF];" // Fitzpatrick modifiers
+ "!!chain;"
+#if ADDITIONAL_EMOJI_SUPPORT
+ "!!RINoChain;"
+#endif
+ "!!forward;"
+ "$CR $LF;"
+ "$L ($L | $V | $LV | $LVT);"
+ "($LV | $V) ($V | $T);"
+ "($LVT | $T) $T;"
+#if ADDITIONAL_EMOJI_SUPPORT
+ "$RI $RI $Extend* / $RI;"
+ "$RI $RI $Extend*;"
+ "[^$Control $CR $LF] $Extend;"
+ "[^$Control $CR $LF] $SpacingMark;"
+#else
+ "[^$Control $CR $LF] $Extend;"
+ "[^$Control $CR $LF] $SpacingMark;"
+ "$RI $RI / $RI;"
+ "$RI $RI;"
+#endif
+ "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward)
+ "$Ben0 $BenV $Ben1;" // Bengali Virama (forward)
+ "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward)
+ "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward)
+ "$Ori0 $OriV $Ori1;" // Oriya Virama (forward)
+ "$Tel0 $TelV $Tel1;" // Telugu Virama (forward)
+ "$Kan0 $KanV $Kan1;" // Kannada Virama (forward)
+ "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward)
+ "$ZWJ $EmojiForSeqs;" // Don't break in emoji ZWJ sequences
+ "$EmojiForMods $EmojiVar? $EmojiMods;" // Don't break between relevant emoji (possibly with variation selector) and Fitzpatrick modifier
+ "!!reverse;"
+ "$LF $CR;"
+ "($L | $V | $LV | $LVT) $L;"
+ "($V | $T) ($LV | $V);"
+ "$T ($LVT | $T);"
+#if ADDITIONAL_EMOJI_SUPPORT
+ "$Extend* $RI $RI / $Extend* $RI $RI;"
+ "$Extend* $RI $RI;"
+ "$Extend [^$Control $CR $LF];"
+ "$SpacingMark [^$Control $CR $LF];"
+#else
+ "$Extend [^$Control $CR $LF];"
+ "$SpacingMark [^$Control $CR $LF];"
+ "$RI $RI / $RI $RI;"
+ "$RI $RI;"
+#endif
+ "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward)
+ "$Ben1 $BenV $Ben0;" // Bengali Virama (backward)
+ "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward)
+ "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward)
+ "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward)
+ "$Tel1 $TelV $Tel0;" // Telugu Virama (backward)
+ "$Kan1 $KanV $Kan0;" // Kannada Virama (backward)
+ "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward)
+ "$EmojiForSeqs $ZWJ;" // Don't break in emoji ZWJ sequences
+ "$EmojiMods $EmojiVar? $EmojiForMods;" // Don't break between relevant emoji (possibly with variation selector) and Fitzpatrick modifier
+#if ADDITIONAL_EMOJI_SUPPORT
+ "!!safe_reverse;"
+ "$RI $RI+;"
+ "[$EmojiVar $EmojiMods]+ $EmojiForMods;"
+ "!!safe_forward;"
+ "$RI $RI+;"
+ "$EmojiForMods [$EmojiVar $EmojiMods]+;";
+#else
+ "[$EmojiVar $EmojiMods]+ $EmojiForMods;"
+ "$EmojiForMods [$EmojiVar $EmojiMods]+;"
+ "!!safe_reverse;"
+ "!!safe_forward;";
+#endif
+ static UBreakIterator* staticCursorMovementIterator = initializeIteratorWithRules(kRules);
+#else // PLATFORM(IOS)
+ // Use the special Thai character break iterator for all locales
+ static UBreakIterator* staticCursorMovementIterator = initializeIterator(UBRK_CHARACTER, "th");
+#endif // !PLATFORM(IOS)
+
+ if (!staticCursorMovementIterator)
+ return nullptr;
+
+ return setTextForIterator(*staticCursorMovementIterator, string);
+}
+
+UBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode mode)
+{
+ UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale, mode);
+ if (!iterator)
+ return nullptr;
+
+ return setContextAwareTextForIterator(*iterator, string, priorContext, priorContextLength);
+}
+
+void releaseLineBreakIterator(UBreakIterator* iterator)
+{
+ ASSERT_ARG(iterator, iterator);
+
+ LineBreakIteratorPool::sharedPool().put(iterator);
+}
+
+UBreakIterator* openLineBreakIterator(const AtomicString& locale)
+{
+ bool localeIsEmpty = locale.isEmpty();
+ UErrorCode openStatus = U_ZERO_ERROR;
+ UBreakIterator* ubrkIter = ubrk_open(UBRK_LINE, localeIsEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus);
+ // locale comes from a web page and it can be invalid, leading ICU
+ // to fail, in which case we fall back to the default locale.
+ if (!localeIsEmpty && U_FAILURE(openStatus)) {
+ openStatus = U_ZERO_ERROR;
+ ubrkIter = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus);
+ }
+
+ if (U_FAILURE(openStatus)) {
+ LOG_ERROR("ubrk_open failed with status %d", openStatus);
+ return nullptr;
+ }
+
+ return ubrkIter;
+}
+
+void closeLineBreakIterator(UBreakIterator*& iterator)
+{
+ UBreakIterator* ubrkIter = iterator;
+ ASSERT(ubrkIter);
+ ubrk_close(ubrkIter);
+ iterator = nullptr;
+}
+
+static std::atomic<UBreakIterator*> nonSharedCharacterBreakIterator = ATOMIC_VAR_INIT(nullptr);
+
+static inline UBreakIterator* getNonSharedCharacterBreakIterator()
+{
+ if (auto *res = nonSharedCharacterBreakIterator.exchange(nullptr, std::memory_order_acquire))
+ return res;
+ return initializeIterator(UBRK_CHARACTER);
+}
+
+static inline void cacheNonSharedCharacterBreakIterator(UBreakIterator* cacheMe)
+{
+ if (auto *old = nonSharedCharacterBreakIterator.exchange(cacheMe, std::memory_order_release))
+ ubrk_close(old);
+}
+
+NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(StringView string)
+{
+ if ((m_iterator = getNonSharedCharacterBreakIterator()))
+ m_iterator = setTextForIterator(*m_iterator, string);
+}
+
+NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
+{
+ if (m_iterator)
+ cacheNonSharedCharacterBreakIterator(m_iterator);
+}
+
+NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&& other)
+ : m_iterator(nullptr)
+{
+ std::swap(m_iterator, other.m_iterator);
+}
+
+// Iterator implemenation.
+
+bool isWordTextBreak(UBreakIterator* iterator)
+{
+ int ruleStatus = ubrk_getRuleStatus(iterator);
+ return ruleStatus != UBRK_WORD_NONE;
+}
+
+unsigned numGraphemeClusters(StringView string)
+{
+ unsigned stringLength = string.length();
+
+ if (!stringLength)
+ return 0;
+
+ // The only Latin-1 Extended Grapheme Cluster is CRLF.
+ if (string.is8Bit()) {
+ auto* characters = string.characters8();
+ unsigned numCRLF = 0;
+ for (unsigned i = 1; i < stringLength; ++i)
+ numCRLF += characters[i - 1] == '\r' && characters[i] == '\n';
+ return stringLength - numCRLF;
+ }
+
+ NonSharedCharacterBreakIterator iterator { string };
+ if (!iterator) {
+ ASSERT_NOT_REACHED();
+ return stringLength;
+ }
+
+ unsigned numGraphemeClusters = 0;
+ while (ubrk_next(iterator) != UBRK_DONE)
+ ++numGraphemeClusters;
+ return numGraphemeClusters;
+}
+
+unsigned numCharactersInGraphemeClusters(StringView string, unsigned numGraphemeClusters)
+{
+ unsigned stringLength = string.length();
+
+ if (stringLength <= numGraphemeClusters)
+ return stringLength;
+
+ // The only Latin-1 Extended Grapheme Cluster is CRLF.
+ if (string.is8Bit()) {
+ auto* characters = string.characters8();
+ unsigned i, j;
+ for (i = 0, j = 0; i < numGraphemeClusters && j + 1 < stringLength; ++i, ++j)
+ j += characters[j] == '\r' && characters[j + 1] == '\n';
+ return j + (i < numGraphemeClusters);
+ }
+
+ NonSharedCharacterBreakIterator iterator { string };
+ if (!iterator) {
+ ASSERT_NOT_REACHED();
+ return stringLength;
+ }
+
+ for (unsigned i = 0; i < numGraphemeClusters; ++i) {
+ if (ubrk_next(iterator) == UBRK_DONE)
+ return stringLength;
+ }
+ return ubrk_current(iterator);
+}
+
+} // namespace WTF
diff --git a/Source/WTF/wtf/text/TextBreakIterator.h b/Source/WTF/wtf/text/TextBreakIterator.h
new file mode 100644
index 000000000..2bb5f9ca4
--- /dev/null
+++ b/Source/WTF/wtf/text/TextBreakIterator.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007-2016 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#pragma once
+
+#include <wtf/text/StringView.h>
+
+namespace WTF {
+
+// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator.
+
+enum class LineBreakIteratorMode { Default, Loose, Normal, Strict };
+
+// This is similar to character break iterator in most cases, but is subject to
+// platform UI conventions. One notable example where this can be different
+// from character break iterator is Thai prepend characters, see bug 24342.
+// Use this for insertion point and selection manipulations.
+WTF_EXPORT_PRIVATE UBreakIterator* cursorMovementIterator(StringView);
+
+WTF_EXPORT_PRIVATE UBreakIterator* wordBreakIterator(StringView);
+WTF_EXPORT_PRIVATE UBreakIterator* sentenceBreakIterator(StringView);
+
+WTF_EXPORT_PRIVATE UBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode);
+WTF_EXPORT_PRIVATE void releaseLineBreakIterator(UBreakIterator*);
+UBreakIterator* openLineBreakIterator(const AtomicString& locale);
+void closeLineBreakIterator(UBreakIterator*&);
+
+WTF_EXPORT_PRIVATE bool isWordTextBreak(UBreakIterator*);
+
+class LazyLineBreakIterator {
+public:
+ LazyLineBreakIterator()
+ {
+ resetPriorContext();
+ }
+
+ explicit LazyLineBreakIterator(StringView stringView, const AtomicString& locale = AtomicString(), LineBreakIteratorMode mode = LineBreakIteratorMode::Default)
+ : m_stringView(stringView)
+ , m_locale(locale)
+ , m_mode(mode)
+ {
+ resetPriorContext();
+ }
+
+ ~LazyLineBreakIterator()
+ {
+ if (m_iterator)
+ releaseLineBreakIterator(m_iterator);
+ }
+
+ StringView stringView() const { return m_stringView; }
+ LineBreakIteratorMode mode() const { return m_mode; }
+
+ UChar lastCharacter() const
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ return m_priorContext[1];
+ }
+
+ UChar secondToLastCharacter() const
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ return m_priorContext[0];
+ }
+
+ void setPriorContext(UChar last, UChar secondToLast)
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ m_priorContext[0] = secondToLast;
+ m_priorContext[1] = last;
+ }
+
+ void updatePriorContext(UChar last)
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ m_priorContext[0] = m_priorContext[1];
+ m_priorContext[1] = last;
+ }
+
+ void resetPriorContext()
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ m_priorContext[0] = 0;
+ m_priorContext[1] = 0;
+ }
+
+ unsigned priorContextLength() const
+ {
+ unsigned priorContextLength = 0;
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ if (m_priorContext[1]) {
+ ++priorContextLength;
+ if (m_priorContext[0])
+ ++priorContextLength;
+ }
+ return priorContextLength;
+ }
+
+ // Obtain text break iterator, possibly previously cached, where this iterator is (or has been)
+ // initialized to use the previously stored string as the primary breaking context and using
+ // previously stored prior context if non-empty.
+ UBreakIterator* get(unsigned priorContextLength)
+ {
+ ASSERT(priorContextLength <= priorContextCapacity);
+ const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0;
+ if (!m_iterator) {
+ m_iterator = acquireLineBreakIterator(m_stringView, m_locale, priorContext, priorContextLength, m_mode);
+ m_cachedPriorContext = priorContext;
+ m_cachedPriorContextLength = priorContextLength;
+ } else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) {
+ resetStringAndReleaseIterator(m_stringView, m_locale, m_mode);
+ return this->get(priorContextLength);
+ }
+ return m_iterator;
+ }
+
+ void resetStringAndReleaseIterator(StringView stringView, const AtomicString& locale, LineBreakIteratorMode mode)
+ {
+ if (m_iterator)
+ releaseLineBreakIterator(m_iterator);
+ m_stringView = stringView;
+ m_locale = locale;
+ m_iterator = nullptr;
+ m_cachedPriorContext = nullptr;
+ m_mode = mode;
+ m_cachedPriorContextLength = 0;
+ }
+
+private:
+ static constexpr unsigned priorContextCapacity = 2;
+ StringView m_stringView;
+ AtomicString m_locale;
+ UBreakIterator* m_iterator { nullptr };
+ const UChar* m_cachedPriorContext { nullptr };
+ LineBreakIteratorMode m_mode { LineBreakIteratorMode::Default };
+ unsigned m_cachedPriorContextLength { 0 };
+ UChar m_priorContext[priorContextCapacity];
+};
+
+// Iterates over "extended grapheme clusters", as defined in UAX #29.
+// Note that platform implementations may be less sophisticated - e.g. ICU prior to
+// version 4.0 only supports "legacy grapheme clusters".
+// Use this for general text processing, e.g. string truncation.
+
+class NonSharedCharacterBreakIterator {
+ WTF_MAKE_NONCOPYABLE(NonSharedCharacterBreakIterator);
+public:
+ WTF_EXPORT_PRIVATE NonSharedCharacterBreakIterator(StringView);
+ WTF_EXPORT_PRIVATE ~NonSharedCharacterBreakIterator();
+
+ NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&&);
+
+ operator UBreakIterator*() const { return m_iterator; }
+
+private:
+ UBreakIterator* m_iterator;
+};
+
+// Counts the number of grapheme clusters. A surrogate pair or a sequence
+// of a non-combining character and following combining characters is
+// counted as 1 grapheme cluster.
+WTF_EXPORT_PRIVATE unsigned numGraphemeClusters(StringView);
+
+// Returns the number of characters which will be less than or equal to
+// the specified grapheme cluster length.
+WTF_EXPORT_PRIVATE unsigned numCharactersInGraphemeClusters(StringView, unsigned);
+
+}
+
+using WTF::LazyLineBreakIterator;
+using WTF::LineBreakIteratorMode;
+using WTF::NonSharedCharacterBreakIterator;
+using WTF::isWordTextBreak;
diff --git a/Source/WTF/wtf/text/TextBreakIteratorInternalICU.h b/Source/WTF/wtf/text/TextBreakIteratorInternalICU.h
new file mode 100644
index 000000000..70a301c88
--- /dev/null
+++ b/Source/WTF/wtf/text/TextBreakIteratorInternalICU.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef TextBreakIteratorInternalICU_h
+#define TextBreakIteratorInternalICU_h
+
+// FIXME: Now that this handles locales for ICU, not just for text breaking,
+// this file and the various implementation files should be renamed.
+
+namespace WTF {
+
+WTF_EXPORT_PRIVATE const char* currentSearchLocaleID();
+WTF_EXPORT_PRIVATE const char* currentTextBreakLocaleID();
+
+}
+
+using WTF::currentSearchLocaleID;
+using WTF::currentTextBreakLocaleID;
+
+#endif
diff --git a/Source/WTF/wtf/text/TextPosition.h b/Source/WTF/wtf/text/TextPosition.h
index be49c157a..2f108b038 100644
--- a/Source/WTF/wtf/text/TextPosition.h
+++ b/Source/WTF/wtf/text/TextPosition.h
@@ -22,37 +22,12 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextPosition_h
-#define TextPosition_h
+#pragma once
-#include <wtf/Assertions.h>
+#include "OrdinalNumber.h"
namespace WTF {
-// An abstract number of element in a sequence. The sequence has a first element.
-// This type should be used instead of integer because 2 contradicting traditions can
-// call a first element '0' or '1' which makes integer type ambiguous.
-class OrdinalNumber {
-public:
- static OrdinalNumber fromZeroBasedInt(int zeroBasedInt) { return OrdinalNumber(zeroBasedInt); }
- static OrdinalNumber fromOneBasedInt(int oneBasedInt) { return OrdinalNumber(oneBasedInt - 1); }
- OrdinalNumber() : m_zeroBasedValue(0) { }
-
- int zeroBasedInt() const { return m_zeroBasedValue; }
- int oneBasedInt() const { return m_zeroBasedValue + 1; }
-
- bool operator==(OrdinalNumber other) { return m_zeroBasedValue == other.m_zeroBasedValue; }
- bool operator!=(OrdinalNumber other) { return !((*this) == other); }
-
- static OrdinalNumber first() { return OrdinalNumber(0); }
- static OrdinalNumber beforeFirst() { return OrdinalNumber(-1); }
-
-private:
- OrdinalNumber(int zeroBasedInt) : m_zeroBasedValue(zeroBasedInt) { }
- int m_zeroBasedValue;
-};
-
-
// TextPosition structure specifies coordinates within an text resource. It is used mostly
// for saving script source position.
class TextPosition {
@@ -62,13 +37,11 @@ public:
, m_column(column)
{
}
+
TextPosition() { }
bool operator==(const TextPosition& other) { return m_line == other.m_line && m_column == other.m_column; }
bool operator!=(const TextPosition& other) { return !((*this) == other); }
- // A 'minimum' value of position, used as a default value.
- static TextPosition minimumPosition() { return TextPosition(OrdinalNumber::first(), OrdinalNumber::first()); }
-
// A value with line value less than a minimum; used as an impossible position.
static TextPosition belowRangePosition() { return TextPosition(OrdinalNumber::beforeFirst(), OrdinalNumber::beforeFirst()); }
@@ -78,8 +51,4 @@ public:
}
-using WTF::OrdinalNumber;
-
using WTF::TextPosition;
-
-#endif // TextPosition_h
diff --git a/Source/WTF/wtf/text/UniquedStringImpl.h b/Source/WTF/wtf/text/UniquedStringImpl.h
new file mode 100644
index 000000000..09aba85cf
--- /dev/null
+++ b/Source/WTF/wtf/text/UniquedStringImpl.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2015 Yusuke Suzuki <utatane.tea@gmail.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UniquedStringImpl_h
+#define UniquedStringImpl_h
+
+#include <wtf/text/StringImpl.h>
+
+namespace WTF {
+
+// It represents that the string impl is uniqued in some ways.
+// When the given 2 string impls are both uniqued string impls, we can compare it just using pointer comparison.
+class UniquedStringImpl : public StringImpl {
+private:
+ UniquedStringImpl() = delete;
+protected:
+ UniquedStringImpl(CreateSymbolTag, const LChar* characters, unsigned length) : StringImpl(CreateSymbol, characters, length) { }
+ UniquedStringImpl(CreateSymbolTag, const UChar* characters, unsigned length) : StringImpl(CreateSymbol, characters, length) { }
+ UniquedStringImpl(CreateSymbolTag) : StringImpl(CreateSymbol) { }
+};
+
+#if !ASSERT_DISABLED
+// UniquedStringImpls created from StaticStringImpl will ASSERT
+// in the generic ValueCheck<T>::checkConsistency
+// as they are not allocated by fastMalloc.
+// We don't currently have any way to detect that case
+// so we ignore the consistency check for all UniquedStringImpls*.
+template<> struct
+ValueCheck<UniquedStringImpl*> {
+ static void checkConsistency(const UniquedStringImpl*) { }
+};
+
+template<> struct
+ValueCheck<const UniquedStringImpl*> {
+ static void checkConsistency(const UniquedStringImpl*) { }
+};
+#endif
+
+} // namespace WTF
+
+using WTF::UniquedStringImpl;
+
+#endif // UniquedStringImpl_h
diff --git a/Source/WTF/wtf/text/WTFString.cpp b/Source/WTF/wtf/text/WTFString.cpp
index 45ba8af52..4f49ebca1 100644
--- a/Source/WTF/wtf/text/WTFString.cpp
+++ b/Source/WTF/wtf/text/WTFString.cpp
@@ -35,7 +35,6 @@
#include <wtf/dtoa.h>
#include <wtf/unicode/CharacterNames.h>
#include <wtf/unicode/UTF8.h>
-#include <wtf/unicode/Unicode.h>
namespace WTF {
@@ -90,64 +89,75 @@ String::String(ASCIILiteral characters)
void String::append(const String& str)
{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
+
if (str.isEmpty())
return;
- // FIXME: This is extremely inefficient. So much so that we might want to take this
- // out of String's API. We can make it better by optimizing the case where exactly
- // one String is pointing at this StringImpl, but even then it's going to require a
- // call to fastMalloc every single time.
if (str.m_impl) {
if (m_impl) {
if (m_impl->is8Bit() && str.m_impl->is8Bit()) {
LChar* data;
if (str.length() > std::numeric_limits<unsigned>::max() - m_impl->length())
CRASH();
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data);
+ auto newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data);
memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LChar));
memcpy(data + m_impl->length(), str.characters8(), str.length() * sizeof(LChar));
- m_impl = newImpl.release();
+ m_impl = WTFMove(newImpl);
return;
}
UChar* data;
if (str.length() > std::numeric_limits<unsigned>::max() - m_impl->length())
CRASH();
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data);
- memcpy(data, m_impl->deprecatedCharacters(), m_impl->length() * sizeof(UChar));
- memcpy(data + m_impl->length(), str.deprecatedCharacters(), str.length() * sizeof(UChar));
- m_impl = newImpl.release();
+ auto newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data);
+ StringView(*m_impl).getCharactersWithUpconvert(data);
+ StringView(str).getCharactersWithUpconvert(data + m_impl->length());
+ m_impl = WTFMove(newImpl);
} else
m_impl = str.m_impl;
}
}
-template <typename CharacterType>
-inline void String::appendInternal(CharacterType c)
+void String::append(LChar character)
{
- // FIXME: This is extremely inefficient. So much so that we might want to take this
- // out of String's API. We can make it better by optimizing the case where exactly
- // one String is pointing at this StringImpl, but even then it's going to require a
- // call to fastMalloc every single time.
- if (m_impl) {
- UChar* data;
- if (m_impl->length() >= std::numeric_limits<unsigned>::max())
- CRASH();
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
- memcpy(data, m_impl->deprecatedCharacters(), m_impl->length() * sizeof(UChar));
- data[m_impl->length()] = c;
- m_impl = newImpl.release();
- } else
- m_impl = StringImpl::create(&c, 1);
-}
+ // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
-void String::append(LChar c)
-{
- appendInternal(c);
+ if (!m_impl) {
+ m_impl = StringImpl::create(&character, 1);
+ return;
+ }
+ if (!is8Bit()) {
+ append(static_cast<UChar>(character));
+ return;
+ }
+ if (m_impl->length() >= std::numeric_limits<unsigned>::max())
+ CRASH();
+ LChar* data;
+ auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
+ memcpy(data, m_impl->characters8(), m_impl->length());
+ data[m_impl->length()] = character;
+ m_impl = WTFMove(newImpl);
}
-void String::append(UChar c)
+void String::append(UChar character)
{
- appendInternal(c);
+ // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
+
+ if (!m_impl) {
+ m_impl = StringImpl::create(&character, 1);
+ return;
+ }
+ if (character <= 0xFF && is8Bit()) {
+ append(static_cast<LChar>(character));
+ return;
+ }
+ if (m_impl->length() >= std::numeric_limits<unsigned>::max())
+ CRASH();
+ UChar* data;
+ auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
+ StringView(*m_impl).getCharactersWithUpconvert(data);
+ data[m_impl->length()] = character;
+ m_impl = WTFMove(newImpl);
}
int codePointCompare(const String& a, const String& b)
@@ -155,20 +165,49 @@ int codePointCompare(const String& a, const String& b)
return codePointCompare(a.impl(), b.impl());
}
-void String::insert(const String& str, unsigned pos)
+void String::insert(const String& string, unsigned position)
{
- if (str.isEmpty()) {
- if (str.isNull())
+ // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
+
+ unsigned lengthToInsert = string.length();
+
+ if (!lengthToInsert) {
+ if (string.isNull())
return;
if (isNull())
- m_impl = str.impl();
+ m_impl = string.impl();
return;
}
- insert(str.deprecatedCharacters(), str.length(), pos);
+
+ if (position >= length()) {
+ append(string);
+ return;
+ }
+
+ if (lengthToInsert > std::numeric_limits<unsigned>::max() - length())
+ CRASH();
+
+ if (is8Bit() && string.is8Bit()) {
+ LChar* data;
+ auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data);
+ StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data);
+ StringView(string).getCharactersWithUpconvert(data + position);
+ StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert);
+ m_impl = WTFMove(newString);
+ } else {
+ UChar* data;
+ auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data);
+ StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data);
+ StringView(string).getCharactersWithUpconvert(data + position);
+ StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert);
+ m_impl = WTFMove(newString);
+ }
}
void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)
{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
+
if (!m_impl) {
if (!charactersToAppend)
return;
@@ -187,24 +226,26 @@ void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)
if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength)
CRASH();
LChar* data;
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
+ auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
StringImpl::copyChars(data, m_impl->characters8(), strLength);
StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
- m_impl = newImpl.release();
+ m_impl = WTFMove(newImpl);
return;
}
if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength)
CRASH();
UChar* data;
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
+ auto newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
StringImpl::copyChars(data, m_impl->characters16(), strLength);
StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
- m_impl = newImpl.release();
+ m_impl = WTFMove(newImpl);
}
void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
+
if (!m_impl) {
if (!charactersToAppend)
return;
@@ -221,39 +262,16 @@ void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength)
CRASH();
UChar* data;
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
+ auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
if (m_impl->is8Bit())
StringImpl::copyChars(data, characters8(), strLength);
else
StringImpl::copyChars(data, characters16(), strLength);
StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
- m_impl = newImpl.release();
+ m_impl = WTFMove(newImpl);
}
-void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
-{
- if (position >= length()) {
- append(charactersToInsert, lengthToInsert);
- return;
- }
-
- ASSERT(m_impl);
-
- if (!lengthToInsert)
- return;
-
- ASSERT(charactersToInsert);
- UChar* data;
- if (lengthToInsert > std::numeric_limits<unsigned>::max() - length())
- CRASH();
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data);
- memcpy(data, deprecatedCharacters(), position * sizeof(UChar));
- memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));
- memcpy(data + position + lengthToInsert, deprecatedCharacters() + position, (length() - position) * sizeof(UChar));
- m_impl = newImpl.release();
-}
-
UChar32 String::characterStartingAt(unsigned i) const
{
if (!m_impl || i >= m_impl->length())
@@ -263,24 +281,20 @@ UChar32 String::characterStartingAt(unsigned i) const
void String::truncate(unsigned position)
{
- if (position >= length())
- return;
- UChar* data;
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
- memcpy(data, deprecatedCharacters(), position * sizeof(UChar));
- m_impl = newImpl.release();
+ if (m_impl)
+ m_impl = m_impl->substring(0, position);
}
template <typename CharacterType>
inline void String::removeInternal(const CharacterType* characters, unsigned position, int lengthToRemove)
{
CharacterType* data;
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
+ auto newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
memcpy(data, characters, position * sizeof(CharacterType));
memcpy(data + position, characters + position + lengthToRemove,
(length() - lengthToRemove - position) * sizeof(CharacterType));
- m_impl = newImpl.release();
+ m_impl = WTFMove(newImpl);
}
void String::remove(unsigned position, int lengthToRemove)
@@ -318,35 +332,58 @@ String String::substringSharingImpl(unsigned offset, unsigned length) const
if (!offset && length == stringLength)
return *this;
- return String(StringImpl::create(m_impl, offset, length));
+ return String(StringImpl::createSubstringSharingImpl(*m_impl, offset, length));
+}
+
+String String::convertToASCIILowercase() const
+{
+ // FIXME: Should this function, and the many others like it, be inlined?
+ if (!m_impl)
+ return String();
+ return m_impl->convertToASCIILowercase();
+}
+
+String String::convertToASCIIUppercase() const
+{
+ // FIXME: Should this function, and the many others like it, be inlined?
+ if (!m_impl)
+ return String();
+ return m_impl->convertToASCIIUppercase();
+}
+
+String String::convertToLowercaseWithoutLocale() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->convertToLowercaseWithoutLocale();
}
-String String::lower() const
+String String::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex) const
{
if (!m_impl)
return String();
- return m_impl->lower();
+ return m_impl->convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(failingIndex);
}
-String String::upper() const
+String String::convertToUppercaseWithoutLocale() const
{
if (!m_impl)
return String();
- return m_impl->upper();
+ return m_impl->convertToUppercaseWithoutLocale();
}
-String String::lower(const AtomicString& localeIdentifier) const
+String String::convertToLowercaseWithLocale(const AtomicString& localeIdentifier) const
{
if (!m_impl)
return String();
- return m_impl->lower(localeIdentifier);
+ return m_impl->convertToLowercaseWithLocale(localeIdentifier);
}
-String String::upper(const AtomicString& localeIdentifier) const
+String String::convertToUppercaseWithLocale(const AtomicString& localeIdentifier) const
{
if (!m_impl)
return String();
- return m_impl->upper(localeIdentifier);
+ return m_impl->convertToUppercaseWithLocale(localeIdentifier);
}
String String::stripWhiteSpace() const
@@ -399,7 +436,10 @@ bool String::percentage(int& result) const
if ((*m_impl)[m_impl->length() - 1] != '%')
return false;
- result = charactersToIntStrict(m_impl->deprecatedCharacters(), m_impl->length() - 1);
+ if (m_impl->is8Bit())
+ result = charactersToIntStrict(m_impl->characters8(), m_impl->length() - 1);
+ else
+ result = charactersToIntStrict(m_impl->characters16(), m_impl->length() - 1);
return true;
}
@@ -427,33 +467,26 @@ Vector<UChar> String::charactersWithNullTermination() const
String String::format(const char *format, ...)
{
-#if OS(WINCE)
va_list args;
va_start(args, format);
- Vector<char, 256> buffer;
+#if USE(CF) && !OS(WINDOWS)
+ if (strstr(format, "%@")) {
+ RetainPtr<CFStringRef> cfFormat = adoptCF(CFStringCreateWithCString(kCFAllocatorDefault, format, kCFStringEncodingUTF8));
- int bufferSize = 256;
- buffer.resize(bufferSize);
- for (;;) {
- int written = vsnprintf(buffer.data(), bufferSize, format, args);
- va_end(args);
+#if COMPILER(CLANG)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wformat-nonliteral"
+#endif
+ RetainPtr<CFStringRef> result = adoptCF(CFStringCreateWithFormatAndArguments(kCFAllocatorDefault, nullptr, cfFormat.get(), args));
+#if COMPILER(CLANG)
+#pragma clang diagnostic pop
+#endif
- if (written == 0)
- return String("");
- if (written > 0)
- return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), written);
-
- bufferSize <<= 1;
- buffer.resize(bufferSize);
- va_start(args, format);
+ va_end(args);
+ return result.get();
}
-
-#else
- va_list args;
- va_start(args, format);
-
- Vector<char, 256> buffer;
+#endif // USE(CF) && !OS(WINDOWS)
// Do the format once to get the length.
#if COMPILER(MSVC)
@@ -461,30 +494,25 @@ String String::format(const char *format, ...)
#else
char ch;
int result = vsnprintf(&ch, 1, format, args);
- // We need to call va_end() and then va_start() again here, as the
- // contents of args is undefined after the call to vsnprintf
- // according to http://man.cx/snprintf(3)
- //
- // Not calling va_end/va_start here happens to work on lots of
- // systems, but fails e.g. on 64bit Linux.
- va_end(args);
- va_start(args, format);
#endif
+ va_end(args);
if (result == 0)
return String("");
if (result < 0)
return String();
+
+ Vector<char, 256> buffer;
unsigned len = result;
buffer.grow(len + 1);
+ va_start(args, format);
// Now do the formatting again, guaranteed to fit.
vsnprintf(buffer.data(), buffer.size(), format, args);
va_end(args);
return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len);
-#endif
}
String String::number(int number)
@@ -663,12 +691,12 @@ String String::isolatedCopy() const &
return m_impl->isolatedCopy();
}
-String String::isolatedCopy() const &&
+String String::isolatedCopy() &&
{
if (isSafeToSendToAnotherThread()) {
// Since we know that our string is a temporary that will be destroyed
// we can just steal the m_impl from it, thus avoiding a copy.
- return String(std::move(*this));
+ return String(WTFMove(*this));
}
if (!m_impl)
@@ -689,14 +717,14 @@ bool String::isSafeToSendToAnotherThread() const
{
if (!impl())
return true;
+ if (isEmpty())
+ return true;
// AtomicStrings are not safe to send between threads as ~StringImpl()
// will try to remove them from the wrong AtomicStringTable.
if (impl()->isAtomic())
return false;
if (impl()->hasOneRef())
return true;
- if (isEmpty())
- return true;
return false;
}
@@ -802,6 +830,11 @@ CString String::utf8(ConversionMode mode) const
return m_impl->utf8(mode);
}
+CString String::utf8() const
+{
+ return utf8(LenientConversion);
+}
+
String String::make8BitFrom16BitSource(const UChar* source, size_t length)
{
if (!length)
@@ -1181,7 +1214,7 @@ String* string(const char* s)
Vector<char> asciiDebug(StringImpl* impl)
{
if (!impl)
- return asciiDebug(String("[null]").impl());
+ return asciiDebug(String(ASCIILiteral("[null]")).impl());
Vector<char> buffer;
for (unsigned i = 0; i < impl->length(); ++i) {
diff --git a/Source/WTF/wtf/text/WTFString.h b/Source/WTF/wtf/text/WTFString.h
index 5c8a0af95..cb4232d58 100644
--- a/Source/WTF/wtf/text/WTFString.h
+++ b/Source/WTF/wtf/text/WTFString.h
@@ -1,6 +1,6 @@
/*
* (C) 1999 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2016 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -26,6 +26,7 @@
// on systems without case-sensitive file systems.
#include <wtf/text/ASCIIFastPath.h>
+#include <wtf/text/IntegerToStringConversion.h>
#include <wtf/text/StringImpl.h>
#ifdef __OBJC__
@@ -34,52 +35,45 @@
namespace WTF {
-class CString;
-struct StringHash;
+class ASCIILiteral;
// Declarations of string operations
-WTF_EXPORT_STRING_API int charactersToIntStrict(const LChar*, size_t, bool* ok = 0, int base = 10);
-WTF_EXPORT_STRING_API int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
-WTF_EXPORT_STRING_API unsigned charactersToUIntStrict(const LChar*, size_t, bool* ok = 0, int base = 10);
-WTF_EXPORT_STRING_API unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
-int64_t charactersToInt64Strict(const LChar*, size_t, bool* ok = 0, int base = 10);
-int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
-uint64_t charactersToUInt64Strict(const LChar*, size_t, bool* ok = 0, int base = 10);
-uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
-intptr_t charactersToIntPtrStrict(const LChar*, size_t, bool* ok = 0, int base = 10);
-intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
-
-int charactersToInt(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage
-WTF_EXPORT_STRING_API int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
-unsigned charactersToUInt(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage
-unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
-int64_t charactersToInt64(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage
-int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
-uint64_t charactersToUInt64(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage
-uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
-intptr_t charactersToIntPtr(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage
-intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+WTF_EXPORT_STRING_API int charactersToIntStrict(const LChar*, size_t, bool* ok = nullptr, int base = 10);
+WTF_EXPORT_STRING_API int charactersToIntStrict(const UChar*, size_t, bool* ok = nullptr, int base = 10);
+WTF_EXPORT_STRING_API unsigned charactersToUIntStrict(const LChar*, size_t, bool* ok = nullptr, int base = 10);
+WTF_EXPORT_STRING_API unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = nullptr, int base = 10);
+int64_t charactersToInt64Strict(const LChar*, size_t, bool* ok = nullptr, int base = 10);
+int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = nullptr, int base = 10);
+uint64_t charactersToUInt64Strict(const LChar*, size_t, bool* ok = nullptr, int base = 10);
+uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = nullptr, int base = 10);
+intptr_t charactersToIntPtrStrict(const LChar*, size_t, bool* ok = nullptr, int base = 10);
+intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = nullptr, int base = 10);
+
+WTF_EXPORT_STRING_API int charactersToInt(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+WTF_EXPORT_STRING_API int charactersToInt(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+unsigned charactersToUInt(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+unsigned charactersToUInt(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+int64_t charactersToInt64(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+int64_t charactersToInt64(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+uint64_t charactersToUInt64(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+WTF_EXPORT_STRING_API uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+intptr_t charactersToIntPtr(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
+intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage
// FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
// Like the non-strict functions above, these return the value when there is trailing garbage.
// It would be better if these were more consistent with the above functions instead.
-WTF_EXPORT_STRING_API double charactersToDouble(const LChar*, size_t, bool* ok = 0);
-WTF_EXPORT_STRING_API double charactersToDouble(const UChar*, size_t, bool* ok = 0);
-float charactersToFloat(const LChar*, size_t, bool* ok = 0);
-WTF_EXPORT_STRING_API float charactersToFloat(const UChar*, size_t, bool* ok = 0);
+WTF_EXPORT_STRING_API double charactersToDouble(const LChar*, size_t, bool* ok = nullptr);
+WTF_EXPORT_STRING_API double charactersToDouble(const UChar*, size_t, bool* ok = nullptr);
+WTF_EXPORT_STRING_API float charactersToFloat(const LChar*, size_t, bool* ok = nullptr);
+WTF_EXPORT_STRING_API float charactersToFloat(const UChar*, size_t, bool* ok = nullptr);
WTF_EXPORT_STRING_API float charactersToFloat(const LChar*, size_t, size_t& parsedLength);
WTF_EXPORT_STRING_API float charactersToFloat(const UChar*, size_t, size_t& parsedLength);
-class ASCIILiteral;
-
-enum TrailingZerosTruncatingPolicy {
- KeepTrailingZeros,
- TruncateTrailingZeros
-};
+template<bool isSpecialCharacter(UChar), typename CharacterType> bool isAllSpecialCharacters(const CharacterType*, size_t);
-template<bool isSpecialCharacter(UChar), typename CharacterType>
-bool isAllSpecialCharacters(const CharacterType*, size_t);
+enum TrailingZerosTruncatingPolicy { KeepTrailingZeros, TruncateTrailingZeros };
class String {
public:
@@ -112,10 +106,13 @@ public:
WTF_EXPORT_STRING_API String(const char* characters);
// Construct a string referencing an existing StringImpl.
- String(StringImpl* impl) : m_impl(impl) { }
- String(PassRefPtr<StringImpl> impl) : m_impl(impl) { }
- String(PassRef<StringImpl> impl) : m_impl(std::move(impl)) { }
- String(RefPtr<StringImpl>&& impl) : m_impl(impl) { }
+ String(StringImpl&);
+ String(StringImpl*);
+ String(Ref<StringImpl>&&);
+ String(RefPtr<StringImpl>&&);
+
+ String(Ref<AtomicStringImpl>&&);
+ String(RefPtr<AtomicStringImpl>&&);
// Construct a string from a constant string literal.
WTF_EXPORT_STRING_API String(ASCIILiteral characters);
@@ -128,26 +125,30 @@ public:
// We have to declare the copy constructor and copy assignment operator as well, otherwise
// they'll be implicitly deleted by adding the move constructor and move assignment operator.
- String(const String& other) : m_impl(other.m_impl) { }
- String(String&& other) : m_impl(other.m_impl.release()) { }
+ String(const String& other)
+ : m_impl(other.m_impl)
+ { }
+ String(String&& other)
+ : m_impl(WTFMove(other.m_impl))
+ { }
String& operator=(const String& other) { m_impl = other.m_impl; return *this; }
- String& operator=(String&& other) { m_impl = other.m_impl.release(); return *this; }
+ String& operator=(String&& other) { m_impl = WTFMove(other.m_impl); return *this; }
// Inline the destructor.
ALWAYS_INLINE ~String() { }
void swap(String& o) { m_impl.swap(o.m_impl); }
- static String adopt(StringBuffer<LChar>& buffer) { return StringImpl::adopt(buffer); }
- static String adopt(StringBuffer<UChar>& buffer) { return StringImpl::adopt(buffer); }
+ static String adopt(StringBuffer<LChar>&& buffer) { return StringImpl::adopt(WTFMove(buffer)); }
+ static String adopt(StringBuffer<UChar>&& buffer) { return StringImpl::adopt(WTFMove(buffer)); }
template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler>
- static String adopt(Vector<CharacterType, inlineCapacity, OverflowHandler>& vector) { return StringImpl::adopt(vector); }
+ static String adopt(Vector<CharacterType, inlineCapacity, OverflowHandler>&& vector) { return StringImpl::adopt(WTFMove(vector)); }
bool isNull() const { return !m_impl; }
bool isEmpty() const { return !m_impl || !m_impl->length(); }
StringImpl* impl() const { return m_impl.get(); }
- PassRefPtr<StringImpl> releaseImpl() { return m_impl.release(); }
+ RefPtr<StringImpl> releaseImpl() { return WTFMove(m_impl); }
unsigned length() const
{
@@ -156,14 +157,6 @@ public:
return m_impl->length();
}
- const UChar* characters() const { return deprecatedCharacters(); } // FIXME: Delete this.
- const UChar* deprecatedCharacters() const
- {
- if (!m_impl)
- return 0;
- return m_impl->deprecatedCharacters();
- }
-
const LChar* characters8() const
{
if (!m_impl)
@@ -182,11 +175,7 @@ public:
// Return characters8() or characters16() depending on CharacterType.
template <typename CharacterType>
- inline const CharacterType* getCharacters() const;
-
- // Like getCharacters() and upconvert if CharacterType is UChar on a 8bit string.
- template <typename CharacterType>
- inline const CharacterType* getCharactersWithUpconvert() const;
+ inline const CharacterType* characters() const;
bool is8Bit() const { return m_impl->is8Bit(); }
@@ -200,7 +189,8 @@ public:
WTF_EXPORT_STRING_API CString ascii() const;
WTF_EXPORT_STRING_API CString latin1() const;
- WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
+ WTF_EXPORT_STRING_API CString utf8(ConversionMode) const;
+ WTF_EXPORT_STRING_API CString utf8() const;
UChar at(unsigned index) const
{
@@ -231,15 +221,16 @@ public:
{ return m_impl ? m_impl->find(str.impl()) : notFound; }
size_t find(const String& str, unsigned start) const
{ return m_impl ? m_impl->find(str.impl(), start) : notFound; }
+ size_t findIgnoringASCIICase(const String& str) const
+ { return m_impl ? m_impl->findIgnoringASCIICase(str.impl()) : notFound; }
+ size_t findIgnoringASCIICase(const String& str, unsigned startOffset) const
+ { return m_impl ? m_impl->findIgnoringASCIICase(str.impl(), startOffset) : notFound; }
size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const
{ return m_impl ? m_impl->find(matchFunction, start) : notFound; }
size_t find(const LChar* str, unsigned start = 0) const
{ return m_impl ? m_impl->find(str, start) : notFound; }
- size_t findNextLineStart(unsigned start = 0) const
- { return m_impl ? m_impl->findNextLineStart(start) : notFound; }
-
// Find the last instance of a single character or string.
size_t reverseFind(UChar c, unsigned start = UINT_MAX) const
{ return m_impl ? m_impl->reverseFind(c, start) : notFound; }
@@ -267,11 +258,21 @@ public:
WTF_EXPORT_STRING_API UChar32 characterStartingAt(unsigned) const; // Ditto.
bool contains(UChar c) const { return find(c) != notFound; }
- bool contains(const LChar* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
- bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
+ bool contains(const LChar* str, bool caseSensitive = true, unsigned startOffset = 0) const
+ { return find(str, startOffset, caseSensitive) != notFound; }
+ bool contains(const String& str) const
+ { return find(str) != notFound; }
+ bool contains(const String& str, bool caseSensitive, unsigned startOffset = 0) const
+ { return find(str, startOffset, caseSensitive) != notFound; }
+ bool containsIgnoringASCIICase(const String& str) const
+ { return findIgnoringASCIICase(str) != notFound; }
+ bool containsIgnoringASCIICase(const String& str, unsigned startOffset) const
+ { return findIgnoringASCIICase(str, startOffset) != notFound; }
bool startsWith(const String& s) const
{ return m_impl ? m_impl->startsWith(s.impl()) : s.isEmpty(); }
+ bool startsWithIgnoringASCIICase(const String& s) const
+ { return m_impl ? m_impl->startsWithIgnoringASCIICase(s.impl()) : s.isEmpty(); }
bool startsWith(const String& s, bool caseSensitive) const
{ return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
bool startsWith(UChar character) const
@@ -279,14 +280,23 @@ public:
template<unsigned matchLength>
bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const
{ return m_impl ? m_impl->startsWith<matchLength>(prefix, caseSensitive) : !matchLength; }
-
- bool endsWith(const String& s, bool caseSensitive = true) const
+ bool hasInfixStartingAt(const String& prefix, unsigned startOffset) const
+ { return m_impl && prefix.impl() ? m_impl->hasInfixStartingAt(*prefix.impl(), startOffset) : false; }
+
+ bool endsWith(const String& s) const
+ { return m_impl ? m_impl->endsWith(s.impl()) : s.isEmpty(); }
+ bool endsWithIgnoringASCIICase(const String& s) const
+ { return m_impl ? m_impl->endsWithIgnoringASCIICase(s.impl()) : s.isEmpty(); }
+ bool endsWith(const String& s, bool caseSensitive) const
{ return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); }
bool endsWith(UChar character) const
{ return m_impl ? m_impl->endsWith(character) : false; }
+ bool endsWith(char character) const { return endsWith(static_cast<UChar>(character)); }
template<unsigned matchLength>
bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const
{ return m_impl ? m_impl->endsWith<matchLength>(prefix, caseSensitive) : !matchLength; }
+ bool hasInfixEndingAt(const String& suffix, unsigned endOffset) const
+ { return m_impl && suffix.impl() ? m_impl->hasInfixEndingAt(*suffix.impl(), endOffset) : false; }
WTF_EXPORT_STRING_API void append(const String&);
WTF_EXPORT_STRING_API void append(LChar);
@@ -295,7 +305,6 @@ public:
WTF_EXPORT_STRING_API void append(const LChar*, unsigned length);
WTF_EXPORT_STRING_API void append(const UChar*, unsigned length);
WTF_EXPORT_STRING_API void insert(const String&, unsigned pos);
- void insert(const UChar*, unsigned length, unsigned pos);
String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; }
String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; }
@@ -311,8 +320,6 @@ public:
return *this;
}
- void fill(UChar c) { if (m_impl) m_impl = m_impl->fill(c); }
-
WTF_EXPORT_STRING_API void truncate(unsigned len);
WTF_EXPORT_STRING_API void remove(unsigned pos, int len = 1);
@@ -321,12 +328,13 @@ public:
String left(unsigned len) const { return substring(0, len); }
String right(unsigned len) const { return substring(length() - len, len); }
- // Returns a lowercase/uppercase version of the string
- WTF_EXPORT_STRING_API String lower() const;
- WTF_EXPORT_STRING_API String upper() const;
-
- WTF_EXPORT_STRING_API String lower(const AtomicString& localeIdentifier) const;
- WTF_EXPORT_STRING_API String upper(const AtomicString& localeIdentifier) const;
+ WTF_EXPORT_STRING_API String convertToASCIILowercase() const;
+ WTF_EXPORT_STRING_API String convertToASCIIUppercase() const;
+ WTF_EXPORT_STRING_API String convertToLowercaseWithoutLocale() const;
+ WTF_EXPORT_STRING_API String convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned) const;
+ WTF_EXPORT_STRING_API String convertToUppercaseWithoutLocale() const;
+ WTF_EXPORT_STRING_API String convertToLowercaseWithLocale(const AtomicString& localeIdentifier) const;
+ WTF_EXPORT_STRING_API String convertToUppercaseWithLocale(const AtomicString& localeIdentifier) const;
WTF_EXPORT_STRING_API String stripWhiteSpace() const;
WTF_EXPORT_STRING_API String stripWhiteSpace(IsWhiteSpaceFunctionPtr) const;
@@ -336,7 +344,8 @@ public:
WTF_EXPORT_STRING_API String removeCharacters(CharacterMatchFunctionPtr) const;
template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const;
- // Return the string with case folded for case insensitive comparison.
+ // Returns the string with case folded for case insensitive comparison.
+ // Use convertToASCIILowercase instead if ASCII case insensitive comparison is desired.
WTF_EXPORT_STRING_API String foldCase() const;
WTF_EXPORT_STRING_API static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
@@ -358,29 +367,29 @@ public:
split(separator, false, result);
}
- WTF_EXPORT_STRING_API int toIntStrict(bool* ok = 0, int base = 10) const;
- WTF_EXPORT_STRING_API unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
- WTF_EXPORT_STRING_API int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
- WTF_EXPORT_STRING_API uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
- intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const;
+ WTF_EXPORT_STRING_API int toIntStrict(bool* ok = nullptr, int base = 10) const;
+ WTF_EXPORT_STRING_API unsigned toUIntStrict(bool* ok = nullptr, int base = 10) const;
+ WTF_EXPORT_STRING_API int64_t toInt64Strict(bool* ok = nullptr, int base = 10) const;
+ WTF_EXPORT_STRING_API uint64_t toUInt64Strict(bool* ok = nullptr, int base = 10) const;
+ WTF_EXPORT_STRING_API intptr_t toIntPtrStrict(bool* ok = nullptr, int base = 10) const;
- WTF_EXPORT_STRING_API int toInt(bool* ok = 0) const;
- WTF_EXPORT_STRING_API unsigned toUInt(bool* ok = 0) const;
- WTF_EXPORT_STRING_API int64_t toInt64(bool* ok = 0) const;
- WTF_EXPORT_STRING_API uint64_t toUInt64(bool* ok = 0) const;
- WTF_EXPORT_STRING_API intptr_t toIntPtr(bool* ok = 0) const;
+ WTF_EXPORT_STRING_API int toInt(bool* ok = nullptr) const;
+ WTF_EXPORT_STRING_API unsigned toUInt(bool* ok = nullptr) const;
+ WTF_EXPORT_STRING_API int64_t toInt64(bool* ok = nullptr) const;
+ WTF_EXPORT_STRING_API uint64_t toUInt64(bool* ok = nullptr) const;
+ WTF_EXPORT_STRING_API intptr_t toIntPtr(bool* ok = nullptr) const;
// FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
// Like the non-strict functions above, these return the value when there is trailing garbage.
// It would be better if these were more consistent with the above functions instead.
- WTF_EXPORT_STRING_API double toDouble(bool* ok = 0) const;
- WTF_EXPORT_STRING_API float toFloat(bool* ok = 0) const;
+ WTF_EXPORT_STRING_API double toDouble(bool* ok = nullptr) const;
+ WTF_EXPORT_STRING_API float toFloat(bool* ok = nullptr) const;
bool percentage(int& percentage) const;
#if COMPILER_SUPPORTS(CXX_REFERENCE_QUALIFIED_FUNCTIONS)
WTF_EXPORT_STRING_API String isolatedCopy() const &;
- WTF_EXPORT_STRING_API String isolatedCopy() const &&;
+ WTF_EXPORT_STRING_API String isolatedCopy() &&;
#else
WTF_EXPORT_STRING_API String isolatedCopy() const;
#endif
@@ -388,7 +397,7 @@ public:
WTF_EXPORT_STRING_API bool isSafeToSendToAnotherThread() const;
// Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that
- // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*).
+ // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString *).
typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA);
typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB);
operator UnspecifiedBoolTypeA() const;
@@ -400,11 +409,12 @@ public:
#endif
#ifdef __OBJC__
- WTF_EXPORT_STRING_API String(NSString*);
+ WTF_EXPORT_STRING_API String(NSString *);
- // This conversion maps NULL to "", which loses the meaning of NULL, but we
- // need this mapping because AppKit crashes when passed nil NSStrings.
- operator NSString*() const { if (!m_impl) return @""; return *m_impl; }
+ // This conversion converts the null string to an empty NSString rather than to nil.
+ // Given Cocoa idioms, this is a more useful default. Clients that need to preserve the
+ // null string can check isNull explicitly.
+ operator NSString *() const;
#endif
WTF_EXPORT_STRING_API static String make8BitFrom16BitSource(const UChar*, size_t);
@@ -446,6 +456,8 @@ public:
String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
+ unsigned existingHash() const { return isNull() ? 0 : impl()->existingHash(); }
+
#ifndef NDEBUG
WTF_EXPORT_STRING_API void show() const;
#endif
@@ -458,6 +470,14 @@ public:
return (*m_impl)[index];
}
+ // Turns this String empty if the StringImpl is not referenced by anyone else.
+ // This is useful for clearing String-based caches.
+ void clearImplIfNotShared()
+ {
+ if (m_impl && m_impl->hasOneRef())
+ m_impl = nullptr;
+ }
+
private:
template <typename CharacterType>
void removeInternal(const CharacterType*, unsigned, int);
@@ -473,10 +493,8 @@ inline bool operator==(const String& a, const LChar* b) { return equal(a.impl(),
inline bool operator==(const String& a, const char* b) { return equal(a.impl(), reinterpret_cast<const LChar*>(b)); }
inline bool operator==(const LChar* a, const String& b) { return equal(a, b.impl()); }
inline bool operator==(const char* a, const String& b) { return equal(reinterpret_cast<const LChar*>(a), b.impl()); }
-template<size_t inlineCapacity>
-inline bool operator==(const Vector<char, inlineCapacity>& a, const String& b) { return equal(b.impl(), a.data(), a.size()); }
-template<size_t inlineCapacity>
-inline bool operator==(const String& a, const Vector<char, inlineCapacity>& b) { return b == a; }
+template<size_t inlineCapacity> inline bool operator==(const Vector<char, inlineCapacity>& a, const String& b) { return equal(b.impl(), a.data(), a.size()); }
+template<size_t inlineCapacity> inline bool operator==(const String& a, const Vector<char, inlineCapacity>& b) { return b == a; }
inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); }
@@ -484,64 +502,79 @@ inline bool operator!=(const String& a, const LChar* b) { return !equal(a.impl()
inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), reinterpret_cast<const LChar*>(b)); }
inline bool operator!=(const LChar* a, const String& b) { return !equal(a, b.impl()); }
inline bool operator!=(const char* a, const String& b) { return !equal(reinterpret_cast<const LChar*>(a), b.impl()); }
-template<size_t inlineCapacity>
-inline bool operator!=(const Vector<char, inlineCapacity>& a, const String& b) { return !(a == b); }
-template<size_t inlineCapacity>
-inline bool operator!=(const String& a, const Vector<char, inlineCapacity>& b) { return b != a; }
-
-inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
-inline bool equalIgnoringCase(const String& a, const LChar* b) { return equalIgnoringCase(a.impl(), b); }
-inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), reinterpret_cast<const LChar*>(b)); }
-inline bool equalIgnoringCase(const LChar* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
-inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(reinterpret_cast<const LChar*>(a), b.impl()); }
-
-inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase)
-{
- return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
-}
+template<size_t inlineCapacity> inline bool operator!=(const Vector<char, inlineCapacity>& a, const String& b) { return !(a == b); }
+template<size_t inlineCapacity> inline bool operator!=(const String& a, const Vector<char, inlineCapacity>& b) { return b != a; }
-inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); }
+bool equalIgnoringASCIICase(const String&, const String&);
+bool equalIgnoringASCIICase(const String&, const char*);
-template<size_t inlineCapacity>
-inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, const String& b) { return equalIgnoringNullity(a, b.impl()); }
+template<unsigned length> bool equalLettersIgnoringASCIICase(const String&, const char (&lowercaseLetters)[length]);
+template<unsigned length> bool startsWithLettersIgnoringASCIICase(const String&, const char (&lowercaseLetters)[length]);
+
+inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); }
+template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, const String& b) { return equalIgnoringNullity(a, b.impl()); }
inline bool operator!(const String& str) { return str.isNull(); }
inline void swap(String& a, String& b) { a.swap(b); }
+#ifdef __OBJC__
+
+// Used in a small number of places where the long standing behavior has been "nil if empty".
+NSString * nsStringNilIfEmpty(const String&);
+
+#endif
+
// Definitions of string operations
-template<size_t inlineCapacity, typename OverflowHandler>
-String::String(const Vector<UChar, inlineCapacity, OverflowHandler>& vector)
- : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : *StringImpl::empty())
+inline String::String(StringImpl& impl)
+ : m_impl(&impl)
{
}
-template<>
-inline const LChar* String::getCharacters<LChar>() const
+inline String::String(StringImpl* impl)
+ : m_impl(impl)
{
- ASSERT(is8Bit());
- return characters8();
}
-template<>
-inline const UChar* String::getCharacters<UChar>() const
+inline String::String(Ref<StringImpl>&& impl)
+ : m_impl(WTFMove(impl))
+{
+}
+
+inline String::String(RefPtr<StringImpl>&& impl)
+ : m_impl(WTFMove(impl))
+{
+}
+
+inline String::String(Ref<AtomicStringImpl>&& impl)
+ : m_impl(WTFMove(impl))
+{
+}
+
+inline String::String(RefPtr<AtomicStringImpl>&& impl)
+ : m_impl(WTFMove(impl))
+{
+}
+
+template<size_t inlineCapacity, typename OverflowHandler>
+String::String(const Vector<UChar, inlineCapacity, OverflowHandler>& vector)
+ : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : Ref<StringImpl>(*StringImpl::empty()))
{
- ASSERT(!is8Bit());
- return characters16();
}
template<>
-inline const LChar* String::getCharactersWithUpconvert<LChar>() const
+inline const LChar* String::characters<LChar>() const
{
ASSERT(is8Bit());
return characters8();
}
template<>
-inline const UChar* String::getCharactersWithUpconvert<UChar>() const
+inline const UChar* String::characters<UChar>() const
{
- return deprecatedCharacters();
+ ASSERT(!is8Bit());
+ return characters16();
}
inline bool String::containsOnlyLatin1() const
@@ -559,12 +592,22 @@ inline bool String::containsOnlyLatin1() const
return !(ored & 0xFF00);
}
-
#ifdef __OBJC__
-// This is for situations in WebKit where the long standing behavior has been
-// "nil if empty", so we try to maintain longstanding behavior for the sake of
-// entrenched clients
-inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; }
+
+inline String::operator NSString *() const
+{
+ if (!m_impl)
+ return @"";
+ return *m_impl;
+}
+
+inline NSString * nsStringNilIfEmpty(const String& string)
+{
+ if (string.isEmpty())
+ return nil;
+ return *string.impl();
+}
+
#endif
inline bool String::containsOnlyASCII() const
@@ -585,12 +628,6 @@ inline bool codePointCompareLessThan(const String& a, const String& b)
return codePointCompare(a.impl(), b.impl()) < 0;
}
-template<size_t inlineCapacity>
-inline void append(Vector<UChar, inlineCapacity>& vector, const String& string)
-{
- vector.append(string.deprecatedCharacters(), string.length());
-}
-
template<typename CharacterType>
inline void appendNumber(Vector<CharacterType>& vector, unsigned char number)
{
@@ -657,13 +694,38 @@ private:
// Shared global empty string.
WTF_EXPORT_STRING_API const String& emptyString();
+template<unsigned length> inline bool equalLettersIgnoringASCIICase(const String& string, const char (&lowercaseLetters)[length])
+{
+ return equalLettersIgnoringASCIICase(string.impl(), lowercaseLetters);
+}
+
+inline bool equalIgnoringASCIICase(const String& a, const String& b)
+{
+ return equalIgnoringASCIICase(a.impl(), b.impl());
+}
+
+inline bool equalIgnoringASCIICase(const String& a, const char* b)
+{
+ return equalIgnoringASCIICase(a.impl(), b);
+}
+
+template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const String& string, const char (&lowercaseLetters)[length])
+{
+ return startsWithLettersIgnoringASCIICase(string.impl(), lowercaseLetters);
+}
+
+template<> struct IntegerToStringConversionTrait<String> {
+ using ReturnType = String;
+ using AdditionalArgumentType = void;
+ static String flush(LChar* characters, unsigned length, void*) { return { characters, length }; }
+};
+
}
using WTF::CString;
using WTF::KeepTrailingZeros;
using WTF::String;
using WTF::emptyString;
-using WTF::append;
using WTF::appendNumber;
using WTF::charactersAreAllASCII;
using WTF::charactersToIntStrict;
@@ -679,7 +741,6 @@ using WTF::charactersToIntPtr;
using WTF::charactersToDouble;
using WTF::charactersToFloat;
using WTF::equal;
-using WTF::equalIgnoringCase;
using WTF::find;
using WTF::isAllSpecialCharacters;
using WTF::isSpaceOrNewline;
@@ -687,4 +748,5 @@ using WTF::reverseFind;
using WTF::ASCIILiteral;
#include <wtf/text/AtomicString.h>
+
#endif
diff --git a/Source/WTF/wtf/text/icu/UTextProvider.cpp b/Source/WTF/wtf/text/icu/UTextProvider.cpp
new file mode 100644
index 000000000..7388fdbf7
--- /dev/null
+++ b/Source/WTF/wtf/text/icu/UTextProvider.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "UTextProvider.h"
+
+#include <algorithm>
+#include <string.h>
+
+namespace WTF {
+
+// Relocate pointer from source into destination as required.
+static inline void fixPointer(const UText* source, UText* destination, const void*& pointer)
+{
+ if (pointer >= source->pExtra && pointer < static_cast<char*>(source->pExtra) + source->extraSize) {
+ // Pointer references source extra buffer.
+ pointer = static_cast<char*>(destination->pExtra) + (static_cast<const char*>(pointer) - static_cast<const char*>(source->pExtra));
+ } else if (pointer >= source && pointer < reinterpret_cast<const char*>(source) + source->sizeOfStruct) {
+ // Pointer references source text structure, but not source extra buffer.
+ pointer = reinterpret_cast<char*>(destination) + (static_cast<const char*>(pointer) - reinterpret_cast<const char*>(source));
+ }
+}
+
+UText* uTextCloneImpl(UText* destination, const UText* source, UBool deep, UErrorCode* status)
+{
+ ASSERT_UNUSED(deep, !deep);
+ if (U_FAILURE(*status))
+ return nullptr;
+ int32_t extraSize = source->extraSize;
+ destination = utext_setup(destination, extraSize, status);
+ if (U_FAILURE(*status))
+ return destination;
+ void* extraNew = destination->pExtra;
+ int32_t flags = destination->flags;
+ int sizeToCopy = std::min(source->sizeOfStruct, destination->sizeOfStruct);
+ memcpy(destination, source, sizeToCopy);
+ destination->pExtra = extraNew;
+ destination->flags = flags;
+ memcpy(destination->pExtra, source->pExtra, extraSize);
+ fixPointer(source, destination, destination->context);
+ fixPointer(source, destination, destination->p);
+ fixPointer(source, destination, destination->q);
+ ASSERT(!destination->r);
+ const void* chunkContents = static_cast<const void*>(destination->chunkContents);
+ fixPointer(source, destination, chunkContents);
+ destination->chunkContents = static_cast<const UChar*>(chunkContents);
+ return destination;
+}
+
+} // namespace WTF
diff --git a/Source/WTF/wtf/text/icu/UTextProvider.h b/Source/WTF/wtf/text/icu/UTextProvider.h
new file mode 100644
index 000000000..2f0af9972
--- /dev/null
+++ b/Source/WTF/wtf/text/icu/UTextProvider.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTextProvider_h
+#define UTextProvider_h
+
+#include <unicode/utext.h>
+
+namespace WTF {
+
+enum class UTextProviderContext {
+ NoContext,
+ PriorContext,
+ PrimaryContext
+};
+
+inline UTextProviderContext uTextProviderContext(const UText* text, int64_t nativeIndex, UBool forward)
+{
+ if (!text->b || nativeIndex > text->b)
+ return UTextProviderContext::PrimaryContext;
+ if (nativeIndex == text->b)
+ return forward ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext;
+ return UTextProviderContext::PriorContext;
+}
+
+inline void initializeContextAwareUTextProvider(UText* text, const UTextFuncs* funcs, const void* string, unsigned length, const UChar* priorContext, int priorContextLength)
+{
+ text->pFuncs = funcs;
+ text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS;
+ text->context = string;
+ text->p = string;
+ text->a = length;
+ text->q = priorContext;
+ text->b = priorContextLength;
+}
+
+// Shared implementation for the UTextClone function on UTextFuncs.
+
+UText* uTextCloneImpl(UText* destination, const UText* source, UBool deep, UErrorCode* status);
+
+
+// Helpers for the UTextAccess function on UTextFuncs.
+
+inline int64_t uTextAccessPinIndex(int64_t& index, int64_t limit)
+{
+ if (index < 0)
+ index = 0;
+ else if (index > limit)
+ index = limit;
+ return index;
+}
+
+inline bool uTextAccessInChunkOrOutOfRange(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward, UBool& isAccessible)
+{
+ if (forward) {
+ if (nativeIndex >= text->chunkNativeStart && nativeIndex < text->chunkNativeLimit) {
+ int64_t offset = nativeIndex - text->chunkNativeStart;
+ // Ensure chunk offset is well formed if computed offset exceeds int32_t range.
+ ASSERT(offset < std::numeric_limits<int32_t>::max());
+ text->chunkOffset = offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0;
+ isAccessible = TRUE;
+ return true;
+ }
+ if (nativeIndex >= nativeLength && text->chunkNativeLimit == nativeLength) {
+ text->chunkOffset = text->chunkLength;
+ isAccessible = FALSE;
+ return true;
+ }
+ } else {
+ if (nativeIndex > text->chunkNativeStart && nativeIndex <= text->chunkNativeLimit) {
+ int64_t offset = nativeIndex - text->chunkNativeStart;
+ // Ensure chunk offset is well formed if computed offset exceeds int32_t range.
+ ASSERT(offset < std::numeric_limits<int32_t>::max());
+ text->chunkOffset = offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0;
+ isAccessible = TRUE;
+ return true;
+ }
+ if (nativeIndex <= 0 && !text->chunkNativeStart) {
+ text->chunkOffset = 0;
+ isAccessible = FALSE;
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace WTF
+
+#endif // UTextProvider_h
diff --git a/Source/WTF/wtf/text/icu/UTextProviderLatin1.cpp b/Source/WTF/wtf/text/icu/UTextProviderLatin1.cpp
new file mode 100644
index 000000000..25a0e1e86
--- /dev/null
+++ b/Source/WTF/wtf/text/icu/UTextProviderLatin1.cpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "UTextProviderLatin1.h"
+
+#include "UTextProvider.h"
+#include <wtf/text/StringImpl.h>
+
+namespace WTF {
+
+// Latin1 provider
+
+static UText* uTextLatin1Clone(UText*, const UText*, UBool, UErrorCode*);
+static int64_t uTextLatin1NativeLength(UText*);
+static UBool uTextLatin1Access(UText*, int64_t, UBool);
+static int32_t uTextLatin1Extract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*);
+static int64_t uTextLatin1MapOffsetToNative(const UText*);
+static int32_t uTextLatin1MapNativeIndexToUTF16(const UText*, int64_t);
+static void uTextLatin1Close(UText*);
+
+static const struct UTextFuncs uTextLatin1Funcs = {
+ sizeof(UTextFuncs),
+ 0,
+ 0,
+ 0,
+ uTextLatin1Clone,
+ uTextLatin1NativeLength,
+ uTextLatin1Access,
+ uTextLatin1Extract,
+ nullptr,
+ nullptr,
+ uTextLatin1MapOffsetToNative,
+ uTextLatin1MapNativeIndexToUTF16,
+ uTextLatin1Close,
+ nullptr,
+ nullptr,
+ nullptr
+};
+
+static UText* uTextLatin1Clone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
+{
+ ASSERT_UNUSED(deep, !deep);
+
+ if (U_FAILURE(*status))
+ return 0;
+
+ UText* result = utext_setup(destination, sizeof(UChar) * UTextWithBufferInlineCapacity, status);
+ if (U_FAILURE(*status))
+ return destination;
+
+ result->providerProperties = source->providerProperties;
+
+ // Point at the same position, but with an empty buffer.
+ result->chunkNativeStart = source->chunkNativeStart;
+ result->chunkNativeLimit = source->chunkNativeStart;
+ result->nativeIndexingLimit = static_cast<int32_t>(source->chunkNativeStart);
+ result->chunkOffset = 0;
+ result->context = source->context;
+ result->a = source->a;
+ result->pFuncs = &uTextLatin1Funcs;
+ result->chunkContents = (UChar*)result->pExtra;
+ memset(const_cast<UChar*>(result->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity);
+
+ return result;
+}
+
+static int64_t uTextLatin1NativeLength(UText* uText)
+{
+ return uText->a;
+}
+
+static UBool uTextLatin1Access(UText* uText, int64_t index, UBool forward)
+{
+ int64_t length = uText->a;
+
+ if (forward) {
+ if (index < uText->chunkNativeLimit && index >= uText->chunkNativeStart) {
+ // Already inside the buffer. Set the new offset.
+ uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart);
+ return TRUE;
+ }
+ if (index >= length && uText->chunkNativeLimit == length) {
+ // Off the end of the buffer, but we can't get it.
+ uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart);
+ return FALSE;
+ }
+ } else {
+ if (index <= uText->chunkNativeLimit && index > uText->chunkNativeStart) {
+ // Already inside the buffer. Set the new offset.
+ uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart);
+ return TRUE;
+ }
+ if (!index && !uText->chunkNativeStart) {
+ // Already at the beginning; can't go any farther.
+ uText->chunkOffset = 0;
+ return FALSE;
+ }
+ }
+
+ if (forward) {
+ uText->chunkNativeStart = index;
+ uText->chunkNativeLimit = uText->chunkNativeStart + UTextWithBufferInlineCapacity;
+ if (uText->chunkNativeLimit > length)
+ uText->chunkNativeLimit = length;
+
+ uText->chunkOffset = 0;
+ } else {
+ uText->chunkNativeLimit = index;
+ if (uText->chunkNativeLimit > length)
+ uText->chunkNativeLimit = length;
+
+ uText->chunkNativeStart = uText->chunkNativeLimit - UTextWithBufferInlineCapacity;
+ if (uText->chunkNativeStart < 0)
+ uText->chunkNativeStart = 0;
+
+ uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart);
+ }
+ uText->chunkLength = static_cast<int32_t>(uText->chunkNativeLimit - uText->chunkNativeStart);
+
+ StringImpl::copyChars(const_cast<UChar*>(uText->chunkContents), static_cast<const LChar*>(uText->context) + uText->chunkNativeStart, static_cast<unsigned>(uText->chunkLength));
+
+ uText->nativeIndexingLimit = uText->chunkLength;
+
+ return TRUE;
+}
+
+static int32_t uTextLatin1Extract(UText* uText, int64_t start, int64_t limit, UChar* dest, int32_t destCapacity, UErrorCode* status)
+{
+ int64_t length = uText->a;
+ if (U_FAILURE(*status))
+ return 0;
+
+ if (destCapacity < 0 || (!dest && destCapacity > 0)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if (start < 0 || start > limit || (limit - start) > INT32_MAX) {
+ *status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ if (start > length)
+ start = length;
+ if (limit > length)
+ limit = length;
+
+ length = limit - start;
+
+ if (!length)
+ return 0;
+
+ if (destCapacity > 0 && !dest) {
+ int32_t trimmedLength = static_cast<int32_t>(length);
+ if (trimmedLength > destCapacity)
+ trimmedLength = destCapacity;
+
+ StringImpl::copyChars(dest, static_cast<const LChar*>(uText->context) + start, static_cast<unsigned>(trimmedLength));
+ }
+
+ if (length < destCapacity) {
+ dest[length] = 0;
+ if (*status == U_STRING_NOT_TERMINATED_WARNING)
+ *status = U_ZERO_ERROR;
+ } else if (length == destCapacity)
+ *status = U_STRING_NOT_TERMINATED_WARNING;
+ else
+ *status = U_BUFFER_OVERFLOW_ERROR;
+
+ return static_cast<int32_t>(length);
+}
+
+static int64_t uTextLatin1MapOffsetToNative(const UText* uText)
+{
+ return uText->chunkNativeStart + uText->chunkOffset;
+}
+
+static int32_t uTextLatin1MapNativeIndexToUTF16(const UText* uText, int64_t nativeIndex)
+{
+ ASSERT_UNUSED(uText, uText->chunkNativeStart >= nativeIndex);
+ ASSERT_UNUSED(uText, nativeIndex < uText->chunkNativeLimit);
+ return static_cast<int32_t>(nativeIndex);
+}
+
+static void uTextLatin1Close(UText* uText)
+{
+ uText->context = nullptr;
+}
+
+UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status)
+{
+ if (U_FAILURE(*status))
+ return nullptr;
+ if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status);
+ if (U_FAILURE(*status)) {
+ ASSERT(!text);
+ return nullptr;
+ }
+
+ text->context = string;
+ text->a = length;
+ text->pFuncs = &uTextLatin1Funcs;
+ text->chunkContents = (UChar*)text->pExtra;
+ memset(const_cast<UChar*>(text->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity);
+
+ return text;
+}
+
+
+// Latin1ContextAware provider
+
+static UText* uTextLatin1ContextAwareClone(UText*, const UText*, UBool, UErrorCode*);
+static int64_t uTextLatin1ContextAwareNativeLength(UText*);
+static UBool uTextLatin1ContextAwareAccess(UText*, int64_t, UBool);
+static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*);
+static void uTextLatin1ContextAwareClose(UText*);
+
+static const struct UTextFuncs textLatin1ContextAwareFuncs = {
+ sizeof(UTextFuncs),
+ 0,
+ 0,
+ 0,
+ uTextLatin1ContextAwareClone,
+ uTextLatin1ContextAwareNativeLength,
+ uTextLatin1ContextAwareAccess,
+ uTextLatin1ContextAwareExtract,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ uTextLatin1ContextAwareClose,
+ nullptr,
+ nullptr,
+ nullptr
+};
+
+static inline UTextProviderContext textLatin1ContextAwareGetCurrentContext(const UText* text)
+{
+ if (!text->chunkContents)
+ return UTextProviderContext::NoContext;
+ return text->chunkContents == text->pExtra ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext;
+}
+
+static void textLatin1ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
+{
+ ASSERT(text->chunkContents == text->pExtra);
+ if (forward) {
+ ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength);
+ text->chunkNativeStart = nativeIndex;
+ text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar);
+ if (text->chunkNativeLimit > nativeLength)
+ text->chunkNativeLimit = nativeLength;
+ } else {
+ ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength);
+ text->chunkNativeLimit = nativeIndex;
+ text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar);
+ if (text->chunkNativeStart < text->b)
+ text->chunkNativeStart = text->b;
+ }
+ int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
+ // Ensure chunk length is well defined if computed length exceeds int32_t range.
+ ASSERT(length < std::numeric_limits<int32_t>::max());
+ text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0;
+ text->nativeIndexingLimit = text->chunkLength;
+ text->chunkOffset = forward ? 0 : text->chunkLength;
+ StringImpl::copyChars(const_cast<UChar*>(text->chunkContents), static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b), static_cast<unsigned>(text->chunkLength));
+}
+
+static void textLatin1ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
+{
+ ASSERT(!text->chunkContents || text->chunkContents == text->q);
+ text->chunkContents = static_cast<const UChar*>(text->pExtra);
+ textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
+}
+
+static void textLatin1ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
+{
+ ASSERT(text->chunkContents == text->q);
+ ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
+ ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
+ ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
+ text->chunkNativeStart = 0;
+ text->chunkNativeLimit = text->b;
+ text->chunkLength = text->b;
+ text->nativeIndexingLimit = text->chunkLength;
+ int64_t offset = nativeIndex - text->chunkNativeStart;
+ // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
+ ASSERT(offset < std::numeric_limits<int32_t>::max());
+ text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
+}
+
+static void textLatin1ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
+{
+ ASSERT(!text->chunkContents || text->chunkContents == text->pExtra);
+ text->chunkContents = static_cast<const UChar*>(text->q);
+ textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward);
+}
+
+static UText* uTextLatin1ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
+{
+ return uTextCloneImpl(destination, source, deep, status);
+}
+
+static int64_t uTextLatin1ContextAwareNativeLength(UText* text)
+{
+ return text->a + text->b;
+}
+
+static UBool uTextLatin1ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward)
+{
+ if (!text->context)
+ return FALSE;
+ int64_t nativeLength = uTextLatin1ContextAwareNativeLength(text);
+ UBool isAccessible;
+ if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible))
+ return isAccessible;
+ nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength);
+ UTextProviderContext currentContext = textLatin1ContextAwareGetCurrentContext(text);
+ UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward);
+ ASSERT(newContext != UTextProviderContext::NoContext);
+ if (newContext == currentContext) {
+ if (currentContext == UTextProviderContext::PrimaryContext)
+ textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
+ else
+ textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward);
+ } else if (newContext == UTextProviderContext::PrimaryContext)
+ textLatin1ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
+ else {
+ ASSERT(newContext == UTextProviderContext::PriorContext);
+ textLatin1ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward);
+ }
+ return TRUE;
+}
+
+static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode)
+{
+ // In the present context, this text provider is used only with ICU functions
+ // that do not perform an extract operation.
+ ASSERT_NOT_REACHED();
+ *errorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+}
+
+static void uTextLatin1ContextAwareClose(UText* text)
+{
+ text->context = nullptr;
+}
+
+UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status)
+{
+ if (U_FAILURE(*status))
+ return 0;
+ if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status);
+ if (U_FAILURE(*status)) {
+ ASSERT(!text);
+ return 0;
+ }
+
+ initializeContextAwareUTextProvider(text, &textLatin1ContextAwareFuncs, string, length, priorContext, priorContextLength);
+ return text;
+}
+
+} // namespace WTF
diff --git a/Source/WTF/wtf/text/icu/UTextProviderLatin1.h b/Source/WTF/wtf/text/icu/UTextProviderLatin1.h
new file mode 100644
index 000000000..f17b34d56
--- /dev/null
+++ b/Source/WTF/wtf/text/icu/UTextProviderLatin1.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTextProviderLatin1_h
+#define UTextProviderLatin1_h
+
+#include <unicode/utext.h>
+#include <wtf/text/LChar.h>
+
+namespace WTF {
+
+const int UTextWithBufferInlineCapacity = 16;
+
+struct UTextWithBuffer {
+ UText text;
+ UChar buffer[UTextWithBufferInlineCapacity];
+};
+
+UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status);
+UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status);
+
+} // namespace WTF
+
+#endif // UTextProviderLatin1_h
diff --git a/Source/WTF/wtf/text/icu/UTextProviderUTF16.cpp b/Source/WTF/wtf/text/icu/UTextProviderUTF16.cpp
new file mode 100644
index 000000000..e1fc2eab9
--- /dev/null
+++ b/Source/WTF/wtf/text/icu/UTextProviderUTF16.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "UTextProviderUTF16.h"
+
+#include "UTextProvider.h"
+#include <algorithm>
+
+namespace WTF {
+
+// UTF16ContextAware provider
+
+static UText* uTextUTF16ContextAwareClone(UText*, const UText*, UBool, UErrorCode*);
+static int64_t uTextUTF16ContextAwareNativeLength(UText*);
+static UBool uTextUTF16ContextAwareAccess(UText*, int64_t, UBool);
+static int32_t uTextUTF16ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*);
+static void uTextUTF16ContextAwareClose(UText*);
+
+static const struct UTextFuncs textUTF16ContextAwareFuncs = {
+ sizeof(UTextFuncs),
+ 0,
+ 0,
+ 0,
+ uTextUTF16ContextAwareClone,
+ uTextUTF16ContextAwareNativeLength,
+ uTextUTF16ContextAwareAccess,
+ uTextUTF16ContextAwareExtract,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ uTextUTF16ContextAwareClose,
+ nullptr,
+ nullptr,
+ nullptr
+};
+
+static inline UTextProviderContext textUTF16ContextAwareGetCurrentContext(const UText* text)
+{
+ if (!text->chunkContents)
+ return UTextProviderContext::NoContext;
+ return text->chunkContents == text->p ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext;
+}
+
+static void textUTF16ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
+{
+ ASSERT(text->chunkContents == text->p);
+ ASSERT_UNUSED(forward, forward ? nativeIndex >= text->b : nativeIndex > text->b);
+ ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
+ text->chunkNativeStart = text->b;
+ text->chunkNativeLimit = nativeLength;
+ int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
+ // Ensure chunk length is well defined if computed length exceeds int32_t range.
+ ASSERT(length < std::numeric_limits<int32_t>::max());
+ text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0;
+ text->nativeIndexingLimit = text->chunkLength;
+ int64_t offset = nativeIndex - text->chunkNativeStart;
+ // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
+ ASSERT(offset < std::numeric_limits<int32_t>::max());
+ text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
+}
+
+static void textUTF16ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
+{
+ ASSERT(!text->chunkContents || text->chunkContents == text->q);
+ text->chunkContents = static_cast<const UChar*>(text->p);
+ textUTF16ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
+}
+
+static void textUTF16ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
+{
+ ASSERT(text->chunkContents == text->q);
+ ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
+ ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
+ ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
+ text->chunkNativeStart = 0;
+ text->chunkNativeLimit = text->b;
+ text->chunkLength = text->b;
+ text->nativeIndexingLimit = text->chunkLength;
+ int64_t offset = nativeIndex - text->chunkNativeStart;
+ // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
+ ASSERT(offset < std::numeric_limits<int32_t>::max());
+ text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
+}
+
+static void textUTF16ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
+{
+ ASSERT(!text->chunkContents || text->chunkContents == text->p);
+ text->chunkContents = static_cast<const UChar*>(text->q);
+ textUTF16ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward);
+}
+
+static UText* uTextUTF16ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
+{
+ return uTextCloneImpl(destination, source, deep, status);
+}
+
+static inline int64_t uTextUTF16ContextAwareNativeLength(UText* text)
+{
+ return text->a + text->b;
+}
+
+static UBool uTextUTF16ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward)
+{
+ if (!text->context)
+ return FALSE;
+ int64_t nativeLength = uTextUTF16ContextAwareNativeLength(text);
+ UBool isAccessible;
+ if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible))
+ return isAccessible;
+ nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength);
+ UTextProviderContext currentContext = textUTF16ContextAwareGetCurrentContext(text);
+ UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward);
+ ASSERT(newContext != UTextProviderContext::NoContext);
+ if (newContext == currentContext) {
+ if (currentContext == UTextProviderContext::PrimaryContext)
+ textUTF16ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
+ else
+ textUTF16ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward);
+ } else if (newContext == UTextProviderContext::PrimaryContext)
+ textUTF16ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
+ else {
+ ASSERT(newContext == UTextProviderContext::PriorContext);
+ textUTF16ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward);
+ }
+ return TRUE;
+}
+
+static int32_t uTextUTF16ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode)
+{
+ // In the present context, this text provider is used only with ICU functions
+ // that do not perform an extract operation.
+ ASSERT_NOT_REACHED();
+ *errorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+}
+
+static void uTextUTF16ContextAwareClose(UText* text)
+{
+ text->context = nullptr;
+}
+
+UText* openUTF16ContextAwareUTextProvider(UText* text, const UChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status)
+{
+ if (U_FAILURE(*status))
+ return 0;
+ if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ text = utext_setup(text, 0, status);
+ if (U_FAILURE(*status)) {
+ ASSERT(!text);
+ return 0;
+ }
+
+ initializeContextAwareUTextProvider(text, &textUTF16ContextAwareFuncs, string, length, priorContext, priorContextLength);
+ return text;
+}
+
+} // namespace WTF
diff --git a/Source/WTF/wtf/text/icu/UTextProviderUTF16.h b/Source/WTF/wtf/text/icu/UTextProviderUTF16.h
new file mode 100644
index 000000000..bcc2c2c8e
--- /dev/null
+++ b/Source/WTF/wtf/text/icu/UTextProviderUTF16.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTextProviderUTF16_h
+#define UTextProviderUTF16_h
+
+#include <unicode/utext.h>
+
+namespace WTF {
+
+UText* openUTF16ContextAwareUTextProvider(UText*, const UChar*, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode*);
+
+} // namespace WTF
+
+#endif // UTextProviderUTF16_h
diff --git a/Source/WTF/wtf/text/unix/TextBreakIteratorInternalICUUnix.cpp b/Source/WTF/wtf/text/unix/TextBreakIteratorInternalICUUnix.cpp
new file mode 100644
index 000000000..44983421c
--- /dev/null
+++ b/Source/WTF/wtf/text/unix/TextBreakIteratorInternalICUUnix.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017 Igalia S.L.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+#include <locale.h>
+
+namespace WTF {
+
+const char* currentSearchLocaleID()
+{
+ if (auto* localeDefault = setlocale(LC_MESSAGES, nullptr))
+ return localeDefault;
+ return "";
+}
+
+const char* currentTextBreakLocaleID()
+{
+ if (auto* localeDefault = setlocale(LC_MESSAGES, nullptr))
+ return localeDefault;
+ return "en_us";
+}
+
+}