diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WTF/wtf/text | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/WTF/wtf/text')
47 files changed, 6825 insertions, 2903 deletions
diff --git a/Source/WTF/wtf/text/ASCIIFastPath.h b/Source/WTF/wtf/text/ASCIIFastPath.h index d057a6fa1..eb54828a2 100644 --- a/Source/WTF/wtf/text/ASCIIFastPath.h +++ b/Source/WTF/wtf/text/ASCIIFastPath.h @@ -22,12 +22,14 @@ #ifndef ASCIIFastPath_h #define ASCIIFastPath_h -#if OS(DARWIN) && (CPU(X86) || CPU(X86_64)) -#include <emmintrin.h> -#endif #include <stdint.h> +#include <unicode/utypes.h> #include <wtf/StdLibExtras.h> -#include <wtf/unicode/Unicode.h> +#include <wtf/text/LChar.h> + +#if CPU(X86_SSE2) +#include <emmintrin.h> +#endif namespace WTF { @@ -107,7 +109,7 @@ inline bool charactersAreAllASCII(const CharacterType* characters, size_t length inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length) { -#if OS(DARWIN) && (CPU(X86) || CPU(X86_64)) +#if CPU(X86_SSE2) const uintptr_t memoryAccessSize = 16; // Memory accesses on 16 byte (128 bit) alignment const uintptr_t memoryAccessMask = memoryAccessSize - 1; @@ -137,7 +139,7 @@ inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, s ASSERT(!(source[i] & 0xff00)); destination[i] = static_cast<LChar>(source[i]); } -#elif COMPILER(GCC) && CPU(ARM64) && defined(NDEBUG) +#elif COMPILER(GCC_OR_CLANG) && CPU(ARM64) && defined(NDEBUG) const LChar* const end = destination + length; const uintptr_t memoryAccessSize = 16; @@ -158,7 +160,7 @@ inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, s while (destination != end) *destination++ = static_cast<LChar>(*source++); -#elif COMPILER(GCC) && CPU(ARM_NEON) && !(PLATFORM(BIG_ENDIAN) || PLATFORM(MIDDLE_ENDIAN)) && defined(NDEBUG) +#elif COMPILER(GCC_OR_CLANG) && CPU(ARM_NEON) && !(CPU(BIG_ENDIAN) || CPU(MIDDLE_ENDIAN)) && defined(NDEBUG) const LChar* const end = destination + length; const uintptr_t memoryAccessSize = 8; diff --git a/Source/WTF/wtf/text/AtomicString.cpp b/Source/WTF/wtf/text/AtomicString.cpp index 5803dd018..cd8ef8ffc 100644 --- a/Source/WTF/wtf/text/AtomicString.cpp +++ b/Source/WTF/wtf/text/AtomicString.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2004-2008, 2013-2014, 2016 Apple Inc. All rights reserved. * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> * Copyright (C) 2012 Google Inc. All rights reserved. * @@ -23,452 +23,81 @@ #include "config.h" #include "AtomicString.h" -#include "AtomicStringTable.h" -#include "HashSet.h" #include "IntegerToStringConversion.h" -#include "StringHash.h" -#include "Threading.h" -#include "WTFThreadData.h" #include "dtoa.h" -#include <wtf/unicode/UTF8.h> #if USE(WEB_THREAD) -#include "TCSpinLock.h" +#include "Lock.h" #endif namespace WTF { -using namespace Unicode; - -static_assert(sizeof(AtomicString) == sizeof(String), "AtomicString and String must be same size!"); - -#if USE(WEB_THREAD) - -class AtomicStringTableLocker : public SpinLockHolder { - WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker); - - static SpinLock s_stringTableLock; -public: - AtomicStringTableLocker() - : SpinLockHolder(&s_stringTableLock) - { - } -}; - -SpinLock AtomicStringTableLocker::s_stringTableLock = SPINLOCK_INITIALIZER; - -#else - -class AtomicStringTableLocker { - WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker); -public: - AtomicStringTableLocker() { } -}; - -#endif // USE(WEB_THREAD) - -static ALWAYS_INLINE HashSet<StringImpl*>& stringTable() -{ - return wtfThreadData().atomicStringTable()->table(); -} - -template<typename T, typename HashTranslator> -static inline PassRefPtr<StringImpl> addToStringTable(const T& value) -{ - AtomicStringTableLocker locker; - - HashSet<StringImpl*>::AddResult addResult = stringTable().add<HashTranslator>(value); - - // If the string is newly-translated, then we need to adopt it. - // The boolean in the pair tells us if that is so. - return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator; -} - -struct CStringTranslator { - static unsigned hash(const LChar* c) - { - return StringHasher::computeHashAndMaskTop8Bits(c); - } - - static inline bool equal(StringImpl* r, const LChar* s) - { - return WTF::equal(r, s); - } - - static void translate(StringImpl*& location, const LChar* const& c, unsigned hash) - { - location = &StringImpl::create(c).leakRef(); - location->setHash(hash); - location->setIsAtomic(true); - } -}; - -PassRefPtr<StringImpl> AtomicString::add(const LChar* c) +template<AtomicString::CaseConvertType type> +ALWAYS_INLINE AtomicString AtomicString::convertASCIICase() const { - if (!c) - return 0; - if (!*c) - return StringImpl::empty(); - - return addToStringTable<const LChar*, CStringTranslator>(c); -} - -template<typename CharacterType> -struct HashTranslatorCharBuffer { - const CharacterType* s; - unsigned length; -}; - -typedef HashTranslatorCharBuffer<UChar> UCharBuffer; -struct UCharBufferTranslator { - static unsigned hash(const UCharBuffer& buf) - { - return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); - } - - static bool equal(StringImpl* const& str, const UCharBuffer& buf) - { - return WTF::equal(str, buf.s, buf.length); - } - - static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) - { - location = &StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef(); - location->setHash(hash); - location->setIsAtomic(true); - } -}; - -template<typename CharacterType> -struct HashAndCharacters { - unsigned hash; - const CharacterType* characters; - unsigned length; -}; - -template<typename CharacterType> -struct HashAndCharactersTranslator { - static unsigned hash(const HashAndCharacters<CharacterType>& buffer) - { - ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length)); - return buffer.hash; - } - - static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer) - { - return WTF::equal(string, buffer.characters, buffer.length); - } - - static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash) - { - location = &StringImpl::create(buffer.characters, buffer.length).leakRef(); - location->setHash(hash); - location->setIsAtomic(true); - } -}; + StringImpl* impl = this->impl(); + if (UNLIKELY(!impl)) + return nullAtom; -struct HashAndUTF8Characters { - unsigned hash; - const char* characters; + // Convert short strings without allocating a new StringImpl, since + // there's a good chance these strings are already in the atomic + // string table and so no memory allocation will be required. unsigned length; - unsigned utf16Length; -}; - -struct HashAndUTF8CharactersTranslator { - static unsigned hash(const HashAndUTF8Characters& buffer) - { - return buffer.hash; - } - - static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) - { - if (buffer.utf16Length != string->length()) - return false; - - // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. - if (buffer.utf16Length != buffer.length) { - const UChar* stringCharacters = string->deprecatedCharacters(); - - return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length); - } - - if (string->is8Bit()) { - const LChar* stringCharacters = string->characters8(); - - for (unsigned i = 0; i < buffer.length; ++i) { - ASSERT(isASCII(buffer.characters[i])); - if (stringCharacters[i] != buffer.characters[i]) - return false; + const unsigned localBufferSize = 100; + if (impl->is8Bit() && (length = impl->length()) <= localBufferSize) { + const LChar* characters = impl->characters8(); + unsigned failingIndex; + for (unsigned i = 0; i < length; ++i) { + if (type == CaseConvertType::Lower ? UNLIKELY(isASCIIUpper(characters[i])) : LIKELY(isASCIILower(characters[i]))) { + failingIndex = i; + goto SlowPath; } - - return true; } - - const UChar* stringCharacters = string->characters16(); - - for (unsigned i = 0; i < buffer.length; ++i) { - ASSERT(isASCII(buffer.characters[i])); - if (stringCharacters[i] != buffer.characters[i]) - return false; - } - - return true; - } - - static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) - { - UChar* target; - RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target); - - bool isAllASCII; - const char* source = buffer.characters; - if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK) - ASSERT_NOT_REACHED(); - - if (isAllASCII) - newString = StringImpl::create(buffer.characters, buffer.length); - - location = newString.release().leakRef(); - location->setHash(hash); - location->setIsAtomic(true); - } -}; - -PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) -{ - if (!s) - return 0; - - if (!length) - return StringImpl::empty(); - - UCharBuffer buffer = { s, length }; - return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); -} - -PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) -{ - ASSERT(s); - ASSERT(existingHash); - - if (!length) - return StringImpl::empty(); - - HashAndCharacters<UChar> buffer = { existingHash, s, length }; - return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar>>(buffer); -} - -PassRefPtr<StringImpl> AtomicString::add(const UChar* s) -{ - if (!s) - return 0; - - unsigned length = 0; - while (s[length] != UChar(0)) - ++length; - - if (!length) - return StringImpl::empty(); - - UCharBuffer buffer = { s, length }; - return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); -} - -struct SubstringLocation { - StringImpl* baseString; - unsigned start; - unsigned length; -}; - -struct SubstringTranslator { - static unsigned hash(const SubstringLocation& buffer) - { - return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->deprecatedCharacters() + buffer.start, buffer.length); - } - - static bool equal(StringImpl* const& string, const SubstringLocation& buffer) - { - return WTF::equal(string, buffer.baseString->deprecatedCharacters() + buffer.start, buffer.length); + return *this; +SlowPath: + LChar localBuffer[localBufferSize]; + for (unsigned i = 0; i < failingIndex; ++i) + localBuffer[i] = characters[i]; + for (unsigned i = failingIndex; i < length; ++i) + localBuffer[i] = type == CaseConvertType::Lower ? toASCIILower(characters[i]) : toASCIIUpper(characters[i]); + return AtomicString(localBuffer, length); } - static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash) - { - location = &StringImpl::create(buffer.baseString, buffer.start, buffer.length).leakRef(); - location->setHash(hash); - location->setIsAtomic(true); - } -}; - -PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length) -{ - if (!baseString) - return 0; - - if (!length || start >= baseString->length()) - return StringImpl::empty(); + Ref<StringImpl> convertedString = type == CaseConvertType::Lower ? impl->convertToASCIILowercase() : impl->convertToASCIIUppercase(); + if (LIKELY(convertedString.ptr() == impl)) + return *this; - unsigned maxLength = baseString->length() - start; - if (length >= maxLength) { - if (!start) - return add(baseString); - length = maxLength; - } - - SubstringLocation buffer = { baseString, start, length }; - return addToStringTable<SubstringLocation, SubstringTranslator>(buffer); + AtomicString result; + result.m_string = AtomicStringImpl::add(convertedString.ptr()); + return result; } - -typedef HashTranslatorCharBuffer<LChar> LCharBuffer; -struct LCharBufferTranslator { - static unsigned hash(const LCharBuffer& buf) - { - return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); - } - - static bool equal(StringImpl* const& str, const LCharBuffer& buf) - { - return WTF::equal(str, buf.s, buf.length); - } - - static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash) - { - location = &StringImpl::create(buf.s, buf.length).leakRef(); - location->setHash(hash); - location->setIsAtomic(true); - } -}; - -typedef HashTranslatorCharBuffer<char> CharBuffer; -struct CharBufferFromLiteralDataTranslator { - static unsigned hash(const CharBuffer& buf) - { - return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length); - } - static bool equal(StringImpl* const& str, const CharBuffer& buf) - { - return WTF::equal(str, buf.s, buf.length); - } - - static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash) - { - location = &StringImpl::createFromLiteral(buf.s, buf.length).leakRef(); - location->setHash(hash); - location->setIsAtomic(true); - } -}; - -PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length) +AtomicString AtomicString::convertToASCIILowercase() const { - if (!s) - return 0; - - if (!length) - return StringImpl::empty(); - - LCharBuffer buffer = { s, length }; - return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer); + return convertASCIICase<CaseConvertType::Lower>(); } -PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length) +AtomicString AtomicString::convertToASCIIUppercase() const { - ASSERT(characters); - ASSERT(length); - - CharBuffer buffer = { characters, length }; - return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer); + return convertASCIICase<CaseConvertType::Upper>(); } -PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* string) -{ - if (!string->length()) - return StringImpl::empty(); - - ASSERT_WITH_MESSAGE(!string->isAtomic(), "AtomicString should not hit the slow case if the string is already atomic."); - - AtomicStringTableLocker locker; - HashSet<StringImpl*>::AddResult addResult = stringTable().add(string); - - if (addResult.isNewEntry) { - ASSERT(*addResult.iterator == string); - string->setIsAtomic(true); - } - - return *addResult.iterator; -} - -template<typename CharacterType> -static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl) -{ - HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() }; - return stringTable().find<HashAndCharactersTranslator<CharacterType>>(buffer); -} - -AtomicStringImpl* AtomicString::find(const StringImpl* stringImpl) -{ - ASSERT(stringImpl); - ASSERT(stringImpl->existingHash()); - - if (!stringImpl->length()) - return static_cast<AtomicStringImpl*>(StringImpl::empty()); - - AtomicStringTableLocker locker; - HashSet<StringImpl*>::iterator iterator; - if (stringImpl->is8Bit()) - iterator = findString<LChar>(stringImpl); - else - iterator = findString<UChar>(stringImpl); - if (iterator == stringTable().end()) - return 0; - return static_cast<AtomicStringImpl*>(*iterator); -} - -void AtomicString::remove(StringImpl* string) -{ - ASSERT(string->isAtomic()); - AtomicStringTableLocker locker; - HashSet<StringImpl*>& atomicStringTable = stringTable(); - HashSet<StringImpl*>::iterator iterator = atomicStringTable.find(string); - ASSERT_WITH_MESSAGE(iterator != atomicStringTable.end(), "The string being removed is atomic in the string table of an other thread!"); - atomicStringTable.remove(iterator); -} - -AtomicString AtomicString::lower() const +AtomicString AtomicString::number(int number) { - // Note: This is a hot function in the Dromaeo benchmark. - StringImpl* impl = this->impl(); - if (UNLIKELY(!impl)) - return AtomicString(); - - RefPtr<StringImpl> lowerImpl = impl->lower(); - AtomicString returnValue; - if (LIKELY(lowerImpl == impl)) - returnValue.m_string = lowerImpl.release(); - else - returnValue.m_string = addSlowCase(lowerImpl.get()); - return returnValue; + return numberToStringSigned<AtomicString>(number); } -AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) +AtomicString AtomicString::number(unsigned number) { - HashAndUTF8Characters buffer; - buffer.characters = charactersStart; - buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); - - if (!buffer.hash) - return nullAtom; - - AtomicString atomicString; - atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); - return atomicString; + return numberToStringUnsigned<AtomicString>(number); } -AtomicString AtomicString::number(int number) +AtomicString AtomicString::number(unsigned long number) { - return numberToStringSigned<AtomicString>(number); + return numberToStringUnsigned<AtomicString>(number); } -AtomicString AtomicString::number(unsigned number) +AtomicString AtomicString::number(unsigned long long number) { return numberToStringUnsigned<AtomicString>(number); } @@ -479,13 +108,13 @@ AtomicString AtomicString::number(double number) return String(numberToFixedPrecisionString(number, 6, buffer, true)); } -#if !ASSERT_DISABLED -bool AtomicString::isInAtomicStringTable(StringImpl* string) +AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) { - AtomicStringTableLocker locker; - return stringTable().contains(string); + auto impl = AtomicStringImpl::addUTF8(charactersStart, charactersEnd); + if (!impl) + return nullAtom; + return impl.get(); } -#endif #ifndef NDEBUG void AtomicString::show() const diff --git a/Source/WTF/wtf/text/AtomicString.h b/Source/WTF/wtf/text/AtomicString.h index 4142de142..91bb20a8b 100644 --- a/Source/WTF/wtf/text/AtomicString.h +++ b/Source/WTF/wtf/text/AtomicString.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2006, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2004-2006, 2008, 2014-2016 Apple Inc. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -23,6 +23,7 @@ #include <utility> #include <wtf/text/AtomicStringImpl.h> +#include <wtf/text/IntegerToStringConversion.h> #include <wtf/text/WTFString.h> // Define 'NO_IMPLICIT_ATOMICSTRING' before including this header, @@ -41,34 +42,38 @@ class AtomicString { public: WTF_EXPORT_PRIVATE static void init(); - AtomicString() { } - AtomicString(const LChar* s) : m_string(add(s)) { } - AtomicString(const char* s) : m_string(add(s)) { } - AtomicString(const LChar* s, unsigned length) : m_string(add(s, length)) { } - AtomicString(const UChar* s, unsigned length) : m_string(add(s, length)) { } - AtomicString(const UChar* s, unsigned length, unsigned existingHash) : m_string(add(s, length, existingHash)) { } - AtomicString(const UChar* s) : m_string(add(s)) { } + AtomicString(); + AtomicString(const LChar*); + AtomicString(const char*); + AtomicString(const LChar*, unsigned length); + AtomicString(const UChar*, unsigned length); + AtomicString(const UChar*, unsigned length, unsigned existingHash); + AtomicString(const UChar*); template<size_t inlineCapacity> explicit AtomicString(const Vector<UChar, inlineCapacity>& characters) - : m_string(add(characters.data(), characters.size())) + : m_string(AtomicStringImpl::add(characters.data(), characters.size())) { } - ATOMICSTRING_CONVERSION AtomicString(StringImpl* imp) : m_string(add(imp)) { } - AtomicString(AtomicStringImpl* imp) : m_string(imp) { } - ATOMICSTRING_CONVERSION AtomicString(const String& s) : m_string(add(s.impl())) { } - AtomicString(StringImpl* baseString, unsigned start, unsigned length) : m_string(add(baseString, start, length)) { } + AtomicString(AtomicStringImpl*); + AtomicString(RefPtr<AtomicStringImpl>&&); + ATOMICSTRING_CONVERSION AtomicString(StringImpl*); + ATOMICSTRING_CONVERSION AtomicString(const String&); + AtomicString(StringImpl* baseString, unsigned start, unsigned length); + + // FIXME: AtomicString doesn’t always have AtomicStringImpl, so one of those two names needs to change.. + AtomicString(UniquedStringImpl* uid); enum ConstructFromLiteralTag { ConstructFromLiteral }; AtomicString(const char* characters, unsigned length, ConstructFromLiteralTag) - : m_string(addFromLiteralData(characters, length)) + : m_string(AtomicStringImpl::addLiteral(characters, length)) { } template<unsigned charactersCount> ALWAYS_INLINE AtomicString(const char (&characters)[charactersCount], ConstructFromLiteralTag) - : m_string(addFromLiteralData(characters, charactersCount - 1)) + : m_string(AtomicStringImpl::addLiteral(characters, charactersCount - 1)) { COMPILE_ASSERT(charactersCount > 1, AtomicStringFromLiteralNotEmpty); COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), AtomicStringFromLiteralCannotOverflow); @@ -77,15 +82,15 @@ public: // We have to declare the copy constructor and copy assignment operator as well, otherwise // they'll be implicitly deleted by adding the move constructor and move assignment operator. AtomicString(const AtomicString& other) : m_string(other.m_string) { } - AtomicString(AtomicString&& other) : m_string(std::move(other.m_string)) { } + AtomicString(AtomicString&& other) : m_string(WTFMove(other.m_string)) { } AtomicString& operator=(const AtomicString& other) { m_string = other.m_string; return *this; } - AtomicString& operator=(AtomicString&& other) { m_string = std::move(other.m_string); return *this; } + AtomicString& operator=(AtomicString&& other) { m_string = WTFMove(other.m_string); return *this; } // Hash table deleted values, which are only constructed and never copied or destroyed. AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } bool isHashTableDeletedValue() const { return m_string.isHashTableDeletedValue(); } - WTF_EXPORT_STRING_API static AtomicStringImpl* find(const StringImpl*); + unsigned existingHash() const { return isNull() ? 0 : impl()->existingHash(); } operator const String&() const { return m_string; } const String& string() const { return m_string; }; @@ -93,31 +98,46 @@ public: AtomicStringImpl* impl() const { return static_cast<AtomicStringImpl *>(m_string.impl()); } bool is8Bit() const { return m_string.is8Bit(); } - const UChar* characters() const { return m_string.deprecatedCharacters(); } // FIXME: Delete this. const LChar* characters8() const { return m_string.characters8(); } const UChar* characters16() const { return m_string.characters16(); } unsigned length() const { return m_string.length(); } - + UChar operator[](unsigned int i) const { return m_string[i]; } WTF_EXPORT_STRING_API static AtomicString number(int); WTF_EXPORT_STRING_API static AtomicString number(unsigned); + WTF_EXPORT_STRING_API static AtomicString number(unsigned long); + WTF_EXPORT_STRING_API static AtomicString number(unsigned long long); WTF_EXPORT_STRING_API static AtomicString number(double); // If we need more overloads of the number function, we can add all the others that String has, but these seem to do for now. bool contains(UChar c) const { return m_string.contains(c); } bool contains(const LChar* s, bool caseSensitive = true) const { return m_string.contains(s, caseSensitive); } - bool contains(const String& s, bool caseSensitive = true) const + bool contains(const String& s) const + { return m_string.contains(s); } + bool contains(const String& s, bool caseSensitive) const { return m_string.contains(s, caseSensitive); } + bool containsIgnoringASCIICase(const String& s) const + { return m_string.containsIgnoringASCIICase(s); } size_t find(UChar c, unsigned start = 0) const { return m_string.find(c, start); } size_t find(const LChar* s, unsigned start = 0, bool caseSentitive = true) const { return m_string.find(s, start, caseSentitive); } size_t find(const String& s, unsigned start = 0, bool caseSentitive = true) const { return m_string.find(s, start, caseSentitive); } - - bool startsWith(const String& s, bool caseSensitive = true) const + size_t findIgnoringASCIICase(const String& s) const + { return m_string.findIgnoringASCIICase(s); } + size_t findIgnoringASCIICase(const String& s, unsigned startOffset) const + { return m_string.findIgnoringASCIICase(s, startOffset); } + size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const + { return m_string.find(matchFunction, start); } + + bool startsWith(const String& s) const + { return m_string.startsWith(s); } + bool startsWithIgnoringASCIICase(const String& s) const + { return m_string.startsWithIgnoringASCIICase(s); } + bool startsWith(const String& s, bool caseSensitive) const { return m_string.startsWith(s, caseSensitive); } bool startsWith(UChar character) const { return m_string.startsWith(character); } @@ -125,17 +145,21 @@ public: bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return m_string.startsWith<matchLength>(prefix, caseSensitive); } - bool endsWith(const String& s, bool caseSensitive = true) const + bool endsWith(const String& s) const + { return m_string.endsWith(s); } + bool endsWithIgnoringASCIICase(const String& s) const + { return m_string.endsWithIgnoringASCIICase(s); } + bool endsWith(const String& s, bool caseSensitive) const { return m_string.endsWith(s, caseSensitive); } bool endsWith(UChar character) const { return m_string.endsWith(character); } template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return m_string.endsWith<matchLength>(prefix, caseSensitive); } - - WTF_EXPORT_STRING_API AtomicString lower() const; - AtomicString upper() const { return AtomicString(impl()->upper()); } - + + WTF_EXPORT_STRING_API AtomicString convertToASCIILowercase() const; + WTF_EXPORT_STRING_API AtomicString convertToASCIIUppercase() const; + int toInt(bool* ok = 0) const { return m_string.toInt(ok); } double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); } float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); } @@ -144,13 +168,11 @@ public: bool isNull() const { return m_string.isNull(); } bool isEmpty() const { return m_string.isEmpty(); } - static void remove(StringImpl*); - #if USE(CF) - AtomicString(CFStringRef s) : m_string(add(s)) { } -#endif + AtomicString(CFStringRef); +#endif #ifdef __OBJC__ - AtomicString(NSString* s) : m_string(add((CFStringRef)s)) { } + AtomicString(NSString*); operator NSString*() const { return m_string; } #endif @@ -167,37 +189,16 @@ private: // The explicit constructors with AtomicString::ConstructFromLiteral must be used for literals. AtomicString(ASCIILiteral); - String m_string; - - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const LChar*); - ALWAYS_INLINE static PassRefPtr<StringImpl> add(const char* s) { return add(reinterpret_cast<const LChar*>(s)); }; - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const LChar*, unsigned length); - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const UChar*, unsigned length); - ALWAYS_INLINE static PassRefPtr<StringImpl> add(const char* s, unsigned length) { return add(reinterpret_cast<const LChar*>(s), length); }; - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const UChar*, unsigned length, unsigned existingHash); - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(const UChar*); - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(StringImpl*, unsigned offset, unsigned length); - ALWAYS_INLINE static PassRefPtr<StringImpl> add(StringImpl* string) - { - if (!string || string->isAtomic()) { - ASSERT_WITH_MESSAGE(!string || isInAtomicStringTable(string), "The atomic string comes from an other thread!"); - return string; - } - return addSlowCase(string); - } - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> addFromLiteralData(const char* characters, unsigned length); - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> addSlowCase(StringImpl*); -#if USE(CF) - WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> add(CFStringRef); -#endif + enum class CaseConvertType { Upper, Lower }; + template<CaseConvertType> AtomicString convertASCIICase() const; WTF_EXPORT_STRING_API static AtomicString fromUTF8Internal(const char*, const char*); -#if !ASSERT_DISABLED - WTF_EXPORT_STRING_API static bool isInAtomicStringTable(StringImpl*); -#endif + String m_string; }; +static_assert(sizeof(AtomicString) == sizeof(String), "AtomicString and String must be same size!"); + inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); } bool operator==(const AtomicString&, const LChar*); inline bool operator==(const AtomicString& a, const char* b) { return WTF::equal(a.impl(), reinterpret_cast<const LChar*>(b)); } @@ -216,25 +217,99 @@ inline bool operator!=(const LChar* a, const AtomicString& b) { return !(b == a) inline bool operator!=(const String& a, const AtomicString& b) { return !equal(a.impl(), b.impl()); } inline bool operator!=(const Vector<UChar>& a, const AtomicString& b) { return !(a == b); } -inline bool equalIgnoringCase(const AtomicString& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } -inline bool equalIgnoringCase(const AtomicString& a, const LChar* b) { return equalIgnoringCase(a.impl(), b); } -inline bool equalIgnoringCase(const AtomicString& a, const char* b) { return equalIgnoringCase(a.impl(), reinterpret_cast<const LChar*>(b)); } -inline bool equalIgnoringCase(const AtomicString& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } -inline bool equalIgnoringCase(const LChar* a, const AtomicString& b) { return equalIgnoringCase(a, b.impl()); } -inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equalIgnoringCase(reinterpret_cast<const LChar*>(a), b.impl()); } -inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } +bool equalIgnoringASCIICase(const AtomicString&, const AtomicString&); +bool equalIgnoringASCIICase(const AtomicString&, const String&); +bool equalIgnoringASCIICase(const String&, const AtomicString&); +bool equalIgnoringASCIICase(const AtomicString&, const char*); + +template<unsigned length> bool equalLettersIgnoringASCIICase(const AtomicString&, const char (&lowercaseLetters)[length]); + +inline AtomicString::AtomicString() +{ +} + +inline AtomicString::AtomicString(const LChar* s) + : m_string(AtomicStringImpl::add(s)) +{ +} + +inline AtomicString::AtomicString(const char* s) + : m_string(AtomicStringImpl::add(s)) +{ +} + +inline AtomicString::AtomicString(const LChar* s, unsigned length) + : m_string(AtomicStringImpl::add(s, length)) +{ +} + +inline AtomicString::AtomicString(const UChar* s, unsigned length) + : m_string(AtomicStringImpl::add(s, length)) +{ +} + +inline AtomicString::AtomicString(const UChar* s, unsigned length, unsigned existingHash) + : m_string(AtomicStringImpl::add(s, length, existingHash)) +{ +} + +inline AtomicString::AtomicString(const UChar* s) + : m_string(AtomicStringImpl::add(s)) +{ +} + +inline AtomicString::AtomicString(AtomicStringImpl* imp) + : m_string(imp) +{ +} + +inline AtomicString::AtomicString(RefPtr<AtomicStringImpl>&& imp) + : m_string(WTFMove(imp)) +{ +} + +inline AtomicString::AtomicString(StringImpl* imp) + : m_string(AtomicStringImpl::add(imp)) +{ +} + +inline AtomicString::AtomicString(const String& s) + : m_string(AtomicStringImpl::add(s.impl())) +{ +} + +inline AtomicString::AtomicString(StringImpl* baseString, unsigned start, unsigned length) + : m_string(AtomicStringImpl::add(baseString, start, length)) +{ +} + +inline AtomicString::AtomicString(UniquedStringImpl* uid) + : m_string(uid) +{ +} + +#if USE(CF) +inline AtomicString::AtomicString(CFStringRef s) + : m_string(AtomicStringImpl::add(s)) +{ +} +#endif + +#ifdef __OBJC__ +inline AtomicString::AtomicString(NSString* s) + : m_string(AtomicStringImpl::add((__bridge CFStringRef)s)) +{ +} +#endif // Define external global variables for the commonly used atomic strings. // These are only usable from the main thread. #ifndef ATOMICSTRING_HIDE_GLOBALS extern const WTF_EXPORTDATA AtomicString nullAtom; extern const WTF_EXPORTDATA AtomicString emptyAtom; -extern const WTF_EXPORTDATA AtomicString textAtom; -extern const WTF_EXPORTDATA AtomicString commentAtom; extern const WTF_EXPORTDATA AtomicString starAtom; extern const WTF_EXPORTDATA AtomicString xmlAtom; extern const WTF_EXPORTDATA AtomicString xmlnsAtom; -extern const WTF_EXPORTDATA AtomicString xlinkAtom; inline AtomicString AtomicString::fromUTF8(const char* characters, size_t length) { @@ -251,7 +326,7 @@ inline AtomicString AtomicString::fromUTF8(const char* characters) return nullAtom; if (!*characters) return emptyAtom; - return fromUTF8Internal(characters, 0); + return fromUTF8Internal(characters, nullptr); } #endif @@ -261,19 +336,48 @@ template<> struct DefaultHash<AtomicString> { typedef AtomicStringHash Hash; }; +template<unsigned length> inline bool equalLettersIgnoringASCIICase(const AtomicString& string, const char (&lowercaseLetters)[length]) +{ + return equalLettersIgnoringASCIICase(string.string(), lowercaseLetters); +} + +inline bool equalIgnoringASCIICase(const AtomicString& a, const AtomicString& b) +{ + return equalIgnoringASCIICase(a.string(), b.string()); +} + +inline bool equalIgnoringASCIICase(const AtomicString& a, const String& b) +{ + return equalIgnoringASCIICase(a.string(), b); +} + +inline bool equalIgnoringASCIICase(const String& a, const AtomicString& b) +{ + return equalIgnoringASCIICase(a, b.string()); +} + +inline bool equalIgnoringASCIICase(const AtomicString& a, const char* b) +{ + return equalIgnoringASCIICase(a.string(), b); +} + +template<> struct IntegerToStringConversionTrait<AtomicString> { + using ReturnType = AtomicString; + using AdditionalArgumentType = void; + static AtomicString flush(LChar* characters, unsigned length, void*) { return { characters, length }; } +}; + } // namespace WTF #ifndef ATOMICSTRING_HIDE_GLOBALS using WTF::AtomicString; using WTF::nullAtom; using WTF::emptyAtom; -using WTF::textAtom; -using WTF::commentAtom; using WTF::starAtom; using WTF::xmlAtom; using WTF::xmlnsAtom; -using WTF::xlinkAtom; #endif #include <wtf/text/StringConcatenate.h> + #endif // AtomicString_h diff --git a/Source/WTF/wtf/text/AtomicStringHash.h b/Source/WTF/wtf/text/AtomicStringHash.h index 6130d9493..417619350 100644 --- a/Source/WTF/wtf/text/AtomicStringHash.h +++ b/Source/WTF/wtf/text/AtomicStringHash.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -48,11 +48,20 @@ namespace WTF { static const bool safeToCompareToEmptyOrDeleted = false; }; - // AtomicStringHash is the default hash for AtomicString - template<> struct HashTraits<WTF::AtomicString> : GenericHashTraits<WTF::AtomicString> { - static const bool emptyValueIsZero = true; - static void constructDeletedValue(WTF::AtomicString& slot) { new (NotNull, &slot) WTF::AtomicString(HashTableDeletedValue); } - static bool isDeletedValue(const WTF::AtomicString& slot) { return slot.isHashTableDeletedValue(); } + template<> struct HashTraits<WTF::AtomicString> : SimpleClassHashTraits<WTF::AtomicString> { + static const bool hasIsEmptyValueFunction = true; + static bool isEmptyValue(const AtomicString& value) + { + return value.isNull(); + } + + static void customDeleteBucket(AtomicString& value) + { + // See unique_ptr's customDeleteBucket() for an explanation. + ASSERT(!isDeletedValue(value)); + AtomicString valueToBeDestroyed = WTFMove(value); + constructDeletedValue(value); + } }; } diff --git a/Source/WTF/wtf/text/AtomicStringImpl.cpp b/Source/WTF/wtf/text/AtomicStringImpl.cpp new file mode 100644 index 000000000..fb50b7fdd --- /dev/null +++ b/Source/WTF/wtf/text/AtomicStringImpl.cpp @@ -0,0 +1,540 @@ +/* + * Copyright (C) 2004-2008, 2013-2014 Apple Inc. All rights reserved. + * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> + * Copyright (C) 2012 Google Inc. All rights reserved. + * Copyright (C) 2015 Yusuke Suzuki<utatane.tea@gmail.com>. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" +#include "AtomicStringImpl.h" + +#include "AtomicStringTable.h" +#include "CommaPrinter.h" +#include "DataLog.h" +#include "HashSet.h" +#include "IntegerToStringConversion.h" +#include "StringHash.h" +#include "StringPrintStream.h" +#include "Threading.h" +#include "WTFThreadData.h" +#include <wtf/unicode/UTF8.h> + +#if USE(WEB_THREAD) +#include "Lock.h" +#endif + +namespace WTF { + +using namespace Unicode; + +#if USE(WEB_THREAD) + +class AtomicStringTableLocker : public LockHolder { + WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker); + + static StaticLock s_stringTableLock; +public: + AtomicStringTableLocker() + : LockHolder(&s_stringTableLock) + { + } +}; + +StaticLock AtomicStringTableLocker::s_stringTableLock; + +#else + +class AtomicStringTableLocker { + WTF_MAKE_NONCOPYABLE(AtomicStringTableLocker); +public: + AtomicStringTableLocker() { } +}; + +#endif // USE(WEB_THREAD) + +using StringTableImpl = HashSet<StringImpl*>; + +static ALWAYS_INLINE StringTableImpl& stringTable() +{ + return wtfThreadData().atomicStringTable()->table(); +} + +template<typename T, typename HashTranslator> +static inline Ref<AtomicStringImpl> addToStringTable(AtomicStringTableLocker&, StringTableImpl& atomicStringTable, const T& value) +{ + auto addResult = atomicStringTable.add<HashTranslator>(value); + + // If the string is newly-translated, then we need to adopt it. + // The boolean in the pair tells us if that is so. + if (addResult.isNewEntry) + return adoptRef(static_cast<AtomicStringImpl&>(**addResult.iterator)); + return *static_cast<AtomicStringImpl*>(*addResult.iterator); +} + +template<typename T, typename HashTranslator> +static inline Ref<AtomicStringImpl> addToStringTable(const T& value) +{ + AtomicStringTableLocker locker; + return addToStringTable<T, HashTranslator>(locker, stringTable(), value); +} + +struct CStringTranslator { + static unsigned hash(const LChar* c) + { + return StringHasher::computeHashAndMaskTop8Bits(c); + } + + static inline bool equal(StringImpl* r, const LChar* s) + { + return WTF::equal(r, s); + } + + static void translate(StringImpl*& location, const LChar* const& c, unsigned hash) + { + location = &StringImpl::create(c).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +RefPtr<AtomicStringImpl> AtomicStringImpl::add(const LChar* c) +{ + if (!c) + return nullptr; + if (!*c) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + return addToStringTable<const LChar*, CStringTranslator>(c); +} + +template<typename CharacterType> +struct HashTranslatorCharBuffer { + const CharacterType* s; + unsigned length; +}; + +typedef HashTranslatorCharBuffer<UChar> UCharBuffer; +struct UCharBufferTranslator { + static unsigned hash(const UCharBuffer& buf) + { + return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); + } + + static bool equal(StringImpl* const& str, const UCharBuffer& buf) + { + return WTF::equal(str, buf.s, buf.length); + } + + static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) + { + location = &StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +template<typename CharacterType> +struct HashAndCharacters { + unsigned hash; + const CharacterType* characters; + unsigned length; +}; + +template<typename CharacterType> +struct HashAndCharactersTranslator { + static unsigned hash(const HashAndCharacters<CharacterType>& buffer) + { + ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length)); + return buffer.hash; + } + + static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer) + { + return WTF::equal(string, buffer.characters, buffer.length); + } + + static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash) + { + location = &StringImpl::create(buffer.characters, buffer.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +struct HashAndUTF8Characters { + unsigned hash; + const char* characters; + unsigned length; + unsigned utf16Length; +}; + +struct HashAndUTF8CharactersTranslator { + static unsigned hash(const HashAndUTF8Characters& buffer) + { + return buffer.hash; + } + + static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) + { + if (buffer.utf16Length != string->length()) + return false; + + // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. + if (buffer.utf16Length != buffer.length) { + if (string->is8Bit()) + return equalLatin1WithUTF8(string->characters8(), buffer.characters, buffer.characters + buffer.length); + + return equalUTF16WithUTF8(string->characters16(), buffer.characters, buffer.characters + buffer.length); + } + + if (string->is8Bit()) { + const LChar* stringCharacters = string->characters8(); + + for (unsigned i = 0; i < buffer.length; ++i) { + ASSERT(isASCII(buffer.characters[i])); + if (stringCharacters[i] != buffer.characters[i]) + return false; + } + + return true; + } + + const UChar* stringCharacters = string->characters16(); + + for (unsigned i = 0; i < buffer.length; ++i) { + ASSERT(isASCII(buffer.characters[i])); + if (stringCharacters[i] != buffer.characters[i]) + return false; + } + + return true; + } + + static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) + { + UChar* target; + auto newString = StringImpl::createUninitialized(buffer.utf16Length, target); + + bool isAllASCII; + const char* source = buffer.characters; + if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK) + ASSERT_NOT_REACHED(); + + if (isAllASCII) + newString = StringImpl::create(buffer.characters, buffer.length); + + location = &newString.leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +RefPtr<AtomicStringImpl> AtomicStringImpl::add(const UChar* s, unsigned length) +{ + if (!s) + return nullptr; + + if (!length) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + UCharBuffer buffer = { s, length }; + return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); +} + +Ref<AtomicStringImpl> AtomicStringImpl::add(const UChar* s, unsigned length, unsigned existingHash) +{ + ASSERT(s); + ASSERT(existingHash); + + if (!length) + return *static_cast<AtomicStringImpl*>(StringImpl::empty()); + + HashAndCharacters<UChar> buffer = { existingHash, s, length }; + return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar>>(buffer); +} + +RefPtr<AtomicStringImpl> AtomicStringImpl::add(const UChar* s) +{ + if (!s) + return nullptr; + + unsigned length = 0; + while (s[length] != UChar(0)) + ++length; + + if (!length) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + UCharBuffer buffer = { s, length }; + return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); +} + +struct SubstringLocation { + StringImpl* baseString; + unsigned start; + unsigned length; +}; + +struct SubstringTranslator { + static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash) + { + location = &StringImpl::createSubstringSharingImpl(*buffer.baseString, buffer.start, buffer.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +struct SubstringTranslator8 : SubstringTranslator { + static unsigned hash(const SubstringLocation& buffer) + { + return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length); + } + + static bool equal(StringImpl* const& string, const SubstringLocation& buffer) + { + return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length); + } +}; + +struct SubstringTranslator16 : SubstringTranslator { + static unsigned hash(const SubstringLocation& buffer) + { + return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length); + } + + static bool equal(StringImpl* const& string, const SubstringLocation& buffer) + { + return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length); + } +}; + +RefPtr<AtomicStringImpl> AtomicStringImpl::add(StringImpl* baseString, unsigned start, unsigned length) +{ + if (!baseString) + return nullptr; + + if (!length || start >= baseString->length()) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + unsigned maxLength = baseString->length() - start; + if (length >= maxLength) { + if (!start) + return add(baseString); + length = maxLength; + } + + SubstringLocation buffer = { baseString, start, length }; + if (baseString->is8Bit()) + return addToStringTable<SubstringLocation, SubstringTranslator8>(buffer); + return addToStringTable<SubstringLocation, SubstringTranslator16>(buffer); +} + +typedef HashTranslatorCharBuffer<LChar> LCharBuffer; +struct LCharBufferTranslator { + static unsigned hash(const LCharBuffer& buf) + { + return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); + } + + static bool equal(StringImpl* const& str, const LCharBuffer& buf) + { + return WTF::equal(str, buf.s, buf.length); + } + + static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash) + { + location = &StringImpl::create(buf.s, buf.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +typedef HashTranslatorCharBuffer<char> CharBuffer; +struct CharBufferFromLiteralDataTranslator { + static unsigned hash(const CharBuffer& buf) + { + return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length); + } + + static bool equal(StringImpl* const& str, const CharBuffer& buf) + { + return WTF::equal(str, buf.s, buf.length); + } + + static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash) + { + location = &StringImpl::createFromLiteral(buf.s, buf.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +RefPtr<AtomicStringImpl> AtomicStringImpl::add(const LChar* s, unsigned length) +{ + if (!s) + return nullptr; + + if (!length) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + LCharBuffer buffer = { s, length }; + return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer); +} + +Ref<AtomicStringImpl> AtomicStringImpl::addLiteral(const char* characters, unsigned length) +{ + ASSERT(characters); + ASSERT(length); + + CharBuffer buffer = { characters, length }; + return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer); +} + +static inline Ref<AtomicStringImpl> addSubstring(AtomicStringTableLocker& locker, StringTableImpl& atomicStringTable, StringImpl& base) +{ + ASSERT(base.length()); + ASSERT(base.isSymbol() || base.isStatic()); + + SubstringLocation buffer = { &base, 0, base.length() }; + if (base.is8Bit()) + return addToStringTable<SubstringLocation, SubstringTranslator8>(locker, atomicStringTable, buffer); + return addToStringTable<SubstringLocation, SubstringTranslator16>(locker, atomicStringTable, buffer); +} + +static inline Ref<AtomicStringImpl> addSubstring(StringImpl& base) +{ + AtomicStringTableLocker locker; + return addSubstring(locker, stringTable(), base); +} + +Ref<AtomicStringImpl> AtomicStringImpl::addSlowCase(StringImpl& string) +{ + if (!string.length()) + return *static_cast<AtomicStringImpl*>(StringImpl::empty()); + + if (string.isSymbol() || string.isStatic()) + return addSubstring(string); + + ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicStringImpl should not hit the slow case if the string is already atomic."); + + AtomicStringTableLocker locker; + auto addResult = stringTable().add(&string); + + if (addResult.isNewEntry) { + ASSERT(*addResult.iterator == &string); + string.setIsAtomic(true); + } + + return *static_cast<AtomicStringImpl*>(*addResult.iterator); +} + +Ref<AtomicStringImpl> AtomicStringImpl::addSlowCase(AtomicStringTable& stringTable, StringImpl& string) +{ + if (!string.length()) + return *static_cast<AtomicStringImpl*>(StringImpl::empty()); + + if (string.isSymbol() || string.isStatic()) { + AtomicStringTableLocker locker; + return addSubstring(locker, stringTable.table(), string); + } + + ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicStringImpl should not hit the slow case if the string is already atomic."); + + AtomicStringTableLocker locker; + auto addResult = stringTable.table().add(&string); + + if (addResult.isNewEntry) { + ASSERT(*addResult.iterator == &string); + string.setIsAtomic(true); + } + + return *static_cast<AtomicStringImpl*>(*addResult.iterator); +} + +void AtomicStringImpl::remove(AtomicStringImpl* string) +{ + ASSERT(string->isAtomic()); + AtomicStringTableLocker locker; + auto& atomicStringTable = stringTable(); + auto iterator = atomicStringTable.find(string); + ASSERT_WITH_MESSAGE(iterator != atomicStringTable.end(), "The string being removed is atomic in the string table of an other thread!"); + ASSERT(string == *iterator); + atomicStringTable.remove(iterator); +} + +RefPtr<AtomicStringImpl> AtomicStringImpl::lookUpSlowCase(StringImpl& string) +{ + ASSERT_WITH_MESSAGE(!string.isAtomic(), "AtomicStringImpls should return from the fast case."); + + if (!string.length()) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + AtomicStringTableLocker locker; + auto& atomicStringTable = stringTable(); + auto iterator = atomicStringTable.find(&string); + if (iterator != atomicStringTable.end()) + return static_cast<AtomicStringImpl*>(*iterator); + return nullptr; +} + +RefPtr<AtomicStringImpl> AtomicStringImpl::addUTF8(const char* charactersStart, const char* charactersEnd) +{ + HashAndUTF8Characters buffer; + buffer.characters = charactersStart; + buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); + + if (!buffer.hash) + return nullptr; + + return addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); +} + +RefPtr<AtomicStringImpl> AtomicStringImpl::lookUpInternal(const LChar* characters, unsigned length) +{ + AtomicStringTableLocker locker; + auto& table = stringTable(); + + LCharBuffer buffer = { characters, length }; + auto iterator = table.find<LCharBufferTranslator>(buffer); + if (iterator != table.end()) + return static_cast<AtomicStringImpl*>(*iterator); + return nullptr; +} + +RefPtr<AtomicStringImpl> AtomicStringImpl::lookUpInternal(const UChar* characters, unsigned length) +{ + AtomicStringTableLocker locker; + auto& table = stringTable(); + + UCharBuffer buffer = { characters, length }; + auto iterator = table.find<UCharBufferTranslator>(buffer); + if (iterator != table.end()) + return static_cast<AtomicStringImpl*>(*iterator); + return nullptr; +} + +#if !ASSERT_DISABLED +bool AtomicStringImpl::isInAtomicStringTable(StringImpl* string) +{ + AtomicStringTableLocker locker; + return stringTable().contains(string); +} +#endif + +} // namespace WTF diff --git a/Source/WTF/wtf/text/AtomicStringImpl.h b/Source/WTF/wtf/text/AtomicStringImpl.h index 45114aca5..1cde4b0ed 100644 --- a/Source/WTF/wtf/text/AtomicStringImpl.h +++ b/Source/WTF/wtf/text/AtomicStringImpl.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 Apple Computer, Inc. + * Copyright (C) 2006 Apple Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -21,18 +21,97 @@ #ifndef AtomicStringImpl_h #define AtomicStringImpl_h -#include <wtf/text/StringImpl.h> +#include <wtf/text/UniquedStringImpl.h> namespace WTF { -class AtomicStringImpl : public StringImpl -{ +class AtomicStringTable; + +class AtomicStringImpl : public UniquedStringImpl { public: - AtomicStringImpl() : StringImpl(0) {} + static RefPtr<AtomicStringImpl> lookUp(LChar* characters, unsigned length) + { + return lookUpInternal(characters, length); + } + static RefPtr<AtomicStringImpl> lookUp(UChar* characters, unsigned length) + { + return lookUpInternal(characters, length); + } + static RefPtr<AtomicStringImpl> lookUp(StringImpl* string) + { + if (!string || string->isAtomic()) + return static_cast<AtomicStringImpl*>(string); + return lookUpSlowCase(*string); + } + + static void remove(AtomicStringImpl*); + + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(const LChar*); + ALWAYS_INLINE static RefPtr<AtomicStringImpl> add(const char* s) { return add(reinterpret_cast<const LChar*>(s)); }; + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(const LChar*, unsigned length); + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(const UChar*, unsigned length); + ALWAYS_INLINE static RefPtr<AtomicStringImpl> add(const char* s, unsigned length) { return add(reinterpret_cast<const LChar*>(s), length); }; + WTF_EXPORT_STRING_API static Ref<AtomicStringImpl> add(const UChar*, unsigned length, unsigned existingHash); + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(const UChar*); + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(StringImpl*, unsigned offset, unsigned length); + ALWAYS_INLINE static RefPtr<AtomicStringImpl> add(StringImpl* string) + { + if (!string) + return static_cast<AtomicStringImpl*>(string); + return add(*string); + } + WTF_EXPORT_STRING_API static Ref<AtomicStringImpl> addLiteral(const char* characters, unsigned length); + + // Returns null if the input data contains an invalid UTF-8 sequence. + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> addUTF8(const char* start, const char* end); +#if USE(CF) + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> add(CFStringRef); +#endif + + template<typename StringTableProvider> + ALWAYS_INLINE static RefPtr<AtomicStringImpl> addWithStringTableProvider(StringTableProvider& stringTableProvider, StringImpl* string) + { + if (!string) + return nullptr; + return add(*stringTableProvider.atomicStringTable(), *string); + } + +#if !ASSERT_DISABLED + WTF_EXPORT_STRING_API static bool isInAtomicStringTable(StringImpl*); +#endif + +private: + AtomicStringImpl() = delete; + + ALWAYS_INLINE static Ref<AtomicStringImpl> add(StringImpl& string) + { + if (string.isAtomic()) { + ASSERT_WITH_MESSAGE(!string.length() || isInAtomicStringTable(&string), "The atomic string comes from an other thread!"); + return static_cast<AtomicStringImpl&>(string); + } + return addSlowCase(string); + } + + ALWAYS_INLINE static Ref<AtomicStringImpl> add(AtomicStringTable& stringTable, StringImpl& string) + { + if (string.isAtomic()) { + ASSERT_WITH_MESSAGE(!string.length() || isInAtomicStringTable(&string), "The atomic string comes from an other thread!"); + return static_cast<AtomicStringImpl&>(string); + } + return addSlowCase(stringTable, string); + } + + WTF_EXPORT_STRING_API static Ref<AtomicStringImpl> addSlowCase(StringImpl&); + WTF_EXPORT_STRING_API static Ref<AtomicStringImpl> addSlowCase(AtomicStringTable&, StringImpl&); + + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> lookUpSlowCase(StringImpl&); + + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> lookUpInternal(const LChar*, unsigned length); + WTF_EXPORT_STRING_API static RefPtr<AtomicStringImpl> lookUpInternal(const UChar*, unsigned length); }; #if !ASSERT_DISABLED -// AtomicStringImpls created from StaticASCIILiteral will ASSERT +// AtomicStringImpls created from StaticStringImpl will ASSERT // in the generic ValueCheck<T>::checkConsistency // as they are not allocated by fastMalloc. // We don't currently have any way to detect that case diff --git a/Source/WTF/wtf/text/AtomicStringTable.cpp b/Source/WTF/wtf/text/AtomicStringTable.cpp index d961b17e2..fe8a4884d 100644 --- a/Source/WTF/wtf/text/AtomicStringTable.cpp +++ b/Source/WTF/wtf/text/AtomicStringTable.cpp @@ -37,25 +37,28 @@ void AtomicStringTable::create(WTFThreadData& data) bool currentThreadIsWebThread = isWebThread(); if (currentThreadIsWebThread || isUIThread()) - data.m_atomicStringTable = sharedStringTable; + data.m_defaultAtomicStringTable = sharedStringTable; else - data.m_atomicStringTable = new AtomicStringTable; + data.m_defaultAtomicStringTable = new AtomicStringTable; // We do the following so that its destruction happens only // once - on the main UI thread. if (!currentThreadIsWebThread) data.m_atomicStringTableDestructor = AtomicStringTable::destroy; #else - data.m_atomicStringTable = new AtomicStringTable; + data.m_defaultAtomicStringTable = new AtomicStringTable; data.m_atomicStringTableDestructor = AtomicStringTable::destroy; #endif // USE(WEB_THREAD) } +AtomicStringTable::~AtomicStringTable() +{ + for (auto* string : m_table) + string->setIsAtomic(false); +} + void AtomicStringTable::destroy(AtomicStringTable* table) { - HashSet<StringImpl*>::iterator end = table->m_table.end(); - for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) - (*iter)->setIsAtomic(false); delete table; } diff --git a/Source/WTF/wtf/text/AtomicStringTable.h b/Source/WTF/wtf/text/AtomicStringTable.h index 57826cb71..71d956d27 100644 --- a/Source/WTF/wtf/text/AtomicStringTable.h +++ b/Source/WTF/wtf/text/AtomicStringTable.h @@ -33,6 +33,7 @@ class StringImpl; class AtomicStringTable { WTF_MAKE_FAST_ALLOCATED; public: + WTF_EXPORT_PRIVATE ~AtomicStringTable(); static void create(WTFThreadData&); HashSet<StringImpl*>& table() { return m_table; } diff --git a/Source/WTF/wtf/text/Base64.cpp b/Source/WTF/wtf/text/Base64.cpp index 2323f3fa3..714a7ead4 100644 --- a/Source/WTF/wtf/text/Base64.cpp +++ b/Source/WTF/wtf/text/Base64.cpp @@ -1,7 +1,7 @@ /* Copyright (C) 2000-2001 Dawit Alemayehu <adawit@kde.org> Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org> - Copyright (C) 2007, 2008, 2013 Apple Inc. All rights reserved. + Copyright (C) 2007, 2008, 2013, 2016 Apple Inc. All rights reserved. Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> This program is free software; you can redistribute it and/or modify @@ -92,7 +92,7 @@ static const char base64URLDecMap[128] = { 0x31, 0x32, 0x33, nonAlphabet, nonAlphabet, nonAlphabet, nonAlphabet, nonAlphabet }; -static inline void base64EncodeInternal(const char* data, unsigned len, Vector<char>& out, Base64EncodePolicy policy, const char (&encodeMap)[64]) +static inline void base64EncodeInternal(const unsigned char* data, unsigned len, Vector<char>& out, Base64EncodePolicy policy, const char (&encodeMap)[64]) { out.clear(); if (!len) @@ -160,29 +160,29 @@ static inline void base64EncodeInternal(const char* data, unsigned len, Vector<c String base64Encode(const void* data, unsigned length, Base64EncodePolicy policy) { Vector<char> result; - base64EncodeInternal(static_cast<const char*>(data), length, result, policy, base64EncMap); + base64EncodeInternal(static_cast<const unsigned char*>(data), length, result, policy, base64EncMap); return String(result.data(), result.size()); } void base64Encode(const void* data, unsigned len, Vector<char>& out, Base64EncodePolicy policy) { - base64EncodeInternal(static_cast<const char*>(data), len, out, policy, base64EncMap); + base64EncodeInternal(static_cast<const unsigned char*>(data), len, out, policy, base64EncMap); } String base64URLEncode(const void* data, unsigned length) { Vector<char> result; - base64EncodeInternal(static_cast<const char*>(data), length, result, Base64URLPolicy, base64URLEncMap); + base64EncodeInternal(static_cast<const unsigned char*>(data), length, result, Base64URLPolicy, base64URLEncMap); return String(result.data(), result.size()); } void base64URLEncode(const void* data, unsigned len, Vector<char>& out) { - base64EncodeInternal(static_cast<const char*>(data), len, out, Base64URLPolicy, base64URLEncMap); + base64EncodeInternal(static_cast<const unsigned char*>(data), len, out, Base64URLPolicy, base64URLEncMap); } template<typename T> -static inline bool base64DecodeInternal(const T* data, unsigned length, Vector<char>& out, Base64DecodePolicy policy, const char (&decodeMap)[128]) +static inline bool base64DecodeInternal(const T* data, unsigned length, SignedOrUnsignedCharVectorAdapter& out, unsigned options, const char (&decodeMap)[128]) { out.clear(); if (!length) @@ -192,29 +192,47 @@ static inline bool base64DecodeInternal(const T* data, unsigned length, Vector<c unsigned equalsSignCount = 0; unsigned outLength = 0; + bool hadError = false; for (unsigned idx = 0; idx < length; ++idx) { unsigned ch = data[idx]; if (ch == '=') { ++equalsSignCount; - // There should be no padding if length is a multiple of 4, and there - // should never be more than 2 padding characters. - if (policy == Base64FailOnInvalidCharacterOrExcessPadding && (length % 4 || equalsSignCount > 2)) - return false; + // There should never be more than 2 padding characters. + if (options & Base64ValidatePadding && equalsSignCount > 2) { + hadError = true; + break; + } } else { char decodedCharacter = ch < WTF_ARRAY_LENGTH(decodeMap) ? decodeMap[ch] : nonAlphabet; if (decodedCharacter != nonAlphabet) { - if (equalsSignCount) - return false; - out[outLength] = decodedCharacter; - ++outLength; - } else if (policy == Base64FailOnInvalidCharacterOrExcessPadding || policy == Base64FailOnInvalidCharacter || (policy == Base64IgnoreWhitespace && !isSpaceOrNewline(ch))) - return false; + if (equalsSignCount) { + hadError = true; + break; + } + out[outLength++] = decodedCharacter; + } else if (!(options & Base64IgnoreSpacesAndNewLines) || !isSpaceOrNewline(ch)) { + hadError = true; + break; + } } } + // Make sure we shrink back the Vector before returning. outLength may be shorter than expected + // in case of error or in case of ignored spaces. + if (outLength < out.size()) + out.shrink(outLength); + + if (hadError) + return false; + if (!outLength) return !equalsSignCount; + // The should be no padding if length is a multiple of 4. + // We use (outLength + equalsSignCount) instead of length because we don't want to account for ignored characters (i.e. spaces). + if (options & Base64ValidatePadding && equalsSignCount && (outLength + equalsSignCount) % 4) + return false; + // Valid data is (n * 4 + [0,2,3]) characters long. if ((outLength % 4) == 1) return false; @@ -248,12 +266,15 @@ static inline bool base64DecodeInternal(const T* data, unsigned length, Vector<c return true; } -bool base64Decode(const String& in, SignedOrUnsignedCharVectorAdapter out, Base64DecodePolicy policy) +bool base64Decode(const String& in, SignedOrUnsignedCharVectorAdapter out, unsigned options) { - return base64DecodeInternal<UChar>(in.deprecatedCharacters(), in.length(), out, policy, base64DecMap); + unsigned length = in.length(); + if (!length || in.is8Bit()) + return base64DecodeInternal(in.characters8(), length, out, options, base64DecMap); + return base64DecodeInternal(in.characters16(), length, out, options, base64DecMap); } -bool base64Decode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter out, Base64DecodePolicy policy) +bool base64Decode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter out, unsigned options) { out.clear(); @@ -261,17 +282,20 @@ bool base64Decode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter out, if (in.size() > UINT_MAX) return false; - return base64DecodeInternal<char>(in.data(), in.size(), out, policy, base64DecMap); + return base64DecodeInternal(reinterpret_cast<const LChar*>(in.data()), in.size(), out, options, base64DecMap); } -bool base64Decode(const char* data, unsigned len, SignedOrUnsignedCharVectorAdapter out, Base64DecodePolicy policy) +bool base64Decode(const char* data, unsigned len, SignedOrUnsignedCharVectorAdapter out, unsigned options) { - return base64DecodeInternal<char>(data, len, out, policy, base64DecMap); + return base64DecodeInternal(reinterpret_cast<const LChar*>(data), len, out, options, base64DecMap); } bool base64URLDecode(const String& in, SignedOrUnsignedCharVectorAdapter out) { - return base64DecodeInternal<UChar>(in.deprecatedCharacters(), in.length(), out, Base64FailOnInvalidCharacter, base64URLDecMap); + unsigned length = in.length(); + if (!length || in.is8Bit()) + return base64DecodeInternal(in.characters8(), length, out, Base64Default, base64URLDecMap); + return base64DecodeInternal(in.characters16(), length, out, Base64Default, base64URLDecMap); } bool base64URLDecode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter out) @@ -282,12 +306,12 @@ bool base64URLDecode(const Vector<char>& in, SignedOrUnsignedCharVectorAdapter o if (in.size() > UINT_MAX) return false; - return base64DecodeInternal<char>(in.data(), in.size(), out, Base64FailOnInvalidCharacter, base64URLDecMap); + return base64DecodeInternal(reinterpret_cast<const LChar*>(in.data()), in.size(), out, Base64Default, base64URLDecMap); } bool base64URLDecode(const char* data, unsigned len, SignedOrUnsignedCharVectorAdapter out) { - return base64DecodeInternal<char>(data, len, out, Base64FailOnInvalidCharacter, base64URLDecMap); + return base64DecodeInternal(reinterpret_cast<const LChar*>(data), len, out, Base64Default, base64URLDecMap); } } // namespace WTF diff --git a/Source/WTF/wtf/text/Base64.h b/Source/WTF/wtf/text/Base64.h index 1dfcf2698..820557558 100644 --- a/Source/WTF/wtf/text/Base64.h +++ b/Source/WTF/wtf/text/Base64.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org> * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> - * Copyright (C) 2013 Apple Inc. All rights reserved. + * Copyright (C) 2013, 2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -12,10 +12,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -40,22 +40,70 @@ enum Base64EncodePolicy { Base64URLPolicy // No padding, no LFs. }; -enum Base64DecodePolicy { - Base64FailOnInvalidCharacterOrExcessPadding, - Base64FailOnInvalidCharacter, - Base64IgnoreWhitespace, - Base64IgnoreInvalidCharacters +enum Base64DecodeOptions { + Base64Default = 0, + Base64ValidatePadding = 1 << 0, + Base64IgnoreSpacesAndNewLines = 1 << 1, }; class SignedOrUnsignedCharVectorAdapter { public: - SignedOrUnsignedCharVectorAdapter(Vector<char>& vector) { m_vector.c = &vector; } - SignedOrUnsignedCharVectorAdapter(Vector<uint8_t>& vector) { m_vector.u = &vector; } - - operator Vector<char>&() { return *m_vector.c; } - void clear() { m_vector.c->clear(); } + SignedOrUnsignedCharVectorAdapter(Vector<char>& vector) + : m_isSigned(true) + { + m_vector.c = &vector; + } + SignedOrUnsignedCharVectorAdapter(Vector<uint8_t>& vector) + : m_isSigned(false) + { + m_vector.u = &vector; + } + + uint8_t* data() + { + if (m_isSigned) + return reinterpret_cast<uint8_t*>(m_vector.c->data()); + return m_vector.u->data(); + } + + size_t size() const + { + if (m_isSigned) + return m_vector.c->size(); + return m_vector.u->size(); + } + + void clear() + { + if (m_isSigned) { + m_vector.c->clear(); + return; + } + m_vector.u->clear(); + } + + void grow(size_t size) + { + if (m_isSigned) { + m_vector.c->grow(size); + return; + } + m_vector.u->grow(size); + } + + void shrink(size_t size) + { + if (m_isSigned) { + m_vector.c->shrink(size); + return; + } + m_vector.u->shrink(size); + } + + uint8_t& operator[](size_t position) { return data()[position]; } private: + bool m_isSigned; union { Vector<char>* c; Vector<uint8_t>* u; @@ -64,14 +112,32 @@ private: class ConstSignedOrUnsignedCharVectorAdapter { public: - ConstSignedOrUnsignedCharVectorAdapter(const Vector<char>& vector) { m_vector.c = &vector; } - ConstSignedOrUnsignedCharVectorAdapter(const Vector<uint8_t>& vector) { m_vector.u = &vector; } - - operator const Vector<char>&() { return *m_vector.c; } - const char* data() const { return m_vector.c->data(); } - size_t size() const { return m_vector.c->size(); } + ConstSignedOrUnsignedCharVectorAdapter(const Vector<char>& vector) + : m_isSigned(false) + { + m_vector.c = &vector; + } + ConstSignedOrUnsignedCharVectorAdapter(const Vector<uint8_t>& vector) + : m_isSigned(true) + { + m_vector.u = &vector; + } + + const uint8_t* data() const + { + if (m_isSigned) + return reinterpret_cast<const uint8_t*>(m_vector.c->data()); + return m_vector.u->data(); + } + size_t size() const + { + if (m_isSigned) + return m_vector.c->size(); + return m_vector.u->size(); + } private: + bool m_isSigned; union { const Vector<char>* c; const Vector<uint8_t>* u; @@ -79,15 +145,15 @@ private: }; WTF_EXPORT_PRIVATE void base64Encode(const void*, unsigned, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs); -WTF_EXPORT_PRIVATE void base64Encode(ConstSignedOrUnsignedCharVectorAdapter, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs); -WTF_EXPORT_PRIVATE void base64Encode(const CString&, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs); +void base64Encode(ConstSignedOrUnsignedCharVectorAdapter, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs); +void base64Encode(const CString&, Vector<char>&, Base64EncodePolicy = Base64DoNotInsertLFs); WTF_EXPORT_PRIVATE String base64Encode(const void*, unsigned, Base64EncodePolicy = Base64DoNotInsertLFs); -WTF_EXPORT_PRIVATE String base64Encode(ConstSignedOrUnsignedCharVectorAdapter, Base64EncodePolicy = Base64DoNotInsertLFs); -WTF_EXPORT_PRIVATE String base64Encode(const CString&, Base64EncodePolicy = Base64DoNotInsertLFs); +String base64Encode(ConstSignedOrUnsignedCharVectorAdapter, Base64EncodePolicy = Base64DoNotInsertLFs); +String base64Encode(const CString&, Base64EncodePolicy = Base64DoNotInsertLFs); -WTF_EXPORT_PRIVATE bool base64Decode(const String&, SignedOrUnsignedCharVectorAdapter, Base64DecodePolicy = Base64FailOnInvalidCharacter); -WTF_EXPORT_PRIVATE bool base64Decode(const Vector<char>&, SignedOrUnsignedCharVectorAdapter, Base64DecodePolicy = Base64FailOnInvalidCharacter); -WTF_EXPORT_PRIVATE bool base64Decode(const char*, unsigned, SignedOrUnsignedCharVectorAdapter, Base64DecodePolicy = Base64FailOnInvalidCharacter); +WTF_EXPORT_PRIVATE bool base64Decode(const String&, SignedOrUnsignedCharVectorAdapter, unsigned options = Base64Default); +WTF_EXPORT_PRIVATE bool base64Decode(const Vector<char>&, SignedOrUnsignedCharVectorAdapter, unsigned options = Base64Default); +WTF_EXPORT_PRIVATE bool base64Decode(const char*, unsigned, SignedOrUnsignedCharVectorAdapter, unsigned options = Base64Default); inline void base64Encode(ConstSignedOrUnsignedCharVectorAdapter in, Vector<char>& out, Base64EncodePolicy policy) { @@ -115,11 +181,12 @@ inline String base64Encode(const CString& in, Base64EncodePolicy policy) // ====================================================================================== WTF_EXPORT_PRIVATE void base64URLEncode(const void*, unsigned, Vector<char>&); -WTF_EXPORT_PRIVATE void base64URLEncode(ConstSignedOrUnsignedCharVectorAdapter, Vector<char>&); -WTF_EXPORT_PRIVATE void base64URLEncode(const CString&, Vector<char>&); +void base64URLEncode(ConstSignedOrUnsignedCharVectorAdapter, Vector<char>&); +void base64URLEncode(const CString&, Vector<char>&); + WTF_EXPORT_PRIVATE String base64URLEncode(const void*, unsigned); -WTF_EXPORT_PRIVATE String base64URLEncode(ConstSignedOrUnsignedCharVectorAdapter); -WTF_EXPORT_PRIVATE String base64URLEncode(const CString&); +String base64URLEncode(ConstSignedOrUnsignedCharVectorAdapter); +String base64URLEncode(const CString&); WTF_EXPORT_PRIVATE bool base64URLDecode(const String&, SignedOrUnsignedCharVectorAdapter); WTF_EXPORT_PRIVATE bool base64URLDecode(const Vector<char>&, SignedOrUnsignedCharVectorAdapter); @@ -150,11 +217,8 @@ inline String base64URLEncode(const CString& in) using WTF::Base64EncodePolicy; using WTF::Base64DoNotInsertLFs; using WTF::Base64InsertLFs; -using WTF::Base64DecodePolicy; -using WTF::Base64FailOnInvalidCharacterOrExcessPadding; -using WTF::Base64FailOnInvalidCharacter; -using WTF::Base64IgnoreWhitespace; -using WTF::Base64IgnoreInvalidCharacters; +using WTF::Base64ValidatePadding; +using WTF::Base64IgnoreSpacesAndNewLines; using WTF::base64Encode; using WTF::base64Decode; using WTF::base64URLDecode; diff --git a/Source/WTF/wtf/text/CString.cpp b/Source/WTF/wtf/text/CString.cpp index e44a96e80..21b37eba8 100644 --- a/Source/WTF/wtf/text/CString.cpp +++ b/Source/WTF/wtf/text/CString.cpp @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -28,18 +28,18 @@ #include "CString.h" #include <string.h> -#include <wtf/StringHasher.h> +#include <wtf/Hasher.h> namespace WTF { -PassRefPtr<CStringBuffer> CStringBuffer::createUninitialized(size_t length) +Ref<CStringBuffer> CStringBuffer::createUninitialized(size_t length) { RELEASE_ASSERT(length < (std::numeric_limits<unsigned>::max() - sizeof(CStringBuffer))); // The +1 is for the terminating null character. size_t size = sizeof(CStringBuffer) + length + 1; CStringBuffer* stringBuffer = static_cast<CStringBuffer*>(fastMalloc(size)); - return adoptRef(new (NotNull, stringBuffer) CStringBuffer(length)); + return adoptRef(*new (NotNull, stringBuffer) CStringBuffer(length)); } CString::CString(const char* str) @@ -76,7 +76,7 @@ char* CString::mutableData() return 0; return m_buffer->mutableData(); } - + CString CString::newUninitialized(size_t length, char*& characterBuffer) { CString result; @@ -92,7 +92,7 @@ void CString::copyBufferIfNeeded() if (!m_buffer || m_buffer->hasOneRef()) return; - RefPtr<CStringBuffer> buffer = m_buffer.release(); + RefPtr<CStringBuffer> buffer = WTFMove(m_buffer); size_t length = buffer->length(); m_buffer = CStringBuffer::createUninitialized(length); memcpy(m_buffer->mutableData(), buffer->data(), length + 1); diff --git a/Source/WTF/wtf/text/CString.h b/Source/WTF/wtf/text/CString.h index 1941a2dbe..4d8d80399 100644 --- a/Source/WTF/wtf/text/CString.h +++ b/Source/WTF/wtf/text/CString.h @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -28,7 +28,7 @@ #include <wtf/HashFunctions.h> #include <wtf/HashTraits.h> -#include <wtf/PassRefPtr.h> +#include <wtf/Ref.h> #include <wtf/RefCounted.h> namespace WTF { @@ -43,7 +43,7 @@ public: private: friend class CString; - static PassRefPtr<CStringBuffer> createUninitialized(size_t length); + static Ref<CStringBuffer> createUninitialized(size_t length); CStringBuffer(size_t length) : m_length(length) { } char* mutableData() { return reinterpret_cast_ptr<char*>(this + 1); } diff --git a/Source/WTF/wtf/text/IntegerToStringConversion.h b/Source/WTF/wtf/text/IntegerToStringConversion.h index 649fb05ef..563614d4f 100644 --- a/Source/WTF/wtf/text/IntegerToStringConversion.h +++ b/Source/WTF/wtf/text/IntegerToStringConversion.h @@ -22,8 +22,6 @@ #ifndef IntegerToStringConversion_h #define IntegerToStringConversion_h -#include "StringBuilder.h" - namespace WTF { enum PositiveOrNegativeNumber { @@ -33,22 +31,6 @@ enum PositiveOrNegativeNumber { template<typename T> struct IntegerToStringConversionTrait; -template<> struct IntegerToStringConversionTrait<AtomicString> { - typedef AtomicString ReturnType; - typedef void AdditionalArgumentType; - static ReturnType flush(LChar* characters, unsigned length, void*) { return AtomicString(characters, length); } -}; -template<> struct IntegerToStringConversionTrait<String> { - typedef String ReturnType; - typedef void AdditionalArgumentType; - static ReturnType flush(LChar* characters, unsigned length, void*) { return String(characters, length); } -}; -template<> struct IntegerToStringConversionTrait<StringBuilder> { - typedef void ReturnType; - typedef StringBuilder AdditionalArgumentType; - static ReturnType flush(LChar* characters, unsigned length, StringBuilder* stringBuilder) { stringBuilder->append(characters, length); } -}; - template<typename T, typename UnsignedIntegerType, PositiveOrNegativeNumber NumberType, typename AdditionalArgumentType> static typename IntegerToStringConversionTrait<T>::ReturnType numberToStringImpl(UnsignedIntegerType number, AdditionalArgumentType additionalArgument) { @@ -81,6 +63,72 @@ inline typename IntegerToStringConversionTrait<T>::ReturnType numberToStringUnsi return numberToStringImpl<T, UnsignedIntegerType, PositiveNumber>(number, additionalArgument); } + +template<typename CharacterType, typename UnsignedIntegerType, PositiveOrNegativeNumber NumberType> +static void writeNumberToBufferImpl(UnsignedIntegerType number, CharacterType* destination) +{ + LChar buf[sizeof(UnsignedIntegerType) * 3 + 1]; + LChar* end = buf + WTF_ARRAY_LENGTH(buf); + LChar* p = end; + + do { + *--p = static_cast<LChar>((number % 10) + '0'); + number /= 10; + } while (number); + + if (NumberType == NegativeNumber) + *--p = '-'; + + while (p < end) + *destination++ = static_cast<CharacterType>(*p++); +} + +template<typename CharacterType, typename SignedIntegerType> +inline void writeNumberToBufferSigned(SignedIntegerType number, CharacterType* destination) +{ + if (number < 0) + return writeNumberToBufferImpl<CharacterType, typename std::make_unsigned<SignedIntegerType>::type, NegativeNumber>(-number, destination); + return writeNumberToBufferImpl<CharacterType, typename std::make_unsigned<SignedIntegerType>::type, PositiveNumber>(number, destination); +} + +template<typename CharacterType, typename UnsignedIntegerType> +inline void writeNumberToBufferUnsigned(UnsignedIntegerType number, CharacterType* destination) +{ + return writeNumberToBufferImpl<CharacterType, UnsignedIntegerType, PositiveNumber>(number, destination); +} + + +template<typename UnsignedIntegerType, PositiveOrNegativeNumber NumberType> +static unsigned lengthOfNumberAsStringImpl(UnsignedIntegerType number) +{ + unsigned length = 0; + + do { + ++length; + number /= 10; + } while (number); + + if (NumberType == NegativeNumber) + ++length; + + return length; +} + +template<typename SignedIntegerType> +inline unsigned lengthOfNumberAsStringSigned(SignedIntegerType number) +{ + if (number < 0) + return lengthOfNumberAsStringImpl<typename std::make_unsigned<SignedIntegerType>::type, NegativeNumber>(-number); + return lengthOfNumberAsStringImpl<typename std::make_unsigned<SignedIntegerType>::type, PositiveNumber>(number); +} + +template<typename UnsignedIntegerType> +inline unsigned lengthOfNumberAsStringUnsigned(UnsignedIntegerType number) +{ + return lengthOfNumberAsStringImpl<UnsignedIntegerType, PositiveNumber>(number); +} + + } // namespace WTF #endif // IntegerToStringConversion_h diff --git a/Source/WTF/wtf/text/LChar.h b/Source/WTF/wtf/text/LChar.h index b7bb89794..4d31dafb9 100644 --- a/Source/WTF/wtf/text/LChar.h +++ b/Source/WTF/wtf/text/LChar.h @@ -10,17 +10,17 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ @@ -29,7 +29,7 @@ // A type to hold a single Latin-1 character. // This type complements the UChar type that we get from the ICU library. -// To parallel that type, we put it outside any namespace. +// To parallel that type, we put this one in the global namespace. typedef unsigned char LChar; #endif diff --git a/Source/WTF/wtf/text/LineBreakIteratorPoolICU.h b/Source/WTF/wtf/text/LineBreakIteratorPoolICU.h new file mode 100644 index 000000000..0cbae4030 --- /dev/null +++ b/Source/WTF/wtf/text/LineBreakIteratorPoolICU.h @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2011 Apple Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "TextBreakIterator.h" +#include <unicode/uloc.h> +#include <wtf/HashMap.h> +#include <wtf/NeverDestroyed.h> +#include <wtf/ThreadSpecific.h> +#include <wtf/text/AtomicString.h> + +namespace WTF { + +class LineBreakIteratorPool { + WTF_MAKE_NONCOPYABLE(LineBreakIteratorPool); +public: + LineBreakIteratorPool() = default; + + static LineBreakIteratorPool& sharedPool() + { + static NeverDestroyed<WTF::ThreadSpecific<LineBreakIteratorPool>> pool; + return *pool.get(); + } + + static AtomicString makeLocaleWithBreakKeyword(const AtomicString& locale, LineBreakIteratorMode mode) + { + // The uloc functions model locales as char*, so we have to downconvert our AtomicString. + auto utf8Locale = locale.string().utf8(); + if (!utf8Locale.length()) + return locale; + Vector<char> scratchBuffer(utf8Locale.length() + 11, 0); + memcpy(scratchBuffer.data(), utf8Locale.data(), utf8Locale.length()); + + const char* keywordValue = nullptr; + switch (mode) { + case LineBreakIteratorMode::Default: + // nullptr will cause any existing values to be removed. + break; + case LineBreakIteratorMode::Loose: + keywordValue = "loose"; + break; + case LineBreakIteratorMode::Normal: + keywordValue = "normal"; + break; + case LineBreakIteratorMode::Strict: + keywordValue = "strict"; + break; + } + + UErrorCode status = U_ZERO_ERROR; + int32_t lengthNeeded = uloc_setKeywordValue("lb", keywordValue, scratchBuffer.data(), scratchBuffer.size(), &status); + if (U_SUCCESS(status)) + return AtomicString::fromUTF8(scratchBuffer.data(), lengthNeeded); + if (status == U_BUFFER_OVERFLOW_ERROR) { + scratchBuffer.grow(lengthNeeded + 1); + memset(scratchBuffer.data() + utf8Locale.length(), 0, scratchBuffer.size() - utf8Locale.length()); + status = U_ZERO_ERROR; + int32_t lengthNeeded2 = uloc_setKeywordValue("lb", keywordValue, scratchBuffer.data(), scratchBuffer.size(), &status); + if (!U_SUCCESS(status) || lengthNeeded != lengthNeeded2) + return locale; + return AtomicString::fromUTF8(scratchBuffer.data(), lengthNeeded); + } + return locale; + } + + UBreakIterator* take(const AtomicString& locale, LineBreakIteratorMode mode) + { + auto localeWithOptionalBreakKeyword = makeLocaleWithBreakKeyword(locale, mode); + + UBreakIterator* iterator = nullptr; + for (size_t i = 0; i < m_pool.size(); ++i) { + if (m_pool[i].first == localeWithOptionalBreakKeyword) { + iterator = m_pool[i].second; + m_pool.remove(i); + break; + } + } + + if (!iterator) { + iterator = openLineBreakIterator(localeWithOptionalBreakKeyword); + if (!iterator) + return nullptr; + } + + ASSERT(!m_vendedIterators.contains(iterator)); + m_vendedIterators.add(iterator, localeWithOptionalBreakKeyword); + return iterator; + } + + void put(UBreakIterator* iterator) + { + ASSERT(m_vendedIterators.contains(iterator)); + if (m_pool.size() == capacity) { + closeLineBreakIterator(m_pool[0].second); + m_pool.remove(0); + } + m_pool.uncheckedAppend({ m_vendedIterators.take(iterator), iterator }); + } + +private: + static constexpr size_t capacity = 4; + + Vector<std::pair<AtomicString, UBreakIterator*>, capacity> m_pool; + HashMap<UBreakIterator*, AtomicString> m_vendedIterators; + + friend WTF::ThreadSpecific<LineBreakIteratorPool>::operator LineBreakIteratorPool*(); +}; + +} diff --git a/Source/WTF/wtf/text/OrdinalNumber.h b/Source/WTF/wtf/text/OrdinalNumber.h new file mode 100644 index 000000000..bb5d62d66 --- /dev/null +++ b/Source/WTF/wtf/text/OrdinalNumber.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2010, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +namespace WTF { + +// An abstract number of element in a sequence. The sequence has a first element. +// This type should be used instead of integer because 2 contradicting traditions can +// call a first element '0' or '1' which makes integer type ambiguous. +class OrdinalNumber { +public: + static OrdinalNumber beforeFirst() { return OrdinalNumber(-1); } + static OrdinalNumber fromZeroBasedInt(int zeroBasedInt) { return OrdinalNumber(zeroBasedInt); } + static OrdinalNumber fromOneBasedInt(int oneBasedInt) { return OrdinalNumber(oneBasedInt - 1); } + + OrdinalNumber() : m_zeroBasedValue(0) { } + + int zeroBasedInt() const { return m_zeroBasedValue; } + int oneBasedInt() const { return m_zeroBasedValue + 1; } + + bool operator==(OrdinalNumber other) { return m_zeroBasedValue == other.m_zeroBasedValue; } + bool operator!=(OrdinalNumber other) { return !((*this) == other); } + bool operator>(OrdinalNumber other) { return m_zeroBasedValue > other.m_zeroBasedValue; } + +private: + OrdinalNumber(int zeroBasedInt) : m_zeroBasedValue(zeroBasedInt) { } + int m_zeroBasedValue; +}; + +} + +using WTF::OrdinalNumber; diff --git a/Source/WTF/wtf/text/StringBuffer.h b/Source/WTF/wtf/text/StringBuffer.h index 22e161101..f293d333d 100644 --- a/Source/WTF/wtf/text/StringBuffer.h +++ b/Source/WTF/wtf/text/StringBuffer.h @@ -30,8 +30,8 @@ #define StringBuffer_h #include <wtf/Assertions.h> -#include <wtf/unicode/Unicode.h> #include <limits> +#include <unicode/utypes.h> namespace WTF { diff --git a/Source/WTF/wtf/text/StringBuilder.cpp b/Source/WTF/wtf/text/StringBuilder.cpp index c483ba146..436015a43 100644 --- a/Source/WTF/wtf/text/StringBuilder.cpp +++ b/Source/WTF/wtf/text/StringBuilder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2010, 2013, 2016 Apple Inc. All rights reserved. * Copyright (C) 2012 Google Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,6 +28,7 @@ #include "StringBuilder.h" #include "IntegerToStringConversion.h" +#include "MathExtras.h" #include "WTFString.h" #include <wtf/dtoa.h> @@ -58,12 +59,7 @@ void StringBuilder::reifyString() const if (m_length == m_buffer->length()) m_string = m_buffer.get(); else - m_string = StringImpl::create(m_buffer, 0, m_length); - - if (m_buffer->has16BitShadow() && m_valid16BitShadowLength < m_length) - m_buffer->upconvertCharacters(m_valid16BitShadowLength, m_length); - - m_valid16BitShadowLength = m_length; + m_string = StringImpl::createSubstringSharingImpl(*m_buffer, 0, m_length); } void StringBuilder::resize(unsigned newSize) @@ -84,6 +80,7 @@ void StringBuilder::resize(unsigned newSize) allocateBuffer(m_buffer->characters16(), m_buffer->length()); } m_length = newSize; + ASSERT(m_buffer->length() >= m_length); return; } @@ -92,7 +89,7 @@ void StringBuilder::resize(unsigned newSize) ASSERT(m_length == m_string.length()); ASSERT(newSize < m_string.length()); m_length = newSize; - m_string = StringImpl::create(m_string.impl(), 0, newSize); + m_string = StringImpl::createSubstringSharingImpl(*m_string.impl(), 0, newSize); } // Allocate a new 8 bit buffer, copying in currentCharacters (these may come from either m_string @@ -101,12 +98,13 @@ void StringBuilder::allocateBuffer(const LChar* currentCharacters, unsigned requ { ASSERT(m_is8Bit); // Copy the existing data into a new buffer, set result to point to the end of the existing data. - RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8); + auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8); memcpy(m_bufferCharacters8, currentCharacters, static_cast<size_t>(m_length) * sizeof(LChar)); // This can't overflow. // Update the builder state. - m_buffer = buffer.release(); + m_buffer = WTFMove(buffer); m_string = String(); + ASSERT(m_buffer->length() == requiredLength); } // Allocate a new 16 bit buffer, copying in currentCharacters (these may come from either m_string @@ -115,12 +113,13 @@ void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requ { ASSERT(!m_is8Bit); // Copy the existing data into a new buffer, set result to point to the end of the existing data. - RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); + auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); memcpy(m_bufferCharacters16, currentCharacters, static_cast<size_t>(m_length) * sizeof(UChar)); // This can't overflow. // Update the builder state. - m_buffer = buffer.release(); + m_buffer = WTFMove(buffer); m_string = String(); + ASSERT(m_buffer->length() == requiredLength); } // Allocate a new 16 bit buffer, copying in currentCharacters (which is 8 bit and may come @@ -128,16 +127,18 @@ void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requ void StringBuilder::allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength) { ASSERT(m_is8Bit); + ASSERT(requiredLength >= m_length); // Copy the existing data into a new buffer, set result to point to the end of the existing data. - RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); + auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); for (unsigned i = 0; i < m_length; ++i) m_bufferCharacters16[i] = currentCharacters[i]; m_is8Bit = false; // Update the builder state. - m_buffer = buffer.release(); + m_buffer = WTFMove(buffer); m_string = String(); + ASSERT(m_buffer->length() == requiredLength); } template <> @@ -151,9 +152,10 @@ void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength) ASSERT(m_buffer->is8Bit()); if (m_buffer->hasOneRef()) - m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters8); + m_buffer = StringImpl::reallocate(m_buffer.releaseNonNull(), requiredLength, m_bufferCharacters8); else allocateBuffer(m_buffer->characters8(), requiredLength); + ASSERT(m_buffer->length() == requiredLength); } template <> @@ -166,9 +168,10 @@ void StringBuilder::reallocateBuffer<UChar>(unsigned requiredLength) if (m_buffer->is8Bit()) allocateBufferUpConvert(m_buffer->characters8(), requiredLength); else if (m_buffer->hasOneRef()) - m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters16); + m_buffer = StringImpl::reallocate(m_buffer.releaseNonNull(), requiredLength, m_bufferCharacters16); else allocateBuffer(m_buffer->characters16(), requiredLength); + ASSERT(m_buffer->length() == requiredLength); } void StringBuilder::reserveCapacity(unsigned newCapacity) @@ -193,6 +196,7 @@ void StringBuilder::reserveCapacity(unsigned newCapacity) allocateBuffer(m_string.characters16(), newCapacity); } } + ASSERT(!newCapacity || m_buffer->length() >= newCapacity); } // Make 'length' additional capacity be available in m_buffer, update m_string & m_length, @@ -233,11 +237,12 @@ CharType* StringBuilder::appendUninitializedSlow(unsigned requiredLength) reallocateBuffer<CharType>(expandedCapacity(capacity(), requiredLength)); } else { ASSERT(m_string.length() == m_length); - allocateBuffer(m_length ? m_string.getCharacters<CharType>() : 0, expandedCapacity(capacity(), requiredLength)); + allocateBuffer(m_length ? m_string.characters<CharType>() : 0, expandedCapacity(capacity(), requiredLength)); } CharType* result = getBufferCharacters<CharType>() + m_length; m_length = requiredLength; + ASSERT(m_buffer->length() >= m_length); return result; } @@ -271,10 +276,11 @@ void StringBuilder::append(const UChar* characters, unsigned length) allocateBufferUpConvert(m_string.isNull() ? 0 : m_string.characters8(), expandedCapacity(capacity(), requiredLength)); } - memcpy(m_bufferCharacters16 + m_length, characters, static_cast<size_t>(length) * sizeof(UChar)); + memcpy(m_bufferCharacters16 + m_length, characters, static_cast<size_t>(length) * sizeof(UChar)); m_length = requiredLength; } else memcpy(appendUninitialized<UChar>(length), characters, static_cast<size_t>(length) * sizeof(UChar)); + ASSERT(m_buffer->length() >= m_length); } void StringBuilder::append(const LChar* characters, unsigned length) @@ -300,6 +306,20 @@ void StringBuilder::append(const LChar* characters, unsigned length) } } +#if USE(CF) + +void StringBuilder::append(CFStringRef string) +{ + // Fast path: avoid constructing a temporary String when possible. + if (auto* characters = CFStringGetCStringPtr(string, kCFStringEncodingISOLatin1)) { + append(reinterpret_cast<const LChar*>(characters), CFStringGetLength(string)); + return; + } + append(String(string)); +} + +#endif + void StringBuilder::appendNumber(int number) { numberToStringSigned<StringBuilder>(number, this); @@ -361,8 +381,103 @@ void StringBuilder::shrinkToFit() reallocateBuffer<LChar>(m_length); else reallocateBuffer<UChar>(m_length); - m_string = m_buffer.release(); + m_string = WTFMove(m_buffer); + } +} + +template <typename OutputCharacterType, typename InputCharacterType> +static void appendQuotedJSONStringInternalSlow(OutputCharacterType*& output, const InputCharacterType character) +{ + switch (character) { + case '\t': + *output++ = '\\'; + *output++ = 't'; + break; + case '\r': + *output++ = '\\'; + *output++ = 'r'; + break; + case '\n': + *output++ = '\\'; + *output++ = 'n'; + break; + case '\f': + *output++ = '\\'; + *output++ = 'f'; + break; + case '\b': + *output++ = '\\'; + *output++ = 'b'; + break; + default: + ASSERT(!(character & 0xFF00)); + *output++ = '\\'; + *output++ = 'u'; + *output++ = '0'; + *output++ = '0'; + *output++ = upperNibbleToLowercaseASCIIHexDigit(character); + *output++ = lowerNibbleToLowercaseASCIIHexDigit(character); + break; + } +} + +template <typename OutputCharacterType, typename InputCharacterType> +static void appendQuotedJSONStringInternal(OutputCharacterType*& output, const InputCharacterType* input, unsigned length) +{ + for (const InputCharacterType* end = input + length; input != end; ++input) { + const InputCharacterType character = *input; + if (LIKELY(character != '"' && character != '\\' && character > 0x1F)) { + *output++ = character; + continue; + } + + if (character == '"' || character == '\\') { + *output++ = '\\'; + *output++ = character; + continue; + } + + appendQuotedJSONStringInternalSlow(output, character); + } +} + +void StringBuilder::appendQuotedJSONString(const String& string) +{ + // Make sure we have enough buffer space to append this string without having + // to worry about reallocating in the middle. + // The 2 is for the '"' quotes on each end. + // The 6 is for characters that need to be \uNNNN encoded. + Checked<unsigned> stringLength = string.length(); + Checked<unsigned> maximumCapacityRequired = length(); + maximumCapacityRequired += 2 + stringLength * 6; + unsigned allocationSize = maximumCapacityRequired.unsafeGet(); + // This max() is here to allow us to allocate sizes between the range [2^31, 2^32 - 2] because roundUpToPowerOfTwo(1<<31 + some int smaller than 1<<31) == 0. + allocationSize = std::max(allocationSize, roundUpToPowerOfTwo(allocationSize)); + + if (is8Bit() && !string.is8Bit()) + allocateBufferUpConvert(m_bufferCharacters8, allocationSize); + else + reserveCapacity(allocationSize); + ASSERT(m_buffer->length() >= allocationSize); + + if (is8Bit()) { + ASSERT(string.is8Bit()); + LChar* output = m_bufferCharacters8 + m_length; + *output++ = '"'; + appendQuotedJSONStringInternal(output, string.characters8(), string.length()); + *output++ = '"'; + m_length = output - m_bufferCharacters8; + } else { + UChar* output = m_bufferCharacters16 + m_length; + *output++ = '"'; + if (string.is8Bit()) + appendQuotedJSONStringInternal(output, string.characters8(), string.length()); + else + appendQuotedJSONStringInternal(output, string.characters16(), string.length()); + *output++ = '"'; + m_length = output - m_bufferCharacters16; } + ASSERT(m_buffer->length() >= m_length); } } // namespace WTF diff --git a/Source/WTF/wtf/text/StringBuilder.h b/Source/WTF/wtf/text/StringBuilder.h index 26be90633..d02737a02 100644 --- a/Source/WTF/wtf/text/StringBuilder.h +++ b/Source/WTF/wtf/text/StringBuilder.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009, 2010, 2012, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2009-2010, 2012-2013, 2016 Apple Inc. All rights reserved. * Copyright (C) 2012 Google Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,6 +28,8 @@ #define StringBuilder_h #include <wtf/text/AtomicString.h> +#include <wtf/text/IntegerToStringConversion.h> +#include <wtf/text/StringView.h> #include <wtf/text/WTFString.h> namespace WTF { @@ -40,7 +42,6 @@ public: StringBuilder() : m_length(0) , m_is8Bit(true) - , m_valid16BitShadowLength(0) , m_bufferCharacters8(0) { } @@ -50,6 +51,11 @@ public: ALWAYS_INLINE void append(const char* characters, unsigned length) { append(reinterpret_cast<const LChar*>(characters), length); } + void append(const AtomicString& atomicString) + { + append(atomicString.string()); + } + void append(const String& string) { if (!string.length()) @@ -89,6 +95,21 @@ public: append(other.characters16(), other.m_length); } + void append(StringView stringView) + { + if (stringView.is8Bit()) + append(stringView.characters8(), stringView.length()); + else + append(stringView.characters16(), stringView.length()); + } + +#if USE(CF) + WTF_EXPORT_PRIVATE void append(CFStringRef); +#endif +#if USE(CF) && defined(__OBJC__) + void append(NSString *string) { append((__bridge CFStringRef)string); } +#endif + void append(const String& string, unsigned offset, unsigned length) { if (!string.length()) @@ -151,6 +172,8 @@ public: append(U16_TRAIL(c)); } + WTF_EXPORT_PRIVATE void appendQuotedJSONString(const String&); + template<unsigned charactersCount> ALWAYS_INLINE void appendLiteral(const char (&characters)[charactersCount]) { append(characters, charactersCount - 1); } @@ -248,32 +271,15 @@ public: return m_buffer->characters16(); } - const UChar* characters() const { return deprecatedCharacters(); } // FIXME: Delete this. - const UChar* deprecatedCharacters() const - { - if (!m_length) - return 0; - if (!m_string.isNull()) - return m_string.deprecatedCharacters(); - ASSERT(m_buffer); - if (m_buffer->has16BitShadow() && m_valid16BitShadowLength < m_length) - m_buffer->upconvertCharacters(m_valid16BitShadowLength, m_length); - - m_valid16BitShadowLength = m_length; - - return m_buffer->deprecatedCharacters(); - } - bool is8Bit() const { return m_is8Bit; } void clear() { m_length = 0; m_string = String(); - m_buffer = 0; + m_buffer = nullptr; m_bufferCharacters8 = 0; m_is8Bit = true; - m_valid16BitShadowLength = 0; } void swap(StringBuilder& stringBuilder) @@ -282,8 +288,8 @@ public: m_string.swap(stringBuilder.m_string); m_buffer.swap(stringBuilder.m_buffer); std::swap(m_is8Bit, stringBuilder.m_is8Bit); - std::swap(m_valid16BitShadowLength, stringBuilder.m_valid16BitShadowLength); std::swap(m_bufferCharacters8, stringBuilder.m_bufferCharacters8); + ASSERT(!m_buffer || m_buffer->length() >= m_length); } private: @@ -304,7 +310,6 @@ private: mutable String m_string; RefPtr<StringImpl> m_buffer; bool m_is8Bit; - mutable unsigned m_valid16BitShadowLength; union { LChar* m_bufferCharacters8; UChar* m_bufferCharacters16; @@ -364,6 +369,12 @@ inline bool operator!=(const StringBuilder& a, const String& b) { return !equal( inline bool operator==(const String& a, const StringBuilder& b) { return equal(b, a); } inline bool operator!=(const String& a, const StringBuilder& b) { return !equal(b, a); } +template<> struct IntegerToStringConversionTrait<StringBuilder> { + using ReturnType = void; + using AdditionalArgumentType = StringBuilder; + static void flush(LChar* characters, unsigned length, StringBuilder* stringBuilder) { stringBuilder->append(characters, length); } +}; + } // namespace WTF using WTF::StringBuilder; diff --git a/Source/WTF/wtf/text/StringCommon.h b/Source/WTF/wtf/text/StringCommon.h new file mode 100644 index 000000000..d35d8905d --- /dev/null +++ b/Source/WTF/wtf/text/StringCommon.h @@ -0,0 +1,656 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringCommon_h +#define StringCommon_h + +#include <unicode/uchar.h> +#include <wtf/ASCIICType.h> + +namespace WTF { + +template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, const CharacterTypeB*, unsigned length); +template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, unsigned lengthA, const CharacterTypeB*, unsigned lengthB); + +template<typename StringClassA, typename StringClassB> bool equalIgnoringASCIICaseCommon(const StringClassA&, const StringClassB&); + +template<typename CharacterType> bool equalLettersIgnoringASCIICase(const CharacterType*, const char* lowercaseLetters, unsigned length); +template<typename CharacterType, unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const CharacterType*, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]); + +template<typename StringClass, unsigned length> bool equalLettersIgnoringASCIICaseCommon(const StringClass&, const char (&lowercaseLetters)[length]); + +template<typename T> +inline T loadUnaligned(const char* s) +{ +#if COMPILER(CLANG) + T tmp; + memcpy(&tmp, s, sizeof(T)); + return tmp; +#else + // This may result in undefined behavior due to unaligned access. + return *reinterpret_cast<const T*>(s); +#endif +} + +// Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. +#if (CPU(X86_64) || CPU(ARM64)) && !ASAN_ENABLED +ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) +{ + unsigned dwordLength = length >> 3; + + const char* a = reinterpret_cast<const char*>(aLChar); + const char* b = reinterpret_cast<const char*>(bLChar); + + if (dwordLength) { + for (unsigned i = 0; i != dwordLength; ++i) { + if (loadUnaligned<uint64_t>(a) != loadUnaligned<uint64_t>(b)) + return false; + + a += sizeof(uint64_t); + b += sizeof(uint64_t); + } + } + + if (length & 4) { + if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b)) + return false; + + a += sizeof(uint32_t); + b += sizeof(uint32_t); + } + + if (length & 2) { + if (loadUnaligned<uint16_t>(a) != loadUnaligned<uint16_t>(b)) + return false; + + a += sizeof(uint16_t); + b += sizeof(uint16_t); + } + + if (length & 1 && (*reinterpret_cast<const LChar*>(a) != *reinterpret_cast<const LChar*>(b))) + return false; + + return true; +} + +ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) +{ + unsigned dwordLength = length >> 2; + + const char* a = reinterpret_cast<const char*>(aUChar); + const char* b = reinterpret_cast<const char*>(bUChar); + + if (dwordLength) { + for (unsigned i = 0; i != dwordLength; ++i) { + if (loadUnaligned<uint64_t>(a) != loadUnaligned<uint64_t>(b)) + return false; + + a += sizeof(uint64_t); + b += sizeof(uint64_t); + } + } + + if (length & 2) { + if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b)) + return false; + + a += sizeof(uint32_t); + b += sizeof(uint32_t); + } + + if (length & 1 && (*reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b))) + return false; + + return true; +} +#elif CPU(X86) && !ASAN_ENABLED +ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) +{ + const char* a = reinterpret_cast<const char*>(aLChar); + const char* b = reinterpret_cast<const char*>(bLChar); + + unsigned wordLength = length >> 2; + for (unsigned i = 0; i != wordLength; ++i) { + if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b)) + return false; + a += sizeof(uint32_t); + b += sizeof(uint32_t); + } + + length &= 3; + + if (length) { + const LChar* aRemainder = reinterpret_cast<const LChar*>(a); + const LChar* bRemainder = reinterpret_cast<const LChar*>(b); + + for (unsigned i = 0; i < length; ++i) { + if (aRemainder[i] != bRemainder[i]) + return false; + } + } + + return true; +} + +ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) +{ + const char* a = reinterpret_cast<const char*>(aUChar); + const char* b = reinterpret_cast<const char*>(bUChar); + + unsigned wordLength = length >> 1; + for (unsigned i = 0; i != wordLength; ++i) { + if (loadUnaligned<uint32_t>(a) != loadUnaligned<uint32_t>(b)) + return false; + a += sizeof(uint32_t); + b += sizeof(uint32_t); + } + + if (length & 1 && *reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b)) + return false; + + return true; +} +#elif PLATFORM(IOS) && WTF_ARM_ARCH_AT_LEAST(7) && !ASAN_ENABLED +ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) +{ + bool isEqual = false; + uint32_t aValue; + uint32_t bValue; + asm("subs %[length], #4\n" + "blo 2f\n" + + "0:\n" // Label 0 = Start of loop over 32 bits. + "ldr %[aValue], [%[a]], #4\n" + "ldr %[bValue], [%[b]], #4\n" + "cmp %[aValue], %[bValue]\n" + "bne 66f\n" + "subs %[length], #4\n" + "bhs 0b\n" + + // At this point, length can be: + // -0: 00000000000000000000000000000000 (0 bytes left) + // -1: 11111111111111111111111111111111 (3 bytes left) + // -2: 11111111111111111111111111111110 (2 bytes left) + // -3: 11111111111111111111111111111101 (1 byte left) + // -4: 11111111111111111111111111111100 (length was 0) + // The pointers are at the correct position. + "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters. + "tst %[length], #2\n" + "beq 1f\n" + "ldrh %[aValue], [%[a]], #2\n" + "ldrh %[bValue], [%[b]], #2\n" + "cmp %[aValue], %[bValue]\n" + "bne 66f\n" + + "1:\n" // Label 1 = Check for a single character left. + "tst %[length], #1\n" + "beq 42f\n" + "ldrb %[aValue], [%[a]]\n" + "ldrb %[bValue], [%[b]]\n" + "cmp %[aValue], %[bValue]\n" + "bne 66f\n" + + "42:\n" // Label 42 = Success. + "mov %[isEqual], #1\n" + "66:\n" // Label 66 = End without changing isEqual to 1. + : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) + : + : + ); + return isEqual; +} + +ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) +{ + bool isEqual = false; + uint32_t aValue; + uint32_t bValue; + asm("subs %[length], #2\n" + "blo 1f\n" + + "0:\n" // Label 0 = Start of loop over 32 bits. + "ldr %[aValue], [%[a]], #4\n" + "ldr %[bValue], [%[b]], #4\n" + "cmp %[aValue], %[bValue]\n" + "bne 66f\n" + "subs %[length], #2\n" + "bhs 0b\n" + + // At this point, length can be: + // -0: 00000000000000000000000000000000 (0 bytes left) + // -1: 11111111111111111111111111111111 (1 character left, 2 bytes) + // -2: 11111111111111111111111111111110 (length was zero) + // The pointers are at the correct position. + "1:\n" // Label 1 = Check for a single character left. + "tst %[length], #1\n" + "beq 42f\n" + "ldrh %[aValue], [%[a]]\n" + "ldrh %[bValue], [%[b]]\n" + "cmp %[aValue], %[bValue]\n" + "bne 66f\n" + + "42:\n" // Label 42 = Success. + "mov %[isEqual], #1\n" + "66:\n" // Label 66 = End without changing isEqual to 1. + : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) + : + : + ); + return isEqual; +} +#elif !ASAN_ENABLED +ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); } +ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); } +#else +ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) +{ + for (unsigned i = 0; i < length; ++i) { + if (a[i] != b[i]) + return false; + } + return true; +} +ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) +{ + for (unsigned i = 0; i < length; ++i) { + if (a[i] != b[i]) + return false; + } + return true; +} +#endif + +ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) +{ + for (unsigned i = 0; i < length; ++i) { + if (a[i] != b[i]) + return false; + } + return true; +} + +ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } + +template<typename StringClassA, typename StringClassB> +ALWAYS_INLINE bool equalCommon(const StringClassA& a, const StringClassB& b) +{ + unsigned length = a.length(); + if (length != b.length()) + return false; + + if (a.is8Bit()) { + if (b.is8Bit()) + return equal(a.characters8(), b.characters8(), length); + + return equal(a.characters8(), b.characters16(), length); + } + + if (b.is8Bit()) + return equal(a.characters16(), b.characters8(), length); + + return equal(a.characters16(), b.characters16(), length); +} + +template<typename StringClassA, typename StringClassB> +ALWAYS_INLINE bool equalCommon(const StringClassA* a, const StringClassB* b) +{ + if (a == b) + return true; + if (!a || !b) + return false; + return equal(*a, *b); +} + +template<typename StringClass, unsigned length> bool equal(const StringClass& a, const UChar (&codeUnits)[length]) +{ + if (a.length() != length) + return false; + + if (a.is8Bit()) + return equal(a.characters8(), codeUnits, length); + + return equal(a.characters16(), codeUnits, length); +} + +template<typename CharacterTypeA, typename CharacterTypeB> +inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB* b, unsigned length) +{ + for (unsigned i = 0; i < length; ++i) { + if (toASCIILower(a[i]) != toASCIILower(b[i])) + return false; + } + return true; +} + +template<typename CharacterTypeA, typename CharacterTypeB> inline bool equalIgnoringASCIICase(const CharacterTypeA* a, unsigned lengthA, const CharacterTypeB* b, unsigned lengthB) +{ + return lengthA == lengthB && equalIgnoringASCIICase(a, b, lengthA); +} + +template<typename StringClassA, typename StringClassB> +bool equalIgnoringASCIICaseCommon(const StringClassA& a, const StringClassB& b) +{ + unsigned length = a.length(); + if (length != b.length()) + return false; + + if (a.is8Bit()) { + if (b.is8Bit()) + return equalIgnoringASCIICase(a.characters8(), b.characters8(), length); + + return equalIgnoringASCIICase(a.characters8(), b.characters16(), length); + } + + if (b.is8Bit()) + return equalIgnoringASCIICase(a.characters16(), b.characters8(), length); + + return equalIgnoringASCIICase(a.characters16(), b.characters16(), length); +} + +template<typename StringClassA> bool equalIgnoringASCIICaseCommon(const StringClassA& a, const char* b) +{ + unsigned length = a.length(); + if (length != strlen(b)) + return false; + + if (a.is8Bit()) + return equalIgnoringASCIICase(a.characters8(), b, length); + + return equalIgnoringASCIICase(a.characters16(), b, length); +} + +template<typename StringClassA, typename StringClassB> +bool startsWith(const StringClassA& reference, const StringClassB& prefix) +{ + unsigned prefixLength = prefix.length(); + if (prefixLength > reference.length()) + return false; + + if (reference.is8Bit()) { + if (prefix.is8Bit()) + return equal(reference.characters8(), prefix.characters8(), prefixLength); + return equal(reference.characters8(), prefix.characters16(), prefixLength); + } + if (prefix.is8Bit()) + return equal(reference.characters16(), prefix.characters8(), prefixLength); + return equal(reference.characters16(), prefix.characters16(), prefixLength); +} + +template<typename StringClassA, typename StringClassB> +bool startsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& prefix) +{ + unsigned prefixLength = prefix.length(); + if (prefixLength > reference.length()) + return false; + + if (reference.is8Bit()) { + if (prefix.is8Bit()) + return equalIgnoringASCIICase(reference.characters8(), prefix.characters8(), prefixLength); + return equalIgnoringASCIICase(reference.characters8(), prefix.characters16(), prefixLength); + } + if (prefix.is8Bit()) + return equalIgnoringASCIICase(reference.characters16(), prefix.characters8(), prefixLength); + return equalIgnoringASCIICase(reference.characters16(), prefix.characters16(), prefixLength); +} + +template<typename StringClassA, typename StringClassB> +bool endsWith(const StringClassA& reference, const StringClassB& suffix) +{ + unsigned suffixLength = suffix.length(); + unsigned referenceLength = reference.length(); + if (suffixLength > referenceLength) + return false; + + unsigned startOffset = referenceLength - suffixLength; + + if (reference.is8Bit()) { + if (suffix.is8Bit()) + return equal(reference.characters8() + startOffset, suffix.characters8(), suffixLength); + return equal(reference.characters8() + startOffset, suffix.characters16(), suffixLength); + } + if (suffix.is8Bit()) + return equal(reference.characters16() + startOffset, suffix.characters8(), suffixLength); + return equal(reference.characters16() + startOffset, suffix.characters16(), suffixLength); +} + +template<typename StringClassA, typename StringClassB> +bool endsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& suffix) +{ + unsigned suffixLength = suffix.length(); + unsigned referenceLength = reference.length(); + if (suffixLength > referenceLength) + return false; + + unsigned startOffset = referenceLength - suffixLength; + + if (reference.is8Bit()) { + if (suffix.is8Bit()) + return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters8(), suffixLength); + return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters16(), suffixLength); + } + if (suffix.is8Bit()) + return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters8(), suffixLength); + return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters16(), suffixLength); +} + +template <typename SearchCharacterType, typename MatchCharacterType> +size_t findIgnoringASCIICase(const SearchCharacterType* source, const MatchCharacterType* matchCharacters, unsigned startOffset, unsigned searchLength, unsigned matchLength) +{ + ASSERT(searchLength >= matchLength); + + const SearchCharacterType* startSearchedCharacters = source + startOffset; + + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + for (unsigned i = 0; i <= delta; ++i) { + if (equalIgnoringASCIICase(startSearchedCharacters + i, matchCharacters, matchLength)) + return startOffset + i; + } + return notFound; +} + +template<typename StringClassA, typename StringClassB> +size_t findIgnoringASCIICase(const StringClassA& source, const StringClassB& stringToFind, unsigned startOffset) +{ + unsigned sourceStringLength = source.length(); + unsigned matchLength = stringToFind.length(); + if (!matchLength) + return std::min(startOffset, sourceStringLength); + + // Check startOffset & matchLength are in range. + if (startOffset > sourceStringLength) + return notFound; + unsigned searchLength = sourceStringLength - startOffset; + if (matchLength > searchLength) + return notFound; + + if (source.is8Bit()) { + if (stringToFind.is8Bit()) + return findIgnoringASCIICase(source.characters8(), stringToFind.characters8(), startOffset, searchLength, matchLength); + return findIgnoringASCIICase(source.characters8(), stringToFind.characters16(), startOffset, searchLength, matchLength); + } + + if (stringToFind.is8Bit()) + return findIgnoringASCIICase(source.characters16(), stringToFind.characters8(), startOffset, searchLength, matchLength); + + return findIgnoringASCIICase(source.characters16(), stringToFind.characters16(), startOffset, searchLength, matchLength); +} + +template <typename SearchCharacterType, typename MatchCharacterType> +ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) +{ + // Optimization: keep a running hash of the strings, + // only call equal() if the hashes match. + + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + unsigned searchHash = 0; + unsigned matchHash = 0; + + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchCharacters[i]; + } + + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; +} + +template<typename CharacterType> +inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) +{ + while (index < length) { + if (characters[index] == matchCharacter) + return index; + ++index; + } + return notFound; +} + +ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) +{ + return find(characters, length, static_cast<UChar>(matchCharacter), index); +} + +inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) +{ + if (matchCharacter & ~0xFF) + return notFound; + return find(characters, length, static_cast<LChar>(matchCharacter), index); +} + +template<typename StringClass> +size_t findCommon(const StringClass& haystack, const StringClass& needle, unsigned start) +{ + unsigned needleLength = needle.length(); + + if (needleLength == 1) { + if (haystack.is8Bit()) + return WTF::find(haystack.characters8(), haystack.length(), needle[0], start); + return WTF::find(haystack.characters16(), haystack.length(), needle[0], start); + } + + if (!needleLength) + return std::min(start, haystack.length()); + + if (start > haystack.length()) + return notFound; + unsigned searchLength = haystack.length() - start; + if (needleLength > searchLength) + return notFound; + + if (haystack.is8Bit()) { + if (needle.is8Bit()) + return findInner(haystack.characters8() + start, needle.characters8(), start, searchLength, needleLength); + return findInner(haystack.characters8() + start, needle.characters16(), start, searchLength, needleLength); + } + + if (needle.is8Bit()) + return findInner(haystack.characters16() + start, needle.characters8(), start, searchLength, needleLength); + + return findInner(haystack.characters16() + start, needle.characters16(), start, searchLength, needleLength); +} + +// This is marked inline since it's mostly used in non-inline functions for each string type. +// When used directly in code it's probably OK to be inline; maybe the loop will be unrolled. +template<typename CharacterType> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, const char* lowercaseLetters, unsigned length) +{ + for (unsigned i = 0; i < length; ++i) { + if (!isASCIIAlphaCaselessEqual(characters[i], lowercaseLetters[i])) + return false; + } + return true; +} + +template<typename CharacterType, unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]) +{ + ASSERT(strlen(lowercaseLetters) == lowercaseLettersLength - 1); + unsigned lowercaseLettersStringLength = lowercaseLettersLength - 1; + return charactersLength == lowercaseLettersStringLength && equalLettersIgnoringASCIICase(characters, lowercaseLetters, lowercaseLettersStringLength); +} + +template<typename StringClass> bool inline hasPrefixWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char* lowercaseLetters, unsigned length) +{ +#if !ASSERT_DISABLED + ASSERT(*lowercaseLetters); + for (const char* letter = lowercaseLetters; *letter; ++letter) + ASSERT(toASCIILowerUnchecked(*letter) == *letter); +#endif + ASSERT(string.length() >= length); + + if (string.is8Bit()) + return equalLettersIgnoringASCIICase(string.characters8(), lowercaseLetters, length); + return equalLettersIgnoringASCIICase(string.characters16(), lowercaseLetters, length); +} + +// This is intentionally not marked inline because it's used often and is not speed-critical enough to want it inlined everywhere. +template<typename StringClass> bool equalLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) +{ + unsigned length = string.length(); + if (length != strlen(lowercaseLetters)) + return false; + return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, length); +} + +template<typename StringClass> bool startsWithLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) +{ + size_t prefixLength = strlen(lowercaseLetters); + if (!prefixLength) + return true; + if (string.length() < prefixLength) + return false; + return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, prefixLength); +} + +template<typename StringClass, unsigned length> inline bool equalLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) +{ + // Don't actually use the length; we are choosing code size over speed. + ASSERT(strlen(lowercaseLetters) == length - 1); + const char* pointer = lowercaseLetters; + return equalLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); +} + +template<typename StringClass, unsigned length> inline bool startsWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) +{ + const char* pointer = lowercaseLetters; + return startsWithLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); +} + +} + +using WTF::equalIgnoringASCIICase; +using WTF::equalLettersIgnoringASCIICase; + +#endif // StringCommon_h diff --git a/Source/WTF/wtf/text/StringConcatenate.h b/Source/WTF/wtf/text/StringConcatenate.h index baeccc1d2..affb7e195 100644 --- a/Source/WTF/wtf/text/StringConcatenate.h +++ b/Source/WTF/wtf/text/StringConcatenate.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Apple Inc. All rights reserved. + * Copyright (C) 2010-2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -28,10 +28,14 @@ #include <string.h> -#ifndef WTFString_h +#ifndef AtomicString_h #include <wtf/text/AtomicString.h> #endif +#ifndef StringView_h +#include <wtf/text/StringView.h> +#endif + // This macro is helpful for testing how many intermediate Strings are created while evaluating an // expression containing operator+. #ifndef WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING @@ -41,382 +45,221 @@ namespace WTF { template<typename StringType> -class StringTypeAdapter { -}; +class StringTypeAdapter; template<> class StringTypeAdapter<char> { public: - StringTypeAdapter<char>(char buffer) - : m_buffer(buffer) + StringTypeAdapter<char>(char character) + : m_character(character) { } unsigned length() { return 1; } - bool is8Bit() { return true; } - void writeTo(LChar* destination) + void writeTo(LChar* destination) const { - *destination = m_buffer; + *destination = m_character; } - void writeTo(UChar* destination) { *destination = m_buffer; } - -private: - unsigned char m_buffer; -}; - -template<> -class StringTypeAdapter<LChar> { -public: - StringTypeAdapter<LChar>(LChar buffer) - : m_buffer(buffer) - { - } - - unsigned length() { return 1; } - - bool is8Bit() { return true; } - - void writeTo(LChar* destination) + void writeTo(UChar* destination) const { - *destination = m_buffer; + *destination = m_character; } - void writeTo(UChar* destination) { *destination = m_buffer; } + String toString() const { return String(&m_character, 1); } private: - LChar m_buffer; + char m_character; }; template<> class StringTypeAdapter<UChar> { public: - StringTypeAdapter<UChar>(UChar buffer) - : m_buffer(buffer) + StringTypeAdapter<UChar>(UChar character) + : m_character(character) { } - unsigned length() { return 1; } - - bool is8Bit() { return m_buffer <= 0xff; } + unsigned length() const { return 1; } + bool is8Bit() const { return m_character <= 0xff; } - void writeTo(LChar* destination) + void writeTo(LChar* destination) const { ASSERT(is8Bit()); - *destination = static_cast<LChar>(m_buffer); + *destination = static_cast<LChar>(m_character); } - void writeTo(UChar* destination) { *destination = m_buffer; } - -private: - UChar m_buffer; -}; - -template<> -class StringTypeAdapter<char*> { -public: - StringTypeAdapter<char*>(char* buffer) - : m_buffer(buffer) - , m_length(strlen(buffer)) - { - } - - unsigned length() { return m_length; } - - bool is8Bit() { return true; } - - void writeTo(LChar* destination) + void writeTo(UChar* destination) const { - for (unsigned i = 0; i < m_length; ++i) - destination[i] = static_cast<LChar>(m_buffer[i]); + *destination = m_character; } - void writeTo(UChar* destination) - { - for (unsigned i = 0; i < m_length; ++i) { - unsigned char c = m_buffer[i]; - destination[i] = c; - } - } + String toString() const { return String(&m_character, 1); } private: - const char* m_buffer; - unsigned m_length; + UChar m_character; }; template<> -class StringTypeAdapter<LChar*> { +class StringTypeAdapter<const LChar*> { public: - StringTypeAdapter<LChar*>(LChar* buffer) - : m_buffer(buffer) - , m_length(strlen(reinterpret_cast<char*>(buffer))) + StringTypeAdapter(const LChar* characters) + : m_characters(characters) + , m_length(strlen(reinterpret_cast<const char*>(characters))) { } - unsigned length() { return m_length; } - - bool is8Bit() { return true; } + unsigned length() const { return m_length; } + bool is8Bit() const { return true; } - void writeTo(LChar* destination) + void writeTo(LChar* destination) const { - memcpy(destination, m_buffer, m_length * sizeof(LChar)); + StringView(m_characters, m_length).getCharactersWithUpconvert(destination); } - void writeTo(UChar* destination) + void writeTo(UChar* destination) const { - StringImpl::copyChars(destination, m_buffer, m_length); + StringView(m_characters, m_length).getCharactersWithUpconvert(destination); } + String toString() const { return String(m_characters, m_length); } + private: - const LChar* m_buffer; + const LChar* m_characters; unsigned m_length; }; template<> class StringTypeAdapter<const UChar*> { public: - StringTypeAdapter<const UChar*>(const UChar* buffer) - : m_buffer(buffer) + StringTypeAdapter(const UChar* characters) + : m_characters(characters) { - size_t len = 0; - while (m_buffer[len] != UChar(0)) - ++len; + unsigned length = 0; + while (m_characters[length]) + ++length; - if (len > std::numeric_limits<unsigned>::max()) + if (length > std::numeric_limits<unsigned>::max()) // FIXME this is silly https://bugs.webkit.org/show_bug.cgi?id=165790 CRASH(); - m_length = len; + m_length = length; } - unsigned length() { return m_length; } + unsigned length() const { return m_length; } + bool is8Bit() const { return false; } - bool is8Bit() { return false; } - - NO_RETURN_DUE_TO_CRASH void writeTo(LChar*) + NO_RETURN_DUE_TO_CRASH void writeTo(LChar*) const { - CRASH(); + CRASH(); // FIXME make this a compile-time failure https://bugs.webkit.org/show_bug.cgi?id=165791 } - void writeTo(UChar* destination) + void writeTo(UChar* destination) const { - memcpy(destination, m_buffer, m_length * sizeof(UChar)); + memcpy(destination, m_characters, m_length * sizeof(UChar)); } + String toString() const { return String(m_characters, m_length); } + private: - const UChar* m_buffer; + const UChar* m_characters; unsigned m_length; }; template<> -class StringTypeAdapter<const char*> { +class StringTypeAdapter<const char*> : public StringTypeAdapter<const LChar*> { public: - StringTypeAdapter<const char*>(const char* buffer) - : m_buffer(buffer) - , m_length(strlen(buffer)) - { - } - - unsigned length() { return m_length; } - - bool is8Bit() { return true; } - - void writeTo(LChar* destination) - { - memcpy(destination, m_buffer, static_cast<size_t>(m_length) * sizeof(LChar)); - } - - void writeTo(UChar* destination) + StringTypeAdapter(const char* characters) + : StringTypeAdapter<const LChar*>(reinterpret_cast<const LChar*>(characters)) { - for (unsigned i = 0; i < m_length; ++i) { - unsigned char c = m_buffer[i]; - destination[i] = c; - } } - -private: - const char* m_buffer; - unsigned m_length; }; template<> -class StringTypeAdapter<const LChar*> { +class StringTypeAdapter<char*> : public StringTypeAdapter<const char*> { public: - StringTypeAdapter<const LChar*>(const LChar* buffer) - : m_buffer(buffer) - , m_length(strlen(reinterpret_cast<const char*>(buffer))) - { - } - - unsigned length() { return m_length; } - - bool is8Bit() { return true; } - - void writeTo(LChar* destination) + StringTypeAdapter(const char* characters) + : StringTypeAdapter<const char*>(characters) { - memcpy(destination, m_buffer, static_cast<size_t>(m_length) * sizeof(LChar)); } - - void writeTo(UChar* destination) - { - StringImpl::copyChars(destination, m_buffer, m_length); - } - -private: - const LChar* m_buffer; - unsigned m_length; }; template<> -class StringTypeAdapter<ASCIILiteral> { +class StringTypeAdapter<ASCIILiteral> : public StringTypeAdapter<const char*> { public: - StringTypeAdapter<ASCIILiteral>(ASCIILiteral buffer) - : m_buffer(reinterpret_cast<const LChar*>(static_cast<const char*>(buffer))) - , m_length(strlen(buffer)) - { - } - - size_t length() { return m_length; } - - bool is8Bit() { return true; } - - void writeTo(LChar* destination) - { - memcpy(destination, m_buffer, static_cast<size_t>(m_length)); - } - - void writeTo(UChar* destination) + StringTypeAdapter(ASCIILiteral characters) + : StringTypeAdapter<const char*>(characters) { - StringImpl::copyChars(destination, m_buffer, m_length); } - -private: - const LChar* m_buffer; - unsigned m_length; }; template<> class StringTypeAdapter<Vector<char>> { public: - StringTypeAdapter<Vector<char>>(const Vector<char>& buffer) - : m_buffer(buffer) + StringTypeAdapter(const Vector<char>& vector) + : m_vector(vector) { } - size_t length() { return m_buffer.size(); } - - bool is8Bit() { return true; } - - void writeTo(LChar* destination) - { - for (size_t i = 0; i < m_buffer.size(); ++i) - destination[i] = static_cast<unsigned char>(m_buffer[i]); - } + size_t length() const { return m_vector.size(); } + bool is8Bit() const { return true; } - void writeTo(UChar* destination) - { - for (size_t i = 0; i < m_buffer.size(); ++i) - destination[i] = static_cast<unsigned char>(m_buffer[i]); - } - -private: - const Vector<char>& m_buffer; -}; - -template<> -class StringTypeAdapter<Vector<LChar>> { -public: - StringTypeAdapter<Vector<LChar>>(const Vector<LChar>& buffer) - : m_buffer(buffer) + void writeTo(LChar* destination) const { + StringView(reinterpret_cast<const LChar*>(m_vector.data()), m_vector.size()).getCharactersWithUpconvert(destination); } - size_t length() { return m_buffer.size(); } - - bool is8Bit() { return true; } - - void writeTo(LChar* destination) + void writeTo(UChar* destination) const { - for (size_t i = 0; i < m_buffer.size(); ++i) - destination[i] = m_buffer[i]; + StringView(reinterpret_cast<const LChar*>(m_vector.data()), m_vector.size()).getCharactersWithUpconvert(destination); } - void writeTo(UChar* destination) - { - for (size_t i = 0; i < m_buffer.size(); ++i) - destination[i] = m_buffer[i]; - } + String toString() const { return String(m_vector.data(), m_vector.size()); } private: - const Vector<LChar>& m_buffer; + const Vector<char>& m_vector; }; template<> class StringTypeAdapter<String> { public: StringTypeAdapter<String>(const String& string) - : m_buffer(string) + : m_string(string) { } - unsigned length() { return m_buffer.length(); } + unsigned length() const { return m_string.length(); } + bool is8Bit() const { return m_string.isNull() || m_string.is8Bit(); } - bool is8Bit() { return m_buffer.isNull() || m_buffer.is8Bit(); } - - void writeTo(LChar* destination) + void writeTo(LChar* destination) const { - unsigned length = m_buffer.length(); - - ASSERT(is8Bit()); - const LChar* data = m_buffer.characters8(); - for (unsigned i = 0; i < length; ++i) - destination[i] = data[i]; - + StringView(m_string).getCharactersWithUpconvert(destination); WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING(); } - void writeTo(UChar* destination) + void writeTo(UChar* destination) const { - unsigned length = m_buffer.length(); - - if (is8Bit()) { - const LChar* data = m_buffer.characters8(); - for (unsigned i = 0; i < length; ++i) - destination[i] = data[i]; - } else { - const UChar* data = m_buffer.characters16(); - for (unsigned i = 0; i < length; ++i) - destination[i] = data[i]; - } - + StringView(m_string).getCharactersWithUpconvert(destination); WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING(); } + String toString() const { return m_string; } + private: - const String& m_buffer; + const String& m_string; }; template<> -class StringTypeAdapter<AtomicString> { +class StringTypeAdapter<AtomicString> : public StringTypeAdapter<String> { public: - StringTypeAdapter<AtomicString>(const AtomicString& string) - : m_adapter(string.string()) + StringTypeAdapter(const AtomicString& string) + : StringTypeAdapter<String>(string.string()) { } - - unsigned length() { return m_adapter.length(); } - - bool is8Bit() { return m_adapter.is8Bit(); } - - void writeTo(LChar* destination) { m_adapter.writeTo(destination); } - void writeTo(UChar* destination) { m_adapter.writeTo(destination); } - -private: - StringTypeAdapter<String> m_adapter; }; -inline void sumWithOverflow(unsigned& total, unsigned addend, bool& overflow) +inline void sumWithOverflow(bool& overflow, unsigned& total, unsigned addend) { unsigned oldTotal = total; total = oldTotal + addend; @@ -424,569 +267,97 @@ inline void sumWithOverflow(unsigned& total, unsigned addend, bool& overflow) overflow = true; } -template<typename StringType1, typename StringType2> -PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2) -{ - StringTypeAdapter<StringType1> adapter1(string1); - StringTypeAdapter<StringType2> adapter2(string2); - - bool overflow = false; - unsigned length = adapter1.length(); - sumWithOverflow(length, adapter2.length(), overflow); - if (overflow) - return 0; - - if (adapter1.is8Bit() && adapter2.is8Bit()) { - LChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - LChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - - return resultImpl.release(); - } - - UChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - UChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - - return resultImpl.release(); -} - -template<typename StringType1, typename StringType2, typename StringType3> -PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3) +template<typename... Unsigned> +inline void sumWithOverflow(bool& overflow, unsigned& total, unsigned addend, Unsigned ...addends) { - StringTypeAdapter<StringType1> adapter1(string1); - StringTypeAdapter<StringType2> adapter2(string2); - StringTypeAdapter<StringType3> adapter3(string3); - - bool overflow = false; - unsigned length = adapter1.length(); - sumWithOverflow(length, adapter2.length(), overflow); - sumWithOverflow(length, adapter3.length(), overflow); - if (overflow) - return 0; - - if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit()) { - LChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - LChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - - return resultImpl.release(); - } - - UChar* buffer = 0; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - UChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - - return resultImpl.release(); + unsigned oldTotal = total; + total = oldTotal + addend; + if (total < oldTotal) + overflow = true; + sumWithOverflow(overflow, total, addends...); } -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4> -PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4) +template<typename Adapter> +inline bool are8Bit(Adapter adapter) { - StringTypeAdapter<StringType1> adapter1(string1); - StringTypeAdapter<StringType2> adapter2(string2); - StringTypeAdapter<StringType3> adapter3(string3); - StringTypeAdapter<StringType4> adapter4(string4); - - bool overflow = false; - unsigned length = adapter1.length(); - sumWithOverflow(length, adapter2.length(), overflow); - sumWithOverflow(length, adapter3.length(), overflow); - sumWithOverflow(length, adapter4.length(), overflow); - if (overflow) - return 0; - - if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit()) { - LChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - LChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - - return resultImpl.release(); - } - - UChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - UChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - - return resultImpl.release(); + return adapter.is8Bit(); } -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5> -PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5) +template<typename Adapter, typename... Adapters> +inline bool are8Bit(Adapter adapter, Adapters ...adapters) { - StringTypeAdapter<StringType1> adapter1(string1); - StringTypeAdapter<StringType2> adapter2(string2); - StringTypeAdapter<StringType3> adapter3(string3); - StringTypeAdapter<StringType4> adapter4(string4); - StringTypeAdapter<StringType5> adapter5(string5); - - bool overflow = false; - unsigned length = adapter1.length(); - sumWithOverflow(length, adapter2.length(), overflow); - sumWithOverflow(length, adapter3.length(), overflow); - sumWithOverflow(length, adapter4.length(), overflow); - sumWithOverflow(length, adapter5.length(), overflow); - if (overflow) - return 0; - - if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit()) { - LChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - LChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - - return resultImpl.release(); - } - - UChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - UChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - - return resultImpl.release(); + return adapter.is8Bit() && are8Bit(adapters...); } -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6> -PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6) +template<typename ResultType, typename Adapter> +inline void makeStringAccumulator(ResultType* result, Adapter adapter) { - StringTypeAdapter<StringType1> adapter1(string1); - StringTypeAdapter<StringType2> adapter2(string2); - StringTypeAdapter<StringType3> adapter3(string3); - StringTypeAdapter<StringType4> adapter4(string4); - StringTypeAdapter<StringType5> adapter5(string5); - StringTypeAdapter<StringType6> adapter6(string6); - - bool overflow = false; - unsigned length = adapter1.length(); - sumWithOverflow(length, adapter2.length(), overflow); - sumWithOverflow(length, adapter3.length(), overflow); - sumWithOverflow(length, adapter4.length(), overflow); - sumWithOverflow(length, adapter5.length(), overflow); - sumWithOverflow(length, adapter6.length(), overflow); - if (overflow) - return 0; - - if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit()) { - LChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - LChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - result += adapter5.length(); - adapter6.writeTo(result); - - return resultImpl.release(); - } - - UChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - UChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - result += adapter5.length(); - adapter6.writeTo(result); - - return resultImpl.release(); + adapter.writeTo(result); } -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7> -PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7) +template<typename ResultType, typename Adapter, typename... Adapters> +inline void makeStringAccumulator(ResultType* result, Adapter adapter, Adapters ...adapters) { - StringTypeAdapter<StringType1> adapter1(string1); - StringTypeAdapter<StringType2> adapter2(string2); - StringTypeAdapter<StringType3> adapter3(string3); - StringTypeAdapter<StringType4> adapter4(string4); - StringTypeAdapter<StringType5> adapter5(string5); - StringTypeAdapter<StringType6> adapter6(string6); - StringTypeAdapter<StringType7> adapter7(string7); - - bool overflow = false; - unsigned length = adapter1.length(); - sumWithOverflow(length, adapter2.length(), overflow); - sumWithOverflow(length, adapter3.length(), overflow); - sumWithOverflow(length, adapter4.length(), overflow); - sumWithOverflow(length, adapter5.length(), overflow); - sumWithOverflow(length, adapter6.length(), overflow); - sumWithOverflow(length, adapter7.length(), overflow); - if (overflow) - return 0; - - if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit() && adapter7.is8Bit()) { - LChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - LChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - result += adapter5.length(); - adapter6.writeTo(result); - result += adapter6.length(); - adapter7.writeTo(result); - - return resultImpl.release(); - } - - UChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - UChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - result += adapter5.length(); - adapter6.writeTo(result); - result += adapter6.length(); - adapter7.writeTo(result); - - return resultImpl.release(); + adapter.writeTo(result); + makeStringAccumulator(result + adapter.length(), adapters...); } -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8> -PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8) +template<typename StringTypeAdapter, typename... StringTypeAdapters> +String tryMakeStringFromAdapters(StringTypeAdapter adapter, StringTypeAdapters ...adapters) { - StringTypeAdapter<StringType1> adapter1(string1); - StringTypeAdapter<StringType2> adapter2(string2); - StringTypeAdapter<StringType3> adapter3(string3); - StringTypeAdapter<StringType4> adapter4(string4); - StringTypeAdapter<StringType5> adapter5(string5); - StringTypeAdapter<StringType6> adapter6(string6); - StringTypeAdapter<StringType7> adapter7(string7); - StringTypeAdapter<StringType8> adapter8(string8); - bool overflow = false; - unsigned length = adapter1.length(); - sumWithOverflow(length, adapter2.length(), overflow); - sumWithOverflow(length, adapter3.length(), overflow); - sumWithOverflow(length, adapter4.length(), overflow); - sumWithOverflow(length, adapter5.length(), overflow); - sumWithOverflow(length, adapter6.length(), overflow); - sumWithOverflow(length, adapter7.length(), overflow); - sumWithOverflow(length, adapter8.length(), overflow); + unsigned length = adapter.length(); + sumWithOverflow(overflow, length, adapters.length()...); if (overflow) - return 0; + return String(); - if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit() && adapter7.is8Bit() && adapter8.is8Bit()) { + if (are8Bit(adapter, adapters...)) { LChar* buffer; RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); if (!resultImpl) - return 0; - - LChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - result += adapter5.length(); - adapter6.writeTo(result); - result += adapter6.length(); - adapter7.writeTo(result); - result += adapter7.length(); - adapter8.writeTo(result); - - return resultImpl.release(); - } - - UChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - UChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - result += adapter5.length(); - adapter6.writeTo(result); - result += adapter6.length(); - adapter7.writeTo(result); - result += adapter7.length(); - adapter8.writeTo(result); - - return resultImpl.release(); -} - -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8, typename StringType9> -PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8, StringType9 string9) -{ - StringTypeAdapter<StringType1> adapter1(string1); - StringTypeAdapter<StringType2> adapter2(string2); - StringTypeAdapter<StringType3> adapter3(string3); - StringTypeAdapter<StringType4> adapter4(string4); - StringTypeAdapter<StringType5> adapter5(string5); - StringTypeAdapter<StringType6> adapter6(string6); - StringTypeAdapter<StringType7> adapter7(string7); - StringTypeAdapter<StringType8> adapter8(string8); - StringTypeAdapter<StringType9> adapter9(string9); + return String(); - bool overflow = false; - unsigned length = adapter1.length(); - sumWithOverflow(length, adapter2.length(), overflow); - sumWithOverflow(length, adapter3.length(), overflow); - sumWithOverflow(length, adapter4.length(), overflow); - sumWithOverflow(length, adapter5.length(), overflow); - sumWithOverflow(length, adapter6.length(), overflow); - sumWithOverflow(length, adapter7.length(), overflow); - sumWithOverflow(length, adapter8.length(), overflow); - sumWithOverflow(length, adapter9.length(), overflow); - if (overflow) - return 0; + makeStringAccumulator(buffer, adapter, adapters...); - if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit() && adapter7.is8Bit() && adapter8.is8Bit() && adapter9.is8Bit()) { - LChar* buffer; - RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); - if (!resultImpl) - return 0; - - LChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - result += adapter5.length(); - adapter6.writeTo(result); - result += adapter6.length(); - adapter7.writeTo(result); - result += adapter7.length(); - adapter8.writeTo(result); - result += adapter8.length(); - adapter9.writeTo(result); - - return resultImpl.release(); + return WTFMove(resultImpl); } UChar* buffer; RefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); if (!resultImpl) - return 0; - - UChar* result = buffer; - adapter1.writeTo(result); - result += adapter1.length(); - adapter2.writeTo(result); - result += adapter2.length(); - adapter3.writeTo(result); - result += adapter3.length(); - adapter4.writeTo(result); - result += adapter4.length(); - adapter5.writeTo(result); - result += adapter5.length(); - adapter6.writeTo(result); - result += adapter6.length(); - adapter7.writeTo(result); - result += adapter7.length(); - adapter8.writeTo(result); - result += adapter8.length(); - adapter9.writeTo(result); - - return resultImpl.release(); -} - - -// Convenience only. -template<typename StringType1> -String makeString(StringType1 string1) -{ - return String(string1); -} - -template<typename StringType1, typename StringType2> -String makeString(StringType1 string1, StringType2 string2) -{ - RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2); - if (!resultImpl) - CRASH(); - return resultImpl.release(); -} - -template<typename StringType1, typename StringType2, typename StringType3> -String makeString(StringType1 string1, StringType2 string2, StringType3 string3) -{ - RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3); - if (!resultImpl) - CRASH(); - return resultImpl.release(); -} - -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4> -String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4) -{ - RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4); - if (!resultImpl) - CRASH(); - return resultImpl.release(); -} + return String(); -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5> -String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5) -{ - RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5); - if (!resultImpl) - CRASH(); - return resultImpl.release(); -} + makeStringAccumulator(buffer, adapter, adapters...); -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6> -String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6) -{ - RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6); - if (!resultImpl) - CRASH(); - return resultImpl.release(); + return WTFMove(resultImpl); } -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7> -String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7) +template<typename... StringTypes> +String tryMakeString(StringTypes ...strings) { - RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7); - if (!resultImpl) - CRASH(); - return resultImpl.release(); + return tryMakeStringFromAdapters(StringTypeAdapter<StringTypes>(strings)...); } -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8> -String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8) +// Convenience only. +template<typename StringType> +String makeString(StringType string) { - RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7, string8); - if (!resultImpl) - CRASH(); - return resultImpl.release(); + return String(string); } -template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8, typename StringType9> -String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8, StringType9 string9) +template<typename... StringTypes> +String makeString(StringTypes... strings) { - RefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7, string8, string9); - if (!resultImpl) + String result = tryMakeString(strings...); + if (!result) CRASH(); - return resultImpl.release(); + return result; } } // namespace WTF using WTF::makeString; +using WTF::tryMakeString; #include <wtf/text/StringOperators.h> #endif diff --git a/Source/WTF/wtf/text/StringConcatenateNumbers.h b/Source/WTF/wtf/text/StringConcatenateNumbers.h new file mode 100644 index 000000000..293e74504 --- /dev/null +++ b/Source/WTF/wtf/text/StringConcatenateNumbers.h @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2017 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <wtf/dtoa.h> +#include <wtf/text/IntegerToStringConversion.h> +#include <wtf/text/StringConcatenate.h> + +namespace WTF { + +template<> +class StringTypeAdapter<int> { +public: + StringTypeAdapter<int>(int number) + : m_number(number) + { + } + + unsigned length() const { return lengthOfNumberAsStringSigned(m_number); } + bool is8Bit() const { return true; } + + void writeTo(LChar* destination) const { writeNumberToBufferSigned(m_number, destination); } + void writeTo(UChar* destination) const { writeNumberToBufferSigned(m_number, destination); } + + String toString() const { return String::number(m_number); } + +private: + int m_number; +}; + +template<> +class StringTypeAdapter<unsigned> { +public: + StringTypeAdapter<unsigned>(unsigned number) + : m_number(number) + { + } + + unsigned length() const { return lengthOfNumberAsStringUnsigned(m_number); } + bool is8Bit() const { return true; } + + void writeTo(LChar* destination) const { writeNumberToBufferUnsigned(m_number, destination); } + void writeTo(UChar* destination) const { writeNumberToBufferUnsigned(m_number, destination); } + + String toString() const { return String::number(m_number); } + +private: + unsigned m_number; +}; + +template<> +class StringTypeAdapter<double> { +public: + StringTypeAdapter<double>(double number) + { + numberToString(number, m_buffer); + m_length = strlen(m_buffer); + } + + unsigned length() const { return m_length; } + bool is8Bit() const { return true; } + + void writeTo(LChar* destination) const + { + for (unsigned i = 0; i < m_length; ++i) + destination[i] = m_buffer[i]; + } + + void writeTo(UChar* destination) const + { + for (unsigned i = 0; i < m_length; ++i) + destination[i] = m_buffer[i]; + } + + String toString() const { return { m_buffer, m_length }; } + +private: + NumberToStringBuffer m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter<float> : public StringTypeAdapter<double> { +public: + StringTypeAdapter<float>(float number) + : StringTypeAdapter<double>(number) + { + } +}; + +class FormattedNumber { +public: + static FormattedNumber fixedPrecision(double number, unsigned significantFigures = 6, bool truncateTrailingZeros = false) + { + FormattedNumber numberFormatter; + numberToFixedPrecisionString(number, significantFigures, numberFormatter.m_buffer, truncateTrailingZeros); + numberFormatter.m_length = strlen(numberFormatter.m_buffer); + return numberFormatter; + } + + static FormattedNumber fixedWidth(double number, unsigned decimalPlaces) + { + FormattedNumber numberFormatter; + numberToFixedWidthString(number, decimalPlaces, numberFormatter.m_buffer); + numberFormatter.m_length = strlen(numberFormatter.m_buffer); + return numberFormatter; + } + + unsigned length() const { return m_length; } + const LChar* buffer() const { return reinterpret_cast<const LChar*>(m_buffer); } + +private: + NumberToStringBuffer m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter<FormattedNumber> { +public: + StringTypeAdapter<FormattedNumber>(const FormattedNumber& numberFormatter) + : m_numberFormatter(numberFormatter) + { + } + + unsigned length() const { return m_numberFormatter.length(); } + bool is8Bit() const { return true; } + + void writeTo(LChar* destination) const + { + auto buffer = m_numberFormatter.buffer(); + auto length = m_numberFormatter.length(); + for (unsigned i = 0; i < length; ++i) + destination[i] = buffer[i]; + } + + void writeTo(UChar* destination) const + { + auto buffer = m_numberFormatter.buffer(); + auto length = m_numberFormatter.length(); + for (unsigned i = 0; i < length; ++i) + destination[i] = buffer[i]; + } + + String toString() const { return { m_numberFormatter.buffer(), m_numberFormatter.length() }; } + +private: + const FormattedNumber& m_numberFormatter; +}; + +} + +using WTF::FormattedNumber; diff --git a/Source/WTF/wtf/text/StringHash.h b/Source/WTF/wtf/text/StringHash.h index 88bdd9369..139b5169e 100644 --- a/Source/WTF/wtf/text/StringHash.h +++ b/Source/WTF/wtf/text/StringHash.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006, 2007, 2008, 2012, 2013 Apple Inc. All rights reserved + * Copyright (C) 2006-2008, 2012-2013, 2016 Apple Inc. All rights reserved * Copyright (C) Research In Motion Limited 2009. All rights reserved. * * This library is free software; you can redistribute it and/or @@ -24,7 +24,7 @@ #include <wtf/text/AtomicString.h> #include <wtf/HashTraits.h> -#include <wtf/StringHasher.h> +#include <wtf/Hasher.h> namespace WTF { @@ -33,7 +33,15 @@ namespace WTF { return value.isNull(); } - // The hash() functions on StringHash and CaseFoldingHash do not support + inline void HashTraits<String>::customDeleteBucket(String& value) + { + // See unique_ptr's customDeleteBucket() for an explanation. + ASSERT(!isDeletedValue(value)); + String valueToBeDestroyed = WTFMove(value); + constructDeletedValue(value); + } + + // The hash() functions on StringHash and ASCIICaseInsensitiveHash do not support // null strings. get(), contains(), and add() on HashMap<String,..., StringHash> // cause a null-pointer dereference when passed null strings. @@ -45,7 +53,7 @@ namespace WTF { static unsigned hash(StringImpl* key) { return key->hash(); } static inline bool equal(const StringImpl* a, const StringImpl* b) { - return equalNonNull(a, b); + return WTF::equal(*a, *b); } static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); } @@ -71,14 +79,11 @@ namespace WTF { static const bool safeToCompareToEmptyOrDeleted = false; }; - class CaseFoldingHash { + class ASCIICaseInsensitiveHash { public: template<typename T> static inline UChar foldCase(T character) { - if (std::is_same<T, LChar>::value) - return StringImpl::latin1CaseFoldTable[character]; - - return u_foldCase(character, U_FOLD_CASE_DEFAULT); + return toASCIILower(character); } static unsigned hash(const UChar* data, unsigned length) @@ -105,17 +110,23 @@ namespace WTF { static inline unsigned hash(const char* data, unsigned length) { - return CaseFoldingHash::hash(reinterpret_cast<const LChar*>(data), length); + return hash(reinterpret_cast<const LChar*>(data), length); } + static inline bool equal(const StringImpl& a, const StringImpl& b) + { + return equalIgnoringASCIICase(a, b); + } static inline bool equal(const StringImpl* a, const StringImpl* b) { - return equalIgnoringCaseNonNull(a, b); + ASSERT(a); + ASSERT(b); + return equal(*a, *b); } static unsigned hash(const RefPtr<StringImpl>& key) { - return hash(*key); + return hash(key.get()); } static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) @@ -167,8 +178,8 @@ namespace WTF { } +using WTF::ASCIICaseInsensitiveHash; using WTF::AlreadyHashed; -using WTF::CaseFoldingHash; using WTF::StringHash; #endif diff --git a/Source/WTF/wtf/text/StringImpl.cpp b/Source/WTF/wtf/text/StringImpl.cpp index 34794258c..ee66daf25 100644 --- a/Source/WTF/wtf/text/StringImpl.cpp +++ b/Source/WTF/wtf/text/StringImpl.cpp @@ -2,7 +2,7 @@ * Copyright (C) 1999 Lars Knoll (knoll@kde.org) * (C) 1999 Antti Koivisto (koivisto@kde.org) * (C) 2001 Dirk Mueller ( mueller@kde.org ) - * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2003-2009, 2013-2016 Apple Inc. All rights reserved. * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) * * This library is free software; you can redistribute it and/or @@ -30,12 +30,14 @@ #include "StringHash.h" #include <wtf/ProcessID.h> #include <wtf/StdLibExtras.h> -#include <wtf/WTFThreadData.h> #include <wtf/text/CString.h> +#include <wtf/text/StringView.h> +#include <wtf/text/SymbolImpl.h> +#include <wtf/text/SymbolRegistry.h> #include <wtf/unicode/CharacterNames.h> #include <wtf/unicode/UTF8.h> -#ifdef STRING_STATS +#if STRING_STATS #include <unistd.h> #include <wtf/DataLog.h> #endif @@ -44,27 +46,21 @@ namespace WTF { using namespace Unicode; -COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small); +static_assert(sizeof(StringImpl) == 2 * sizeof(int) + 2 * sizeof(void*), "StringImpl should stay small"); -#ifdef STRING_STATS +#if STRING_STATS StringStats StringImpl::m_stringStats; -unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency; +std::atomic<unsigned> StringStats::s_stringRemovesTillPrintStats(s_printStringStatsFrequency); -void StringStats::removeString(StringImpl* string) +void StringStats::removeString(StringImpl& string) { - unsigned length = string->length(); - bool isSubString = string->isSubString(); + unsigned length = string.length(); + bool isSubString = string.isSubString(); --m_totalNumberStrings; - if (string->has16BitShadow()) { - --m_numberUpconvertedStrings; - if (!isSubString) - m_totalUpconvertedData -= length; - } - - if (string->is8Bit()) { + if (string.is8Bit()) { --m_number8BitStrings; if (!isSubString) m_total8BitData -= length; @@ -87,46 +83,46 @@ void StringStats::printStats() unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData; double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0; double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0; - dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength); + dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings.load(), percent8Bit, m_total8BitData.load(), m_total8BitData.load(), average8bitLength); double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0; double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0; - dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength); - - double percentUpconverted = m_totalNumberStrings ? ((double)m_numberUpconvertedStrings * 100) / (double)m_number8BitStrings : 0.0; - double averageUpconvertedLength = m_numberUpconvertedStrings ? (double)m_totalUpconvertedData / (double)m_numberUpconvertedStrings : 0.0; - dataLogF("%8u (%5.2f%%) upconverted %12llu chars %12llu bytes avg length %6.1f\n", m_numberUpconvertedStrings, percentUpconverted, m_totalUpconvertedData, m_totalUpconvertedData * 2, averageUpconvertedLength); + dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings.load(), percent16Bit, m_total16BitData.load(), m_total16BitData * 2, average16bitLength); double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0; - unsigned long long totalDataBytes = m_total8BitData + (m_total16BitData + m_totalUpconvertedData) * 2; - dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength); - unsigned long long totalSavedBytes = m_total8BitData - m_totalUpconvertedData; + unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; + dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings.load(), totalNumberCharacters, totalDataBytes, averageLength); + unsigned long long totalSavedBytes = m_total8BitData; double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0; dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings); + + dataLogF("%8u StringImpl::ref calls\n", m_refCalls.load()); + dataLogF("%8u StringImpl::deref calls\n", m_derefCalls.load()); } #endif +StringImpl::StaticStringImpl StringImpl::s_atomicEmptyString("", StringImpl::StringAtomic); StringImpl::~StringImpl() { ASSERT(!isStatic()); - STRING_STATS_REMOVE_STRING(this); + StringView::invalidate(*this); - if (isAtomic()) - AtomicString::remove(this); - if (isIdentifier()) { - if (!wtfThreadData().currentIdentifierTable()->remove(this)) - CRASH(); - } + STRING_STATS_REMOVE_STRING(*this); - BufferOwnership ownership = bufferOwnership(); + if (isAtomic() && length() && !isSymbol()) + AtomicStringImpl::remove(static_cast<AtomicStringImpl*>(this)); - if (has16BitShadow()) { - ASSERT(m_copyData16); - fastFree(m_copyData16); + if (isSymbol()) { + auto& symbol = static_cast<SymbolImpl&>(*this); + auto* symbolRegistry = symbol.symbolRegistry(); + if (symbolRegistry) + symbolRegistry->remove(symbol); } + BufferOwnership ownership = bufferOwnership(); + if (ownership == BufferInternal) return; if (ownership == BufferOwned) { @@ -137,8 +133,8 @@ StringImpl::~StringImpl() } ASSERT(ownership == BufferSubstring); - ASSERT(m_substringBuffer); - m_substringBuffer->deref(); + ASSERT(substringBuffer()); + substringBuffer()->deref(); } void StringImpl::destroy(StringImpl* stringImpl) @@ -147,19 +143,19 @@ void StringImpl::destroy(StringImpl* stringImpl) fastFree(stringImpl); } -PassRef<StringImpl> StringImpl::createFromLiteral(const char* characters, unsigned length) +Ref<StringImpl> StringImpl::createFromLiteral(const char* characters, unsigned length) { ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty string"); ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(characters), length)); return adoptRef(*new StringImpl(reinterpret_cast<const LChar*>(characters), length, ConstructWithoutCopying)); } -PassRef<StringImpl> StringImpl::createFromLiteral(const char* characters) +Ref<StringImpl> StringImpl::createFromLiteral(const char* characters) { return createFromLiteral(characters, strlen(characters)); } -PassRef<StringImpl> StringImpl::createWithoutCopying(const UChar* characters, unsigned length) +Ref<StringImpl> StringImpl::createWithoutCopying(const UChar* characters, unsigned length) { if (!length) return *empty(); @@ -167,7 +163,7 @@ PassRef<StringImpl> StringImpl::createWithoutCopying(const UChar* characters, un return adoptRef(*new StringImpl(characters, length, ConstructWithoutCopying)); } -PassRef<StringImpl> StringImpl::createWithoutCopying(const LChar* characters, unsigned length) +Ref<StringImpl> StringImpl::createWithoutCopying(const LChar* characters, unsigned length) { if (!length) return *empty(); @@ -176,7 +172,7 @@ PassRef<StringImpl> StringImpl::createWithoutCopying(const LChar* characters, un } template <typename CharType> -inline PassRef<StringImpl> StringImpl::createUninitializedInternal(unsigned length, CharType*& data) +inline Ref<StringImpl> StringImpl::createUninitializedInternal(unsigned length, CharType*& data) { if (!length) { data = 0; @@ -186,7 +182,7 @@ inline PassRef<StringImpl> StringImpl::createUninitializedInternal(unsigned leng } template <typename CharType> -inline PassRef<StringImpl> StringImpl::createUninitializedInternalNonEmpty(unsigned length, CharType*& data) +inline Ref<StringImpl> StringImpl::createUninitializedInternalNonEmpty(unsigned length, CharType*& data) { ASSERT(length); @@ -195,26 +191,25 @@ inline PassRef<StringImpl> StringImpl::createUninitializedInternalNonEmpty(unsig // heap allocation from this call. if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharType))) CRASH(); - size_t size = sizeof(StringImpl) + length * sizeof(CharType); - StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); + StringImpl* string = static_cast<StringImpl*>(fastMalloc(allocationSize<CharType>(length))); - data = reinterpret_cast<CharType*>(string + 1); + data = string->tailPointer<CharType>(); return constructInternal<CharType>(string, length); } -PassRef<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data) +Ref<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data) { return createUninitializedInternal(length, data); } -PassRef<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) +Ref<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) { return createUninitializedInternal(length, data); } template <typename CharType> -inline PassRef<StringImpl> StringImpl::reallocateInternal(PassRefPtr<StringImpl> originalString, unsigned length, CharType*& data) -{ +inline Ref<StringImpl> StringImpl::reallocateInternal(Ref<StringImpl>&& originalString, unsigned length, CharType*& data) +{ ASSERT(originalString->hasOneRef()); ASSERT(originalString->bufferOwnership() == BufferInternal); @@ -226,28 +221,28 @@ inline PassRef<StringImpl> StringImpl::reallocateInternal(PassRefPtr<StringImpl> // Same as createUninitialized() except here we use fastRealloc. if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharType))) CRASH(); - size_t size = sizeof(StringImpl) + length * sizeof(CharType); + originalString->~StringImpl(); - StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.leakRef(), size)); + auto* string = static_cast<StringImpl*>(fastRealloc(&originalString.leakRef(), allocationSize<CharType>(length))); - data = reinterpret_cast<CharType*>(string + 1); + data = string->tailPointer<CharType>(); return constructInternal<CharType>(string, length); } -PassRef<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data) +Ref<StringImpl> StringImpl::reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data) { ASSERT(originalString->is8Bit()); - return reallocateInternal(originalString, length, data); + return reallocateInternal(WTFMove(originalString), length, data); } -PassRef<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data) +Ref<StringImpl> StringImpl::reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data) { ASSERT(!originalString->is8Bit()); - return reallocateInternal(originalString, length, data); + return reallocateInternal(WTFMove(originalString), length, data); } template <typename CharType> -inline PassRef<StringImpl> StringImpl::createInternal(const CharType* characters, unsigned length) +inline Ref<StringImpl> StringImpl::createInternal(const CharType* characters, unsigned length) { if (!characters || !length) return *empty(); @@ -258,23 +253,23 @@ inline PassRef<StringImpl> StringImpl::createInternal(const CharType* characters return string; } -PassRef<StringImpl> StringImpl::create(const UChar* characters, unsigned length) +Ref<StringImpl> StringImpl::create(const UChar* characters, unsigned length) { return createInternal(characters, length); } -PassRef<StringImpl> StringImpl::create(const LChar* characters, unsigned length) +Ref<StringImpl> StringImpl::create(const LChar* characters, unsigned length) { return createInternal(characters, length); } -PassRef<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length) +Ref<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length) { if (!characters || !length) return *empty(); LChar* data; - RefPtr<StringImpl> string = createUninitializedInternalNonEmpty(length, data); + auto string = createUninitializedInternalNonEmpty(length, data); for (size_t i = 0; i < length; ++i) { if (characters[i] & 0xff00) @@ -282,15 +277,15 @@ PassRef<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, un data[i] = static_cast<LChar>(characters[i]); } - return string.releaseNonNull(); + return string; } -PassRef<StringImpl> StringImpl::create8BitIfPossible(const UChar* string) +Ref<StringImpl> StringImpl::create8BitIfPossible(const UChar* string) { return StringImpl::create8BitIfPossible(string, lengthOfNullTerminatedString(string)); } -PassRef<StringImpl> StringImpl::create(const LChar* string) +Ref<StringImpl> StringImpl::create(const LChar* string) { if (!string) return *empty(); @@ -300,41 +295,6 @@ PassRef<StringImpl> StringImpl::create(const LChar* string) return create(string, length); } -const UChar* StringImpl::getData16SlowCase() const -{ - if (has16BitShadow()) - return m_copyData16; - - if (bufferOwnership() == BufferSubstring) { - // If this is a substring, return a pointer into the parent string. - // TODO: Consider severing this string from the parent string - unsigned offset = m_data8 - m_substringBuffer->characters8(); - return m_substringBuffer->deprecatedCharacters() + offset; - } - - STRING_STATS_ADD_UPCONVERTED_STRING(m_length); - - unsigned len = length(); - - m_copyData16 = static_cast<UChar*>(fastMalloc(len * sizeof(UChar))); - - m_hashAndFlags |= s_hashFlagHas16BitShadow; - - upconvertCharacters(0, len); - - return m_copyData16; -} - -void StringImpl::upconvertCharacters(unsigned start, unsigned end) const -{ - ASSERT(is8Bit()); - ASSERT(has16BitShadow()); - - for (size_t i = start; i < end; ++i) - m_copyData16[i] = m_data8[i]; -} - - bool StringImpl::containsOnlyWhitespace() { // FIXME: The definition of whitespace here includes a number of characters @@ -358,7 +318,7 @@ bool StringImpl::containsOnlyWhitespace() return true; } -PassRef<StringImpl> StringImpl::substring(unsigned start, unsigned length) +Ref<StringImpl> StringImpl::substring(unsigned start, unsigned length) { if (start >= m_length) return *empty(); @@ -385,42 +345,23 @@ UChar32 StringImpl::characterStartingAt(unsigned i) return 0; } -PassRef<StringImpl> StringImpl::lower() +Ref<StringImpl> StringImpl::convertToLowercaseWithoutLocale() { - // Note: This is a hot function in the Dromaeo benchmark, specifically the - // no-op code path up through the first 'return' statement. + // Note: At one time this was a hot function in the Dromaeo benchmark, specifically the + // no-op code path that may return ourself if we find no upper case letters and no invalid + // ASCII letters. // First scan the string for uppercase and non-ASCII characters: if (is8Bit()) { - unsigned failingIndex; for (unsigned i = 0; i < m_length; ++i) { LChar character = m_data8[i]; - if (UNLIKELY((character & ~0x7F) || isASCIIUpper(character))) { - failingIndex = i; - goto SlowPath8bitLower; - } - } - return *this; - -SlowPath8bitLower: - LChar* data8; - auto newImpl = createUninitializedInternalNonEmpty(m_length, data8); - - for (unsigned i = 0; i < failingIndex; ++i) - data8[i] = m_data8[i]; - - for (unsigned i = failingIndex; i < m_length; ++i) { - LChar character = m_data8[i]; - if (!(character & ~0x7F)) - data8[i] = toASCIILower(character); - else { - ASSERT(u_tolower(character) <= 0xFF); - data8[i] = static_cast<LChar>(u_tolower(character)); - } + if (UNLIKELY((character & ~0x7F) || isASCIIUpper(character))) + return convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(i); } - return newImpl; + return *this; } + bool noUpper = true; unsigned ored = 0; @@ -451,26 +392,51 @@ SlowPath8bitLower: // Do a slower implementation for cases that include non-ASCII characters. UChar* data16; - RefPtr<StringImpl> newImpl = createUninitializedInternalNonEmpty(m_length, data16); + auto newImpl = createUninitializedInternalNonEmpty(m_length, data16); UErrorCode status = U_ZERO_ERROR; int32_t realLength = u_strToLower(data16, length, m_data16, m_length, "", &status); if (U_SUCCESS(status) && realLength == length) - return newImpl.releaseNonNull(); + return newImpl; newImpl = createUninitialized(realLength, data16); status = U_ZERO_ERROR; u_strToLower(data16, realLength, m_data16, m_length, "", &status); if (U_FAILURE(status)) return *this; - return newImpl.releaseNonNull(); + return newImpl; +} + +Ref<StringImpl> StringImpl::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex) +{ + ASSERT(is8Bit()); + LChar* data8; + auto newImpl = createUninitializedInternalNonEmpty(m_length, data8); + + for (unsigned i = 0; i < failingIndex; ++i) { + ASSERT(!(m_data8[i] & ~0x7F) && !isASCIIUpper(m_data8[i])); + data8[i] = m_data8[i]; + } + + for (unsigned i = failingIndex; i < m_length; ++i) { + LChar character = m_data8[i]; + if (!(character & ~0x7F)) + data8[i] = toASCIILower(character); + else { + ASSERT(u_tolower(character) <= 0xFF); + data8[i] = static_cast<LChar>(u_tolower(character)); + } + } + + return newImpl; } -PassRef<StringImpl> StringImpl::upper() +Ref<StringImpl> StringImpl::convertToUppercaseWithoutLocale() { - // This function could be optimized for no-op cases the way lower() is, - // but in empirical testing, few actual calls to upper() are no-ops, so - // it wouldn't be worth the extra time for pre-scanning. + // This function could be optimized for no-op cases the way + // convertToLowercaseWithoutLocale() is, but in empirical testing, + // few actual calls to upper() are no-ops, so it wouldn't be worth + // the extra time for pre-scanning. if (m_length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) CRASH(); @@ -478,30 +444,23 @@ PassRef<StringImpl> StringImpl::upper() if (is8Bit()) { LChar* data8; - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); + auto newImpl = createUninitialized(m_length, data8); // Do a faster loop for the case where all the characters are ASCII. unsigned ored = 0; for (int i = 0; i < length; ++i) { LChar c = m_data8[i]; ored |= c; -#if CPU(X86) && defined(_MSC_VER) && _MSC_VER >=1700 - // Workaround for an MSVC 2012 x86 optimizer bug. Remove once the bug is fixed. - // See https://connect.microsoft.com/VisualStudio/feedback/details/780362/optimization-bug-of-range-comparison - // for more details. - data8[i] = c >= 'a' && c <= 'z' ? c & ~0x20 : c; -#else data8[i] = toASCIIUpper(c); -#endif } if (!(ored & ~0x7F)) - return newImpl.releaseNonNull(); + return newImpl; // Do a slower implementation for cases that include non-ASCII Latin-1 characters. int numberSharpSCharacters = 0; // There are two special cases. - // 1. latin-1 characters when converted to upper case are 16 bit characters. + // 1. Some Latin-1 characters when converted to upper case are 16 bit characters. // 2. Lower case sharp-S converts to "SS" (two characters) for (int32_t i = 0; i < length; ++i) { LChar c = m_data8[i]; @@ -509,7 +468,7 @@ PassRef<StringImpl> StringImpl::upper() ++numberSharpSCharacters; ASSERT(u_toupper(c) <= 0xFFFF); UChar upper = u_toupper(c); - if (UNLIKELY(upper > 0xff)) { + if (UNLIKELY(upper > 0xFF)) { // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path. goto upconvert; } @@ -517,7 +476,7 @@ PassRef<StringImpl> StringImpl::upper() } if (!numberSharpSCharacters) - return newImpl.releaseNonNull(); + return newImpl; // We have numberSSCharacters sharp-s characters, but none of the other special characters. newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); @@ -535,14 +494,15 @@ PassRef<StringImpl> StringImpl::upper() } } - return newImpl.releaseNonNull(); + return newImpl; } upconvert: - const UChar* source16 = deprecatedCharacters(); + auto upconvertedCharacters = StringView(*this).upconvertedCharacters(); + const UChar* source16 = upconvertedCharacters; UChar* data16; - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); + auto newImpl = createUninitialized(m_length, data16); // Do a faster loop for the case where all the characters are ASCII. unsigned ored = 0; @@ -552,19 +512,19 @@ upconvert: data16[i] = toASCIIUpper(c); } if (!(ored & ~0x7F)) - return newImpl.releaseNonNull(); + return newImpl; // Do a slower implementation for cases that include non-ASCII characters. UErrorCode status = U_ZERO_ERROR; int32_t realLength = u_strToUpper(data16, length, source16, m_length, "", &status); if (U_SUCCESS(status) && realLength == length) - return newImpl.releaseNonNull(); + return newImpl; newImpl = createUninitialized(realLength, data16); status = U_ZERO_ERROR; u_strToUpper(data16, realLength, source16, m_length, "", &status); if (U_FAILURE(status)) return *this; - return newImpl.releaseNonNull(); + return newImpl; } static inline bool needsTurkishCasingRules(const AtomicString& localeIdentifier) @@ -577,14 +537,14 @@ static inline bool needsTurkishCasingRules(const AtomicString& localeIdentifier) && (localeIdentifier.length() == 2 || localeIdentifier[2] == '-'); } -PassRef<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) +Ref<StringImpl> StringImpl::convertToLowercaseWithLocale(const AtomicString& localeIdentifier) { // Use the more-optimized code path most of the time. // Assuming here that the only locale-specific lowercasing is the Turkish casing rules. // FIXME: Could possibly optimize further by looking for the specific sequences // that have locale-specific lowercasing. There are only three of them. if (!needsTurkishCasingRules(localeIdentifier)) - return lower(); + return convertToLowercaseWithoutLocale(); // FIXME: Could share more code with the main StringImpl::lower by factoring out // this last part into a shared function that takes a locale string, since this is @@ -597,28 +557,29 @@ PassRef<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than // allocating memory just to turn localeIdentifier into a C string, and we assume // there is no difference between the uppercasing for "tr" and "az" locales. - const UChar* source16 = deprecatedCharacters(); + auto upconvertedCharacters = StringView(*this).upconvertedCharacters(); + const UChar* source16 = upconvertedCharacters; UChar* data16; - RefPtr<StringImpl> newString = createUninitialized(length, data16); + auto newString = createUninitialized(length, data16); UErrorCode status = U_ZERO_ERROR; int realLength = u_strToLower(data16, length, source16, length, "tr", &status); if (U_SUCCESS(status) && realLength == length) - return newString.releaseNonNull(); + return newString; newString = createUninitialized(realLength, data16); status = U_ZERO_ERROR; u_strToLower(data16, realLength, source16, length, "tr", &status); if (U_FAILURE(status)) return *this; - return newString.releaseNonNull(); + return newString; } -PassRef<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) +Ref<StringImpl> StringImpl::convertToUppercaseWithLocale(const AtomicString& localeIdentifier) { // Use the more-optimized code path most of the time. // Assuming here that the only locale-specific lowercasing is the Turkish casing rules, // and that the only affected character is lowercase "i". if (!needsTurkishCasingRules(localeIdentifier) || find('i') == notFound) - return upper(); + return convertToUppercaseWithoutLocale(); if (m_length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) CRASH(); @@ -627,95 +588,145 @@ PassRef<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than // allocating memory just to turn localeIdentifier into a C string, and we assume // there is no difference between the uppercasing for "tr" and "az" locales. - const UChar* source16 = deprecatedCharacters(); + auto upconvertedCharacters = StringView(*this).upconvertedCharacters(); + const UChar* source16 = upconvertedCharacters; UChar* data16; - RefPtr<StringImpl> newString = createUninitialized(length, data16); + auto newString = createUninitialized(length, data16); UErrorCode status = U_ZERO_ERROR; int realLength = u_strToUpper(data16, length, source16, length, "tr", &status); if (U_SUCCESS(status) && realLength == length) - return newString.releaseNonNull(); + return newString; newString = createUninitialized(realLength, data16); status = U_ZERO_ERROR; u_strToUpper(data16, realLength, source16, length, "tr", &status); if (U_FAILURE(status)) return *this; - return newString.releaseNonNull(); -} - -PassRef<StringImpl> StringImpl::fill(UChar character) -{ - if (!(character & ~0x7F)) { - LChar* data; - auto newImpl = createUninitialized(m_length, data); - for (unsigned i = 0; i < m_length; ++i) - data[i] = character; - return newImpl; - } - UChar* data; - auto newImpl = createUninitialized(m_length, data); - for (unsigned i = 0; i < m_length; ++i) - data[i] = character; - return newImpl; + return newString; } -PassRef<StringImpl> StringImpl::foldCase() +Ref<StringImpl> StringImpl::foldCase() { - if (m_length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) - CRASH(); - int32_t length = m_length; - if (is8Bit()) { - // Do a faster loop for the case where all the characters are ASCII. - LChar* data; - auto newImpl = createUninitialized(m_length, data); - LChar ored = 0; - - for (int32_t i = 0; i < length; ++i) { - LChar c = m_data8[i]; - data[i] = toASCIILower(c); - ored |= c; + unsigned failingIndex; + for (unsigned i = 0; i < m_length; ++i) { + auto character = m_data8[i]; + if (UNLIKELY(!isASCII(character) || isASCIIUpper(character))) { + failingIndex = i; + goto SlowPath; + } } + // String was all ASCII and no uppercase, so just return as-is. + return *this; - if (!(ored & ~0x7F)) - return newImpl; - - // Do a slower implementation for cases that include non-ASCII Latin-1 characters. - // FIXME: Shouldn't this use u_foldCase instead of u_tolower? - for (int32_t i = 0; i < length; ++i) { - ASSERT(u_tolower(m_data8[i]) <= 0xFF); - data[i] = static_cast<LChar>(u_tolower(m_data8[i])); +SlowPath: + bool need16BitCharacters = false; + for (unsigned i = failingIndex; i < m_length; ++i) { + auto character = m_data8[i]; + if (character == 0xB5 || character == 0xDF) { + need16BitCharacters = true; + break; + } } - return newImpl; + if (!need16BitCharacters) { + LChar* data8; + auto folded = createUninitializedInternalNonEmpty(m_length, data8); + for (unsigned i = 0; i < failingIndex; ++i) + data8[i] = m_data8[i]; + for (unsigned i = failingIndex; i < m_length; ++i) { + auto character = m_data8[i]; + if (isASCII(character)) + data8[i] = toASCIILower(character); + else { + ASSERT(u_foldCase(character, U_FOLD_CASE_DEFAULT) <= 0xFF); + data8[i] = static_cast<LChar>(u_foldCase(character, U_FOLD_CASE_DEFAULT)); + } + } + return folded; + } + } else { + // FIXME: Unclear why we use goto in the 8-bit case, and a different approach in the 16-bit case. + bool noUpper = true; + unsigned ored = 0; + for (unsigned i = 0; i < m_length; ++i) { + UChar character = m_data16[i]; + if (UNLIKELY(isASCIIUpper(character))) + noUpper = false; + ored |= character; + } + if (!(ored & ~0x7F)) { + if (noUpper) { + // String was all ASCII and no uppercase, so just return as-is. + return *this; + } + UChar* data16; + auto folded = createUninitializedInternalNonEmpty(m_length, data16); + for (unsigned i = 0; i < m_length; ++i) + data16[i] = toASCIILower(m_data16[i]); + return folded; + } } - // Do a faster loop for the case where all the characters are ASCII. - UChar* data; - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); - UChar ored = 0; - for (int32_t i = 0; i < length; ++i) { - UChar c = m_data16[i]; - ored |= c; - data[i] = toASCIILower(c); - } - if (!(ored & ~0x7F)) - return newImpl.releaseNonNull(); + if (m_length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) + CRASH(); - // Do a slower implementation for cases that include non-ASCII characters. + auto upconvertedCharacters = StringView(*this).upconvertedCharacters(); + + UChar* data; + auto folded = createUninitializedInternalNonEmpty(m_length, data); + int32_t length = m_length; UErrorCode status = U_ZERO_ERROR; - int32_t realLength = u_strFoldCase(data, length, m_data16, m_length, U_FOLD_CASE_DEFAULT, &status); + int32_t realLength = u_strFoldCase(data, length, upconvertedCharacters, length, U_FOLD_CASE_DEFAULT, &status); if (U_SUCCESS(status) && realLength == length) - return newImpl.releaseNonNull(); - newImpl = createUninitialized(realLength, data); + return folded; + ASSERT(realLength > length); + folded = createUninitializedInternalNonEmpty(realLength, data); status = U_ZERO_ERROR; - u_strFoldCase(data, realLength, m_data16, m_length, U_FOLD_CASE_DEFAULT, &status); + u_strFoldCase(data, realLength, upconvertedCharacters, length, U_FOLD_CASE_DEFAULT, &status); if (U_FAILURE(status)) return *this; - return newImpl.releaseNonNull(); + return folded; +} + +template<StringImpl::CaseConvertType type, typename CharacterType> +ALWAYS_INLINE Ref<StringImpl> StringImpl::convertASCIICase(StringImpl& impl, const CharacterType* data, unsigned length) +{ + unsigned failingIndex; + for (unsigned i = 0; i < length; ++i) { + CharacterType character = data[i]; + if (type == CaseConvertType::Lower ? UNLIKELY(isASCIIUpper(character)) : LIKELY(isASCIILower(character))) { + failingIndex = i; + goto SlowPath; + } + } + return impl; + +SlowPath: + CharacterType* newData; + auto newImpl = createUninitializedInternalNonEmpty(length, newData); + for (unsigned i = 0; i < failingIndex; ++i) + newData[i] = data[i]; + for (unsigned i = failingIndex; i < length; ++i) + newData[i] = type == CaseConvertType::Lower ? toASCIILower(data[i]) : toASCIIUpper(data[i]); + return newImpl; +} + +Ref<StringImpl> StringImpl::convertToASCIILowercase() +{ + if (is8Bit()) + return convertASCIICase<CaseConvertType::Lower>(*this, m_data8, m_length); + return convertASCIICase<CaseConvertType::Lower>(*this, m_data16, m_length); +} + +Ref<StringImpl> StringImpl::convertToASCIIUppercase() +{ + if (is8Bit()) + return convertASCIICase<CaseConvertType::Upper>(*this, m_data8, m_length); + return convertASCIICase<CaseConvertType::Upper>(*this, m_data16, m_length); } template <class UCharPredicate> -inline PassRef<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate) +inline Ref<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate) { if (!m_length) return *this; @@ -763,18 +774,18 @@ public: } }; -PassRef<StringImpl> StringImpl::stripWhiteSpace() +Ref<StringImpl> StringImpl::stripWhiteSpace() { return stripMatchedCharacters(SpaceOrNewlinePredicate()); } -PassRef<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) +Ref<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) { return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); } template <typename CharType> -ALWAYS_INLINE PassRef<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch) +ALWAYS_INLINE Ref<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch) { const CharType* from = characters; const CharType* fromend = from + m_length; @@ -803,10 +814,10 @@ ALWAYS_INLINE PassRef<StringImpl> StringImpl::removeCharacters(const CharType* c data.shrink(outc); - return adopt(data); + return adopt(WTFMove(data)); } -PassRef<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) +Ref<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) { if (is8Bit()) return removeCharacters(characters8(), findMatch); @@ -814,11 +825,11 @@ PassRef<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findM } template <typename CharType, class UCharPredicate> -inline PassRef<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate) +inline Ref<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate) { StringBuffer<CharType> data(m_length); - const CharType* from = getCharacters<CharType>(); + const CharType* from = characters<CharType>(); const CharType* fromend = from + m_length; int outc = 0; bool changedToSpace = false; @@ -847,17 +858,17 @@ inline PassRef<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPre data.shrink(outc); - return adopt(data); + return adopt(WTFMove(data)); } -PassRef<StringImpl> StringImpl::simplifyWhiteSpace() +Ref<StringImpl> StringImpl::simplifyWhiteSpace() { if (is8Bit()) return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate()); return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate()); } -PassRef<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) +Ref<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) { if (is8Bit()) return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace)); @@ -948,24 +959,54 @@ float StringImpl::toFloat(bool* ok) return charactersToFloat(characters16(), m_length, ok); } -bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) +// Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt +static const UChar latin1CaseFoldTable[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, +}; + +static inline bool equalCompatibilityCaseless(const LChar* a, const LChar* b, unsigned length) { while (length--) { - if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldTable[*b++]) + if (latin1CaseFoldTable[*a++] != latin1CaseFoldTable[*b++]) return false; } return true; } -bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) +static inline bool equalCompatibilityCaseless(const UChar* a, const LChar* b, unsigned length) { while (length--) { - if (u_foldCase(*a++, U_FOLD_CASE_DEFAULT) != StringImpl::latin1CaseFoldTable[*b++]) + if (u_foldCase(*a++, U_FOLD_CASE_DEFAULT) != latin1CaseFoldTable[*b++]) return false; } return true; } +static inline bool equalCompatibilityCaseless(const LChar* a, const UChar* b, unsigned length) +{ + return equalCompatibilityCaseless(b, a, length); +} + +static inline bool equalCompatibilityCaseless(const UChar* a, const UChar* b, unsigned length) +{ + return !u_memcasecmp(a, b, length, U_FOLD_CASE_DEFAULT); +} + size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) { if (is8Bit()) @@ -986,8 +1027,11 @@ size_t StringImpl::find(const LChar* matchString, unsigned index) return std::min(index, length()); // Optimization 1: fast case for strings of length 1. - if (matchLength == 1) + if (matchLength == 1) { + if (is8Bit()) + return WTF::find(characters8(), length(), matchString[0], index); return WTF::find(characters16(), length(), *matchString, index); + } // Check index & matchLength are in range. if (index > length()) @@ -998,10 +1042,32 @@ size_t StringImpl::find(const LChar* matchString, unsigned index) // delta is the number of additional times to test; delta == 0 means test only once. unsigned delta = searchLength - matchLength; - const UChar* searchCharacters = deprecatedCharacters() + index; - // Optimization 2: keep a running hash of the strings, // only call equal if the hashes match. + + if (is8Bit()) { + const LChar* searchCharacters = characters8() + index; + + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchString[i]; + } + + unsigned i = 0; + while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; + } + + const UChar* searchCharacters = characters16() + index; + unsigned searchHash = 0; unsigned matchHash = 0; for (unsigned i = 0; i < matchLength; ++i) { @@ -1010,7 +1076,6 @@ size_t StringImpl::find(const LChar* matchString, unsigned index) } unsigned i = 0; - // keep looping until we match while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { if (i == delta) return notFound; @@ -1042,45 +1107,27 @@ size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) // delta is the number of additional times to test; delta == 0 means test only once. unsigned delta = searchLength - matchLength; - const UChar* searchCharacters = deprecatedCharacters() + index; + if (is8Bit()) { + const LChar* searchCharacters = characters8() + index; - unsigned i = 0; - // keep looping until we match - while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { - if (i == delta) - return notFound; - ++i; + unsigned i = 0; + while (!equalCompatibilityCaseless(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + ++i; + } + return index + i; } - return index + i; -} - -template <typename SearchCharacterType, typename MatchCharacterType> -ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) -{ - // Optimization: keep a running hash of the strings, - // only call equal() if the hashes match. - // delta is the number of additional times to test; delta == 0 means test only once. - unsigned delta = searchLength - matchLength; - - unsigned searchHash = 0; - unsigned matchHash = 0; - - for (unsigned i = 0; i < matchLength; ++i) { - searchHash += searchCharacters[i]; - matchHash += matchCharacters[i]; - } + const UChar* searchCharacters = characters16() + index; unsigned i = 0; - // keep looping until we match - while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) { + while (!equalCompatibilityCaseless(searchCharacters + i, matchString, matchLength)) { if (i == delta) return notFound; - searchHash += searchCharacters[i + matchLength]; - searchHash -= searchCharacters[i]; ++i; } - return index + i; + return index + i; } size_t StringImpl::find(StringImpl* matchString) @@ -1128,35 +1175,7 @@ size_t StringImpl::find(StringImpl* matchString, unsigned index) if (UNLIKELY(!matchString)) return notFound; - unsigned matchLength = matchString->length(); - - // Optimization 1: fast case for strings of length 1. - if (matchLength == 1) { - if (is8Bit()) - return WTF::find(characters8(), length(), (*matchString)[0], index); - return WTF::find(characters16(), length(), (*matchString)[0], index); - } - - if (UNLIKELY(!matchLength)) - return std::min(index, length()); - - // Check index & matchLength are in range. - if (index > length()) - return notFound; - unsigned searchLength = length() - index; - if (matchLength > searchLength) - return notFound; - - if (is8Bit()) { - if (matchString->is8Bit()) - return findInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength); - return findInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength); - } - - if (matchString->is8Bit()) - return findInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength); - - return findInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength); + return findCommon(*this, *matchString, index); } template <typename SearchCharacterType, typename MatchCharacterType> @@ -1167,7 +1186,7 @@ ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea unsigned i = 0; // keep looping until we match - while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { + while (!equalCompatibilityCaseless(searchCharacters + i, matchCharacters, matchLength)) { if (i == delta) return notFound; ++i; @@ -1203,11 +1222,28 @@ size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength); } -size_t StringImpl::findNextLineStart(unsigned index) +size_t StringImpl::findIgnoringASCIICase(const StringImpl& matchString) const { - if (is8Bit()) - return WTF::findNextLineStart(characters8(), m_length, index); - return WTF::findNextLineStart(characters16(), m_length, index); + return ::WTF::findIgnoringASCIICase(*this, matchString, 0); +} + +size_t StringImpl::findIgnoringASCIICase(const StringImpl& matchString, unsigned startOffset) const +{ + return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset); +} + +size_t StringImpl::findIgnoringASCIICase(const StringImpl* matchString) const +{ + if (!matchString) + return notFound; + return ::WTF::findIgnoringASCIICase(*this, *matchString, 0); +} + +size_t StringImpl::findIgnoringASCIICase(const StringImpl* matchString, unsigned startOffset) const +{ + if (!matchString) + return notFound; + return ::WTF::findIgnoringASCIICase(*this, *matchString, startOffset); } size_t StringImpl::reverseFind(UChar c, unsigned index) @@ -1284,7 +1320,7 @@ ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy unsigned delta = std::min(index, length - matchLength); // keep looping until we match - while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { + while (!equalCompatibilityCaseless(searchCharacters + delta, matchCharacters, matchLength)) { if (!delta) return notFound; --delta; @@ -1330,26 +1366,52 @@ ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); } if (stringImpl->is8Bit()) - return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); - return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); + return equalCompatibilityCaseless(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); + return equalCompatibilityCaseless(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); +} + +ALWAYS_INLINE static bool equalInner(const StringImpl& stringImpl, unsigned startOffset, const StringImpl& matchString) +{ + if (startOffset > stringImpl.length()) + return false; + if (matchString.length() > stringImpl.length()) + return false; + if (matchString.length() + startOffset > stringImpl.length()) + return false; + + if (stringImpl.is8Bit()) { + if (matchString.is8Bit()) + return equal(stringImpl.characters8() + startOffset, matchString.characters8(), matchString.length()); + return equal(stringImpl.characters8() + startOffset, matchString.characters16(), matchString.length()); + } + if (matchString.is8Bit()) + return equal(stringImpl.characters16() + startOffset, matchString.characters8(), matchString.length()); + return equal(stringImpl.characters16() + startOffset, matchString.characters16(), matchString.length()); } bool StringImpl::startsWith(const StringImpl* str) const { if (!str) return false; + return ::WTF::startsWith(*this, *str); +} - if (str->length() > length()) +bool StringImpl::startsWith(const StringImpl& str) const +{ + return ::WTF::startsWith(*this, str); +} + +bool StringImpl::startsWithIgnoringASCIICase(const StringImpl* prefix) const +{ + if (!prefix) return false; - if (is8Bit()) { - if (str->is8Bit()) - return equal(characters8(), str->characters8(), str->length()); - return equal(characters8(), str->characters16(), str->length()); - } - if (str->is8Bit()) - return equal(characters16(), str->characters8(), str->length()); - return equal(characters16(), str->characters16(), str->length()); + return ::WTF::startsWithIgnoringASCIICase(*this, *prefix); +} + +bool StringImpl::startsWithIgnoringASCIICase(const StringImpl& prefix) const +{ + return ::WTF::startsWithIgnoringASCIICase(*this, prefix); } bool StringImpl::startsWith(UChar character) const @@ -1365,6 +1427,24 @@ bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool return equalInner(this, 0, matchString, matchLength, caseSensitive); } +bool StringImpl::hasInfixStartingAt(const StringImpl& matchString, unsigned startOffset) const +{ + return equalInner(*this, startOffset, matchString); +} + +bool StringImpl::endsWith(StringImpl* suffix) +{ + if (!suffix) + return false; + + return ::WTF::endsWith(*this, *suffix); +} + +bool StringImpl::endsWith(StringImpl& suffix) +{ + return ::WTF::endsWith(*this, suffix); +} + bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive) { ASSERT(matchString); @@ -1375,6 +1455,19 @@ bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive) return false; } +bool StringImpl::endsWithIgnoringASCIICase(const StringImpl* suffix) const +{ + if (!suffix) + return false; + + return ::WTF::endsWithIgnoringASCIICase(*this, *suffix); +} + +bool StringImpl::endsWithIgnoringASCIICase(const StringImpl& suffix) const +{ + return ::WTF::endsWithIgnoringASCIICase(*this, suffix); +} + bool StringImpl::endsWith(UChar character) const { return m_length && (*this)[m_length - 1] == character; @@ -1389,7 +1482,14 @@ bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool ca return equalInner(this, startOffset, matchString, matchLength, caseSensitive); } -PassRef<StringImpl> StringImpl::replace(UChar oldC, UChar newC) +bool StringImpl::hasInfixEndingAt(const StringImpl& matchString, unsigned endOffset) const +{ + if (endOffset < matchString.length()) + return false; + return equalInner(*this, endOffset - matchString.length(), matchString); +} + +Ref<StringImpl> StringImpl::replace(UChar oldC, UChar newC) { if (oldC == newC) return *this; @@ -1450,7 +1550,7 @@ PassRef<StringImpl> StringImpl::replace(UChar oldC, UChar newC) return newImpl; } -PassRef<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) +Ref<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) { position = std::min(position, length()); lengthToReplace = std::min(lengthToReplace, length() - position); @@ -1495,7 +1595,7 @@ PassRef<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToRepl return newImpl; } -PassRef<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) +Ref<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) { if (!replacement) return *this; @@ -1506,7 +1606,7 @@ PassRef<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) return replace(pattern, replacement->m_data16, replacement->length()); } -PassRef<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength) +Ref<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength) { ASSERT(replacement); @@ -1583,7 +1683,7 @@ PassRef<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, return newImpl; } -PassRef<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength) +Ref<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength) { ASSERT(replacement); @@ -1663,7 +1763,7 @@ PassRef<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, return newImpl; } -PassRef<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) +Ref<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) { if (!pattern || !replacement) return *this; @@ -1770,34 +1870,9 @@ PassRef<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replace return newImpl; } -static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) -{ - unsigned aLength = a->length(); - unsigned bLength = b->length(); - if (aLength != bLength) - return false; - - if (a->is8Bit()) { - if (b->is8Bit()) - return equal(a->characters8(), b->characters8(), aLength); - - return equal(a->characters8(), b->characters16(), aLength); - } - - if (b->is8Bit()) - return equal(a->characters16(), b->characters8(), aLength); - - return equal(a->characters16(), b->characters16(), aLength); -} - bool equal(const StringImpl* a, const StringImpl* b) { - if (a == b) - return true; - if (!a || !b) - return false; - - return stringImplContentEqual(a, b); + return equalCommon(a, b); } template <typename CharType> @@ -1860,109 +1935,34 @@ bool equal(const StringImpl* a, const LChar* b) return !b[length]; } -bool equalNonNull(const StringImpl* a, const StringImpl* b) +bool equal(const StringImpl& a, const StringImpl& b) { - ASSERT(a && b); - if (a == b) - return true; - - return stringImplContentEqual(a, b); + return equalCommon(a, b); } -bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) +bool equalIgnoringNullity(StringImpl* a, StringImpl* b) { - if (a == b) + if (!a && b && !b->length()) return true; - if (!a || !b) - return false; - - return CaseFoldingHash::equal(a, b); -} - -bool equalIgnoringCase(const StringImpl* a, const LChar* b) -{ - if (!a) - return !b; - if (!b) - return !a; - - unsigned length = a->length(); - - // Do a faster loop for the case where all the characters are ASCII. - UChar ored = 0; - bool equal = true; - if (a->is8Bit()) { - const LChar* as = a->characters8(); - for (unsigned i = 0; i != length; ++i) { - LChar bc = b[i]; - if (!bc) - return false; - UChar ac = as[i]; - ored |= ac; - equal = equal && (toASCIILower(ac) == toASCIILower(bc)); - } - - // Do a slower implementation for cases that include non-ASCII characters. - if (ored & ~0x7F) { - equal = true; - for (unsigned i = 0; i != length; ++i) - equal = equal && u_foldCase(as[i], U_FOLD_CASE_DEFAULT) == u_foldCase(b[i], U_FOLD_CASE_DEFAULT); - } - - return equal && !b[length]; - } - - const UChar* as = a->characters16(); - for (unsigned i = 0; i != length; ++i) { - LChar bc = b[i]; - if (!bc) - return false; - UChar ac = as[i]; - ored |= ac; - equal = equal && (toASCIILower(ac) == toASCIILower(bc)); - } - - // Do a slower implementation for cases that include non-ASCII characters. - if (ored & ~0x7F) { - equal = true; - for (unsigned i = 0; i != length; ++i) { - equal = equal && u_foldCase(as[i], U_FOLD_CASE_DEFAULT) == u_foldCase(b[i], U_FOLD_CASE_DEFAULT); - } - } - - return equal && !b[length]; + if (!b && a && !a->length()) + return true; + return equal(a, b); } -bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) +bool equalIgnoringASCIICase(const StringImpl* a, const StringImpl* b) { - ASSERT(a && b); if (a == b) return true; - - unsigned length = a->length(); - if (length != b->length()) + if (!a || !b) return false; - - if (a->is8Bit()) { - if (b->is8Bit()) - return equalIgnoringCase(a->characters8(), b->characters8(), length); - - return equalIgnoringCase(b->characters16(), a->characters8(), length); - } - - if (b->is8Bit()) - return equalIgnoringCase(a->characters16(), b->characters8(), length); - - return equalIgnoringCase(a->characters16(), b->characters16(), length); + return equalIgnoringASCIICaseCommon(*a, *b); } -bool equalIgnoringNullity(StringImpl* a, StringImpl* b) +bool equalIgnoringASCIICaseNonNull(const StringImpl* a, const StringImpl* b) { - if (!a && b && !b->length()) - return true; - if (!b && a && !a->length()) - return true; - return equal(a, b); + ASSERT(a); + ASSERT(b); + return equalIgnoringASCIICase(*a, *b); } UCharDirection StringImpl::defaultWritingDirection(bool* hasStrongDirectionality) @@ -1985,7 +1985,7 @@ UCharDirection StringImpl::defaultWritingDirection(bool* hasStrongDirectionality return U_LEFT_TO_RIGHT; } -PassRef<StringImpl> StringImpl::adopt(StringBuffer<LChar>& buffer) +Ref<StringImpl> StringImpl::adopt(StringBuffer<LChar>&& buffer) { unsigned length = buffer.length(); if (!length) @@ -1993,7 +1993,7 @@ PassRef<StringImpl> StringImpl::adopt(StringBuffer<LChar>& buffer) return adoptRef(*new StringImpl(buffer.release(), length)); } -PassRef<StringImpl> StringImpl::adopt(StringBuffer<UChar>& buffer) +Ref<StringImpl> StringImpl::adopt(StringBuffer<UChar>&& buffer) { unsigned length = buffer.length(); if (!length) @@ -2005,11 +2005,7 @@ size_t StringImpl::sizeInBytes() const { // FIXME: support substrings size_t size = length(); - if (is8Bit()) { - if (has16BitShadow()) { - size += 2 * size; - } - } else + if (!is8Bit()) size *= 2; return size + sizeof(*this); } @@ -2023,8 +2019,7 @@ static inline void putUTF8Triple(char*& buffer, UChar ch) *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); } -bool StringImpl::utf8Impl( - const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode mode) +bool StringImpl::utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode mode) { if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) { const UChar* charactersEnd = characters + length; @@ -2075,8 +2070,21 @@ bool StringImpl::utf8Impl( return true; } -CString StringImpl::utf8ForCharacters( - const UChar* characters, unsigned length, ConversionMode mode) +CString StringImpl::utf8ForCharacters(const LChar* characters, unsigned length) +{ + if (!length) + return CString("", 0); + if (length > std::numeric_limits<unsigned>::max() / 3) + return CString(); + Vector<char, 1024> bufferVector(length * 3); + char* buffer = bufferVector.data(); + const LChar* source = characters; + ConversionResult result = convertLatin1ToUTF8(&source, source + length, &buffer, buffer + bufferVector.size()); + ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion + return CString(bufferVector.data(), buffer - bufferVector.data()); +} + +CString StringImpl::utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode mode) { if (!length) return CString("", 0); @@ -2131,25 +2139,21 @@ CString StringImpl::utf8(ConversionMode mode) const return utf8ForRange(0, length(), mode); } -// Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt -const UChar StringImpl::latin1CaseFoldTable[256] = { - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, -}; - +bool equalIgnoringNullity(const UChar* a, size_t aLength, StringImpl* b) +{ + if (!b) + return !aLength; + if (aLength != b->length()) + return false; + if (b->is8Bit()) { + const LChar* bCharacters = b->characters8(); + for (unsigned i = 0; i < aLength; ++i) { + if (a[i] != bCharacters[i]) + return false; + } + return true; + } + return !memcmp(a, b->characters16(), b->length() * sizeof(UChar)); +} } // namespace WTF diff --git a/Source/WTF/wtf/text/StringImpl.h b/Source/WTF/wtf/text/StringImpl.h index 770acf000..b2c45e8fa 100644 --- a/Source/WTF/wtf/text/StringImpl.h +++ b/Source/WTF/wtf/text/StringImpl.h @@ -1,6 +1,6 @@ /* * Copyright (C) 1999 Lars Knoll (knoll@kde.org) - * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2005-2010, 2013-2016 Apple Inc. All rights reserved. * Copyright (C) 2009 Google Inc. All rights reserved. * * This library is free software; you can redistribute it and/or @@ -24,16 +24,16 @@ #define StringImpl_h #include <limits.h> +#include <unicode/uchar.h> +#include <unicode/ustring.h> #include <wtf/ASCIICType.h> -#include <wtf/CompilationThread.h> -#include <wtf/CryptographicallyRandomNumber.h> #include <wtf/Forward.h> +#include <wtf/Hasher.h> #include <wtf/MathExtras.h> #include <wtf/StdLibExtras.h> -#include <wtf/StringHasher.h> #include <wtf/Vector.h> #include <wtf/text/ConversionMode.h> -#include <wtf/unicode/Unicode.h> +#include <wtf/text/StringCommon.h> #if USE(CF) typedef const struct __CFString * CFStringRef; @@ -43,27 +43,28 @@ typedef const struct __CFString * CFStringRef; @class NSString; #endif -// FIXME: This is a temporary layering violation while we move string code to WTF. -// Landing the file moves in one patch, will follow on with patches to change the namespaces. namespace JSC { -struct IdentifierASCIIStringTranslator; namespace LLInt { class Data; } class LLIntOffsetsExtractor; -template <typename T> struct IdentifierCharBufferTranslator; -struct IdentifierLCharFromUCharTranslator; } namespace WTF { +class SymbolImpl; +class SymbolRegistry; + struct CStringTranslator; -template<typename CharacterType> struct HashAndCharactersTranslator; +struct CharBufferFromLiteralDataTranslator; struct HashAndUTF8CharactersTranslator; struct LCharBufferTranslator; -struct CharBufferFromLiteralDataTranslator; +struct StringHash; struct SubstringTranslator; struct UCharBufferTranslator; + template<typename> class RetainPtr; +template<typename> struct HashAndCharactersTranslator; + enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive @@ -72,10 +73,10 @@ enum TextCaseSensitivity { typedef bool (*CharacterMatchFunctionPtr)(UChar); typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); -// Define STRING_STATS to turn on run time statistics of string sizes and memory usage -#undef STRING_STATS +// Define STRING_STATS to 1 turn on run time statistics of string sizes and memory usage +#define STRING_STATS 0 -#ifdef STRING_STATS +#if STRING_STATS struct StringStats { inline void add8BitString(unsigned length, bool isSubString = false) { @@ -93,33 +94,29 @@ struct StringStats { m_total16BitData += length; } - inline void addUpconvertedString(unsigned length) - { - ++m_numberUpconvertedStrings; - m_totalUpconvertedData += length; - } - - void removeString(StringImpl*); + void removeString(StringImpl&); void printStats(); static const unsigned s_printStringStatsFrequency = 5000; - static unsigned s_stringRemovesTillPrintStats; - - unsigned m_totalNumberStrings; - unsigned m_number8BitStrings; - unsigned m_number16BitStrings; - unsigned m_numberUpconvertedStrings; - unsigned long long m_total8BitData; - unsigned long long m_total16BitData; - unsigned long long m_totalUpconvertedData; + static std::atomic<unsigned> s_stringRemovesTillPrintStats; + + std::atomic<unsigned> m_refCalls; + std::atomic<unsigned> m_derefCalls; + + std::atomic<unsigned> m_totalNumberStrings; + std::atomic<unsigned> m_number8BitStrings; + std::atomic<unsigned> m_number16BitStrings; + std::atomic<unsigned long long> m_total8BitData; + std::atomic<unsigned long long> m_total16BitData; }; #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length) #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString) #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length) #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString) -#define STRING_STATS_ADD_UPCONVERTED_STRING(length) StringImpl::stringStats().addUpconvertedString(length) #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string) +#define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls; +#define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls; #else #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0) @@ -127,14 +124,12 @@ struct StringStats { #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0) #define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0) #define STRING_STATS_REMOVE_STRING(string) ((void)0) +#define STRING_STATS_REF_STRING(string) ((void)0) +#define STRING_STATS_DEREF_STRING(string) ((void)0) #endif class StringImpl { WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED; - friend struct JSC::IdentifierASCIIStringTranslator; - friend struct JSC::IdentifierCharBufferTranslator<LChar>; - friend struct JSC::IdentifierCharBufferTranslator<UChar>; - friend struct JSC::IdentifierLCharFromUCharTranslator; friend struct WTF::CStringTranslator; template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator; friend struct WTF::HashAndUTF8CharactersTranslator; @@ -142,9 +137,9 @@ class StringImpl { friend struct WTF::LCharBufferTranslator; friend struct WTF::SubstringTranslator; friend struct WTF::UCharBufferTranslator; - friend class AtomicStringImpl; friend class JSC::LLInt::Data; friend class JSC::LLIntOffsetsExtractor; + friend class SymbolImpl; private: enum BufferOwnership { @@ -153,42 +148,26 @@ private: BufferSubstring, }; - // Used to construct static strings, which have an special refCount that can never hit zero. - // This means that the static string will never be destroyed, which is important because - // static strings will be shared across threads & ref-counted in a non-threadsafe manner. - enum ConstructStaticStringTag { ConstructStaticString }; - StringImpl(const UChar* characters, unsigned length, ConstructStaticStringTag) - : m_refCount(s_refCountFlagIsStaticString) - , m_length(length) - , m_data16(characters) - , m_buffer(0) - , m_hashAndFlags(s_hashFlagIsIdentifier | BufferOwned) - { - // Ensure that the hash is computed so that AtomicStringHash can call existingHash() - // with impunity. The empty string is special because it is never entered into - // AtomicString's HashKey, but still needs to compare correctly. - STRING_STATS_ADD_16BIT_STRING(m_length); - - hash(); - } - - // Used to construct static strings, which have an special refCount that can never hit zero. - // This means that the static string will never be destroyed, which is important because - // static strings will be shared across threads & ref-counted in a non-threadsafe manner. - StringImpl(const LChar* characters, unsigned length, ConstructStaticStringTag) - : m_refCount(s_refCountFlagIsStaticString) - , m_length(length) - , m_data8(characters) - , m_buffer(0) - , m_hashAndFlags(s_hashFlag8BitBuffer | s_hashFlagIsIdentifier | BufferOwned) - { - // Ensure that the hash is computed so that AtomicStringHash can call existingHash() - // with impunity. The empty string is special because it is never entered into - // AtomicString's HashKey, but still needs to compare correctly. - STRING_STATS_ADD_8BIT_STRING(m_length); - - hash(); - } + // The bottom 6 bits in the hash are flags. +public: + static constexpr const unsigned s_flagCount = 6; +private: + static constexpr const unsigned s_flagMask = (1u << s_flagCount) - 1; + static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags"); + static constexpr const unsigned s_flagStringKindCount = 4; + + static constexpr const unsigned s_hashFlagStringKindIsAtomic = 1u << (s_flagStringKindCount); + static constexpr const unsigned s_hashFlagStringKindIsSymbol = 1u << (s_flagStringKindCount + 1); + static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtomic | s_hashFlagStringKindIsSymbol; + static constexpr const unsigned s_hashFlag8BitBuffer = 1u << 3; + static constexpr const unsigned s_hashFlagDidReportCost = 1u << 2; + static constexpr const unsigned s_hashMaskBufferOwnership = (1u << 0) | (1u << 1); + + enum StringKind { + StringNormal = 0u, // non-symbol, non-atomic + StringAtomic = s_hashFlagStringKindIsAtomic, // non-symbol, atomic + StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic + }; // FIXME: there has to be a less hacky way to do this. enum Force8Bit { Force8BitConstructor }; @@ -196,9 +175,8 @@ private: StringImpl(unsigned length, Force8Bit) : m_refCount(s_refCountIncrement) , m_length(length) - , m_data8(reinterpret_cast<const LChar*>(this + 1)) - , m_buffer(0) - , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) + , m_data8(tailPointer<LChar>()) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferInternal) { ASSERT(m_data8); ASSERT(m_length); @@ -210,9 +188,8 @@ private: StringImpl(unsigned length) : m_refCount(s_refCountIncrement) , m_length(length) - , m_data16(reinterpret_cast<const UChar*>(this + 1)) - , m_buffer(0) - , m_hashAndFlags(BufferInternal) + , m_data16(tailPointer<UChar>()) + , m_hashAndFlags(StringNormal | BufferInternal) { ASSERT(m_data16); ASSERT(m_length); @@ -225,8 +202,7 @@ private: : m_refCount(s_refCountIncrement) , m_length(length) , m_data8(characters.leakPtr()) - , m_buffer(0) - , m_hashAndFlags(s_hashFlag8BitBuffer | BufferOwned) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferOwned) { ASSERT(m_data8); ASSERT(m_length); @@ -239,26 +215,24 @@ private: : m_refCount(s_refCountIncrement) , m_length(length) , m_data16(characters) - , m_buffer(0) - , m_hashAndFlags(BufferInternal) + , m_hashAndFlags(StringNormal | BufferInternal) { ASSERT(m_data16); ASSERT(m_length); - STRING_STATS_ADD_16BIT_STRING(0); + STRING_STATS_ADD_16BIT_STRING(m_length); } StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag) : m_refCount(s_refCountIncrement) , m_length(length) , m_data8(characters) - , m_buffer(0) - , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferInternal) { ASSERT(m_data8); ASSERT(m_length); - STRING_STATS_ADD_8BIT_STRING(0); + STRING_STATS_ADD_8BIT_STRING(m_length); } // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) @@ -266,8 +240,7 @@ private: : m_refCount(s_refCountIncrement) , m_length(length) , m_data16(characters.leakPtr()) - , m_buffer(0) - , m_hashAndFlags(BufferOwned) + , m_hashAndFlags(StringNormal | BufferOwned) { ASSERT(m_data16); ASSERT(m_length); @@ -276,109 +249,74 @@ private: } // Used to create new strings that are a substring of an existing 8-bit StringImpl (BufferSubstring) - StringImpl(const LChar* characters, unsigned length, PassRefPtr<StringImpl> base) + StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base) : m_refCount(s_refCountIncrement) , m_length(length) , m_data8(characters) - , m_substringBuffer(base.leakRef()) - , m_hashAndFlags(s_hashFlag8BitBuffer | BufferSubstring) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferSubstring) { ASSERT(is8Bit()); ASSERT(m_data8); ASSERT(m_length); - ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); + ASSERT(base->bufferOwnership() != BufferSubstring); + + substringBuffer() = &base.leakRef(); STRING_STATS_ADD_8BIT_STRING2(m_length, true); } // Used to create new strings that are a substring of an existing 16-bit StringImpl (BufferSubstring) - StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base) + StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base) : m_refCount(s_refCountIncrement) , m_length(length) , m_data16(characters) - , m_substringBuffer(base.leakRef()) - , m_hashAndFlags(BufferSubstring) + , m_hashAndFlags(StringNormal | BufferSubstring) { ASSERT(!is8Bit()); ASSERT(m_data16); ASSERT(m_length); - ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); + ASSERT(base->bufferOwnership() != BufferSubstring); - STRING_STATS_ADD_16BIT_STRING2(m_length, true); - } + substringBuffer() = &base.leakRef(); - enum CreateEmptyUnique_T { CreateEmptyUnique }; - StringImpl(CreateEmptyUnique_T) - : m_refCount(s_refCountIncrement) - , m_length(0) - // We expect m_buffer to be initialized to 0 as we use it - // to represent a null terminated buffer. - , m_data16(reinterpret_cast<const UChar*>(&m_buffer)) - , m_buffer(0) - { - ASSERT(m_data16); - // Set the hash early, so that all empty unique StringImpls have a hash, - // and don't use the normal hashing algorithm - the unique nature of these - // keys means that we don't need them to match any other string (in fact, - // that's exactly the oposite of what we want!), and teh normal hash would - // lead to lots of conflicts. - unsigned hash = cryptographicallyRandomNumber() | 1; - hash <<= s_flagCount; - if (!hash) - hash = 1 << s_flagCount; - m_hashAndFlags = hash | BufferInternal; - - STRING_STATS_ADD_16BIT_STRING(m_length); + STRING_STATS_ADD_16BIT_STRING2(m_length, true); } - ~StringImpl(); - public: WTF_EXPORT_STRING_API static void destroy(StringImpl*); - WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const UChar*, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const LChar*, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> create8BitIfPossible(const UChar*, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> create(const UChar*, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> create(const LChar*, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> create8BitIfPossible(const UChar*, unsigned length); template<size_t inlineCapacity> - static PassRef<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) + static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) { return create8BitIfPossible(vector.data(), vector.size()); } - WTF_EXPORT_STRING_API static PassRef<StringImpl> create8BitIfPossible(const UChar*); + WTF_EXPORT_STRING_API static Ref<StringImpl> create8BitIfPossible(const UChar*); - ALWAYS_INLINE static PassRef<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); } - WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const LChar*); - ALWAYS_INLINE static PassRef<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); } + ALWAYS_INLINE static Ref<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); } + WTF_EXPORT_STRING_API static Ref<StringImpl> create(const LChar*); + ALWAYS_INLINE static Ref<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); } - static ALWAYS_INLINE PassRef<StringImpl> create8(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) + static ALWAYS_INLINE Ref<StringImpl> createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length) { - ASSERT(rep); - ASSERT(length <= rep->length()); + ASSERT(length <= rep.length()); if (!length) return *empty(); - ASSERT(rep->is8Bit()); - StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); - return adoptRef(*new StringImpl(rep->m_data8 + offset, length, ownerRep)); - } - - static ALWAYS_INLINE PassRef<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) - { - ASSERT(rep); - ASSERT(length <= rep->length()); - - if (!length) - return *empty(); + auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep); - StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); - if (rep->is8Bit()) - return adoptRef(*new StringImpl(rep->m_data8 + offset, length, ownerRep)); - return adoptRef(*new StringImpl(rep->m_data16 + offset, length, ownerRep)); + // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string. + auto* stringImpl = static_cast<StringImpl*>(fastMalloc(allocationSize<StringImpl*>(1))); + if (rep.is8Bit()) + return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data8 + offset, length, *ownerRep)); + return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data16 + offset, length, *ownerRep)); } template<unsigned charactersCount> - ALWAYS_INLINE static PassRef<StringImpl> createFromLiteral(const char (&characters)[charactersCount]) + ALWAYS_INLINE static Ref<StringImpl> createFromLiteral(const char (&characters)[charactersCount]) { COMPILE_ASSERT(charactersCount > 1, StringImplFromLiteralNotEmpty); COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow); @@ -387,53 +325,50 @@ public: } // FIXME: Transition off of these functions to createWithoutCopying instead. - WTF_EXPORT_STRING_API static PassRef<StringImpl> createFromLiteral(const char* characters, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createFromLiteral(const char* characters); + WTF_EXPORT_STRING_API static Ref<StringImpl> createFromLiteral(const char* characters, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> createFromLiteral(const char* characters); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createWithoutCopying(const UChar* characters, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createWithoutCopying(const LChar* characters, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> createWithoutCopying(const UChar* characters, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> createWithoutCopying(const LChar* characters, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createUninitialized(unsigned length, LChar*& data); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createUninitialized(unsigned length, UChar*& data); - template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output) + WTF_EXPORT_STRING_API static Ref<StringImpl> createUninitialized(unsigned length, LChar*& data); + WTF_EXPORT_STRING_API static Ref<StringImpl> createUninitialized(unsigned length, UChar*& data); + template <typename T> static ALWAYS_INLINE RefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output) { if (!length) { - output = 0; + output = nullptr; return empty(); } if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(T))) { - output = 0; - return 0; + output = nullptr; + return nullptr; } StringImpl* resultImpl; - if (!tryFastMalloc(sizeof(T) * length + sizeof(StringImpl)).getValue(resultImpl)) { - output = 0; - return 0; + if (!tryFastMalloc(allocationSize<T>(length)).getValue(resultImpl)) { + output = nullptr; + return nullptr; } - output = reinterpret_cast<T*>(resultImpl + 1); + output = resultImpl->tailPointer<T>(); return constructInternal<T>(resultImpl, length); } - static PassRef<StringImpl> createEmptyUnique() - { - return adoptRef(*new StringImpl(CreateEmptyUnique)); - } - - // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr, + // Reallocate the StringImpl. The originalString must be only owned by the Ref, // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(), // the originalString can't be used after this function. - static PassRef<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data); - static PassRef<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data); + static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data); + static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data); static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); } static unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; } - static unsigned flagIsIdentifier() { return s_hashFlagIsIdentifier; } + static unsigned flagIsAtomic() { return s_hashFlagStringKindIsAtomic; } + static unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; } + static unsigned maskStringKind() { return s_hashMaskStringKind; } static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); } template<typename CharType, size_t inlineCapacity, typename OverflowHandler> - static PassRef<StringImpl> adopt(Vector<CharType, inlineCapacity, OverflowHandler>& vector) + static Ref<StringImpl> adopt(Vector<CharType, inlineCapacity, OverflowHandler>&& vector) { if (size_t size = vector.size()) { ASSERT(vector.data()); @@ -444,31 +379,24 @@ public: return *empty(); } - WTF_EXPORT_STRING_API static PassRef<StringImpl> adopt(StringBuffer<UChar>&); - WTF_EXPORT_STRING_API static PassRef<StringImpl> adopt(StringBuffer<LChar>&); + WTF_EXPORT_STRING_API static Ref<StringImpl> adopt(StringBuffer<UChar>&&); + WTF_EXPORT_STRING_API static Ref<StringImpl> adopt(StringBuffer<LChar>&&); unsigned length() const { return m_length; } + static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); } bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; } ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; } - const UChar* characters() const { return deprecatedCharacters(); } // FIXME: Delete this. - ALWAYS_INLINE const UChar* deprecatedCharacters() const - { - if (!is8Bit()) - return m_data16; - - return getData16SlowCase(); - } template <typename CharType> - ALWAYS_INLINE const CharType * getCharacters() const; + ALWAYS_INLINE const CharType *characters() const; size_t cost() const { // For substrings, return the cost of the base string. if (bufferOwnership() == BufferSubstring) - return m_substringBuffer->cost(); + return substringBuffer()->cost(); if (m_hashAndFlags & s_hashFlagDidReportCost) return 0; @@ -486,7 +414,7 @@ public: return 0; if (bufferOwnership() == BufferSubstring) - return divideRoundedUp(m_substringBuffer->costDuringGC(), refCount()); + return divideRoundedUp(substringBuffer()->costDuringGC(), refCount()); size_t result = m_length; if (!is8Bit()) @@ -496,40 +424,28 @@ public: WTF_EXPORT_STRING_API size_t sizeInBytes() const; - bool has16BitShadow() const { return m_hashAndFlags & s_hashFlagHas16BitShadow; } - WTF_EXPORT_STRING_API void upconvertCharacters(unsigned, unsigned) const; - bool isIdentifier() const { return m_hashAndFlags & s_hashFlagIsIdentifier; } - bool isIdentifierOrUnique() const { return isIdentifier() || isEmptyUnique(); } - void setIsIdentifier(bool isIdentifier) - { - ASSERT(!isStatic()); - ASSERT(!isEmptyUnique()); - if (isIdentifier) - m_hashAndFlags |= s_hashFlagIsIdentifier; - else - m_hashAndFlags &= ~s_hashFlagIsIdentifier; - } - - bool isEmptyUnique() const - { - return !length() && !isStatic(); - } + StringKind stringKind() const { return static_cast<StringKind>(m_hashAndFlags & s_hashMaskStringKind); } + bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; } + bool isAtomic() const { return m_hashAndFlags & s_hashFlagStringKindIsAtomic; } - bool isAtomic() const { return m_hashAndFlags & s_hashFlagIsAtomic; } void setIsAtomic(bool isAtomic) { ASSERT(!isStatic()); - ASSERT(!isEmptyUnique()); - if (isAtomic) - m_hashAndFlags |= s_hashFlagIsAtomic; - else - m_hashAndFlags &= ~s_hashFlagIsAtomic; + ASSERT(!isSymbol()); + if (isAtomic) { + m_hashAndFlags |= s_hashFlagStringKindIsAtomic; + ASSERT(stringKind() == StringAtomic); + } else { + m_hashAndFlags &= ~s_hashFlagStringKindIsAtomic; + ASSERT(stringKind() == StringNormal); + } } -#ifdef STRING_STATS - bool isSubString() const { return bufferOwnership() == BufferSubstring; } +#if STRING_STATS + bool isSubString() const { return bufferOwnership() == BufferSubstring; } #endif + static WTF_EXPORT_STRING_API CString utf8ForCharacters(const LChar* characters, unsigned length); static WTF_EXPORT_STRING_API CString utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion); WTF_EXPORT_STRING_API CString utf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const; WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const; @@ -577,7 +493,12 @@ public: return existingHash(); return hashSlowCase(); } - + + WTF_EXPORT_PRIVATE unsigned concurrentHash() const; + + unsigned symbolAwareHash() const; + unsigned existingSymbolAwareHash() const; + bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; } inline size_t refCount() const @@ -598,13 +519,15 @@ public: inline void ref() { - ASSERT(!isCompilationThread()); + STRING_STATS_REF_STRING(*this); + m_refCount += s_refCountIncrement; } inline void deref() { - ASSERT(!isCompilationThread()); + STRING_STATS_DEREF_STRING(*this); + unsigned tempRefCount = m_refCount - s_refCountIncrement; if (!tempRefCount) { StringImpl::destroy(this); @@ -613,7 +536,47 @@ public: m_refCount = tempRefCount; } - WTF_EXPORT_PRIVATE static StringImpl* empty(); + class StaticStringImpl { + WTF_MAKE_NONCOPYABLE(StaticStringImpl); + public: + // Used to construct static strings, which have an special refCount that can never hit zero. + // This means that the static string will never be destroyed, which is important because + // static strings will be shared across threads & ref-counted in a non-threadsafe manner. + template<unsigned charactersCount> + constexpr StaticStringImpl(const char (&characters)[charactersCount], StringKind stringKind = StringNormal) + : m_refCount(s_refCountFlagIsStaticString) + , m_length(charactersCount - 1) + , m_data8(characters) + , m_hashAndFlags(s_hashFlag8BitBuffer | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount)) + { + } + + template<unsigned charactersCount> + constexpr StaticStringImpl(const char16_t (&characters)[charactersCount], StringKind stringKind = StringNormal) + : m_refCount(s_refCountFlagIsStaticString) + , m_length(charactersCount - 1) + , m_data16(characters) + , m_hashAndFlags(stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount)) + { + } + + operator StringImpl&() + { + return *reinterpret_cast<StringImpl*>(this); + } + + // These member variables must match the layout of StringImpl. + unsigned m_refCount; + unsigned m_length; + union { + const char* m_data8; + const char16_t* m_data16; + }; + unsigned m_hashAndFlags; + }; + + WTF_EXPORTDATA static StaticStringImpl s_atomicEmptyString; + ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_atomicEmptyString); } // FIXME: Does this really belong in StringImpl? template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters) @@ -652,9 +615,9 @@ public: // Some string features, like refcounting and the atomicity flag, are not // thread-safe. We achieve thread safety by isolation, giving each thread // its own copy of the string. - PassRef<StringImpl> isolatedCopy() const; + Ref<StringImpl> isolatedCopy() const; - WTF_EXPORT_STRING_API PassRef<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); + WTF_EXPORT_STRING_API Ref<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); UChar at(unsigned i) const { @@ -686,23 +649,24 @@ public: double toDouble(bool* ok = 0); float toFloat(bool* ok = 0); - WTF_EXPORT_STRING_API PassRef<StringImpl> lower(); - WTF_EXPORT_STRING_API PassRef<StringImpl> upper(); - WTF_EXPORT_STRING_API PassRef<StringImpl> lower(const AtomicString& localeIdentifier); - WTF_EXPORT_STRING_API PassRef<StringImpl> upper(const AtomicString& localeIdentifier); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToASCIILowercase(); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToASCIIUppercase(); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithoutLocale(); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToUppercaseWithoutLocale(); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithLocale(const AtomicString& localeIdentifier); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToUppercaseWithLocale(const AtomicString& localeIdentifier); - WTF_EXPORT_STRING_API PassRef<StringImpl> fill(UChar); - // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII? - PassRef<StringImpl> foldCase(); + Ref<StringImpl> foldCase(); - PassRef<StringImpl> stripWhiteSpace(); - PassRef<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); - WTF_EXPORT_STRING_API PassRef<StringImpl> simplifyWhiteSpace(); - PassRef<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr); + Ref<StringImpl> stripWhiteSpace(); + Ref<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); + WTF_EXPORT_STRING_API Ref<StringImpl> simplifyWhiteSpace(); + Ref<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr); - PassRef<StringImpl> removeCharacters(CharacterMatchFunctionPtr); + Ref<StringImpl> removeCharacters(CharacterMatchFunctionPtr); template <typename CharType> - ALWAYS_INLINE PassRef<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr); + ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr); size_t find(LChar character, unsigned start = 0); size_t find(char character, unsigned start = 0); @@ -715,33 +679,44 @@ public: size_t findIgnoringCase(const LChar*, unsigned index = 0); ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } WTF_EXPORT_STRING_API size_t findIgnoringCase(StringImpl*, unsigned index = 0); - - WTF_EXPORT_STRING_API size_t findNextLineStart(unsigned index = UINT_MAX); + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl&) const; + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const; + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl*) const; + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl*, unsigned startOffset) const; WTF_EXPORT_STRING_API size_t reverseFind(UChar, unsigned index = UINT_MAX); WTF_EXPORT_STRING_API size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); WTF_EXPORT_STRING_API size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); WTF_EXPORT_STRING_API bool startsWith(const StringImpl*) const; + WTF_EXPORT_STRING_API bool startsWith(const StringImpl&) const; + WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringImpl*) const; + WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringImpl&) const; bool startsWith(StringImpl* str, bool caseSensitive) { return caseSensitive ? startsWith(str) : (reverseFindIgnoringCase(str, 0) == 0); } WTF_EXPORT_STRING_API bool startsWith(UChar) const; WTF_EXPORT_STRING_API bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const; template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); } + WTF_EXPORT_STRING_API bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const; - WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive = true); + WTF_EXPORT_STRING_API bool endsWith(StringImpl*); + WTF_EXPORT_STRING_API bool endsWith(StringImpl&); + WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringImpl*) const; + WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringImpl&) const; + WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive); WTF_EXPORT_STRING_API bool endsWith(UChar) const; WTF_EXPORT_STRING_API bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const; template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); } + WTF_EXPORT_STRING_API bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const; - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, UChar); - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, StringImpl*); - ALWAYS_INLINE PassRef<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); - PassRef<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(StringImpl*, StringImpl*); - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(unsigned index, unsigned len, StringImpl*); + WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, UChar); + WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, StringImpl*); + ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } + WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); + Ref<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); + WTF_EXPORT_STRING_API Ref<StringImpl> replace(StringImpl*, StringImpl*); + WTF_EXPORT_STRING_API Ref<StringImpl> replace(unsigned index, unsigned len, StringImpl*); WTF_EXPORT_STRING_API UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr); @@ -749,14 +724,70 @@ public: RetainPtr<CFStringRef> createCFString(); #endif #ifdef __OBJC__ - WTF_EXPORT_STRING_API operator NSString*(); + WTF_EXPORT_STRING_API operator NSString *(); #endif -#ifdef STRING_STATS +#if STRING_STATS ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } #endif - WTF_EXPORT_STRING_API static const UChar latin1CaseFoldTable[256]; +protected: + ~StringImpl(); + + enum CreateSymbolTag { CreateSymbol }; + + // Used to create new symbol strings that holds existing 8-bit [[Description]] string as a substring buffer (BufferSubstring). + StringImpl(CreateSymbolTag, const LChar* characters, unsigned length) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data8(characters) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringSymbol | BufferSubstring) + { + ASSERT(is8Bit()); + ASSERT(m_data8); + STRING_STATS_ADD_8BIT_STRING2(m_length, true); + } + + // Used to create new symbol strings that holds existing 16-bit [[Description]] string as a substring buffer (BufferSubstring). + StringImpl(CreateSymbolTag, const UChar* characters, unsigned length) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data16(characters) + , m_hashAndFlags(StringSymbol | BufferSubstring) + { + ASSERT(!is8Bit()); + ASSERT(m_data16); + STRING_STATS_ADD_16BIT_STRING2(m_length, true); + } + + // Null symbol. + StringImpl(CreateSymbolTag) + : m_refCount(s_refCountIncrement) + , m_length(0) + , m_data8(empty()->characters8()) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringSymbol | BufferSubstring) + { + ASSERT(is8Bit()); + ASSERT(m_data8); + STRING_STATS_ADD_8BIT_STRING2(m_length, true); + } + + template<typename T> + static size_t allocationSize(unsigned tailElementCount) + { + return tailOffset<T>() + tailElementCount * sizeof(T); + } + + template<typename T> + static ptrdiff_t tailOffset() + { +#if COMPILER(MSVC) + // MSVC doesn't support alignof yet. + return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl)); +#else + return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags)); +#endif + } private: bool requiresCopy() const @@ -765,59 +796,61 @@ private: return true; if (is8Bit()) - return reinterpret_cast<const void*>(m_data8) == reinterpret_cast<const void*>(this + 1); - return reinterpret_cast<const void*>(m_data16) == reinterpret_cast<const void*>(this + 1); + return m_data8 == tailPointer<LChar>(); + return m_data16 == tailPointer<UChar>(); + } + + template<typename T> + const T* tailPointer() const + { + return reinterpret_cast_ptr<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>()); + } + + template<typename T> + T* tailPointer() + { + return reinterpret_cast_ptr<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>()); + } + + StringImpl* const& substringBuffer() const + { + ASSERT(bufferOwnership() == BufferSubstring); + + return *tailPointer<StringImpl*>(); + } + + StringImpl*& substringBuffer() + { + ASSERT(bufferOwnership() == BufferSubstring); + + return *tailPointer<StringImpl*>(); } // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. static const unsigned s_copyCharsInlineCutOff = 20; + enum class CaseConvertType { Upper, Lower }; + template<CaseConvertType type, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType*, unsigned); + BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); } - template <class UCharPredicate> PassRef<StringImpl> stripMatchedCharacters(UCharPredicate); - template <typename CharType, class UCharPredicate> PassRef<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate); - template <typename CharType> static PassRef<StringImpl> constructInternal(StringImpl*, unsigned); - template <typename CharType> static PassRef<StringImpl> createUninitializedInternal(unsigned, CharType*&); - template <typename CharType> static PassRef<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharType*&); - template <typename CharType> static PassRef<StringImpl> reallocateInternal(PassRefPtr<StringImpl>, unsigned, CharType*&); - template <typename CharType> static PassRef<StringImpl> createInternal(const CharType*, unsigned); - WTF_EXPORT_STRING_API NEVER_INLINE const UChar* getData16SlowCase() const; + template <class UCharPredicate> Ref<StringImpl> stripMatchedCharacters(UCharPredicate); + template <typename CharType, class UCharPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate); + template <typename CharType> static Ref<StringImpl> constructInternal(StringImpl*, unsigned); + template <typename CharType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharType*&); + template <typename CharType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharType*&); + template <typename CharType> static Ref<StringImpl> reallocateInternal(Ref<StringImpl>&&, unsigned, CharType*&); + template <typename CharType> static Ref<StringImpl> createInternal(const CharType*, unsigned); WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const; // The bottom bit in the ref count indicates a static (immortal) string. static const unsigned s_refCountFlagIsStaticString = 0x1; static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag. - // The bottom 7 bits in the hash are flags. - static const unsigned s_flagCount = 7; - static const unsigned s_flagMask = (1u << s_flagCount) - 1; - COMPILE_ASSERT(s_flagCount <= StringHasher::flagCount, StringHasher_reserves_enough_bits_for_StringImpl_flags); - - static const unsigned s_hashFlagHas16BitShadow = 1u << 6; - static const unsigned s_hashFlag8BitBuffer = 1u << 5; - static const unsigned s_hashFlagIsAtomic = 1u << 4; - static const unsigned s_hashFlagDidReportCost = 1u << 3; - static const unsigned s_hashFlagIsIdentifier = 1u << 2; - static const unsigned s_hashMaskBufferOwnership = 1u | (1u << 1); - -#ifdef STRING_STATS +#if STRING_STATS WTF_EXPORTDATA static StringStats m_stringStats; #endif public: - struct StaticASCIILiteral { - // These member variables must match the layout of StringImpl. - unsigned m_refCount; - unsigned m_length; - const LChar* m_data8; - void* m_buffer; - unsigned m_hashAndFlags; - - // These values mimic ConstructFromLiteral. - static const unsigned s_initialRefCount = s_refCountIncrement; - static const unsigned s_initialFlags = s_hashFlag8BitBuffer | BufferInternal; - static const unsigned s_hashShift = s_flagCount; - }; - #ifndef NDEBUG void assertHashIsCorrect() { @@ -827,25 +860,20 @@ public: #endif private: - // These member variables must match the layout of StaticASCIILiteral. + // These member variables must match the layout of StaticStringImpl. unsigned m_refCount; unsigned m_length; union { const LChar* m_data8; const UChar* m_data16; }; - union { - void* m_buffer; - StringImpl* m_substringBuffer; - mutable UChar* m_copyData16; - }; mutable unsigned m_hashAndFlags; }; -COMPILE_ASSERT(sizeof(StringImpl) == sizeof(StringImpl::StaticASCIILiteral), StringImpl_should_match_its_StaticASCIILiteral); +static_assert(sizeof(StringImpl) == sizeof(StringImpl::StaticStringImpl), ""); #if !ASSERT_DISABLED -// StringImpls created from StaticASCIILiteral will ASSERT +// StringImpls created from StaticStringImpl will ASSERT // in the generic ValueCheck<T>::checkConsistency // as they are not allocated by fastMalloc. // We don't currently have any way to detect that case @@ -857,15 +885,15 @@ ValueCheck<StringImpl*> { #endif template <> -ALWAYS_INLINE PassRef<StringImpl> StringImpl::constructInternal<LChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length, Force8BitConstructor)); } +ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length, Force8BitConstructor)); } template <> -ALWAYS_INLINE PassRef<StringImpl> StringImpl::constructInternal<UChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length)); } +ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length)); } template <> -ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return characters8(); } +ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const { return characters8(); } template <> -ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return deprecatedCharacters(); } +ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const { return characters16(); } WTF_EXPORT_STRING_API bool equal(const StringImpl*, const StringImpl*); WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*); @@ -875,266 +903,20 @@ WTF_EXPORT_STRING_API bool equal(const StringImpl*, const UChar*, unsigned); inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); } inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); } -WTF_EXPORT_STRING_API bool equalNonNull(const StringImpl* a, const StringImpl* b); - -// Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. -#if CPU(X86_64) || CPU(ARM64) -ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) -{ - unsigned dwordLength = length >> 3; - - if (dwordLength) { - const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a); - const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b); - - for (unsigned i = 0; i != dwordLength; ++i) { - if (*aDWordCharacters++ != *bDWordCharacters++) - return false; - } - - a = reinterpret_cast<const LChar*>(aDWordCharacters); - b = reinterpret_cast<const LChar*>(bDWordCharacters); - } - - if (length & 4) { - if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uint32_t*>(b)) - return false; - - a += 4; - b += 4; - } - - if (length & 2) { - if (*reinterpret_cast<const uint16_t*>(a) != *reinterpret_cast<const uint16_t*>(b)) - return false; - - a += 2; - b += 2; - } - - if (length & 1 && (*a != *b)) - return false; - - return true; -} - -ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) -{ - unsigned dwordLength = length >> 2; - - if (dwordLength) { - const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a); - const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b); - - for (unsigned i = 0; i != dwordLength; ++i) { - if (*aDWordCharacters++ != *bDWordCharacters++) - return false; - } - - a = reinterpret_cast<const UChar*>(aDWordCharacters); - b = reinterpret_cast<const UChar*>(bDWordCharacters); - } - - if (length & 2) { - if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uint32_t*>(b)) - return false; - - a += 2; - b += 2; - } - - if (length & 1 && (*a != *b)) - return false; - - return true; -} -#elif CPU(X86) -ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) -{ - const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a); - const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b); - - unsigned wordLength = length >> 2; - for (unsigned i = 0; i != wordLength; ++i) { - if (*aCharacters++ != *bCharacters++) - return false; - } - - length &= 3; - - if (length) { - const LChar* aRemainder = reinterpret_cast<const LChar*>(aCharacters); - const LChar* bRemainder = reinterpret_cast<const LChar*>(bCharacters); - - for (unsigned i = 0; i < length; ++i) { - if (aRemainder[i] != bRemainder[i]) - return false; - } - } - - return true; -} - -ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) -{ - const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a); - const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b); - - unsigned wordLength = length >> 1; - for (unsigned i = 0; i != wordLength; ++i) { - if (*aCharacters++ != *bCharacters++) - return false; - } - - if (length & 1 && *reinterpret_cast<const UChar*>(aCharacters) != *reinterpret_cast<const UChar*>(bCharacters)) - return false; - - return true; -} -#elif PLATFORM(IOS) && WTF_ARM_ARCH_AT_LEAST(7) -ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) -{ - bool isEqual = false; - uint32_t aValue; - uint32_t bValue; - asm("subs %[length], #4\n" - "blo 2f\n" - - "0:\n" // Label 0 = Start of loop over 32 bits. - "ldr %[aValue], [%[a]], #4\n" - "ldr %[bValue], [%[b]], #4\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - "subs %[length], #4\n" - "bhs 0b\n" - - // At this point, length can be: - // -0: 00000000000000000000000000000000 (0 bytes left) - // -1: 11111111111111111111111111111111 (3 bytes left) - // -2: 11111111111111111111111111111110 (2 bytes left) - // -3: 11111111111111111111111111111101 (1 byte left) - // -4: 11111111111111111111111111111100 (length was 0) - // The pointers are at the correct position. - "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters. - "tst %[length], #2\n" - "beq 1f\n" - "ldrh %[aValue], [%[a]], #2\n" - "ldrh %[bValue], [%[b]], #2\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - - "1:\n" // Label 1 = Check for a single character left. - "tst %[length], #1\n" - "beq 42f\n" - "ldrb %[aValue], [%[a]]\n" - "ldrb %[bValue], [%[b]]\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - - "42:\n" // Label 42 = Success. - "mov %[isEqual], #1\n" - "66:\n" // Label 66 = End without changing isEqual to 1. - : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) - : - : - ); - return isEqual; -} - -ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) -{ - bool isEqual = false; - uint32_t aValue; - uint32_t bValue; - asm("subs %[length], #2\n" - "blo 1f\n" - - "0:\n" // Label 0 = Start of loop over 32 bits. - "ldr %[aValue], [%[a]], #4\n" - "ldr %[bValue], [%[b]], #4\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - "subs %[length], #2\n" - "bhs 0b\n" - - // At this point, length can be: - // -0: 00000000000000000000000000000000 (0 bytes left) - // -1: 11111111111111111111111111111111 (1 character left, 2 bytes) - // -2: 11111111111111111111111111111110 (length was zero) - // The pointers are at the correct position. - "1:\n" // Label 1 = Check for a single character left. - "tst %[length], #1\n" - "beq 42f\n" - "ldrh %[aValue], [%[a]]\n" - "ldrh %[bValue], [%[b]]\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - - "42:\n" // Label 42 = Success. - "mov %[isEqual], #1\n" - "66:\n" // Label 66 = End without changing isEqual to 1. - : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) - : - : - ); - return isEqual; -} -#else -ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); } -ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); } -#endif - -ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) -{ - for (unsigned i = 0; i < length; ++i) { - if (a[i] != b[i]) - return false; - } - return true; -} - -ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } - -WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const StringImpl*); -WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const LChar*); -inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equalIgnoringCase(b, a); } -WTF_EXPORT_STRING_API bool equalIgnoringCase(const LChar*, const LChar*, unsigned); -WTF_EXPORT_STRING_API bool equalIgnoringCase(const UChar*, const LChar*, unsigned); -inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } -inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } -inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } -inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } -inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) -{ - ASSERT(length >= 0); - return !u_memcasecmp(a, b, length, U_FOLD_CASE_DEFAULT); -} -WTF_EXPORT_STRING_API bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*); +WTF_EXPORT_STRING_API bool equal(const StringImpl& a, const StringImpl& b); WTF_EXPORT_STRING_API bool equalIgnoringNullity(StringImpl*, StringImpl*); +WTF_EXPORT_STRING_API bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*); -template<typename CharacterType> -inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) -{ - while (index < length) { - if (characters[index] == matchCharacter) - return index; - ++index; - } - return notFound; -} +bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&); +WTF_EXPORT_STRING_API bool equalIgnoringASCIICase(const StringImpl*, const StringImpl*); +bool equalIgnoringASCIICase(const StringImpl&, const char*); +bool equalIgnoringASCIICase(const StringImpl*, const char*); -ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) -{ - return find(characters, length, static_cast<UChar>(matchCharacter), index); -} +WTF_EXPORT_STRING_API bool equalIgnoringASCIICaseNonNull(const StringImpl*, const StringImpl*); -inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) -{ - if (matchCharacter & ~0xFF) - return notFound; - return find(characters, length, static_cast<LChar>(matchCharacter), index); -} +template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]); +template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl*, const char (&lowercaseLetters)[length]); inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) { @@ -1157,37 +939,6 @@ inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunct } template<typename CharacterType> -inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0) -{ - while (index < length) { - CharacterType c = characters[index++]; - if ((c != '\n') && (c != '\r')) - continue; - - // There can only be a start of a new line if there are more characters - // beyond the current character. - if (index < length) { - // The 3 common types of line terminators are 1. \r\n (Windows), - // 2. \r (old MacOS) and 3. \n (Unix'es). - - if (c == '\n') - return index; // Case 3: just \n. - - CharacterType c2 = characters[index]; - if (c2 != '\n') - return index; // Case 2: just \r. - - // Case 1: \r\n. - // But, there's only a start of a new line if there are more - // characters beyond the \r\n. - if (++index < length) - return index; - } - } - return notFound; -} - -template<typename CharacterType> inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX) { if (!length) @@ -1248,18 +999,13 @@ inline size_t StringImpl::find(UChar character, unsigned start) return WTF::find(characters16(), m_length, character, start); } -template<size_t inlineCapacity> -bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) +template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) { - if (!b) - return !a.size(); - if (a.size() != b->length()) - return false; - return !memcmp(a.data(), b->deprecatedCharacters(), b->length() * sizeof(UChar)); + return equalIgnoringNullity(a.data(), a.size(), b); } template<typename CharacterType1, typename CharacterType2> -static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2) +inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2) { const unsigned lmin = l1 < l2 ? l1 : l2; unsigned pos = 0; @@ -1278,22 +1024,22 @@ static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType return (l1 > l2) ? 1 : -1; } -static inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2) +inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2) { return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8()); } -static inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2) +inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2) { return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16()); } -static inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2) +inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2) { return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16()); } -static inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) +inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) { if (!string1) return (string2 && string2->length()) ? -1 : 0; @@ -1313,7 +1059,7 @@ static inline int codePointCompare(const StringImpl* string1, const StringImpl* return codePointCompare16(string1, string2); } -static inline bool isSpaceOrNewline(UChar c) +inline bool isSpaceOrNewline(UChar c) { // Use isASCIISpace() for basic Latin-1. // This will include newlines, which aren't included in Unicode DirWS. @@ -1332,7 +1078,7 @@ inline unsigned lengthOfNullTerminatedString(const CharacterType* string) return static_cast<unsigned>(length); } -inline PassRef<StringImpl> StringImpl::isolatedCopy() const +inline Ref<StringImpl> StringImpl::isolatedCopy() const { if (!requiresCopy()) { if (is8Bit()) @@ -1345,8 +1091,6 @@ inline PassRef<StringImpl> StringImpl::isolatedCopy() const return create(m_data16, m_length); } -struct StringHash; - // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> template<typename T> struct DefaultHash; template<> struct DefaultHash<StringImpl*> { @@ -1356,11 +1100,45 @@ template<> struct DefaultHash<RefPtr<StringImpl>> { typedef StringHash Hash; }; +inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b) +{ + return equalIgnoringASCIICaseCommon(a, b); +} + +inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b) +{ + return equalIgnoringASCIICaseCommon(a, b); +} + +inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b) +{ + return a && equalIgnoringASCIICase(*a, b); +} + +template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length]) +{ + return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters); +} + +template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length]) +{ + return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters); +} + +template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length]) +{ + return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters); +} + +template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length]) +{ + return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters); +} + } // namespace WTF using WTF::StringImpl; using WTF::equal; -using WTF::equalNonNull; using WTF::TextCaseSensitivity; using WTF::TextCaseSensitive; using WTF::TextCaseInsensitive; diff --git a/Source/WTF/wtf/text/StringOperators.h b/Source/WTF/wtf/text/StringOperators.h index cfd2f6d6c..5a2435658 100644 --- a/Source/WTF/wtf/text/StringOperators.h +++ b/Source/WTF/wtf/text/StringOperators.h @@ -35,10 +35,10 @@ public: operator String() const { - RefPtr<StringImpl> resultImpl = tryMakeString(m_string1, m_string2); - if (!resultImpl) + String result = tryMakeString(m_string1, m_string2); + if (!result) CRASH(); - return resultImpl.release(); + return result; } operator AtomicString() const @@ -97,6 +97,8 @@ public: void writeTo(LChar* destination) { m_buffer.writeTo(destination); } void writeTo(UChar* destination) { m_buffer.writeTo(destination); } + String toString() const { return m_buffer; } + private: StringAppend<StringType1, StringType2>& m_buffer; }; diff --git a/Source/WTF/wtf/text/StringStatics.cpp b/Source/WTF/wtf/text/StringStatics.cpp index 8f0c74cc0..0c2119c1d 100644 --- a/Source/WTF/wtf/text/StringStatics.cpp +++ b/Source/WTF/wtf/text/StringStatics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Apple Inc. All Rights Reserved. + * Copyright (C) 2010, 2016 Apple Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,8 +30,8 @@ #endif #include "AtomicString.h" -#include "DynamicAnnotations.h" #include "MainThread.h" +#include "NeverDestroyed.h" #include "StaticConstructors.h" #include "StringImpl.h" @@ -41,29 +41,11 @@ namespace WTF { -StringImpl* StringImpl::empty() -{ - // FIXME: This works around a bug in our port of PCRE, that a regular expression - // run on the empty string may still perform a read from the first element, and - // as such we need this to be a valid pointer. No code should ever be reading - // from a zero length string, so this should be able to be a non-null pointer - // into the zero-page. - // Replace this with 'reinterpret_cast<UChar*>(static_cast<intptr_t>(1))' once - // PCRE goes away. - static LChar emptyLCharData = 0; - DEFINE_STATIC_LOCAL(StringImpl, emptyString, (&emptyLCharData, 0, ConstructStaticString)); - WTF_ANNOTATE_BENIGN_RACE(&emptyString, "Benign race on StringImpl::emptyString reference counter"); - return &emptyString; -} - WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, nullAtom) WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, emptyAtom) -WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, textAtom) -WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, commentAtom) WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, starAtom) WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlAtom) WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlnsAtom) -WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, xlinkAtom) NEVER_INLINE unsigned StringImpl::hashSlowCase() const { @@ -74,6 +56,17 @@ NEVER_INLINE unsigned StringImpl::hashSlowCase() const return existingHash(); } +unsigned StringImpl::concurrentHash() const +{ + unsigned hash; + if (is8Bit()) + hash = StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length); + else + hash = StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length); + ASSERT(((hash << s_flagCount) >> s_flagCount) == hash); + return hash; +} + void AtomicString::init() { static bool initialized; @@ -84,12 +77,9 @@ void AtomicString::init() // Use placement new to initialize the globals. new (NotNull, (void*)&nullAtom) AtomicString; new (NotNull, (void*)&emptyAtom) AtomicString(""); - new (NotNull, (void*)&textAtom) AtomicString("#text", AtomicString::ConstructFromLiteral); - new (NotNull, (void*)&commentAtom) AtomicString("#comment", AtomicString::ConstructFromLiteral); new (NotNull, (void*)&starAtom) AtomicString("*", AtomicString::ConstructFromLiteral); new (NotNull, (void*)&xmlAtom) AtomicString("xml", AtomicString::ConstructFromLiteral); new (NotNull, (void*)&xmlnsAtom) AtomicString("xmlns", AtomicString::ConstructFromLiteral); - new (NotNull, (void*)&xlinkAtom) AtomicString("xlink", AtomicString::ConstructFromLiteral); initialized = true; } diff --git a/Source/WTF/wtf/text/StringView.cpp b/Source/WTF/wtf/text/StringView.cpp new file mode 100644 index 000000000..580799765 --- /dev/null +++ b/Source/WTF/wtf/text/StringView.cpp @@ -0,0 +1,285 @@ +/* + +Copyright (C) 2014-2017 Apple Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "config.h" +#include "StringView.h" + +#include <mutex> +#include <unicode/ubrk.h> +#include <wtf/HashMap.h> +#include <wtf/Lock.h> +#include <wtf/NeverDestroyed.h> +#include <wtf/Optional.h> +#include <wtf/text/TextBreakIterator.h> +#include <wtf/unicode/UTF8.h> + +namespace WTF { + +using namespace Unicode; + +bool StringView::containsIgnoringASCIICase(const StringView& matchString) const +{ + return findIgnoringASCIICase(matchString) != notFound; +} + +bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const +{ + return findIgnoringASCIICase(matchString, startOffset) != notFound; +} + +size_t StringView::findIgnoringASCIICase(const StringView& matchString) const +{ + return ::WTF::findIgnoringASCIICase(*this, matchString, 0); +} + +size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const +{ + return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset); +} + +bool StringView::startsWith(const StringView& prefix) const +{ + return ::WTF::startsWith(*this, prefix); +} + +bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const +{ + return ::WTF::startsWithIgnoringASCIICase(*this, prefix); +} + +bool StringView::endsWith(const StringView& suffix) const +{ + return ::WTF::endsWith(*this, suffix); +} + +bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const +{ + return ::WTF::endsWithIgnoringASCIICase(*this, suffix); +} + +CString StringView::utf8(ConversionMode mode) const +{ + if (isNull()) + return CString("", 0); + if (is8Bit()) + return StringImpl::utf8ForCharacters(characters8(), length()); + return StringImpl::utf8ForCharacters(characters16(), length(), mode); +} + +size_t StringView::find(StringView matchString, unsigned start) const +{ + return findCommon(*this, matchString, start); +} + +void StringView::SplitResult::Iterator::findNextSubstring() +{ + for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) { + if (separatorPosition > m_position) { + m_length = separatorPosition - m_position; + return; + } + } + m_length = m_result.m_string.length() - m_position; +} + +auto StringView::SplitResult::Iterator::operator++() -> Iterator& +{ + ASSERT(m_position < m_result.m_string.length()); + m_position += m_length; + if (m_position < m_result.m_string.length()) { + ++m_position; + findNextSubstring(); + } + return *this; +} + +class StringView::GraphemeClusters::Iterator::Impl { +public: + Impl(const StringView& stringView, std::optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index) + : m_stringView(stringView) + , m_iterator(WTFMove(iterator)) + , m_index(index) + , m_indexEnd(computeIndexEnd()) + { + } + + void operator++() + { + ASSERT(m_indexEnd > m_index); + m_index = m_indexEnd; + m_indexEnd = computeIndexEnd(); + } + + StringView operator*() const + { + if (m_stringView.is8Bit()) + return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index); + return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index); + } + + bool operator==(const Impl& other) const + { + ASSERT(&m_stringView == &other.m_stringView); + auto result = m_index == other.m_index; + ASSERT(!result || m_indexEnd == other.m_indexEnd); + return result; + } + + unsigned computeIndexEnd() + { + if (!m_iterator) + return 0; + if (m_index == m_stringView.length()) + return m_index; + return ubrk_following(m_iterator.value(), m_index); + } + +private: + const StringView& m_stringView; + std::optional<NonSharedCharacterBreakIterator> m_iterator; + unsigned m_index; + unsigned m_indexEnd; +}; + +StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index) + : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? std::nullopt : std::optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index)) +{ +} + +StringView::GraphemeClusters::Iterator::~Iterator() +{ +} + +StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other) + : m_impl(WTFMove(other.m_impl)) +{ +} + +auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator& +{ + ++(*m_impl); + return *this; +} + +StringView StringView::GraphemeClusters::Iterator::operator*() const +{ + return **m_impl; +} + +bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const +{ + return *m_impl == *(other.m_impl); +} + +bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const +{ + return !(*this == other); +} + +#if CHECK_STRINGVIEW_LIFETIME + +// Manage reference count manually so UnderlyingString does not need to be defined in the header. + +struct StringView::UnderlyingString { + std::atomic_uint refCount { 1u }; + bool isValid { true }; + const StringImpl& string; + explicit UnderlyingString(const StringImpl&); +}; + +StringView::UnderlyingString::UnderlyingString(const StringImpl& string) + : string(string) +{ +} + +static StaticLock underlyingStringsMutex; + +static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings() +{ + static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map; + return map; +} + +void StringView::invalidate(const StringImpl& stringToBeDestroyed) +{ + UnderlyingString* underlyingString; + { + std::lock_guard<StaticLock> lock(underlyingStringsMutex); + underlyingString = underlyingStrings().take(&stringToBeDestroyed); + if (!underlyingString) + return; + } + ASSERT(underlyingString->isValid); + underlyingString->isValid = false; +} + +bool StringView::underlyingStringIsValid() const +{ + return !m_underlyingString || m_underlyingString->isValid; +} + +void StringView::adoptUnderlyingString(UnderlyingString* underlyingString) +{ + if (m_underlyingString) { + std::lock_guard<StaticLock> lock(underlyingStringsMutex); + if (!--m_underlyingString->refCount) { + if (m_underlyingString->isValid) { + underlyingStrings().remove(&m_underlyingString->string); + } + delete m_underlyingString; + } + } + m_underlyingString = underlyingString; +} + +void StringView::setUnderlyingString(const StringImpl* string) +{ + UnderlyingString* underlyingString; + if (!string) + underlyingString = nullptr; + else { + std::lock_guard<StaticLock> lock(underlyingStringsMutex); + auto result = underlyingStrings().add(string, nullptr); + if (result.isNewEntry) + result.iterator->value = new UnderlyingString(*string); + else + ++result.iterator->value->refCount; + underlyingString = result.iterator->value; + } + adoptUnderlyingString(underlyingString); +} + +void StringView::setUnderlyingString(const StringView& otherString) +{ + UnderlyingString* underlyingString = otherString.m_underlyingString; + if (underlyingString) + ++underlyingString->refCount; + adoptUnderlyingString(underlyingString); +} + +#endif // CHECK_STRINGVIEW_LIFETIME + +} // namespace WTF diff --git a/Source/WTF/wtf/text/StringView.h b/Source/WTF/wtf/text/StringView.h index 70f4eb0cb..ef209f947 100644 --- a/Source/WTF/wtf/text/StringView.h +++ b/Source/WTF/wtf/text/StringView.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 Apple Inc. All rights reserved. + * Copyright (C) 2014-2017 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,132 +26,916 @@ #ifndef StringView_h #define StringView_h -#include <wtf/text/WTFString.h> +#include <limits.h> +#include <unicode/utypes.h> +#include <wtf/Forward.h> +#include <wtf/RetainPtr.h> +#include <wtf/Vector.h> +#include <wtf/text/CString.h> +#include <wtf/text/ConversionMode.h> +#include <wtf/text/LChar.h> +#include <wtf/text/StringCommon.h> + +// FIXME: Enabling the StringView lifetime checking causes the MSVC build to fail. Figure out why. +#if defined(NDEBUG) || COMPILER(MSVC) +#define CHECK_STRINGVIEW_LIFETIME 0 +#else +#define CHECK_STRINGVIEW_LIFETIME 1 +#endif namespace WTF { +using CharacterMatchFunction = bool (*)(UChar); + // StringView is a non-owning reference to a string, similar to the proposed std::string_view. -// Whether the string is 8-bit or 16-bit is encoded in the upper bit of the length member. -// This means that strings longer than 2 Gigabytes can not be represented. If that turns out to be -// a problem we can investigate alternative solutions. class StringView { public: - StringView() - : m_characters(nullptr) - , m_length(0) - { + StringView(); +#if CHECK_STRINGVIEW_LIFETIME + ~StringView(); + StringView(StringView&&); + StringView(const StringView&); + StringView& operator=(StringView&&); + StringView& operator=(const StringView&); +#endif + + StringView(const AtomicString&); + StringView(const String&); + StringView(const StringImpl&); + StringView(const StringImpl*); + StringView(const LChar*, unsigned length); + StringView(const UChar*, unsigned length); + StringView(const char*); + + static StringView empty(); + + unsigned length() const; + bool isEmpty() const; + + explicit operator bool() const; + bool isNull() const; + + UChar operator[](unsigned index) const; + + class CodeUnits; + CodeUnits codeUnits() const; + + class CodePoints; + CodePoints codePoints() const; + + class GraphemeClusters; + GraphemeClusters graphemeClusters() const; + + bool is8Bit() const; + const LChar* characters8() const; + const UChar* characters16() const; + + String toString() const; + String toStringWithoutCopying() const; + AtomicString toAtomicString() const; + +#if USE(CF) + // This function converts null strings to empty strings. + WTF_EXPORT_STRING_API RetainPtr<CFStringRef> createCFStringWithoutCopying() const; +#endif + +#ifdef __OBJC__ + // These functions convert null strings to empty strings. + WTF_EXPORT_STRING_API RetainPtr<NSString> createNSString() const; + WTF_EXPORT_STRING_API RetainPtr<NSString> createNSStringWithoutCopying() const; +#endif + + WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const; + + class UpconvertedCharacters; + UpconvertedCharacters upconvertedCharacters() const; + + void getCharactersWithUpconvert(LChar*) const; + void getCharactersWithUpconvert(UChar*) const; + + StringView substring(unsigned start, unsigned length = std::numeric_limits<unsigned>::max()) const; + StringView left(unsigned len) const { return substring(0, len); } + StringView right(unsigned len) const { return substring(length() - len, len); } + + class SplitResult; + SplitResult split(UChar) const; + + size_t find(UChar, unsigned start = 0) const; + size_t find(CharacterMatchFunction, unsigned start = 0) const; + + WTF_EXPORT_STRING_API size_t find(StringView, unsigned start) const; + + size_t reverseFind(UChar, unsigned index = UINT_MAX) const; + + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringView&) const; + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringView&, unsigned startOffset) const; + + bool contains(UChar) const; + WTF_EXPORT_STRING_API bool containsIgnoringASCIICase(const StringView&) const; + WTF_EXPORT_STRING_API bool containsIgnoringASCIICase(const StringView&, unsigned startOffset) const; + + WTF_EXPORT_STRING_API bool startsWith(const StringView&) const; + WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringView&) const; + + WTF_EXPORT_STRING_API bool endsWith(const StringView&) const; + WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringView&) const; + + int toInt() const; + int toInt(bool& isValid) const; + int toIntStrict(bool& isValid) const; + float toFloat(bool& isValid) const; + + static void invalidate(const StringImpl&); + + struct UnderlyingString; + +private: + friend bool equal(StringView, StringView); + + void initialize(const LChar*, unsigned length); + void initialize(const UChar*, unsigned length); + +#if CHECK_STRINGVIEW_LIFETIME + WTF_EXPORT_STRING_API bool underlyingStringIsValid() const; + WTF_EXPORT_STRING_API void setUnderlyingString(const StringImpl*); + WTF_EXPORT_STRING_API void setUnderlyingString(const StringView&); +#else + bool underlyingStringIsValid() const { return true; } + void setUnderlyingString(const StringImpl*) { } + void setUnderlyingString(const StringView&) { } +#endif + void clear(); + + const void* m_characters { nullptr }; + unsigned m_length { 0 }; + bool m_is8Bit { true }; + +#if CHECK_STRINGVIEW_LIFETIME + void adoptUnderlyingString(UnderlyingString*); + UnderlyingString* m_underlyingString { nullptr }; +#endif +}; + +template<typename CharacterType, size_t inlineCapacity> void append(Vector<CharacterType, inlineCapacity>&, StringView); + +bool equal(StringView, StringView); +bool equal(StringView, const LChar*); +bool equal(StringView, const char*); + +bool equalIgnoringASCIICase(StringView, StringView); +bool equalIgnoringASCIICase(StringView, const char*); + +template<unsigned length> bool equalLettersIgnoringASCIICase(StringView, const char (&lowercaseLetters)[length]); + +inline bool operator==(StringView a, StringView b) { return equal(a, b); } +inline bool operator==(StringView a, const LChar* b) { return equal(a, b); } +inline bool operator==(StringView a, const char* b) { return equal(a, b); } +inline bool operator==(const LChar* a, StringView b) { return equal(b, a); } +inline bool operator==(const char* a, StringView b) { return equal(b, a); } + +inline bool operator!=(StringView a, StringView b) { return !equal(a, b); } +inline bool operator!=(StringView a, const LChar* b) { return !equal(a, b); } +inline bool operator!=(StringView a, const char* b) { return !equal(a, b); } +inline bool operator!=(const LChar* a, StringView b) { return !equal(b, a); } +inline bool operator!=(const char* a, StringView b) { return !equal(b, a); } + +} + +#include <wtf/text/AtomicString.h> +#include <wtf/text/WTFString.h> + +namespace WTF { + +inline StringView::StringView() +{ + // FIXME: It's peculiar that null strings are 16-bit and empty strings return 8-bit (according to the is8Bit function). +} + +#if CHECK_STRINGVIEW_LIFETIME +inline StringView::~StringView() +{ + setUnderlyingString(nullptr); +} + +inline StringView::StringView(StringView&& other) + : m_characters(other.m_characters) + , m_length(other.m_length) + , m_is8Bit(other.m_is8Bit) +{ + ASSERT(other.underlyingStringIsValid()); + + other.clear(); + + setUnderlyingString(other); + other.setUnderlyingString(nullptr); +} + +inline StringView::StringView(const StringView& other) + : m_characters(other.m_characters) + , m_length(other.m_length) + , m_is8Bit(other.m_is8Bit) +{ + ASSERT(other.underlyingStringIsValid()); + + setUnderlyingString(other); +} + +inline StringView& StringView::operator=(StringView&& other) +{ + ASSERT(other.underlyingStringIsValid()); + + m_characters = other.m_characters; + m_length = other.m_length; + m_is8Bit = other.m_is8Bit; + + other.clear(); + + setUnderlyingString(other); + other.setUnderlyingString(nullptr); + + return *this; +} + +inline StringView& StringView::operator=(const StringView& other) +{ + ASSERT(other.underlyingStringIsValid()); + + m_characters = other.m_characters; + m_length = other.m_length; + m_is8Bit = other.m_is8Bit; + + setUnderlyingString(other); + + return *this; +} +#endif // CHECK_STRINGVIEW_LIFETIME + +inline void StringView::initialize(const LChar* characters, unsigned length) +{ + m_characters = characters; + m_length = length; + m_is8Bit = true; +} + +inline void StringView::initialize(const UChar* characters, unsigned length) +{ + m_characters = characters; + m_length = length; + m_is8Bit = false; +} + +inline StringView::StringView(const LChar* characters, unsigned length) +{ + initialize(characters, length); +} + +inline StringView::StringView(const UChar* characters, unsigned length) +{ + initialize(characters, length); +} + +inline StringView::StringView(const char* characters) +{ + initialize(reinterpret_cast<const LChar*>(characters), strlen(characters)); +} + +inline StringView::StringView(const StringImpl& string) +{ + setUnderlyingString(&string); + if (string.is8Bit()) + initialize(string.characters8(), string.length()); + else + initialize(string.characters16(), string.length()); +} + +inline StringView::StringView(const StringImpl* string) +{ + if (!string) + return; + + setUnderlyingString(string); + if (string->is8Bit()) + initialize(string->characters8(), string->length()); + else + initialize(string->characters16(), string->length()); +} + +inline StringView::StringView(const String& string) +{ + setUnderlyingString(string.impl()); + if (!string.impl()) { + clear(); + return; + } + if (string.is8Bit()) { + initialize(string.characters8(), string.length()); + return; } + initialize(string.characters16(), string.length()); +} - StringView(const LChar* characters, unsigned length) - { - initialize(characters, length); +inline StringView::StringView(const AtomicString& atomicString) + : StringView(atomicString.string()) +{ +} + +inline void StringView::clear() +{ + m_characters = nullptr; + m_length = 0; + m_is8Bit = true; +} + +inline StringView StringView::empty() +{ + return StringView(reinterpret_cast<const LChar*>(""), 0); +} + +inline const LChar* StringView::characters8() const +{ + ASSERT(is8Bit()); + ASSERT(underlyingStringIsValid()); + return static_cast<const LChar*>(m_characters); +} + +inline const UChar* StringView::characters16() const +{ + ASSERT(!is8Bit()); + ASSERT(underlyingStringIsValid()); + return static_cast<const UChar*>(m_characters); +} + +class StringView::UpconvertedCharacters { +public: + explicit UpconvertedCharacters(const StringView&); + operator const UChar*() const { return m_characters; } + const UChar* get() const { return m_characters; } +private: + Vector<UChar, 32> m_upconvertedCharacters; + const UChar* m_characters; +}; + +inline StringView::UpconvertedCharacters StringView::upconvertedCharacters() const +{ + return UpconvertedCharacters(*this); +} + +inline bool StringView::isNull() const +{ + return !m_characters; +} + +inline bool StringView::isEmpty() const +{ + return !length(); +} + +inline unsigned StringView::length() const +{ + return m_length; +} + +inline StringView::operator bool() const +{ + return !isNull(); +} + +inline bool StringView::is8Bit() const +{ + return m_is8Bit; +} + +inline StringView StringView::substring(unsigned start, unsigned length) const +{ + if (start >= this->length()) + return empty(); + unsigned maxLength = this->length() - start; + + if (length >= maxLength) { + if (!start) + return *this; + length = maxLength; } - StringView(const UChar* characters, unsigned length) - { - initialize(characters, length); + if (is8Bit()) { + StringView result(characters8() + start, length); + result.setUnderlyingString(*this); + return result; } + StringView result(characters16() + start, length); + result.setUnderlyingString(*this); + return result; +} - StringView(const String& string) - : m_characters(nullptr) - , m_length(0) - { - if (!string.impl()) - return; - - if (string.is8Bit()) - initialize(string.characters8(), string.length()); - else - initialize(string.characters16(), string.length()); +inline UChar StringView::operator[](unsigned index) const +{ + ASSERT(index < length()); + if (is8Bit()) + return characters8()[index]; + return characters16()[index]; +} + +inline bool StringView::contains(UChar character) const +{ + return find(character) != notFound; +} + +inline void StringView::getCharactersWithUpconvert(LChar* destination) const +{ + ASSERT(is8Bit()); + auto characters8 = this->characters8(); + for (unsigned i = 0; i < m_length; ++i) + destination[i] = characters8[i]; +} + +inline void StringView::getCharactersWithUpconvert(UChar* destination) const +{ + if (is8Bit()) { + auto characters8 = this->characters8(); + for (unsigned i = 0; i < m_length; ++i) + destination[i] = characters8[i]; + return; } + auto characters16 = this->characters16(); + for (unsigned i = 0; i < m_length; ++i) + destination[i] = characters16[i]; +} - static StringView empty() - { - return StringView(reinterpret_cast<const LChar*>(""), 0); +inline StringView::UpconvertedCharacters::UpconvertedCharacters(const StringView& string) +{ + if (!string.is8Bit()) { + m_characters = string.characters16(); + return; } + const LChar* characters8 = string.characters8(); + unsigned length = string.m_length; + m_upconvertedCharacters.reserveInitialCapacity(length); + for (unsigned i = 0; i < length; ++i) + m_upconvertedCharacters.uncheckedAppend(characters8[i]); + m_characters = m_upconvertedCharacters.data(); +} - const LChar* characters8() const - { - ASSERT(is8Bit()); +inline String StringView::toString() const +{ + if (is8Bit()) + return String(characters8(), m_length); + return String(characters16(), m_length); +} - return static_cast<const LChar*>(m_characters); - } +inline AtomicString StringView::toAtomicString() const +{ + if (is8Bit()) + return AtomicString(characters8(), m_length); + return AtomicString(characters16(), m_length); +} - const UChar* characters16() const - { - ASSERT(!is8Bit()); +inline float StringView::toFloat(bool& isValid) const +{ + if (is8Bit()) + return charactersToFloat(characters8(), m_length, &isValid); + return charactersToFloat(characters16(), m_length, &isValid); +} - return static_cast<const UChar*>(m_characters); - } +inline int StringView::toInt() const +{ + bool isValid; + return toInt(isValid); +} - bool isNull() const { return !m_characters; } - bool isEmpty() const { return !length(); } - unsigned length() const { return m_length & ~is16BitStringFlag; } +inline int StringView::toInt(bool& isValid) const +{ + if (is8Bit()) + return charactersToInt(characters8(), m_length, &isValid); + return charactersToInt(characters16(), m_length, &isValid); +} - explicit operator bool() const { return !isNull(); } +inline int StringView::toIntStrict(bool& isValid) const +{ + if (is8Bit()) + return charactersToIntStrict(characters8(), m_length, &isValid); + return charactersToIntStrict(characters16(), m_length, &isValid); +} - bool is8Bit() const { return !(m_length & is16BitStringFlag); } +inline String StringView::toStringWithoutCopying() const +{ + if (is8Bit()) + return StringImpl::createWithoutCopying(characters8(), m_length); + return StringImpl::createWithoutCopying(characters16(), m_length); +} - StringView substring(unsigned start, unsigned length = std::numeric_limits<unsigned>::max()) const - { - if (start >= this->length()) - return empty(); - unsigned maxLength = this->length() - start; +inline size_t StringView::find(UChar character, unsigned start) const +{ + if (is8Bit()) + return WTF::find(characters8(), m_length, character, start); + return WTF::find(characters16(), m_length, character, start); +} - if (length >= maxLength) { - if (!start) - return *this; - length = maxLength; - } +inline size_t StringView::find(CharacterMatchFunction matchFunction, unsigned start) const +{ + if (is8Bit()) + return WTF::find(characters8(), m_length, matchFunction, start); + return WTF::find(characters16(), m_length, matchFunction, start); +} - if (is8Bit()) - return StringView(characters8() + start, length); +inline size_t StringView::reverseFind(UChar character, unsigned index) const +{ + if (is8Bit()) + return WTF::reverseFind(characters8(), m_length, character, index); + return WTF::reverseFind(characters16(), m_length, character, index); +} - return StringView(characters16() + start, length); - } +#if !CHECK_STRINGVIEW_LIFETIME +inline void StringView::invalidate(const StringImpl&) +{ +} +#endif - String toString() const - { - if (is8Bit()) - return String(characters8(), length()); +template<typename StringType> class StringTypeAdapter; - return String(characters16(), length()); +template<> class StringTypeAdapter<StringView> { +public: + StringTypeAdapter<StringView>(StringView string) + : m_string(string) + { } - String toStringWithoutCopying() const - { - if (is8Bit()) - return StringImpl::createWithoutCopying(characters8(), length()); + unsigned length() { return m_string.length(); } + bool is8Bit() { return m_string.is8Bit(); } + void writeTo(LChar* destination) { m_string.getCharactersWithUpconvert(destination); } + void writeTo(UChar* destination) { m_string.getCharactersWithUpconvert(destination); } - return StringImpl::createWithoutCopying(characters16(), length()); - } + String toString() const { return m_string.toString(); } private: - void initialize(const LChar* characters, unsigned length) - { - ASSERT(!(length & is16BitStringFlag)); - - m_characters = characters; - m_length = length; - } + StringView m_string; +}; - void initialize(const UChar* characters, unsigned length) - { - ASSERT(!(length & is16BitStringFlag)); - - m_characters = characters; - m_length = is16BitStringFlag | length; +template<typename CharacterType, size_t inlineCapacity> void append(Vector<CharacterType, inlineCapacity>& buffer, StringView string) +{ + unsigned oldSize = buffer.size(); + buffer.grow(oldSize + string.length()); + string.getCharactersWithUpconvert(buffer.data() + oldSize); +} + +inline bool equal(StringView a, StringView b) +{ + if (a.m_characters == b.m_characters) { + ASSERT(a.is8Bit() == b.is8Bit()); + return a.length() == b.length(); } + + return equalCommon(a, b); +} + +inline bool equal(StringView a, const LChar* b) +{ + if (!b) + return !a.isEmpty(); + if (a.isEmpty()) + return !b; + unsigned aLength = a.length(); + if (a.is8Bit()) + return equal(a.characters8(), b, aLength); + return equal(a.characters16(), b, aLength); +} - static const unsigned is16BitStringFlag = 1u << 31; +inline bool equal(StringView a, const char* b) +{ + return equal(a, reinterpret_cast<const LChar*>(b)); +} - const void* m_characters; +inline bool equalIgnoringASCIICase(StringView a, StringView b) +{ + return equalIgnoringASCIICaseCommon(a, b); +} + +inline bool equalIgnoringASCIICase(StringView a, const char* b) +{ + return equalIgnoringASCIICaseCommon(a, b); +} + +class StringView::SplitResult { +public: + explicit SplitResult(StringView, UChar separator); + + class Iterator; + Iterator begin() const; + Iterator end() const; + +private: + StringView m_string; + UChar m_separator; +}; + +class StringView::GraphemeClusters { +public: + explicit GraphemeClusters(const StringView&); + + class Iterator; + Iterator begin() const; + Iterator end() const; + +private: + StringView m_stringView; +}; + +class StringView::CodePoints { +public: + explicit CodePoints(const StringView&); + + class Iterator; + Iterator begin() const; + Iterator end() const; + +private: + StringView m_stringView; +}; + +class StringView::CodeUnits { +public: + explicit CodeUnits(const StringView&); + + class Iterator; + Iterator begin() const; + Iterator end() const; + +private: + StringView m_stringView; +}; + +class StringView::SplitResult::Iterator { +public: + StringView operator*() const; + + WTF_EXPORT_PRIVATE Iterator& operator++(); + + bool operator==(const Iterator&) const; + bool operator!=(const Iterator&) const; + +private: + enum PositionTag { AtEnd }; + Iterator(const SplitResult&); + Iterator(const SplitResult&, PositionTag); + + WTF_EXPORT_PRIVATE void findNextSubstring(); + + friend SplitResult; + + const SplitResult& m_result; + unsigned m_position { 0 }; unsigned m_length; }; +class StringView::GraphemeClusters::Iterator { +public: + WTF_EXPORT_PRIVATE Iterator() = delete; + WTF_EXPORT_PRIVATE Iterator(const StringView&, unsigned index); + WTF_EXPORT_PRIVATE ~Iterator(); + + Iterator(const Iterator&) = delete; + WTF_EXPORT_PRIVATE Iterator(Iterator&&); + Iterator& operator=(const Iterator&) = delete; + Iterator& operator=(Iterator&&) = delete; + + WTF_EXPORT_PRIVATE StringView operator*() const; + WTF_EXPORT_PRIVATE Iterator& operator++(); + + WTF_EXPORT_PRIVATE bool operator==(const Iterator&) const; + WTF_EXPORT_PRIVATE bool operator!=(const Iterator&) const; + +private: + class Impl; + + std::unique_ptr<Impl> m_impl; +}; + +class StringView::CodePoints::Iterator { +public: + Iterator(const StringView&, unsigned index); + + UChar32 operator*() const; + Iterator& operator++(); + + bool operator==(const Iterator&) const; + bool operator!=(const Iterator&) const; + Iterator& operator=(const Iterator&); + +private: + std::reference_wrapper<const StringView> m_stringView; + std::optional<unsigned> m_nextCodePointOffset; + UChar32 m_codePoint; +}; + +class StringView::CodeUnits::Iterator { +public: + Iterator(const StringView&, unsigned index); + + UChar operator*() const; + Iterator& operator++(); + + bool operator==(const Iterator&) const; + bool operator!=(const Iterator&) const; + +private: + const StringView& m_stringView; + unsigned m_index; +}; + +inline auto StringView::graphemeClusters() const -> GraphemeClusters +{ + return GraphemeClusters(*this); +} + +inline auto StringView::codePoints() const -> CodePoints +{ + return CodePoints(*this); +} + +inline auto StringView::codeUnits() const -> CodeUnits +{ + return CodeUnits(*this); +} + +inline StringView::GraphemeClusters::GraphemeClusters(const StringView& stringView) + : m_stringView(stringView) +{ +} + +inline auto StringView::GraphemeClusters::begin() const -> Iterator +{ + return Iterator(m_stringView, 0); +} + +inline auto StringView::GraphemeClusters::end() const -> Iterator +{ + return Iterator(m_stringView, m_stringView.length()); +} + +inline StringView::CodePoints::CodePoints(const StringView& stringView) + : m_stringView(stringView) +{ +} + +inline StringView::CodePoints::Iterator::Iterator(const StringView& stringView, unsigned index) + : m_stringView(stringView) + , m_nextCodePointOffset(index) +{ + operator++(); +} + +inline auto StringView::CodePoints::Iterator::operator++() -> Iterator& +{ + ASSERT(m_nextCodePointOffset); + if (m_nextCodePointOffset.value() == m_stringView.get().length()) { + m_nextCodePointOffset = std::nullopt; + return *this; + } + if (m_stringView.get().is8Bit()) + m_codePoint = m_stringView.get().characters8()[m_nextCodePointOffset.value()++]; + else + U16_NEXT(m_stringView.get().characters16(), m_nextCodePointOffset.value(), m_stringView.get().length(), m_codePoint); + ASSERT(m_nextCodePointOffset.value() <= m_stringView.get().length()); + return *this; +} + +inline auto StringView::CodePoints::Iterator::operator=(const Iterator& other) -> Iterator& +{ + m_stringView = other.m_stringView; + m_nextCodePointOffset = other.m_nextCodePointOffset; + m_codePoint = other.m_codePoint; + return *this; +} + +inline UChar32 StringView::CodePoints::Iterator::operator*() const +{ + ASSERT(m_nextCodePointOffset); + return m_codePoint; +} + +inline bool StringView::CodePoints::Iterator::operator==(const Iterator& other) const +{ + ASSERT(&m_stringView.get() == &other.m_stringView.get()); + return m_nextCodePointOffset == other.m_nextCodePointOffset; +} + +inline bool StringView::CodePoints::Iterator::operator!=(const Iterator& other) const +{ + return !(*this == other); +} + +inline auto StringView::CodePoints::begin() const -> Iterator +{ + return Iterator(m_stringView, 0); +} + +inline auto StringView::CodePoints::end() const -> Iterator +{ + return Iterator(m_stringView, m_stringView.length()); +} + +inline StringView::CodeUnits::CodeUnits(const StringView& stringView) + : m_stringView(stringView) +{ +} + +inline StringView::CodeUnits::Iterator::Iterator(const StringView& stringView, unsigned index) + : m_stringView(stringView) + , m_index(index) +{ +} + +inline auto StringView::CodeUnits::Iterator::operator++() -> Iterator& +{ + ++m_index; + return *this; +} + +inline UChar StringView::CodeUnits::Iterator::operator*() const +{ + return m_stringView[m_index]; +} + +inline bool StringView::CodeUnits::Iterator::operator==(const Iterator& other) const +{ + ASSERT(&m_stringView == &other.m_stringView); + return m_index == other.m_index; +} + +inline bool StringView::CodeUnits::Iterator::operator!=(const Iterator& other) const +{ + return !(*this == other); +} + +inline auto StringView::CodeUnits::begin() const -> Iterator +{ + return Iterator(m_stringView, 0); +} + +inline auto StringView::CodeUnits::end() const -> Iterator +{ + return Iterator(m_stringView, m_stringView.length()); +} + +inline auto StringView::split(UChar separator) const -> SplitResult +{ + return SplitResult { *this, separator }; +} + +inline StringView::SplitResult::SplitResult(StringView stringView, UChar separator) + : m_string { stringView } + , m_separator { separator } +{ +} + +inline auto StringView::SplitResult::begin() const -> Iterator +{ + return Iterator { *this }; +} + +inline auto StringView::SplitResult::end() const -> Iterator +{ + return Iterator { *this, Iterator::AtEnd }; +} + +inline StringView::SplitResult::Iterator::Iterator(const SplitResult& result) + : m_result { result } +{ + findNextSubstring(); +} + +inline StringView::SplitResult::Iterator::Iterator(const SplitResult& result, PositionTag) + : m_result { result } + , m_position { result.m_string.length() } +{ +} + +inline StringView StringView::SplitResult::Iterator::operator*() const +{ + ASSERT(m_position < m_result.m_string.length()); + return m_result.m_string.substring(m_position, m_length); +} + +inline bool StringView::SplitResult::Iterator::operator==(const Iterator& other) const +{ + ASSERT(&m_result == &other.m_result); + return m_position == other.m_position; +} + +inline bool StringView::SplitResult::Iterator::operator!=(const Iterator& other) const +{ + return !(*this == other); +} + +template<unsigned length> inline bool equalLettersIgnoringASCIICase(StringView string, const char (&lowercaseLetters)[length]) +{ + return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters); +} + } // namespace WTF +using WTF::append; +using WTF::equal; using WTF::StringView; #endif // StringView_h diff --git a/Source/WTF/wtf/text/SymbolImpl.cpp b/Source/WTF/wtf/text/SymbolImpl.cpp new file mode 100644 index 000000000..18ebea9e0 --- /dev/null +++ b/Source/WTF/wtf/text/SymbolImpl.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2016 Yusuke Suzuki <utatane.tea@gmail.com>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "SymbolImpl.h" + +namespace WTF { + +// In addition to the normal hash value, store specialized hash value for +// symbolized StringImpl*. And don't use the normal hash value for symbolized +// StringImpl* when they are treated as Identifiers. Unique nature of these +// symbolized StringImpl* keys means that we don't need them to match any other +// string (in fact, that's exactly the oposite of what we want!), and the +// normal hash would lead to lots of conflicts. +unsigned SymbolImpl::nextHashForSymbol() +{ + static unsigned s_nextHashForSymbol = 0; + s_nextHashForSymbol += 1 << s_flagCount; + s_nextHashForSymbol |= 1 << 31; + return s_nextHashForSymbol; +} + +Ref<SymbolImpl> SymbolImpl::create(StringImpl& rep) +{ + auto* ownerRep = (rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep; + ASSERT(ownerRep->bufferOwnership() != BufferSubstring); + if (rep.is8Bit()) + return adoptRef(*new SymbolImpl(rep.m_data8, rep.length(), *ownerRep)); + return adoptRef(*new SymbolImpl(rep.m_data16, rep.length(), *ownerRep)); +} + +Ref<SymbolImpl> SymbolImpl::createNullSymbol() +{ + return adoptRef(*new SymbolImpl); +} + +} // namespace WTF diff --git a/Source/WTF/wtf/text/SymbolImpl.h b/Source/WTF/wtf/text/SymbolImpl.h new file mode 100644 index 000000000..293da0a59 --- /dev/null +++ b/Source/WTF/wtf/text/SymbolImpl.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2015-2016 Yusuke Suzuki <utatane.tea@gmail.com>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <wtf/text/UniquedStringImpl.h> + +namespace WTF { + +// SymbolImpl is used to represent the symbol string impl. +// It is uniqued string impl, but is not registered in Atomic String tables, so it's not atomic. +class SymbolImpl : public UniquedStringImpl { +private: + static constexpr const unsigned s_flagIsNullSymbol = 1u; + +public: + unsigned hashForSymbol() const { return m_hashForSymbol; } + SymbolRegistry* const& symbolRegistry() const { return m_symbolRegistry; } + SymbolRegistry*& symbolRegistry() { return m_symbolRegistry; } + bool isNullSymbol() const { return m_flags & s_flagIsNullSymbol; } + + WTF_EXPORT_STRING_API static Ref<SymbolImpl> createNullSymbol(); + WTF_EXPORT_STRING_API static Ref<SymbolImpl> create(StringImpl& rep); + + Ref<StringImpl> extractFoldedString() + { + ASSERT(substringBuffer()); + ASSERT(substringBuffer() == m_owner); + ASSERT(!substringBuffer()->isSymbol()); + return createSubstringSharingImpl(*this, 0, length()); + } + +private: + WTF_EXPORT_PRIVATE static unsigned nextHashForSymbol(); + + friend class StringImpl; + + SymbolImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base) + : UniquedStringImpl(CreateSymbol, characters, length) + , m_owner(&base.leakRef()) + , m_hashForSymbol(nextHashForSymbol()) + { + ASSERT(StringImpl::tailOffset<StringImpl*>() == OBJECT_OFFSETOF(SymbolImpl, m_owner)); + } + + SymbolImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base) + : UniquedStringImpl(CreateSymbol, characters, length) + , m_owner(&base.leakRef()) + , m_hashForSymbol(nextHashForSymbol()) + { + ASSERT(StringImpl::tailOffset<StringImpl*>() == OBJECT_OFFSETOF(SymbolImpl, m_owner)); + } + + SymbolImpl() + : UniquedStringImpl(CreateSymbol) + , m_owner(StringImpl::empty()) + , m_hashForSymbol(nextHashForSymbol()) + , m_flags(s_flagIsNullSymbol) + { + ASSERT(StringImpl::tailOffset<StringImpl*>() == OBJECT_OFFSETOF(SymbolImpl, m_owner)); + } + + // The pointer to the owner string should be immediately following after the StringImpl layout, + // since we would like to align the layout of SymbolImpl to the one of BufferSubstring StringImpl. + StringImpl* m_owner; + SymbolRegistry* m_symbolRegistry { nullptr }; + unsigned m_hashForSymbol; + unsigned m_flags { 0 }; +}; + +inline unsigned StringImpl::symbolAwareHash() const +{ + if (isSymbol()) + return static_cast<const SymbolImpl*>(this)->hashForSymbol(); + return hash(); +} + +inline unsigned StringImpl::existingSymbolAwareHash() const +{ + if (isSymbol()) + return static_cast<const SymbolImpl*>(this)->hashForSymbol(); + return existingHash(); +} + +#if !ASSERT_DISABLED +// SymbolImpls created from StaticStringImpl will ASSERT +// in the generic ValueCheck<T>::checkConsistency +// as they are not allocated by fastMalloc. +// We don't currently have any way to detect that case +// so we ignore the consistency check for all SymbolImpls*. +template<> struct +ValueCheck<SymbolImpl*> { + static void checkConsistency(const SymbolImpl*) { } +}; + +template<> struct +ValueCheck<const SymbolImpl*> { + static void checkConsistency(const SymbolImpl*) { } +}; +#endif + +} // namespace WTF + +using WTF::SymbolImpl; diff --git a/Source/WTF/wtf/text/SymbolRegistry.cpp b/Source/WTF/wtf/text/SymbolRegistry.cpp new file mode 100644 index 000000000..264bc5ca4 --- /dev/null +++ b/Source/WTF/wtf/text/SymbolRegistry.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2015 Yusuke Suzuki <utatane.tea@gmail.com>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "SymbolRegistry.h" + +namespace WTF { + +SymbolRegistry::~SymbolRegistry() +{ + for (auto& key : m_table) + static_cast<SymbolImpl&>(*key.impl()).symbolRegistry() = nullptr; +} + +Ref<SymbolImpl> SymbolRegistry::symbolForKey(const String& rep) +{ + auto addResult = m_table.add(SymbolRegistryKey(rep.impl())); + if (!addResult.isNewEntry) + return *static_cast<SymbolImpl*>(addResult.iterator->impl()); + + auto symbol = SymbolImpl::create(*rep.impl()); + symbol->symbolRegistry() = this; + *addResult.iterator = SymbolRegistryKey(&symbol.get()); + return symbol; +} + +String SymbolRegistry::keyForSymbol(SymbolImpl& uid) +{ + ASSERT(uid.symbolRegistry() == this); + return uid.extractFoldedString(); +} + +void SymbolRegistry::remove(SymbolImpl& uid) +{ + ASSERT(uid.symbolRegistry() == this); + auto iterator = m_table.find(SymbolRegistryKey(&uid)); + ASSERT_WITH_MESSAGE(iterator != m_table.end(), "The string being removed is registered in the string table of an other thread!"); + m_table.remove(iterator); +} + +} diff --git a/Source/WTF/wtf/text/SymbolRegistry.h b/Source/WTF/wtf/text/SymbolRegistry.h new file mode 100644 index 000000000..06d276834 --- /dev/null +++ b/Source/WTF/wtf/text/SymbolRegistry.h @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2015 Yusuke Suzuki <utatane.tea@gmail.com>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef WTF_SymbolRegistry_h +#define WTF_SymbolRegistry_h + +#include <wtf/HashSet.h> +#include <wtf/text/StringHash.h> +#include <wtf/text/SymbolImpl.h> +#include <wtf/text/WTFString.h> + +namespace WTF { + +// Since StringImpl* used for Symbol uid doesn't have a hash value reflecting the string content, +// to compare with an external string in string contents, introduce SymbolRegistryKey. +// SymbolRegistryKey holds a hash value reflecting the string content additionally. +class SymbolRegistryKey { +public: + SymbolRegistryKey() = default; + explicit SymbolRegistryKey(StringImpl* uid); + SymbolRegistryKey(WTF::HashTableDeletedValueType); + + unsigned hash() const { return m_hash; } + StringImpl* impl() const { return m_impl; } + + bool isHashTableDeletedValue() const { return m_impl == hashTableDeletedValue(); } + +private: + static StringImpl* hashTableDeletedValue() { return reinterpret_cast<StringImpl*>(-1); } + + StringImpl* m_impl { nullptr }; + unsigned m_hash { 0 }; +}; + +template<typename T> struct DefaultHash; +template<> struct DefaultHash<SymbolRegistryKey> { + struct Hash : StringHash { + static unsigned hash(const SymbolRegistryKey& key) + { + return key.hash(); + } + static bool equal(const SymbolRegistryKey& a, const SymbolRegistryKey& b) + { + return StringHash::equal(a.impl(), b.impl()); + } + }; +}; + +template<> struct HashTraits<SymbolRegistryKey> : SimpleClassHashTraits<SymbolRegistryKey> { + static const bool hasIsEmptyValueFunction = true; + static bool isEmptyValue(const SymbolRegistryKey& key) + { + return key.impl() == nullptr; + } +}; + +class SymbolRegistry { + WTF_MAKE_NONCOPYABLE(SymbolRegistry); +public: + SymbolRegistry() = default; + WTF_EXPORT_PRIVATE ~SymbolRegistry(); + + WTF_EXPORT_PRIVATE Ref<SymbolImpl> symbolForKey(const String&); + WTF_EXPORT_PRIVATE String keyForSymbol(SymbolImpl&); + + void remove(SymbolImpl&); + +private: + HashSet<SymbolRegistryKey> m_table; +}; + +inline SymbolRegistryKey::SymbolRegistryKey(StringImpl* uid) + : m_impl(uid) +{ + if (uid->isSymbol()) { + if (uid->is8Bit()) + m_hash = StringHasher::computeHashAndMaskTop8Bits(uid->characters8(), uid->length()); + else + m_hash = StringHasher::computeHashAndMaskTop8Bits(uid->characters16(), uid->length()); + } else + m_hash = uid->hash(); +} + +inline SymbolRegistryKey::SymbolRegistryKey(WTF::HashTableDeletedValueType) + : m_impl(hashTableDeletedValue()) +{ +} + +} + +#endif diff --git a/Source/WTF/wtf/text/TextBreakIterator.cpp b/Source/WTF/wtf/text/TextBreakIterator.cpp new file mode 100644 index 000000000..1edc32a5d --- /dev/null +++ b/Source/WTF/wtf/text/TextBreakIterator.cpp @@ -0,0 +1,448 @@ +/* + * (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2004-2016 Apple Inc. All rights reserved. + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "TextBreakIterator.h" + +#include "LineBreakIteratorPoolICU.h" +#include "TextBreakIteratorInternalICU.h" +#include "UTextProviderLatin1.h" +#include "UTextProviderUTF16.h" +#include <atomic> +#include <mutex> +#include <unicode/ubrk.h> +#include <wtf/text/StringBuilder.h> + +// FIXME: This needs a better name +#define ADDITIONAL_EMOJI_SUPPORT (PLATFORM(IOS) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101100)) + +namespace WTF { + +// Iterator initialization + +static UBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID()) +{ + UErrorCode openStatus = U_ZERO_ERROR; + UBreakIterator* iterator = ubrk_open(type, locale, 0, 0, &openStatus); + ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); + return iterator; +} + +#if !PLATFORM(IOS) + +static UBreakIterator* initializeIteratorWithRules(const char* breakRules) +{ + UParseError parseStatus; + UErrorCode openStatus = U_ZERO_ERROR; + unsigned length = strlen(breakRules); + auto upconvertedCharacters = StringView(reinterpret_cast<const LChar*>(breakRules), length).upconvertedCharacters(); + UBreakIterator* iterator = ubrk_openRules(upconvertedCharacters, length, 0, 0, &parseStatus, &openStatus); + ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); + return iterator; +} + +#endif + + +// Iterator text setting + +static UBreakIterator* setTextForIterator(UBreakIterator& iterator, StringView string) +{ + if (string.is8Bit()) { + UTextWithBuffer textLocal; + textLocal.text = UTEXT_INITIALIZER; + textLocal.text.extraSize = sizeof(textLocal.buffer); + textLocal.text.pExtra = textLocal.buffer; + + UErrorCode openStatus = U_ZERO_ERROR; + UText* text = openLatin1UTextProvider(&textLocal, string.characters8(), string.length(), &openStatus); + if (U_FAILURE(openStatus)) { + LOG_ERROR("uTextOpenLatin1 failed with status %d", openStatus); + return nullptr; + } + + UErrorCode setTextStatus = U_ZERO_ERROR; + ubrk_setUText(&iterator, text, &setTextStatus); + if (U_FAILURE(setTextStatus)) { + LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus); + return nullptr; + } + + utext_close(text); + } else { + UErrorCode setTextStatus = U_ZERO_ERROR; + ubrk_setText(&iterator, string.characters16(), string.length(), &setTextStatus); + if (U_FAILURE(setTextStatus)) + return nullptr; + } + + return &iterator; +} + +static UBreakIterator* setContextAwareTextForIterator(UBreakIterator& iterator, StringView string, const UChar* priorContext, unsigned priorContextLength) +{ + if (string.is8Bit()) { + UTextWithBuffer textLocal; + textLocal.text = UTEXT_INITIALIZER; + textLocal.text.extraSize = sizeof(textLocal.buffer); + textLocal.text.pExtra = textLocal.buffer; + + UErrorCode openStatus = U_ZERO_ERROR; + UText* text = openLatin1ContextAwareUTextProvider(&textLocal, string.characters8(), string.length(), priorContext, priorContextLength, &openStatus); + if (U_FAILURE(openStatus)) { + LOG_ERROR("openLatin1ContextAwareUTextProvider failed with status %d", openStatus); + return nullptr; + } + + UErrorCode setTextStatus = U_ZERO_ERROR; + ubrk_setUText(&iterator, text, &setTextStatus); + if (U_FAILURE(setTextStatus)) { + LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus); + return nullptr; + } + + utext_close(text); + } else { + UText textLocal = UTEXT_INITIALIZER; + + UErrorCode openStatus = U_ZERO_ERROR; + UText* text = openUTF16ContextAwareUTextProvider(&textLocal, string.characters16(), string.length(), priorContext, priorContextLength, &openStatus); + if (U_FAILURE(openStatus)) { + LOG_ERROR("openUTF16ContextAwareUTextProvider failed with status %d", openStatus); + return 0; + } + + UErrorCode setTextStatus = U_ZERO_ERROR; + ubrk_setUText(&iterator, text, &setTextStatus); + if (U_FAILURE(setTextStatus)) { + LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus); + return nullptr; + } + + utext_close(text); + } + + return &iterator; +} + + +// Static iterators + +UBreakIterator* wordBreakIterator(StringView string) +{ + static UBreakIterator* staticWordBreakIterator = initializeIterator(UBRK_WORD); + if (!staticWordBreakIterator) + return nullptr; + + return setTextForIterator(*staticWordBreakIterator, string); +} + +UBreakIterator* sentenceBreakIterator(StringView string) +{ + static UBreakIterator* staticSentenceBreakIterator = initializeIterator(UBRK_SENTENCE); + if (!staticSentenceBreakIterator) + return nullptr; + + return setTextForIterator(*staticSentenceBreakIterator, string); +} + +UBreakIterator* cursorMovementIterator(StringView string) +{ +#if !PLATFORM(IOS) + // This rule set is based on character-break iterator rules of ICU 57 + // <http://source.icu-project.org/repos/icu/icu/tags/release-57-1/source/data/brkitr/>. + // The major differences from the original ones are listed below: + // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier; + // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342); + // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and; + // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks. + // * Added rules for regional indicator symbols. + static const char* kRules = + "$CR = [\\p{Grapheme_Cluster_Break = CR}];" + "$LF = [\\p{Grapheme_Cluster_Break = LF}];" + "$Control = [\\p{Grapheme_Cluster_Break = Control}];" + "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks + "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];" + "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];" + "$L = [\\p{Grapheme_Cluster_Break = L}];" + "$V = [\\p{Grapheme_Cluster_Break = V}];" + "$T = [\\p{Grapheme_Cluster_Break = T}];" + "$LV = [\\p{Grapheme_Cluster_Break = LV}];" + "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];" + "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha + "$HinV = \\u094D;" // Devanagari Sign Virama + "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha + "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha + "$BenV = \\u09CD;" // Bengali Sign Virama + "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha + "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha + "$PanV = \\u0A4D;" // Gurmukhi Sign Virama + "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha + "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha + "$GujV = \\u0ACD;" // Gujarati Sign Virama + "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha + "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha + "$OriV = \\u0B4D;" // Oriya Sign Virama + "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha + "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha + "$TelV = \\u0C4D;" // Telugu Sign Virama + "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha + "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha + "$KanV = \\u0CCD;" // Kannada Sign Virama + "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha + "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha + "$MalV = \\u0D4D;" // Malayalam Sign Virama + "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha + "$RI = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators + "$ZWJ = \\u200D;" // Zero width joiner + "$EmojiVar = [\\uFE0F];" // Emoji-style variation selector +#if ADDITIONAL_EMOJI_SUPPORT + "$EmojiForSeqs = [\\u2640 \\u2642 \\u26F9 \\u2764 \\U0001F308 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA-\\U0001F3CC \\U0001F3F3 \\U0001F441 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F46F \\U0001F471 \\U0001F473 \\U0001F477 \\U0001F481-\\U0001F482 \\U0001F486-\\U0001F487 \\U0001F48B \\U0001F575 \\U0001F5E8 \\U0001F645-\\U0001F647 \\U0001F64B \\U0001F64D-\\U0001F64E \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\u2695-\\u2696 \\u2708 \\U0001F33E \\U0001F373 \\U0001F393 \\U0001F3A4 \\U0001F3A8 \\U0001F3EB \\U0001F3ED \\U0001F4BB-\\U0001F4BC \\U0001F527 \\U0001F52C \\U0001F680 \\U0001F692 \\U0001F926 \\U0001F937-\\U0001F939 \\U0001F93C-\\U0001F93E];" // Emoji that participate in ZWJ sequences + "$EmojiForMods = [\\u261D \\u26F9 \\u270A-\\u270D \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA \\U0001F3CB \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F575 \\U0001F590 \\U0001F595 \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0 \\U0001F918 \\U0001F3C2 \\U0001F3C7 \\U0001F3CC \\U0001F574 \\U0001F57A \\U0001F6CC \\U0001F919-\\U0001F91E \\U0001F926 \\U0001F930 \\U0001F933-\\U0001F939 \\U0001F93C-\\U0001F93E] ;" // Emoji that take Fitzpatrick modifiers +#else + "$EmojiForSeqs = [\\u2764 \\U0001F466-\\U0001F469 \\U0001F48B];" // Emoji that participate in ZWJ sequences + "$EmojiForMods = [\\u261D \\u270A-\\u270C \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3C7 \\U0001F3CA \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0] ;" // Emoji that take Fitzpatrick modifiers +#endif + "$EmojiMods = [\\U0001F3FB-\\U0001F3FF];" // Fitzpatrick modifiers + "!!chain;" +#if ADDITIONAL_EMOJI_SUPPORT + "!!RINoChain;" +#endif + "!!forward;" + "$CR $LF;" + "$L ($L | $V | $LV | $LVT);" + "($LV | $V) ($V | $T);" + "($LVT | $T) $T;" +#if ADDITIONAL_EMOJI_SUPPORT + "$RI $RI $Extend* / $RI;" + "$RI $RI $Extend*;" + "[^$Control $CR $LF] $Extend;" + "[^$Control $CR $LF] $SpacingMark;" +#else + "[^$Control $CR $LF] $Extend;" + "[^$Control $CR $LF] $SpacingMark;" + "$RI $RI / $RI;" + "$RI $RI;" +#endif + "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward) + "$Ben0 $BenV $Ben1;" // Bengali Virama (forward) + "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward) + "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward) + "$Ori0 $OriV $Ori1;" // Oriya Virama (forward) + "$Tel0 $TelV $Tel1;" // Telugu Virama (forward) + "$Kan0 $KanV $Kan1;" // Kannada Virama (forward) + "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward) + "$ZWJ $EmojiForSeqs;" // Don't break in emoji ZWJ sequences + "$EmojiForMods $EmojiVar? $EmojiMods;" // Don't break between relevant emoji (possibly with variation selector) and Fitzpatrick modifier + "!!reverse;" + "$LF $CR;" + "($L | $V | $LV | $LVT) $L;" + "($V | $T) ($LV | $V);" + "$T ($LVT | $T);" +#if ADDITIONAL_EMOJI_SUPPORT + "$Extend* $RI $RI / $Extend* $RI $RI;" + "$Extend* $RI $RI;" + "$Extend [^$Control $CR $LF];" + "$SpacingMark [^$Control $CR $LF];" +#else + "$Extend [^$Control $CR $LF];" + "$SpacingMark [^$Control $CR $LF];" + "$RI $RI / $RI $RI;" + "$RI $RI;" +#endif + "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward) + "$Ben1 $BenV $Ben0;" // Bengali Virama (backward) + "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward) + "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward) + "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward) + "$Tel1 $TelV $Tel0;" // Telugu Virama (backward) + "$Kan1 $KanV $Kan0;" // Kannada Virama (backward) + "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward) + "$EmojiForSeqs $ZWJ;" // Don't break in emoji ZWJ sequences + "$EmojiMods $EmojiVar? $EmojiForMods;" // Don't break between relevant emoji (possibly with variation selector) and Fitzpatrick modifier +#if ADDITIONAL_EMOJI_SUPPORT + "!!safe_reverse;" + "$RI $RI+;" + "[$EmojiVar $EmojiMods]+ $EmojiForMods;" + "!!safe_forward;" + "$RI $RI+;" + "$EmojiForMods [$EmojiVar $EmojiMods]+;"; +#else + "[$EmojiVar $EmojiMods]+ $EmojiForMods;" + "$EmojiForMods [$EmojiVar $EmojiMods]+;" + "!!safe_reverse;" + "!!safe_forward;"; +#endif + static UBreakIterator* staticCursorMovementIterator = initializeIteratorWithRules(kRules); +#else // PLATFORM(IOS) + // Use the special Thai character break iterator for all locales + static UBreakIterator* staticCursorMovementIterator = initializeIterator(UBRK_CHARACTER, "th"); +#endif // !PLATFORM(IOS) + + if (!staticCursorMovementIterator) + return nullptr; + + return setTextForIterator(*staticCursorMovementIterator, string); +} + +UBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode mode) +{ + UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale, mode); + if (!iterator) + return nullptr; + + return setContextAwareTextForIterator(*iterator, string, priorContext, priorContextLength); +} + +void releaseLineBreakIterator(UBreakIterator* iterator) +{ + ASSERT_ARG(iterator, iterator); + + LineBreakIteratorPool::sharedPool().put(iterator); +} + +UBreakIterator* openLineBreakIterator(const AtomicString& locale) +{ + bool localeIsEmpty = locale.isEmpty(); + UErrorCode openStatus = U_ZERO_ERROR; + UBreakIterator* ubrkIter = ubrk_open(UBRK_LINE, localeIsEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus); + // locale comes from a web page and it can be invalid, leading ICU + // to fail, in which case we fall back to the default locale. + if (!localeIsEmpty && U_FAILURE(openStatus)) { + openStatus = U_ZERO_ERROR; + ubrkIter = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus); + } + + if (U_FAILURE(openStatus)) { + LOG_ERROR("ubrk_open failed with status %d", openStatus); + return nullptr; + } + + return ubrkIter; +} + +void closeLineBreakIterator(UBreakIterator*& iterator) +{ + UBreakIterator* ubrkIter = iterator; + ASSERT(ubrkIter); + ubrk_close(ubrkIter); + iterator = nullptr; +} + +static std::atomic<UBreakIterator*> nonSharedCharacterBreakIterator = ATOMIC_VAR_INIT(nullptr); + +static inline UBreakIterator* getNonSharedCharacterBreakIterator() +{ + if (auto *res = nonSharedCharacterBreakIterator.exchange(nullptr, std::memory_order_acquire)) + return res; + return initializeIterator(UBRK_CHARACTER); +} + +static inline void cacheNonSharedCharacterBreakIterator(UBreakIterator* cacheMe) +{ + if (auto *old = nonSharedCharacterBreakIterator.exchange(cacheMe, std::memory_order_release)) + ubrk_close(old); +} + +NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(StringView string) +{ + if ((m_iterator = getNonSharedCharacterBreakIterator())) + m_iterator = setTextForIterator(*m_iterator, string); +} + +NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator() +{ + if (m_iterator) + cacheNonSharedCharacterBreakIterator(m_iterator); +} + +NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&& other) + : m_iterator(nullptr) +{ + std::swap(m_iterator, other.m_iterator); +} + +// Iterator implemenation. + +bool isWordTextBreak(UBreakIterator* iterator) +{ + int ruleStatus = ubrk_getRuleStatus(iterator); + return ruleStatus != UBRK_WORD_NONE; +} + +unsigned numGraphemeClusters(StringView string) +{ + unsigned stringLength = string.length(); + + if (!stringLength) + return 0; + + // The only Latin-1 Extended Grapheme Cluster is CRLF. + if (string.is8Bit()) { + auto* characters = string.characters8(); + unsigned numCRLF = 0; + for (unsigned i = 1; i < stringLength; ++i) + numCRLF += characters[i - 1] == '\r' && characters[i] == '\n'; + return stringLength - numCRLF; + } + + NonSharedCharacterBreakIterator iterator { string }; + if (!iterator) { + ASSERT_NOT_REACHED(); + return stringLength; + } + + unsigned numGraphemeClusters = 0; + while (ubrk_next(iterator) != UBRK_DONE) + ++numGraphemeClusters; + return numGraphemeClusters; +} + +unsigned numCharactersInGraphemeClusters(StringView string, unsigned numGraphemeClusters) +{ + unsigned stringLength = string.length(); + + if (stringLength <= numGraphemeClusters) + return stringLength; + + // The only Latin-1 Extended Grapheme Cluster is CRLF. + if (string.is8Bit()) { + auto* characters = string.characters8(); + unsigned i, j; + for (i = 0, j = 0; i < numGraphemeClusters && j + 1 < stringLength; ++i, ++j) + j += characters[j] == '\r' && characters[j + 1] == '\n'; + return j + (i < numGraphemeClusters); + } + + NonSharedCharacterBreakIterator iterator { string }; + if (!iterator) { + ASSERT_NOT_REACHED(); + return stringLength; + } + + for (unsigned i = 0; i < numGraphemeClusters; ++i) { + if (ubrk_next(iterator) == UBRK_DONE) + return stringLength; + } + return ubrk_current(iterator); +} + +} // namespace WTF diff --git a/Source/WTF/wtf/text/TextBreakIterator.h b/Source/WTF/wtf/text/TextBreakIterator.h new file mode 100644 index 000000000..2bb5f9ca4 --- /dev/null +++ b/Source/WTF/wtf/text/TextBreakIterator.h @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> + * Copyright (C) 2007-2016 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#pragma once + +#include <wtf/text/StringView.h> + +namespace WTF { + +// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator. + +enum class LineBreakIteratorMode { Default, Loose, Normal, Strict }; + +// This is similar to character break iterator in most cases, but is subject to +// platform UI conventions. One notable example where this can be different +// from character break iterator is Thai prepend characters, see bug 24342. +// Use this for insertion point and selection manipulations. +WTF_EXPORT_PRIVATE UBreakIterator* cursorMovementIterator(StringView); + +WTF_EXPORT_PRIVATE UBreakIterator* wordBreakIterator(StringView); +WTF_EXPORT_PRIVATE UBreakIterator* sentenceBreakIterator(StringView); + +WTF_EXPORT_PRIVATE UBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode); +WTF_EXPORT_PRIVATE void releaseLineBreakIterator(UBreakIterator*); +UBreakIterator* openLineBreakIterator(const AtomicString& locale); +void closeLineBreakIterator(UBreakIterator*&); + +WTF_EXPORT_PRIVATE bool isWordTextBreak(UBreakIterator*); + +class LazyLineBreakIterator { +public: + LazyLineBreakIterator() + { + resetPriorContext(); + } + + explicit LazyLineBreakIterator(StringView stringView, const AtomicString& locale = AtomicString(), LineBreakIteratorMode mode = LineBreakIteratorMode::Default) + : m_stringView(stringView) + , m_locale(locale) + , m_mode(mode) + { + resetPriorContext(); + } + + ~LazyLineBreakIterator() + { + if (m_iterator) + releaseLineBreakIterator(m_iterator); + } + + StringView stringView() const { return m_stringView; } + LineBreakIteratorMode mode() const { return m_mode; } + + UChar lastCharacter() const + { + static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length"); + return m_priorContext[1]; + } + + UChar secondToLastCharacter() const + { + static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length"); + return m_priorContext[0]; + } + + void setPriorContext(UChar last, UChar secondToLast) + { + static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length"); + m_priorContext[0] = secondToLast; + m_priorContext[1] = last; + } + + void updatePriorContext(UChar last) + { + static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length"); + m_priorContext[0] = m_priorContext[1]; + m_priorContext[1] = last; + } + + void resetPriorContext() + { + static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length"); + m_priorContext[0] = 0; + m_priorContext[1] = 0; + } + + unsigned priorContextLength() const + { + unsigned priorContextLength = 0; + static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length"); + if (m_priorContext[1]) { + ++priorContextLength; + if (m_priorContext[0]) + ++priorContextLength; + } + return priorContextLength; + } + + // Obtain text break iterator, possibly previously cached, where this iterator is (or has been) + // initialized to use the previously stored string as the primary breaking context and using + // previously stored prior context if non-empty. + UBreakIterator* get(unsigned priorContextLength) + { + ASSERT(priorContextLength <= priorContextCapacity); + const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0; + if (!m_iterator) { + m_iterator = acquireLineBreakIterator(m_stringView, m_locale, priorContext, priorContextLength, m_mode); + m_cachedPriorContext = priorContext; + m_cachedPriorContextLength = priorContextLength; + } else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) { + resetStringAndReleaseIterator(m_stringView, m_locale, m_mode); + return this->get(priorContextLength); + } + return m_iterator; + } + + void resetStringAndReleaseIterator(StringView stringView, const AtomicString& locale, LineBreakIteratorMode mode) + { + if (m_iterator) + releaseLineBreakIterator(m_iterator); + m_stringView = stringView; + m_locale = locale; + m_iterator = nullptr; + m_cachedPriorContext = nullptr; + m_mode = mode; + m_cachedPriorContextLength = 0; + } + +private: + static constexpr unsigned priorContextCapacity = 2; + StringView m_stringView; + AtomicString m_locale; + UBreakIterator* m_iterator { nullptr }; + const UChar* m_cachedPriorContext { nullptr }; + LineBreakIteratorMode m_mode { LineBreakIteratorMode::Default }; + unsigned m_cachedPriorContextLength { 0 }; + UChar m_priorContext[priorContextCapacity]; +}; + +// Iterates over "extended grapheme clusters", as defined in UAX #29. +// Note that platform implementations may be less sophisticated - e.g. ICU prior to +// version 4.0 only supports "legacy grapheme clusters". +// Use this for general text processing, e.g. string truncation. + +class NonSharedCharacterBreakIterator { + WTF_MAKE_NONCOPYABLE(NonSharedCharacterBreakIterator); +public: + WTF_EXPORT_PRIVATE NonSharedCharacterBreakIterator(StringView); + WTF_EXPORT_PRIVATE ~NonSharedCharacterBreakIterator(); + + NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&&); + + operator UBreakIterator*() const { return m_iterator; } + +private: + UBreakIterator* m_iterator; +}; + +// Counts the number of grapheme clusters. A surrogate pair or a sequence +// of a non-combining character and following combining characters is +// counted as 1 grapheme cluster. +WTF_EXPORT_PRIVATE unsigned numGraphemeClusters(StringView); + +// Returns the number of characters which will be less than or equal to +// the specified grapheme cluster length. +WTF_EXPORT_PRIVATE unsigned numCharactersInGraphemeClusters(StringView, unsigned); + +} + +using WTF::LazyLineBreakIterator; +using WTF::LineBreakIteratorMode; +using WTF::NonSharedCharacterBreakIterator; +using WTF::isWordTextBreak; diff --git a/Source/WTF/wtf/text/TextBreakIteratorInternalICU.h b/Source/WTF/wtf/text/TextBreakIteratorInternalICU.h new file mode 100644 index 000000000..70a301c88 --- /dev/null +++ b/Source/WTF/wtf/text/TextBreakIteratorInternalICU.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2007 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef TextBreakIteratorInternalICU_h +#define TextBreakIteratorInternalICU_h + +// FIXME: Now that this handles locales for ICU, not just for text breaking, +// this file and the various implementation files should be renamed. + +namespace WTF { + +WTF_EXPORT_PRIVATE const char* currentSearchLocaleID(); +WTF_EXPORT_PRIVATE const char* currentTextBreakLocaleID(); + +} + +using WTF::currentSearchLocaleID; +using WTF::currentTextBreakLocaleID; + +#endif diff --git a/Source/WTF/wtf/text/TextPosition.h b/Source/WTF/wtf/text/TextPosition.h index be49c157a..2f108b038 100644 --- a/Source/WTF/wtf/text/TextPosition.h +++ b/Source/WTF/wtf/text/TextPosition.h @@ -22,37 +22,12 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef TextPosition_h -#define TextPosition_h +#pragma once -#include <wtf/Assertions.h> +#include "OrdinalNumber.h" namespace WTF { -// An abstract number of element in a sequence. The sequence has a first element. -// This type should be used instead of integer because 2 contradicting traditions can -// call a first element '0' or '1' which makes integer type ambiguous. -class OrdinalNumber { -public: - static OrdinalNumber fromZeroBasedInt(int zeroBasedInt) { return OrdinalNumber(zeroBasedInt); } - static OrdinalNumber fromOneBasedInt(int oneBasedInt) { return OrdinalNumber(oneBasedInt - 1); } - OrdinalNumber() : m_zeroBasedValue(0) { } - - int zeroBasedInt() const { return m_zeroBasedValue; } - int oneBasedInt() const { return m_zeroBasedValue + 1; } - - bool operator==(OrdinalNumber other) { return m_zeroBasedValue == other.m_zeroBasedValue; } - bool operator!=(OrdinalNumber other) { return !((*this) == other); } - - static OrdinalNumber first() { return OrdinalNumber(0); } - static OrdinalNumber beforeFirst() { return OrdinalNumber(-1); } - -private: - OrdinalNumber(int zeroBasedInt) : m_zeroBasedValue(zeroBasedInt) { } - int m_zeroBasedValue; -}; - - // TextPosition structure specifies coordinates within an text resource. It is used mostly // for saving script source position. class TextPosition { @@ -62,13 +37,11 @@ public: , m_column(column) { } + TextPosition() { } bool operator==(const TextPosition& other) { return m_line == other.m_line && m_column == other.m_column; } bool operator!=(const TextPosition& other) { return !((*this) == other); } - // A 'minimum' value of position, used as a default value. - static TextPosition minimumPosition() { return TextPosition(OrdinalNumber::first(), OrdinalNumber::first()); } - // A value with line value less than a minimum; used as an impossible position. static TextPosition belowRangePosition() { return TextPosition(OrdinalNumber::beforeFirst(), OrdinalNumber::beforeFirst()); } @@ -78,8 +51,4 @@ public: } -using WTF::OrdinalNumber; - using WTF::TextPosition; - -#endif // TextPosition_h diff --git a/Source/WTF/wtf/text/UniquedStringImpl.h b/Source/WTF/wtf/text/UniquedStringImpl.h new file mode 100644 index 000000000..09aba85cf --- /dev/null +++ b/Source/WTF/wtf/text/UniquedStringImpl.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2015 Yusuke Suzuki <utatane.tea@gmail.com>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UniquedStringImpl_h +#define UniquedStringImpl_h + +#include <wtf/text/StringImpl.h> + +namespace WTF { + +// It represents that the string impl is uniqued in some ways. +// When the given 2 string impls are both uniqued string impls, we can compare it just using pointer comparison. +class UniquedStringImpl : public StringImpl { +private: + UniquedStringImpl() = delete; +protected: + UniquedStringImpl(CreateSymbolTag, const LChar* characters, unsigned length) : StringImpl(CreateSymbol, characters, length) { } + UniquedStringImpl(CreateSymbolTag, const UChar* characters, unsigned length) : StringImpl(CreateSymbol, characters, length) { } + UniquedStringImpl(CreateSymbolTag) : StringImpl(CreateSymbol) { } +}; + +#if !ASSERT_DISABLED +// UniquedStringImpls created from StaticStringImpl will ASSERT +// in the generic ValueCheck<T>::checkConsistency +// as they are not allocated by fastMalloc. +// We don't currently have any way to detect that case +// so we ignore the consistency check for all UniquedStringImpls*. +template<> struct +ValueCheck<UniquedStringImpl*> { + static void checkConsistency(const UniquedStringImpl*) { } +}; + +template<> struct +ValueCheck<const UniquedStringImpl*> { + static void checkConsistency(const UniquedStringImpl*) { } +}; +#endif + +} // namespace WTF + +using WTF::UniquedStringImpl; + +#endif // UniquedStringImpl_h diff --git a/Source/WTF/wtf/text/WTFString.cpp b/Source/WTF/wtf/text/WTFString.cpp index 45ba8af52..4f49ebca1 100644 --- a/Source/WTF/wtf/text/WTFString.cpp +++ b/Source/WTF/wtf/text/WTFString.cpp @@ -35,7 +35,6 @@ #include <wtf/dtoa.h> #include <wtf/unicode/CharacterNames.h> #include <wtf/unicode/UTF8.h> -#include <wtf/unicode/Unicode.h> namespace WTF { @@ -90,64 +89,75 @@ String::String(ASCIILiteral characters) void String::append(const String& str) { + // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. + if (str.isEmpty()) return; - // FIXME: This is extremely inefficient. So much so that we might want to take this - // out of String's API. We can make it better by optimizing the case where exactly - // one String is pointing at this StringImpl, but even then it's going to require a - // call to fastMalloc every single time. if (str.m_impl) { if (m_impl) { if (m_impl->is8Bit() && str.m_impl->is8Bit()) { LChar* data; if (str.length() > std::numeric_limits<unsigned>::max() - m_impl->length()) CRASH(); - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); + auto newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LChar)); memcpy(data + m_impl->length(), str.characters8(), str.length() * sizeof(LChar)); - m_impl = newImpl.release(); + m_impl = WTFMove(newImpl); return; } UChar* data; if (str.length() > std::numeric_limits<unsigned>::max() - m_impl->length()) CRASH(); - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); - memcpy(data, m_impl->deprecatedCharacters(), m_impl->length() * sizeof(UChar)); - memcpy(data + m_impl->length(), str.deprecatedCharacters(), str.length() * sizeof(UChar)); - m_impl = newImpl.release(); + auto newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); + StringView(*m_impl).getCharactersWithUpconvert(data); + StringView(str).getCharactersWithUpconvert(data + m_impl->length()); + m_impl = WTFMove(newImpl); } else m_impl = str.m_impl; } } -template <typename CharacterType> -inline void String::appendInternal(CharacterType c) +void String::append(LChar character) { - // FIXME: This is extremely inefficient. So much so that we might want to take this - // out of String's API. We can make it better by optimizing the case where exactly - // one String is pointing at this StringImpl, but even then it's going to require a - // call to fastMalloc every single time. - if (m_impl) { - UChar* data; - if (m_impl->length() >= std::numeric_limits<unsigned>::max()) - CRASH(); - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); - memcpy(data, m_impl->deprecatedCharacters(), m_impl->length() * sizeof(UChar)); - data[m_impl->length()] = c; - m_impl = newImpl.release(); - } else - m_impl = StringImpl::create(&c, 1); -} + // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. -void String::append(LChar c) -{ - appendInternal(c); + if (!m_impl) { + m_impl = StringImpl::create(&character, 1); + return; + } + if (!is8Bit()) { + append(static_cast<UChar>(character)); + return; + } + if (m_impl->length() >= std::numeric_limits<unsigned>::max()) + CRASH(); + LChar* data; + auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters8(), m_impl->length()); + data[m_impl->length()] = character; + m_impl = WTFMove(newImpl); } -void String::append(UChar c) +void String::append(UChar character) { - appendInternal(c); + // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. + + if (!m_impl) { + m_impl = StringImpl::create(&character, 1); + return; + } + if (character <= 0xFF && is8Bit()) { + append(static_cast<LChar>(character)); + return; + } + if (m_impl->length() >= std::numeric_limits<unsigned>::max()) + CRASH(); + UChar* data; + auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); + StringView(*m_impl).getCharactersWithUpconvert(data); + data[m_impl->length()] = character; + m_impl = WTFMove(newImpl); } int codePointCompare(const String& a, const String& b) @@ -155,20 +165,49 @@ int codePointCompare(const String& a, const String& b) return codePointCompare(a.impl(), b.impl()); } -void String::insert(const String& str, unsigned pos) +void String::insert(const String& string, unsigned position) { - if (str.isEmpty()) { - if (str.isNull()) + // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. + + unsigned lengthToInsert = string.length(); + + if (!lengthToInsert) { + if (string.isNull()) return; if (isNull()) - m_impl = str.impl(); + m_impl = string.impl(); return; } - insert(str.deprecatedCharacters(), str.length(), pos); + + if (position >= length()) { + append(string); + return; + } + + if (lengthToInsert > std::numeric_limits<unsigned>::max() - length()) + CRASH(); + + if (is8Bit() && string.is8Bit()) { + LChar* data; + auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data); + StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data); + StringView(string).getCharactersWithUpconvert(data + position); + StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert); + m_impl = WTFMove(newString); + } else { + UChar* data; + auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data); + StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data); + StringView(string).getCharactersWithUpconvert(data + position); + StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert); + m_impl = WTFMove(newString); + } } void String::append(const LChar* charactersToAppend, unsigned lengthToAppend) { + // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. + if (!m_impl) { if (!charactersToAppend) return; @@ -187,24 +226,26 @@ void String::append(const LChar* charactersToAppend, unsigned lengthToAppend) if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength) CRASH(); LChar* data; - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); + auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); StringImpl::copyChars(data, m_impl->characters8(), strLength); StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); - m_impl = newImpl.release(); + m_impl = WTFMove(newImpl); return; } if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength) CRASH(); UChar* data; - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); + auto newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); StringImpl::copyChars(data, m_impl->characters16(), strLength); StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); - m_impl = newImpl.release(); + m_impl = WTFMove(newImpl); } void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) { + // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. + if (!m_impl) { if (!charactersToAppend) return; @@ -221,39 +262,16 @@ void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength) CRASH(); UChar* data; - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); + auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); if (m_impl->is8Bit()) StringImpl::copyChars(data, characters8(), strLength); else StringImpl::copyChars(data, characters16(), strLength); StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); - m_impl = newImpl.release(); + m_impl = WTFMove(newImpl); } -void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position) -{ - if (position >= length()) { - append(charactersToInsert, lengthToInsert); - return; - } - - ASSERT(m_impl); - - if (!lengthToInsert) - return; - - ASSERT(charactersToInsert); - UChar* data; - if (lengthToInsert > std::numeric_limits<unsigned>::max() - length()) - CRASH(); - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data); - memcpy(data, deprecatedCharacters(), position * sizeof(UChar)); - memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); - memcpy(data + position + lengthToInsert, deprecatedCharacters() + position, (length() - position) * sizeof(UChar)); - m_impl = newImpl.release(); -} - UChar32 String::characterStartingAt(unsigned i) const { if (!m_impl || i >= m_impl->length()) @@ -263,24 +281,20 @@ UChar32 String::characterStartingAt(unsigned i) const void String::truncate(unsigned position) { - if (position >= length()) - return; - UChar* data; - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data); - memcpy(data, deprecatedCharacters(), position * sizeof(UChar)); - m_impl = newImpl.release(); + if (m_impl) + m_impl = m_impl->substring(0, position); } template <typename CharacterType> inline void String::removeInternal(const CharacterType* characters, unsigned position, int lengthToRemove) { CharacterType* data; - RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); + auto newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); memcpy(data, characters, position * sizeof(CharacterType)); memcpy(data + position, characters + position + lengthToRemove, (length() - lengthToRemove - position) * sizeof(CharacterType)); - m_impl = newImpl.release(); + m_impl = WTFMove(newImpl); } void String::remove(unsigned position, int lengthToRemove) @@ -318,35 +332,58 @@ String String::substringSharingImpl(unsigned offset, unsigned length) const if (!offset && length == stringLength) return *this; - return String(StringImpl::create(m_impl, offset, length)); + return String(StringImpl::createSubstringSharingImpl(*m_impl, offset, length)); +} + +String String::convertToASCIILowercase() const +{ + // FIXME: Should this function, and the many others like it, be inlined? + if (!m_impl) + return String(); + return m_impl->convertToASCIILowercase(); +} + +String String::convertToASCIIUppercase() const +{ + // FIXME: Should this function, and the many others like it, be inlined? + if (!m_impl) + return String(); + return m_impl->convertToASCIIUppercase(); +} + +String String::convertToLowercaseWithoutLocale() const +{ + if (!m_impl) + return String(); + return m_impl->convertToLowercaseWithoutLocale(); } -String String::lower() const +String String::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex) const { if (!m_impl) return String(); - return m_impl->lower(); + return m_impl->convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(failingIndex); } -String String::upper() const +String String::convertToUppercaseWithoutLocale() const { if (!m_impl) return String(); - return m_impl->upper(); + return m_impl->convertToUppercaseWithoutLocale(); } -String String::lower(const AtomicString& localeIdentifier) const +String String::convertToLowercaseWithLocale(const AtomicString& localeIdentifier) const { if (!m_impl) return String(); - return m_impl->lower(localeIdentifier); + return m_impl->convertToLowercaseWithLocale(localeIdentifier); } -String String::upper(const AtomicString& localeIdentifier) const +String String::convertToUppercaseWithLocale(const AtomicString& localeIdentifier) const { if (!m_impl) return String(); - return m_impl->upper(localeIdentifier); + return m_impl->convertToUppercaseWithLocale(localeIdentifier); } String String::stripWhiteSpace() const @@ -399,7 +436,10 @@ bool String::percentage(int& result) const if ((*m_impl)[m_impl->length() - 1] != '%') return false; - result = charactersToIntStrict(m_impl->deprecatedCharacters(), m_impl->length() - 1); + if (m_impl->is8Bit()) + result = charactersToIntStrict(m_impl->characters8(), m_impl->length() - 1); + else + result = charactersToIntStrict(m_impl->characters16(), m_impl->length() - 1); return true; } @@ -427,33 +467,26 @@ Vector<UChar> String::charactersWithNullTermination() const String String::format(const char *format, ...) { -#if OS(WINCE) va_list args; va_start(args, format); - Vector<char, 256> buffer; +#if USE(CF) && !OS(WINDOWS) + if (strstr(format, "%@")) { + RetainPtr<CFStringRef> cfFormat = adoptCF(CFStringCreateWithCString(kCFAllocatorDefault, format, kCFStringEncodingUTF8)); - int bufferSize = 256; - buffer.resize(bufferSize); - for (;;) { - int written = vsnprintf(buffer.data(), bufferSize, format, args); - va_end(args); +#if COMPILER(CLANG) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat-nonliteral" +#endif + RetainPtr<CFStringRef> result = adoptCF(CFStringCreateWithFormatAndArguments(kCFAllocatorDefault, nullptr, cfFormat.get(), args)); +#if COMPILER(CLANG) +#pragma clang diagnostic pop +#endif - if (written == 0) - return String(""); - if (written > 0) - return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), written); - - bufferSize <<= 1; - buffer.resize(bufferSize); - va_start(args, format); + va_end(args); + return result.get(); } - -#else - va_list args; - va_start(args, format); - - Vector<char, 256> buffer; +#endif // USE(CF) && !OS(WINDOWS) // Do the format once to get the length. #if COMPILER(MSVC) @@ -461,30 +494,25 @@ String String::format(const char *format, ...) #else char ch; int result = vsnprintf(&ch, 1, format, args); - // We need to call va_end() and then va_start() again here, as the - // contents of args is undefined after the call to vsnprintf - // according to http://man.cx/snprintf(3) - // - // Not calling va_end/va_start here happens to work on lots of - // systems, but fails e.g. on 64bit Linux. - va_end(args); - va_start(args, format); #endif + va_end(args); if (result == 0) return String(""); if (result < 0) return String(); + + Vector<char, 256> buffer; unsigned len = result; buffer.grow(len + 1); + va_start(args, format); // Now do the formatting again, guaranteed to fit. vsnprintf(buffer.data(), buffer.size(), format, args); va_end(args); return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len); -#endif } String String::number(int number) @@ -663,12 +691,12 @@ String String::isolatedCopy() const & return m_impl->isolatedCopy(); } -String String::isolatedCopy() const && +String String::isolatedCopy() && { if (isSafeToSendToAnotherThread()) { // Since we know that our string is a temporary that will be destroyed // we can just steal the m_impl from it, thus avoiding a copy. - return String(std::move(*this)); + return String(WTFMove(*this)); } if (!m_impl) @@ -689,14 +717,14 @@ bool String::isSafeToSendToAnotherThread() const { if (!impl()) return true; + if (isEmpty()) + return true; // AtomicStrings are not safe to send between threads as ~StringImpl() // will try to remove them from the wrong AtomicStringTable. if (impl()->isAtomic()) return false; if (impl()->hasOneRef()) return true; - if (isEmpty()) - return true; return false; } @@ -802,6 +830,11 @@ CString String::utf8(ConversionMode mode) const return m_impl->utf8(mode); } +CString String::utf8() const +{ + return utf8(LenientConversion); +} + String String::make8BitFrom16BitSource(const UChar* source, size_t length) { if (!length) @@ -1181,7 +1214,7 @@ String* string(const char* s) Vector<char> asciiDebug(StringImpl* impl) { if (!impl) - return asciiDebug(String("[null]").impl()); + return asciiDebug(String(ASCIILiteral("[null]")).impl()); Vector<char> buffer; for (unsigned i = 0; i < impl->length(); ++i) { diff --git a/Source/WTF/wtf/text/WTFString.h b/Source/WTF/wtf/text/WTFString.h index 5c8a0af95..cb4232d58 100644 --- a/Source/WTF/wtf/text/WTFString.h +++ b/Source/WTF/wtf/text/WTFString.h @@ -1,6 +1,6 @@ /* * (C) 1999 Lars Knoll (knoll@kde.org) - * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2004-2016 Apple Inc. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -26,6 +26,7 @@ // on systems without case-sensitive file systems. #include <wtf/text/ASCIIFastPath.h> +#include <wtf/text/IntegerToStringConversion.h> #include <wtf/text/StringImpl.h> #ifdef __OBJC__ @@ -34,52 +35,45 @@ namespace WTF { -class CString; -struct StringHash; +class ASCIILiteral; // Declarations of string operations -WTF_EXPORT_STRING_API int charactersToIntStrict(const LChar*, size_t, bool* ok = 0, int base = 10); -WTF_EXPORT_STRING_API int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); -WTF_EXPORT_STRING_API unsigned charactersToUIntStrict(const LChar*, size_t, bool* ok = 0, int base = 10); -WTF_EXPORT_STRING_API unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); -int64_t charactersToInt64Strict(const LChar*, size_t, bool* ok = 0, int base = 10); -int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); -uint64_t charactersToUInt64Strict(const LChar*, size_t, bool* ok = 0, int base = 10); -uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); -intptr_t charactersToIntPtrStrict(const LChar*, size_t, bool* ok = 0, int base = 10); -intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10); - -int charactersToInt(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage -WTF_EXPORT_STRING_API int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage -unsigned charactersToUInt(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage -unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage -int64_t charactersToInt64(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage -int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage -uint64_t charactersToUInt64(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage -uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage -intptr_t charactersToIntPtr(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage -intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +WTF_EXPORT_STRING_API int charactersToIntStrict(const LChar*, size_t, bool* ok = nullptr, int base = 10); +WTF_EXPORT_STRING_API int charactersToIntStrict(const UChar*, size_t, bool* ok = nullptr, int base = 10); +WTF_EXPORT_STRING_API unsigned charactersToUIntStrict(const LChar*, size_t, bool* ok = nullptr, int base = 10); +WTF_EXPORT_STRING_API unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = nullptr, int base = 10); +int64_t charactersToInt64Strict(const LChar*, size_t, bool* ok = nullptr, int base = 10); +int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = nullptr, int base = 10); +uint64_t charactersToUInt64Strict(const LChar*, size_t, bool* ok = nullptr, int base = 10); +uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = nullptr, int base = 10); +intptr_t charactersToIntPtrStrict(const LChar*, size_t, bool* ok = nullptr, int base = 10); +intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = nullptr, int base = 10); + +WTF_EXPORT_STRING_API int charactersToInt(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +WTF_EXPORT_STRING_API int charactersToInt(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +unsigned charactersToUInt(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +unsigned charactersToUInt(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +int64_t charactersToInt64(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +int64_t charactersToInt64(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +uint64_t charactersToUInt64(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +WTF_EXPORT_STRING_API uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +intptr_t charactersToIntPtr(const LChar*, size_t, bool* ok = nullptr); // ignores trailing garbage +intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = nullptr); // ignores trailing garbage // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage. // Like the non-strict functions above, these return the value when there is trailing garbage. // It would be better if these were more consistent with the above functions instead. -WTF_EXPORT_STRING_API double charactersToDouble(const LChar*, size_t, bool* ok = 0); -WTF_EXPORT_STRING_API double charactersToDouble(const UChar*, size_t, bool* ok = 0); -float charactersToFloat(const LChar*, size_t, bool* ok = 0); -WTF_EXPORT_STRING_API float charactersToFloat(const UChar*, size_t, bool* ok = 0); +WTF_EXPORT_STRING_API double charactersToDouble(const LChar*, size_t, bool* ok = nullptr); +WTF_EXPORT_STRING_API double charactersToDouble(const UChar*, size_t, bool* ok = nullptr); +WTF_EXPORT_STRING_API float charactersToFloat(const LChar*, size_t, bool* ok = nullptr); +WTF_EXPORT_STRING_API float charactersToFloat(const UChar*, size_t, bool* ok = nullptr); WTF_EXPORT_STRING_API float charactersToFloat(const LChar*, size_t, size_t& parsedLength); WTF_EXPORT_STRING_API float charactersToFloat(const UChar*, size_t, size_t& parsedLength); -class ASCIILiteral; - -enum TrailingZerosTruncatingPolicy { - KeepTrailingZeros, - TruncateTrailingZeros -}; +template<bool isSpecialCharacter(UChar), typename CharacterType> bool isAllSpecialCharacters(const CharacterType*, size_t); -template<bool isSpecialCharacter(UChar), typename CharacterType> -bool isAllSpecialCharacters(const CharacterType*, size_t); +enum TrailingZerosTruncatingPolicy { KeepTrailingZeros, TruncateTrailingZeros }; class String { public: @@ -112,10 +106,13 @@ public: WTF_EXPORT_STRING_API String(const char* characters); // Construct a string referencing an existing StringImpl. - String(StringImpl* impl) : m_impl(impl) { } - String(PassRefPtr<StringImpl> impl) : m_impl(impl) { } - String(PassRef<StringImpl> impl) : m_impl(std::move(impl)) { } - String(RefPtr<StringImpl>&& impl) : m_impl(impl) { } + String(StringImpl&); + String(StringImpl*); + String(Ref<StringImpl>&&); + String(RefPtr<StringImpl>&&); + + String(Ref<AtomicStringImpl>&&); + String(RefPtr<AtomicStringImpl>&&); // Construct a string from a constant string literal. WTF_EXPORT_STRING_API String(ASCIILiteral characters); @@ -128,26 +125,30 @@ public: // We have to declare the copy constructor and copy assignment operator as well, otherwise // they'll be implicitly deleted by adding the move constructor and move assignment operator. - String(const String& other) : m_impl(other.m_impl) { } - String(String&& other) : m_impl(other.m_impl.release()) { } + String(const String& other) + : m_impl(other.m_impl) + { } + String(String&& other) + : m_impl(WTFMove(other.m_impl)) + { } String& operator=(const String& other) { m_impl = other.m_impl; return *this; } - String& operator=(String&& other) { m_impl = other.m_impl.release(); return *this; } + String& operator=(String&& other) { m_impl = WTFMove(other.m_impl); return *this; } // Inline the destructor. ALWAYS_INLINE ~String() { } void swap(String& o) { m_impl.swap(o.m_impl); } - static String adopt(StringBuffer<LChar>& buffer) { return StringImpl::adopt(buffer); } - static String adopt(StringBuffer<UChar>& buffer) { return StringImpl::adopt(buffer); } + static String adopt(StringBuffer<LChar>&& buffer) { return StringImpl::adopt(WTFMove(buffer)); } + static String adopt(StringBuffer<UChar>&& buffer) { return StringImpl::adopt(WTFMove(buffer)); } template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler> - static String adopt(Vector<CharacterType, inlineCapacity, OverflowHandler>& vector) { return StringImpl::adopt(vector); } + static String adopt(Vector<CharacterType, inlineCapacity, OverflowHandler>&& vector) { return StringImpl::adopt(WTFMove(vector)); } bool isNull() const { return !m_impl; } bool isEmpty() const { return !m_impl || !m_impl->length(); } StringImpl* impl() const { return m_impl.get(); } - PassRefPtr<StringImpl> releaseImpl() { return m_impl.release(); } + RefPtr<StringImpl> releaseImpl() { return WTFMove(m_impl); } unsigned length() const { @@ -156,14 +157,6 @@ public: return m_impl->length(); } - const UChar* characters() const { return deprecatedCharacters(); } // FIXME: Delete this. - const UChar* deprecatedCharacters() const - { - if (!m_impl) - return 0; - return m_impl->deprecatedCharacters(); - } - const LChar* characters8() const { if (!m_impl) @@ -182,11 +175,7 @@ public: // Return characters8() or characters16() depending on CharacterType. template <typename CharacterType> - inline const CharacterType* getCharacters() const; - - // Like getCharacters() and upconvert if CharacterType is UChar on a 8bit string. - template <typename CharacterType> - inline const CharacterType* getCharactersWithUpconvert() const; + inline const CharacterType* characters() const; bool is8Bit() const { return m_impl->is8Bit(); } @@ -200,7 +189,8 @@ public: WTF_EXPORT_STRING_API CString ascii() const; WTF_EXPORT_STRING_API CString latin1() const; - WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const; + WTF_EXPORT_STRING_API CString utf8(ConversionMode) const; + WTF_EXPORT_STRING_API CString utf8() const; UChar at(unsigned index) const { @@ -231,15 +221,16 @@ public: { return m_impl ? m_impl->find(str.impl()) : notFound; } size_t find(const String& str, unsigned start) const { return m_impl ? m_impl->find(str.impl(), start) : notFound; } + size_t findIgnoringASCIICase(const String& str) const + { return m_impl ? m_impl->findIgnoringASCIICase(str.impl()) : notFound; } + size_t findIgnoringASCIICase(const String& str, unsigned startOffset) const + { return m_impl ? m_impl->findIgnoringASCIICase(str.impl(), startOffset) : notFound; } size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const { return m_impl ? m_impl->find(matchFunction, start) : notFound; } size_t find(const LChar* str, unsigned start = 0) const { return m_impl ? m_impl->find(str, start) : notFound; } - size_t findNextLineStart(unsigned start = 0) const - { return m_impl ? m_impl->findNextLineStart(start) : notFound; } - // Find the last instance of a single character or string. size_t reverseFind(UChar c, unsigned start = UINT_MAX) const { return m_impl ? m_impl->reverseFind(c, start) : notFound; } @@ -267,11 +258,21 @@ public: WTF_EXPORT_STRING_API UChar32 characterStartingAt(unsigned) const; // Ditto. bool contains(UChar c) const { return find(c) != notFound; } - bool contains(const LChar* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } - bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } + bool contains(const LChar* str, bool caseSensitive = true, unsigned startOffset = 0) const + { return find(str, startOffset, caseSensitive) != notFound; } + bool contains(const String& str) const + { return find(str) != notFound; } + bool contains(const String& str, bool caseSensitive, unsigned startOffset = 0) const + { return find(str, startOffset, caseSensitive) != notFound; } + bool containsIgnoringASCIICase(const String& str) const + { return findIgnoringASCIICase(str) != notFound; } + bool containsIgnoringASCIICase(const String& str, unsigned startOffset) const + { return findIgnoringASCIICase(str, startOffset) != notFound; } bool startsWith(const String& s) const { return m_impl ? m_impl->startsWith(s.impl()) : s.isEmpty(); } + bool startsWithIgnoringASCIICase(const String& s) const + { return m_impl ? m_impl->startsWithIgnoringASCIICase(s.impl()) : s.isEmpty(); } bool startsWith(const String& s, bool caseSensitive) const { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); } bool startsWith(UChar character) const @@ -279,14 +280,23 @@ public: template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return m_impl ? m_impl->startsWith<matchLength>(prefix, caseSensitive) : !matchLength; } - - bool endsWith(const String& s, bool caseSensitive = true) const + bool hasInfixStartingAt(const String& prefix, unsigned startOffset) const + { return m_impl && prefix.impl() ? m_impl->hasInfixStartingAt(*prefix.impl(), startOffset) : false; } + + bool endsWith(const String& s) const + { return m_impl ? m_impl->endsWith(s.impl()) : s.isEmpty(); } + bool endsWithIgnoringASCIICase(const String& s) const + { return m_impl ? m_impl->endsWithIgnoringASCIICase(s.impl()) : s.isEmpty(); } + bool endsWith(const String& s, bool caseSensitive) const { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); } bool endsWith(UChar character) const { return m_impl ? m_impl->endsWith(character) : false; } + bool endsWith(char character) const { return endsWith(static_cast<UChar>(character)); } template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return m_impl ? m_impl->endsWith<matchLength>(prefix, caseSensitive) : !matchLength; } + bool hasInfixEndingAt(const String& suffix, unsigned endOffset) const + { return m_impl && suffix.impl() ? m_impl->hasInfixEndingAt(*suffix.impl(), endOffset) : false; } WTF_EXPORT_STRING_API void append(const String&); WTF_EXPORT_STRING_API void append(LChar); @@ -295,7 +305,6 @@ public: WTF_EXPORT_STRING_API void append(const LChar*, unsigned length); WTF_EXPORT_STRING_API void append(const UChar*, unsigned length); WTF_EXPORT_STRING_API void insert(const String&, unsigned pos); - void insert(const UChar*, unsigned length, unsigned pos); String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; } String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; } @@ -311,8 +320,6 @@ public: return *this; } - void fill(UChar c) { if (m_impl) m_impl = m_impl->fill(c); } - WTF_EXPORT_STRING_API void truncate(unsigned len); WTF_EXPORT_STRING_API void remove(unsigned pos, int len = 1); @@ -321,12 +328,13 @@ public: String left(unsigned len) const { return substring(0, len); } String right(unsigned len) const { return substring(length() - len, len); } - // Returns a lowercase/uppercase version of the string - WTF_EXPORT_STRING_API String lower() const; - WTF_EXPORT_STRING_API String upper() const; - - WTF_EXPORT_STRING_API String lower(const AtomicString& localeIdentifier) const; - WTF_EXPORT_STRING_API String upper(const AtomicString& localeIdentifier) const; + WTF_EXPORT_STRING_API String convertToASCIILowercase() const; + WTF_EXPORT_STRING_API String convertToASCIIUppercase() const; + WTF_EXPORT_STRING_API String convertToLowercaseWithoutLocale() const; + WTF_EXPORT_STRING_API String convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned) const; + WTF_EXPORT_STRING_API String convertToUppercaseWithoutLocale() const; + WTF_EXPORT_STRING_API String convertToLowercaseWithLocale(const AtomicString& localeIdentifier) const; + WTF_EXPORT_STRING_API String convertToUppercaseWithLocale(const AtomicString& localeIdentifier) const; WTF_EXPORT_STRING_API String stripWhiteSpace() const; WTF_EXPORT_STRING_API String stripWhiteSpace(IsWhiteSpaceFunctionPtr) const; @@ -336,7 +344,8 @@ public: WTF_EXPORT_STRING_API String removeCharacters(CharacterMatchFunctionPtr) const; template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const; - // Return the string with case folded for case insensitive comparison. + // Returns the string with case folded for case insensitive comparison. + // Use convertToASCIILowercase instead if ASCII case insensitive comparison is desired. WTF_EXPORT_STRING_API String foldCase() const; WTF_EXPORT_STRING_API static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2); @@ -358,29 +367,29 @@ public: split(separator, false, result); } - WTF_EXPORT_STRING_API int toIntStrict(bool* ok = 0, int base = 10) const; - WTF_EXPORT_STRING_API unsigned toUIntStrict(bool* ok = 0, int base = 10) const; - WTF_EXPORT_STRING_API int64_t toInt64Strict(bool* ok = 0, int base = 10) const; - WTF_EXPORT_STRING_API uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const; - intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const; + WTF_EXPORT_STRING_API int toIntStrict(bool* ok = nullptr, int base = 10) const; + WTF_EXPORT_STRING_API unsigned toUIntStrict(bool* ok = nullptr, int base = 10) const; + WTF_EXPORT_STRING_API int64_t toInt64Strict(bool* ok = nullptr, int base = 10) const; + WTF_EXPORT_STRING_API uint64_t toUInt64Strict(bool* ok = nullptr, int base = 10) const; + WTF_EXPORT_STRING_API intptr_t toIntPtrStrict(bool* ok = nullptr, int base = 10) const; - WTF_EXPORT_STRING_API int toInt(bool* ok = 0) const; - WTF_EXPORT_STRING_API unsigned toUInt(bool* ok = 0) const; - WTF_EXPORT_STRING_API int64_t toInt64(bool* ok = 0) const; - WTF_EXPORT_STRING_API uint64_t toUInt64(bool* ok = 0) const; - WTF_EXPORT_STRING_API intptr_t toIntPtr(bool* ok = 0) const; + WTF_EXPORT_STRING_API int toInt(bool* ok = nullptr) const; + WTF_EXPORT_STRING_API unsigned toUInt(bool* ok = nullptr) const; + WTF_EXPORT_STRING_API int64_t toInt64(bool* ok = nullptr) const; + WTF_EXPORT_STRING_API uint64_t toUInt64(bool* ok = nullptr) const; + WTF_EXPORT_STRING_API intptr_t toIntPtr(bool* ok = nullptr) const; // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage. // Like the non-strict functions above, these return the value when there is trailing garbage. // It would be better if these were more consistent with the above functions instead. - WTF_EXPORT_STRING_API double toDouble(bool* ok = 0) const; - WTF_EXPORT_STRING_API float toFloat(bool* ok = 0) const; + WTF_EXPORT_STRING_API double toDouble(bool* ok = nullptr) const; + WTF_EXPORT_STRING_API float toFloat(bool* ok = nullptr) const; bool percentage(int& percentage) const; #if COMPILER_SUPPORTS(CXX_REFERENCE_QUALIFIED_FUNCTIONS) WTF_EXPORT_STRING_API String isolatedCopy() const &; - WTF_EXPORT_STRING_API String isolatedCopy() const &&; + WTF_EXPORT_STRING_API String isolatedCopy() &&; #else WTF_EXPORT_STRING_API String isolatedCopy() const; #endif @@ -388,7 +397,7 @@ public: WTF_EXPORT_STRING_API bool isSafeToSendToAnotherThread() const; // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that - // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*). + // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString *). typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA); typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB); operator UnspecifiedBoolTypeA() const; @@ -400,11 +409,12 @@ public: #endif #ifdef __OBJC__ - WTF_EXPORT_STRING_API String(NSString*); + WTF_EXPORT_STRING_API String(NSString *); - // This conversion maps NULL to "", which loses the meaning of NULL, but we - // need this mapping because AppKit crashes when passed nil NSStrings. - operator NSString*() const { if (!m_impl) return @""; return *m_impl; } + // This conversion converts the null string to an empty NSString rather than to nil. + // Given Cocoa idioms, this is a more useful default. Clients that need to preserve the + // null string can check isNull explicitly. + operator NSString *() const; #endif WTF_EXPORT_STRING_API static String make8BitFrom16BitSource(const UChar*, size_t); @@ -446,6 +456,8 @@ public: String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { } bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); } + unsigned existingHash() const { return isNull() ? 0 : impl()->existingHash(); } + #ifndef NDEBUG WTF_EXPORT_STRING_API void show() const; #endif @@ -458,6 +470,14 @@ public: return (*m_impl)[index]; } + // Turns this String empty if the StringImpl is not referenced by anyone else. + // This is useful for clearing String-based caches. + void clearImplIfNotShared() + { + if (m_impl && m_impl->hasOneRef()) + m_impl = nullptr; + } + private: template <typename CharacterType> void removeInternal(const CharacterType*, unsigned, int); @@ -473,10 +493,8 @@ inline bool operator==(const String& a, const LChar* b) { return equal(a.impl(), inline bool operator==(const String& a, const char* b) { return equal(a.impl(), reinterpret_cast<const LChar*>(b)); } inline bool operator==(const LChar* a, const String& b) { return equal(a, b.impl()); } inline bool operator==(const char* a, const String& b) { return equal(reinterpret_cast<const LChar*>(a), b.impl()); } -template<size_t inlineCapacity> -inline bool operator==(const Vector<char, inlineCapacity>& a, const String& b) { return equal(b.impl(), a.data(), a.size()); } -template<size_t inlineCapacity> -inline bool operator==(const String& a, const Vector<char, inlineCapacity>& b) { return b == a; } +template<size_t inlineCapacity> inline bool operator==(const Vector<char, inlineCapacity>& a, const String& b) { return equal(b.impl(), a.data(), a.size()); } +template<size_t inlineCapacity> inline bool operator==(const String& a, const Vector<char, inlineCapacity>& b) { return b == a; } inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); } @@ -484,64 +502,79 @@ inline bool operator!=(const String& a, const LChar* b) { return !equal(a.impl() inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), reinterpret_cast<const LChar*>(b)); } inline bool operator!=(const LChar* a, const String& b) { return !equal(a, b.impl()); } inline bool operator!=(const char* a, const String& b) { return !equal(reinterpret_cast<const LChar*>(a), b.impl()); } -template<size_t inlineCapacity> -inline bool operator!=(const Vector<char, inlineCapacity>& a, const String& b) { return !(a == b); } -template<size_t inlineCapacity> -inline bool operator!=(const String& a, const Vector<char, inlineCapacity>& b) { return b != a; } - -inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } -inline bool equalIgnoringCase(const String& a, const LChar* b) { return equalIgnoringCase(a.impl(), b); } -inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), reinterpret_cast<const LChar*>(b)); } -inline bool equalIgnoringCase(const LChar* a, const String& b) { return equalIgnoringCase(a, b.impl()); } -inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(reinterpret_cast<const LChar*>(a), b.impl()); } - -inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase) -{ - return ignoreCase ? equalIgnoringCase(a, b) : (a == b); -} +template<size_t inlineCapacity> inline bool operator!=(const Vector<char, inlineCapacity>& a, const String& b) { return !(a == b); } +template<size_t inlineCapacity> inline bool operator!=(const String& a, const Vector<char, inlineCapacity>& b) { return b != a; } -inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); } +bool equalIgnoringASCIICase(const String&, const String&); +bool equalIgnoringASCIICase(const String&, const char*); -template<size_t inlineCapacity> -inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, const String& b) { return equalIgnoringNullity(a, b.impl()); } +template<unsigned length> bool equalLettersIgnoringASCIICase(const String&, const char (&lowercaseLetters)[length]); +template<unsigned length> bool startsWithLettersIgnoringASCIICase(const String&, const char (&lowercaseLetters)[length]); + +inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); } +template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, const String& b) { return equalIgnoringNullity(a, b.impl()); } inline bool operator!(const String& str) { return str.isNull(); } inline void swap(String& a, String& b) { a.swap(b); } +#ifdef __OBJC__ + +// Used in a small number of places where the long standing behavior has been "nil if empty". +NSString * nsStringNilIfEmpty(const String&); + +#endif + // Definitions of string operations -template<size_t inlineCapacity, typename OverflowHandler> -String::String(const Vector<UChar, inlineCapacity, OverflowHandler>& vector) - : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : *StringImpl::empty()) +inline String::String(StringImpl& impl) + : m_impl(&impl) { } -template<> -inline const LChar* String::getCharacters<LChar>() const +inline String::String(StringImpl* impl) + : m_impl(impl) { - ASSERT(is8Bit()); - return characters8(); } -template<> -inline const UChar* String::getCharacters<UChar>() const +inline String::String(Ref<StringImpl>&& impl) + : m_impl(WTFMove(impl)) +{ +} + +inline String::String(RefPtr<StringImpl>&& impl) + : m_impl(WTFMove(impl)) +{ +} + +inline String::String(Ref<AtomicStringImpl>&& impl) + : m_impl(WTFMove(impl)) +{ +} + +inline String::String(RefPtr<AtomicStringImpl>&& impl) + : m_impl(WTFMove(impl)) +{ +} + +template<size_t inlineCapacity, typename OverflowHandler> +String::String(const Vector<UChar, inlineCapacity, OverflowHandler>& vector) + : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : Ref<StringImpl>(*StringImpl::empty())) { - ASSERT(!is8Bit()); - return characters16(); } template<> -inline const LChar* String::getCharactersWithUpconvert<LChar>() const +inline const LChar* String::characters<LChar>() const { ASSERT(is8Bit()); return characters8(); } template<> -inline const UChar* String::getCharactersWithUpconvert<UChar>() const +inline const UChar* String::characters<UChar>() const { - return deprecatedCharacters(); + ASSERT(!is8Bit()); + return characters16(); } inline bool String::containsOnlyLatin1() const @@ -559,12 +592,22 @@ inline bool String::containsOnlyLatin1() const return !(ored & 0xFF00); } - #ifdef __OBJC__ -// This is for situations in WebKit where the long standing behavior has been -// "nil if empty", so we try to maintain longstanding behavior for the sake of -// entrenched clients -inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; } + +inline String::operator NSString *() const +{ + if (!m_impl) + return @""; + return *m_impl; +} + +inline NSString * nsStringNilIfEmpty(const String& string) +{ + if (string.isEmpty()) + return nil; + return *string.impl(); +} + #endif inline bool String::containsOnlyASCII() const @@ -585,12 +628,6 @@ inline bool codePointCompareLessThan(const String& a, const String& b) return codePointCompare(a.impl(), b.impl()) < 0; } -template<size_t inlineCapacity> -inline void append(Vector<UChar, inlineCapacity>& vector, const String& string) -{ - vector.append(string.deprecatedCharacters(), string.length()); -} - template<typename CharacterType> inline void appendNumber(Vector<CharacterType>& vector, unsigned char number) { @@ -657,13 +694,38 @@ private: // Shared global empty string. WTF_EXPORT_STRING_API const String& emptyString(); +template<unsigned length> inline bool equalLettersIgnoringASCIICase(const String& string, const char (&lowercaseLetters)[length]) +{ + return equalLettersIgnoringASCIICase(string.impl(), lowercaseLetters); +} + +inline bool equalIgnoringASCIICase(const String& a, const String& b) +{ + return equalIgnoringASCIICase(a.impl(), b.impl()); +} + +inline bool equalIgnoringASCIICase(const String& a, const char* b) +{ + return equalIgnoringASCIICase(a.impl(), b); +} + +template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const String& string, const char (&lowercaseLetters)[length]) +{ + return startsWithLettersIgnoringASCIICase(string.impl(), lowercaseLetters); +} + +template<> struct IntegerToStringConversionTrait<String> { + using ReturnType = String; + using AdditionalArgumentType = void; + static String flush(LChar* characters, unsigned length, void*) { return { characters, length }; } +}; + } using WTF::CString; using WTF::KeepTrailingZeros; using WTF::String; using WTF::emptyString; -using WTF::append; using WTF::appendNumber; using WTF::charactersAreAllASCII; using WTF::charactersToIntStrict; @@ -679,7 +741,6 @@ using WTF::charactersToIntPtr; using WTF::charactersToDouble; using WTF::charactersToFloat; using WTF::equal; -using WTF::equalIgnoringCase; using WTF::find; using WTF::isAllSpecialCharacters; using WTF::isSpaceOrNewline; @@ -687,4 +748,5 @@ using WTF::reverseFind; using WTF::ASCIILiteral; #include <wtf/text/AtomicString.h> + #endif diff --git a/Source/WTF/wtf/text/icu/UTextProvider.cpp b/Source/WTF/wtf/text/icu/UTextProvider.cpp new file mode 100644 index 000000000..7388fdbf7 --- /dev/null +++ b/Source/WTF/wtf/text/icu/UTextProvider.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "UTextProvider.h" + +#include <algorithm> +#include <string.h> + +namespace WTF { + +// Relocate pointer from source into destination as required. +static inline void fixPointer(const UText* source, UText* destination, const void*& pointer) +{ + if (pointer >= source->pExtra && pointer < static_cast<char*>(source->pExtra) + source->extraSize) { + // Pointer references source extra buffer. + pointer = static_cast<char*>(destination->pExtra) + (static_cast<const char*>(pointer) - static_cast<const char*>(source->pExtra)); + } else if (pointer >= source && pointer < reinterpret_cast<const char*>(source) + source->sizeOfStruct) { + // Pointer references source text structure, but not source extra buffer. + pointer = reinterpret_cast<char*>(destination) + (static_cast<const char*>(pointer) - reinterpret_cast<const char*>(source)); + } +} + +UText* uTextCloneImpl(UText* destination, const UText* source, UBool deep, UErrorCode* status) +{ + ASSERT_UNUSED(deep, !deep); + if (U_FAILURE(*status)) + return nullptr; + int32_t extraSize = source->extraSize; + destination = utext_setup(destination, extraSize, status); + if (U_FAILURE(*status)) + return destination; + void* extraNew = destination->pExtra; + int32_t flags = destination->flags; + int sizeToCopy = std::min(source->sizeOfStruct, destination->sizeOfStruct); + memcpy(destination, source, sizeToCopy); + destination->pExtra = extraNew; + destination->flags = flags; + memcpy(destination->pExtra, source->pExtra, extraSize); + fixPointer(source, destination, destination->context); + fixPointer(source, destination, destination->p); + fixPointer(source, destination, destination->q); + ASSERT(!destination->r); + const void* chunkContents = static_cast<const void*>(destination->chunkContents); + fixPointer(source, destination, chunkContents); + destination->chunkContents = static_cast<const UChar*>(chunkContents); + return destination; +} + +} // namespace WTF diff --git a/Source/WTF/wtf/text/icu/UTextProvider.h b/Source/WTF/wtf/text/icu/UTextProvider.h new file mode 100644 index 000000000..2f0af9972 --- /dev/null +++ b/Source/WTF/wtf/text/icu/UTextProvider.h @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTextProvider_h +#define UTextProvider_h + +#include <unicode/utext.h> + +namespace WTF { + +enum class UTextProviderContext { + NoContext, + PriorContext, + PrimaryContext +}; + +inline UTextProviderContext uTextProviderContext(const UText* text, int64_t nativeIndex, UBool forward) +{ + if (!text->b || nativeIndex > text->b) + return UTextProviderContext::PrimaryContext; + if (nativeIndex == text->b) + return forward ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext; + return UTextProviderContext::PriorContext; +} + +inline void initializeContextAwareUTextProvider(UText* text, const UTextFuncs* funcs, const void* string, unsigned length, const UChar* priorContext, int priorContextLength) +{ + text->pFuncs = funcs; + text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS; + text->context = string; + text->p = string; + text->a = length; + text->q = priorContext; + text->b = priorContextLength; +} + +// Shared implementation for the UTextClone function on UTextFuncs. + +UText* uTextCloneImpl(UText* destination, const UText* source, UBool deep, UErrorCode* status); + + +// Helpers for the UTextAccess function on UTextFuncs. + +inline int64_t uTextAccessPinIndex(int64_t& index, int64_t limit) +{ + if (index < 0) + index = 0; + else if (index > limit) + index = limit; + return index; +} + +inline bool uTextAccessInChunkOrOutOfRange(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward, UBool& isAccessible) +{ + if (forward) { + if (nativeIndex >= text->chunkNativeStart && nativeIndex < text->chunkNativeLimit) { + int64_t offset = nativeIndex - text->chunkNativeStart; + // Ensure chunk offset is well formed if computed offset exceeds int32_t range. + ASSERT(offset < std::numeric_limits<int32_t>::max()); + text->chunkOffset = offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0; + isAccessible = TRUE; + return true; + } + if (nativeIndex >= nativeLength && text->chunkNativeLimit == nativeLength) { + text->chunkOffset = text->chunkLength; + isAccessible = FALSE; + return true; + } + } else { + if (nativeIndex > text->chunkNativeStart && nativeIndex <= text->chunkNativeLimit) { + int64_t offset = nativeIndex - text->chunkNativeStart; + // Ensure chunk offset is well formed if computed offset exceeds int32_t range. + ASSERT(offset < std::numeric_limits<int32_t>::max()); + text->chunkOffset = offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0; + isAccessible = TRUE; + return true; + } + if (nativeIndex <= 0 && !text->chunkNativeStart) { + text->chunkOffset = 0; + isAccessible = FALSE; + return true; + } + } + return false; +} + +} // namespace WTF + +#endif // UTextProvider_h diff --git a/Source/WTF/wtf/text/icu/UTextProviderLatin1.cpp b/Source/WTF/wtf/text/icu/UTextProviderLatin1.cpp new file mode 100644 index 000000000..25a0e1e86 --- /dev/null +++ b/Source/WTF/wtf/text/icu/UTextProviderLatin1.cpp @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "UTextProviderLatin1.h" + +#include "UTextProvider.h" +#include <wtf/text/StringImpl.h> + +namespace WTF { + +// Latin1 provider + +static UText* uTextLatin1Clone(UText*, const UText*, UBool, UErrorCode*); +static int64_t uTextLatin1NativeLength(UText*); +static UBool uTextLatin1Access(UText*, int64_t, UBool); +static int32_t uTextLatin1Extract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*); +static int64_t uTextLatin1MapOffsetToNative(const UText*); +static int32_t uTextLatin1MapNativeIndexToUTF16(const UText*, int64_t); +static void uTextLatin1Close(UText*); + +static const struct UTextFuncs uTextLatin1Funcs = { + sizeof(UTextFuncs), + 0, + 0, + 0, + uTextLatin1Clone, + uTextLatin1NativeLength, + uTextLatin1Access, + uTextLatin1Extract, + nullptr, + nullptr, + uTextLatin1MapOffsetToNative, + uTextLatin1MapNativeIndexToUTF16, + uTextLatin1Close, + nullptr, + nullptr, + nullptr +}; + +static UText* uTextLatin1Clone(UText* destination, const UText* source, UBool deep, UErrorCode* status) +{ + ASSERT_UNUSED(deep, !deep); + + if (U_FAILURE(*status)) + return 0; + + UText* result = utext_setup(destination, sizeof(UChar) * UTextWithBufferInlineCapacity, status); + if (U_FAILURE(*status)) + return destination; + + result->providerProperties = source->providerProperties; + + // Point at the same position, but with an empty buffer. + result->chunkNativeStart = source->chunkNativeStart; + result->chunkNativeLimit = source->chunkNativeStart; + result->nativeIndexingLimit = static_cast<int32_t>(source->chunkNativeStart); + result->chunkOffset = 0; + result->context = source->context; + result->a = source->a; + result->pFuncs = &uTextLatin1Funcs; + result->chunkContents = (UChar*)result->pExtra; + memset(const_cast<UChar*>(result->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity); + + return result; +} + +static int64_t uTextLatin1NativeLength(UText* uText) +{ + return uText->a; +} + +static UBool uTextLatin1Access(UText* uText, int64_t index, UBool forward) +{ + int64_t length = uText->a; + + if (forward) { + if (index < uText->chunkNativeLimit && index >= uText->chunkNativeStart) { + // Already inside the buffer. Set the new offset. + uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); + return TRUE; + } + if (index >= length && uText->chunkNativeLimit == length) { + // Off the end of the buffer, but we can't get it. + uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); + return FALSE; + } + } else { + if (index <= uText->chunkNativeLimit && index > uText->chunkNativeStart) { + // Already inside the buffer. Set the new offset. + uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); + return TRUE; + } + if (!index && !uText->chunkNativeStart) { + // Already at the beginning; can't go any farther. + uText->chunkOffset = 0; + return FALSE; + } + } + + if (forward) { + uText->chunkNativeStart = index; + uText->chunkNativeLimit = uText->chunkNativeStart + UTextWithBufferInlineCapacity; + if (uText->chunkNativeLimit > length) + uText->chunkNativeLimit = length; + + uText->chunkOffset = 0; + } else { + uText->chunkNativeLimit = index; + if (uText->chunkNativeLimit > length) + uText->chunkNativeLimit = length; + + uText->chunkNativeStart = uText->chunkNativeLimit - UTextWithBufferInlineCapacity; + if (uText->chunkNativeStart < 0) + uText->chunkNativeStart = 0; + + uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); + } + uText->chunkLength = static_cast<int32_t>(uText->chunkNativeLimit - uText->chunkNativeStart); + + StringImpl::copyChars(const_cast<UChar*>(uText->chunkContents), static_cast<const LChar*>(uText->context) + uText->chunkNativeStart, static_cast<unsigned>(uText->chunkLength)); + + uText->nativeIndexingLimit = uText->chunkLength; + + return TRUE; +} + +static int32_t uTextLatin1Extract(UText* uText, int64_t start, int64_t limit, UChar* dest, int32_t destCapacity, UErrorCode* status) +{ + int64_t length = uText->a; + if (U_FAILURE(*status)) + return 0; + + if (destCapacity < 0 || (!dest && destCapacity > 0)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if (start < 0 || start > limit || (limit - start) > INT32_MAX) { + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + if (start > length) + start = length; + if (limit > length) + limit = length; + + length = limit - start; + + if (!length) + return 0; + + if (destCapacity > 0 && !dest) { + int32_t trimmedLength = static_cast<int32_t>(length); + if (trimmedLength > destCapacity) + trimmedLength = destCapacity; + + StringImpl::copyChars(dest, static_cast<const LChar*>(uText->context) + start, static_cast<unsigned>(trimmedLength)); + } + + if (length < destCapacity) { + dest[length] = 0; + if (*status == U_STRING_NOT_TERMINATED_WARNING) + *status = U_ZERO_ERROR; + } else if (length == destCapacity) + *status = U_STRING_NOT_TERMINATED_WARNING; + else + *status = U_BUFFER_OVERFLOW_ERROR; + + return static_cast<int32_t>(length); +} + +static int64_t uTextLatin1MapOffsetToNative(const UText* uText) +{ + return uText->chunkNativeStart + uText->chunkOffset; +} + +static int32_t uTextLatin1MapNativeIndexToUTF16(const UText* uText, int64_t nativeIndex) +{ + ASSERT_UNUSED(uText, uText->chunkNativeStart >= nativeIndex); + ASSERT_UNUSED(uText, nativeIndex < uText->chunkNativeLimit); + return static_cast<int32_t>(nativeIndex); +} + +static void uTextLatin1Close(UText* uText) +{ + uText->context = nullptr; +} + +UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status) +{ + if (U_FAILURE(*status)) + return nullptr; + if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status); + if (U_FAILURE(*status)) { + ASSERT(!text); + return nullptr; + } + + text->context = string; + text->a = length; + text->pFuncs = &uTextLatin1Funcs; + text->chunkContents = (UChar*)text->pExtra; + memset(const_cast<UChar*>(text->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity); + + return text; +} + + +// Latin1ContextAware provider + +static UText* uTextLatin1ContextAwareClone(UText*, const UText*, UBool, UErrorCode*); +static int64_t uTextLatin1ContextAwareNativeLength(UText*); +static UBool uTextLatin1ContextAwareAccess(UText*, int64_t, UBool); +static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*); +static void uTextLatin1ContextAwareClose(UText*); + +static const struct UTextFuncs textLatin1ContextAwareFuncs = { + sizeof(UTextFuncs), + 0, + 0, + 0, + uTextLatin1ContextAwareClone, + uTextLatin1ContextAwareNativeLength, + uTextLatin1ContextAwareAccess, + uTextLatin1ContextAwareExtract, + nullptr, + nullptr, + nullptr, + nullptr, + uTextLatin1ContextAwareClose, + nullptr, + nullptr, + nullptr +}; + +static inline UTextProviderContext textLatin1ContextAwareGetCurrentContext(const UText* text) +{ + if (!text->chunkContents) + return UTextProviderContext::NoContext; + return text->chunkContents == text->pExtra ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext; +} + +static void textLatin1ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) +{ + ASSERT(text->chunkContents == text->pExtra); + if (forward) { + ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength); + text->chunkNativeStart = nativeIndex; + text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar); + if (text->chunkNativeLimit > nativeLength) + text->chunkNativeLimit = nativeLength; + } else { + ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength); + text->chunkNativeLimit = nativeIndex; + text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar); + if (text->chunkNativeStart < text->b) + text->chunkNativeStart = text->b; + } + int64_t length = text->chunkNativeLimit - text->chunkNativeStart; + // Ensure chunk length is well defined if computed length exceeds int32_t range. + ASSERT(length < std::numeric_limits<int32_t>::max()); + text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0; + text->nativeIndexingLimit = text->chunkLength; + text->chunkOffset = forward ? 0 : text->chunkLength; + StringImpl::copyChars(const_cast<UChar*>(text->chunkContents), static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b), static_cast<unsigned>(text->chunkLength)); +} + +static void textLatin1ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) +{ + ASSERT(!text->chunkContents || text->chunkContents == text->q); + text->chunkContents = static_cast<const UChar*>(text->pExtra); + textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); +} + +static void textLatin1ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) +{ + ASSERT(text->chunkContents == text->q); + ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b); + ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); + ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); + text->chunkNativeStart = 0; + text->chunkNativeLimit = text->b; + text->chunkLength = text->b; + text->nativeIndexingLimit = text->chunkLength; + int64_t offset = nativeIndex - text->chunkNativeStart; + // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length. + ASSERT(offset < std::numeric_limits<int32_t>::max()); + text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength); +} + +static void textLatin1ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) +{ + ASSERT(!text->chunkContents || text->chunkContents == text->pExtra); + text->chunkContents = static_cast<const UChar*>(text->q); + textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); +} + +static UText* uTextLatin1ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status) +{ + return uTextCloneImpl(destination, source, deep, status); +} + +static int64_t uTextLatin1ContextAwareNativeLength(UText* text) +{ + return text->a + text->b; +} + +static UBool uTextLatin1ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward) +{ + if (!text->context) + return FALSE; + int64_t nativeLength = uTextLatin1ContextAwareNativeLength(text); + UBool isAccessible; + if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible)) + return isAccessible; + nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength); + UTextProviderContext currentContext = textLatin1ContextAwareGetCurrentContext(text); + UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward); + ASSERT(newContext != UTextProviderContext::NoContext); + if (newContext == currentContext) { + if (currentContext == UTextProviderContext::PrimaryContext) + textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); + else + textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); + } else if (newContext == UTextProviderContext::PrimaryContext) + textLatin1ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward); + else { + ASSERT(newContext == UTextProviderContext::PriorContext); + textLatin1ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward); + } + return TRUE; +} + +static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode) +{ + // In the present context, this text provider is used only with ICU functions + // that do not perform an extract operation. + ASSERT_NOT_REACHED(); + *errorCode = U_UNSUPPORTED_ERROR; + return 0; +} + +static void uTextLatin1ContextAwareClose(UText* text) +{ + text->context = nullptr; +} + +UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status) +{ + if (U_FAILURE(*status)) + return 0; + if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status); + if (U_FAILURE(*status)) { + ASSERT(!text); + return 0; + } + + initializeContextAwareUTextProvider(text, &textLatin1ContextAwareFuncs, string, length, priorContext, priorContextLength); + return text; +} + +} // namespace WTF diff --git a/Source/WTF/wtf/text/icu/UTextProviderLatin1.h b/Source/WTF/wtf/text/icu/UTextProviderLatin1.h new file mode 100644 index 000000000..f17b34d56 --- /dev/null +++ b/Source/WTF/wtf/text/icu/UTextProviderLatin1.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTextProviderLatin1_h +#define UTextProviderLatin1_h + +#include <unicode/utext.h> +#include <wtf/text/LChar.h> + +namespace WTF { + +const int UTextWithBufferInlineCapacity = 16; + +struct UTextWithBuffer { + UText text; + UChar buffer[UTextWithBufferInlineCapacity]; +}; + +UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status); +UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status); + +} // namespace WTF + +#endif // UTextProviderLatin1_h diff --git a/Source/WTF/wtf/text/icu/UTextProviderUTF16.cpp b/Source/WTF/wtf/text/icu/UTextProviderUTF16.cpp new file mode 100644 index 000000000..e1fc2eab9 --- /dev/null +++ b/Source/WTF/wtf/text/icu/UTextProviderUTF16.cpp @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "UTextProviderUTF16.h" + +#include "UTextProvider.h" +#include <algorithm> + +namespace WTF { + +// UTF16ContextAware provider + +static UText* uTextUTF16ContextAwareClone(UText*, const UText*, UBool, UErrorCode*); +static int64_t uTextUTF16ContextAwareNativeLength(UText*); +static UBool uTextUTF16ContextAwareAccess(UText*, int64_t, UBool); +static int32_t uTextUTF16ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*); +static void uTextUTF16ContextAwareClose(UText*); + +static const struct UTextFuncs textUTF16ContextAwareFuncs = { + sizeof(UTextFuncs), + 0, + 0, + 0, + uTextUTF16ContextAwareClone, + uTextUTF16ContextAwareNativeLength, + uTextUTF16ContextAwareAccess, + uTextUTF16ContextAwareExtract, + nullptr, + nullptr, + nullptr, + nullptr, + uTextUTF16ContextAwareClose, + nullptr, + nullptr, + nullptr +}; + +static inline UTextProviderContext textUTF16ContextAwareGetCurrentContext(const UText* text) +{ + if (!text->chunkContents) + return UTextProviderContext::NoContext; + return text->chunkContents == text->p ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext; +} + +static void textUTF16ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) +{ + ASSERT(text->chunkContents == text->p); + ASSERT_UNUSED(forward, forward ? nativeIndex >= text->b : nativeIndex > text->b); + ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); + text->chunkNativeStart = text->b; + text->chunkNativeLimit = nativeLength; + int64_t length = text->chunkNativeLimit - text->chunkNativeStart; + // Ensure chunk length is well defined if computed length exceeds int32_t range. + ASSERT(length < std::numeric_limits<int32_t>::max()); + text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0; + text->nativeIndexingLimit = text->chunkLength; + int64_t offset = nativeIndex - text->chunkNativeStart; + // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length. + ASSERT(offset < std::numeric_limits<int32_t>::max()); + text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength); +} + +static void textUTF16ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) +{ + ASSERT(!text->chunkContents || text->chunkContents == text->q); + text->chunkContents = static_cast<const UChar*>(text->p); + textUTF16ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); +} + +static void textUTF16ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) +{ + ASSERT(text->chunkContents == text->q); + ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b); + ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); + ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); + text->chunkNativeStart = 0; + text->chunkNativeLimit = text->b; + text->chunkLength = text->b; + text->nativeIndexingLimit = text->chunkLength; + int64_t offset = nativeIndex - text->chunkNativeStart; + // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length. + ASSERT(offset < std::numeric_limits<int32_t>::max()); + text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength); +} + +static void textUTF16ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) +{ + ASSERT(!text->chunkContents || text->chunkContents == text->p); + text->chunkContents = static_cast<const UChar*>(text->q); + textUTF16ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); +} + +static UText* uTextUTF16ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status) +{ + return uTextCloneImpl(destination, source, deep, status); +} + +static inline int64_t uTextUTF16ContextAwareNativeLength(UText* text) +{ + return text->a + text->b; +} + +static UBool uTextUTF16ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward) +{ + if (!text->context) + return FALSE; + int64_t nativeLength = uTextUTF16ContextAwareNativeLength(text); + UBool isAccessible; + if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible)) + return isAccessible; + nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength); + UTextProviderContext currentContext = textUTF16ContextAwareGetCurrentContext(text); + UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward); + ASSERT(newContext != UTextProviderContext::NoContext); + if (newContext == currentContext) { + if (currentContext == UTextProviderContext::PrimaryContext) + textUTF16ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); + else + textUTF16ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); + } else if (newContext == UTextProviderContext::PrimaryContext) + textUTF16ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward); + else { + ASSERT(newContext == UTextProviderContext::PriorContext); + textUTF16ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward); + } + return TRUE; +} + +static int32_t uTextUTF16ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode) +{ + // In the present context, this text provider is used only with ICU functions + // that do not perform an extract operation. + ASSERT_NOT_REACHED(); + *errorCode = U_UNSUPPORTED_ERROR; + return 0; +} + +static void uTextUTF16ContextAwareClose(UText* text) +{ + text->context = nullptr; +} + +UText* openUTF16ContextAwareUTextProvider(UText* text, const UChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status) +{ + if (U_FAILURE(*status)) + return 0; + if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + text = utext_setup(text, 0, status); + if (U_FAILURE(*status)) { + ASSERT(!text); + return 0; + } + + initializeContextAwareUTextProvider(text, &textUTF16ContextAwareFuncs, string, length, priorContext, priorContextLength); + return text; +} + +} // namespace WTF diff --git a/Source/WTF/wtf/text/icu/UTextProviderUTF16.h b/Source/WTF/wtf/text/icu/UTextProviderUTF16.h new file mode 100644 index 000000000..bcc2c2c8e --- /dev/null +++ b/Source/WTF/wtf/text/icu/UTextProviderUTF16.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTextProviderUTF16_h +#define UTextProviderUTF16_h + +#include <unicode/utext.h> + +namespace WTF { + +UText* openUTF16ContextAwareUTextProvider(UText*, const UChar*, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode*); + +} // namespace WTF + +#endif // UTextProviderUTF16_h diff --git a/Source/WTF/wtf/text/unix/TextBreakIteratorInternalICUUnix.cpp b/Source/WTF/wtf/text/unix/TextBreakIteratorInternalICUUnix.cpp new file mode 100644 index 000000000..44983421c --- /dev/null +++ b/Source/WTF/wtf/text/unix/TextBreakIteratorInternalICUUnix.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2017 Igalia S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "TextBreakIteratorInternalICU.h" + +#include <locale.h> + +namespace WTF { + +const char* currentSearchLocaleID() +{ + if (auto* localeDefault = setlocale(LC_MESSAGES, nullptr)) + return localeDefault; + return ""; +} + +const char* currentTextBreakLocaleID() +{ + if (auto* localeDefault = setlocale(LC_MESSAGES, nullptr)) + return localeDefault; + return "en_us"; +} + +} |