diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WTF/wtf/text/StringImpl.h | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/WTF/wtf/text/StringImpl.h')
-rw-r--r-- | Source/WTF/wtf/text/StringImpl.h | 974 |
1 files changed, 376 insertions, 598 deletions
diff --git a/Source/WTF/wtf/text/StringImpl.h b/Source/WTF/wtf/text/StringImpl.h index 770acf000..b2c45e8fa 100644 --- a/Source/WTF/wtf/text/StringImpl.h +++ b/Source/WTF/wtf/text/StringImpl.h @@ -1,6 +1,6 @@ /* * Copyright (C) 1999 Lars Knoll (knoll@kde.org) - * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2005-2010, 2013-2016 Apple Inc. All rights reserved. * Copyright (C) 2009 Google Inc. All rights reserved. * * This library is free software; you can redistribute it and/or @@ -24,16 +24,16 @@ #define StringImpl_h #include <limits.h> +#include <unicode/uchar.h> +#include <unicode/ustring.h> #include <wtf/ASCIICType.h> -#include <wtf/CompilationThread.h> -#include <wtf/CryptographicallyRandomNumber.h> #include <wtf/Forward.h> +#include <wtf/Hasher.h> #include <wtf/MathExtras.h> #include <wtf/StdLibExtras.h> -#include <wtf/StringHasher.h> #include <wtf/Vector.h> #include <wtf/text/ConversionMode.h> -#include <wtf/unicode/Unicode.h> +#include <wtf/text/StringCommon.h> #if USE(CF) typedef const struct __CFString * CFStringRef; @@ -43,27 +43,28 @@ typedef const struct __CFString * CFStringRef; @class NSString; #endif -// FIXME: This is a temporary layering violation while we move string code to WTF. -// Landing the file moves in one patch, will follow on with patches to change the namespaces. namespace JSC { -struct IdentifierASCIIStringTranslator; namespace LLInt { class Data; } class LLIntOffsetsExtractor; -template <typename T> struct IdentifierCharBufferTranslator; -struct IdentifierLCharFromUCharTranslator; } namespace WTF { +class SymbolImpl; +class SymbolRegistry; + struct CStringTranslator; -template<typename CharacterType> struct HashAndCharactersTranslator; +struct CharBufferFromLiteralDataTranslator; struct HashAndUTF8CharactersTranslator; struct LCharBufferTranslator; -struct CharBufferFromLiteralDataTranslator; +struct StringHash; struct SubstringTranslator; struct UCharBufferTranslator; + template<typename> class RetainPtr; +template<typename> struct HashAndCharactersTranslator; + enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive @@ -72,10 +73,10 @@ enum TextCaseSensitivity { typedef bool (*CharacterMatchFunctionPtr)(UChar); typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); -// Define STRING_STATS to turn on run time statistics of string sizes and memory usage -#undef STRING_STATS +// Define STRING_STATS to 1 turn on run time statistics of string sizes and memory usage +#define STRING_STATS 0 -#ifdef STRING_STATS +#if STRING_STATS struct StringStats { inline void add8BitString(unsigned length, bool isSubString = false) { @@ -93,33 +94,29 @@ struct StringStats { m_total16BitData += length; } - inline void addUpconvertedString(unsigned length) - { - ++m_numberUpconvertedStrings; - m_totalUpconvertedData += length; - } - - void removeString(StringImpl*); + void removeString(StringImpl&); void printStats(); static const unsigned s_printStringStatsFrequency = 5000; - static unsigned s_stringRemovesTillPrintStats; - - unsigned m_totalNumberStrings; - unsigned m_number8BitStrings; - unsigned m_number16BitStrings; - unsigned m_numberUpconvertedStrings; - unsigned long long m_total8BitData; - unsigned long long m_total16BitData; - unsigned long long m_totalUpconvertedData; + static std::atomic<unsigned> s_stringRemovesTillPrintStats; + + std::atomic<unsigned> m_refCalls; + std::atomic<unsigned> m_derefCalls; + + std::atomic<unsigned> m_totalNumberStrings; + std::atomic<unsigned> m_number8BitStrings; + std::atomic<unsigned> m_number16BitStrings; + std::atomic<unsigned long long> m_total8BitData; + std::atomic<unsigned long long> m_total16BitData; }; #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length) #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString) #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length) #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString) -#define STRING_STATS_ADD_UPCONVERTED_STRING(length) StringImpl::stringStats().addUpconvertedString(length) #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string) +#define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls; +#define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls; #else #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0) @@ -127,14 +124,12 @@ struct StringStats { #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0) #define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0) #define STRING_STATS_REMOVE_STRING(string) ((void)0) +#define STRING_STATS_REF_STRING(string) ((void)0) +#define STRING_STATS_DEREF_STRING(string) ((void)0) #endif class StringImpl { WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED; - friend struct JSC::IdentifierASCIIStringTranslator; - friend struct JSC::IdentifierCharBufferTranslator<LChar>; - friend struct JSC::IdentifierCharBufferTranslator<UChar>; - friend struct JSC::IdentifierLCharFromUCharTranslator; friend struct WTF::CStringTranslator; template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator; friend struct WTF::HashAndUTF8CharactersTranslator; @@ -142,9 +137,9 @@ class StringImpl { friend struct WTF::LCharBufferTranslator; friend struct WTF::SubstringTranslator; friend struct WTF::UCharBufferTranslator; - friend class AtomicStringImpl; friend class JSC::LLInt::Data; friend class JSC::LLIntOffsetsExtractor; + friend class SymbolImpl; private: enum BufferOwnership { @@ -153,42 +148,26 @@ private: BufferSubstring, }; - // Used to construct static strings, which have an special refCount that can never hit zero. - // This means that the static string will never be destroyed, which is important because - // static strings will be shared across threads & ref-counted in a non-threadsafe manner. - enum ConstructStaticStringTag { ConstructStaticString }; - StringImpl(const UChar* characters, unsigned length, ConstructStaticStringTag) - : m_refCount(s_refCountFlagIsStaticString) - , m_length(length) - , m_data16(characters) - , m_buffer(0) - , m_hashAndFlags(s_hashFlagIsIdentifier | BufferOwned) - { - // Ensure that the hash is computed so that AtomicStringHash can call existingHash() - // with impunity. The empty string is special because it is never entered into - // AtomicString's HashKey, but still needs to compare correctly. - STRING_STATS_ADD_16BIT_STRING(m_length); - - hash(); - } - - // Used to construct static strings, which have an special refCount that can never hit zero. - // This means that the static string will never be destroyed, which is important because - // static strings will be shared across threads & ref-counted in a non-threadsafe manner. - StringImpl(const LChar* characters, unsigned length, ConstructStaticStringTag) - : m_refCount(s_refCountFlagIsStaticString) - , m_length(length) - , m_data8(characters) - , m_buffer(0) - , m_hashAndFlags(s_hashFlag8BitBuffer | s_hashFlagIsIdentifier | BufferOwned) - { - // Ensure that the hash is computed so that AtomicStringHash can call existingHash() - // with impunity. The empty string is special because it is never entered into - // AtomicString's HashKey, but still needs to compare correctly. - STRING_STATS_ADD_8BIT_STRING(m_length); - - hash(); - } + // The bottom 6 bits in the hash are flags. +public: + static constexpr const unsigned s_flagCount = 6; +private: + static constexpr const unsigned s_flagMask = (1u << s_flagCount) - 1; + static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags"); + static constexpr const unsigned s_flagStringKindCount = 4; + + static constexpr const unsigned s_hashFlagStringKindIsAtomic = 1u << (s_flagStringKindCount); + static constexpr const unsigned s_hashFlagStringKindIsSymbol = 1u << (s_flagStringKindCount + 1); + static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtomic | s_hashFlagStringKindIsSymbol; + static constexpr const unsigned s_hashFlag8BitBuffer = 1u << 3; + static constexpr const unsigned s_hashFlagDidReportCost = 1u << 2; + static constexpr const unsigned s_hashMaskBufferOwnership = (1u << 0) | (1u << 1); + + enum StringKind { + StringNormal = 0u, // non-symbol, non-atomic + StringAtomic = s_hashFlagStringKindIsAtomic, // non-symbol, atomic + StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic + }; // FIXME: there has to be a less hacky way to do this. enum Force8Bit { Force8BitConstructor }; @@ -196,9 +175,8 @@ private: StringImpl(unsigned length, Force8Bit) : m_refCount(s_refCountIncrement) , m_length(length) - , m_data8(reinterpret_cast<const LChar*>(this + 1)) - , m_buffer(0) - , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) + , m_data8(tailPointer<LChar>()) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferInternal) { ASSERT(m_data8); ASSERT(m_length); @@ -210,9 +188,8 @@ private: StringImpl(unsigned length) : m_refCount(s_refCountIncrement) , m_length(length) - , m_data16(reinterpret_cast<const UChar*>(this + 1)) - , m_buffer(0) - , m_hashAndFlags(BufferInternal) + , m_data16(tailPointer<UChar>()) + , m_hashAndFlags(StringNormal | BufferInternal) { ASSERT(m_data16); ASSERT(m_length); @@ -225,8 +202,7 @@ private: : m_refCount(s_refCountIncrement) , m_length(length) , m_data8(characters.leakPtr()) - , m_buffer(0) - , m_hashAndFlags(s_hashFlag8BitBuffer | BufferOwned) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferOwned) { ASSERT(m_data8); ASSERT(m_length); @@ -239,26 +215,24 @@ private: : m_refCount(s_refCountIncrement) , m_length(length) , m_data16(characters) - , m_buffer(0) - , m_hashAndFlags(BufferInternal) + , m_hashAndFlags(StringNormal | BufferInternal) { ASSERT(m_data16); ASSERT(m_length); - STRING_STATS_ADD_16BIT_STRING(0); + STRING_STATS_ADD_16BIT_STRING(m_length); } StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag) : m_refCount(s_refCountIncrement) , m_length(length) , m_data8(characters) - , m_buffer(0) - , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferInternal) { ASSERT(m_data8); ASSERT(m_length); - STRING_STATS_ADD_8BIT_STRING(0); + STRING_STATS_ADD_8BIT_STRING(m_length); } // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) @@ -266,8 +240,7 @@ private: : m_refCount(s_refCountIncrement) , m_length(length) , m_data16(characters.leakPtr()) - , m_buffer(0) - , m_hashAndFlags(BufferOwned) + , m_hashAndFlags(StringNormal | BufferOwned) { ASSERT(m_data16); ASSERT(m_length); @@ -276,109 +249,74 @@ private: } // Used to create new strings that are a substring of an existing 8-bit StringImpl (BufferSubstring) - StringImpl(const LChar* characters, unsigned length, PassRefPtr<StringImpl> base) + StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base) : m_refCount(s_refCountIncrement) , m_length(length) , m_data8(characters) - , m_substringBuffer(base.leakRef()) - , m_hashAndFlags(s_hashFlag8BitBuffer | BufferSubstring) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferSubstring) { ASSERT(is8Bit()); ASSERT(m_data8); ASSERT(m_length); - ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); + ASSERT(base->bufferOwnership() != BufferSubstring); + + substringBuffer() = &base.leakRef(); STRING_STATS_ADD_8BIT_STRING2(m_length, true); } // Used to create new strings that are a substring of an existing 16-bit StringImpl (BufferSubstring) - StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base) + StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base) : m_refCount(s_refCountIncrement) , m_length(length) , m_data16(characters) - , m_substringBuffer(base.leakRef()) - , m_hashAndFlags(BufferSubstring) + , m_hashAndFlags(StringNormal | BufferSubstring) { ASSERT(!is8Bit()); ASSERT(m_data16); ASSERT(m_length); - ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); + ASSERT(base->bufferOwnership() != BufferSubstring); - STRING_STATS_ADD_16BIT_STRING2(m_length, true); - } + substringBuffer() = &base.leakRef(); - enum CreateEmptyUnique_T { CreateEmptyUnique }; - StringImpl(CreateEmptyUnique_T) - : m_refCount(s_refCountIncrement) - , m_length(0) - // We expect m_buffer to be initialized to 0 as we use it - // to represent a null terminated buffer. - , m_data16(reinterpret_cast<const UChar*>(&m_buffer)) - , m_buffer(0) - { - ASSERT(m_data16); - // Set the hash early, so that all empty unique StringImpls have a hash, - // and don't use the normal hashing algorithm - the unique nature of these - // keys means that we don't need them to match any other string (in fact, - // that's exactly the oposite of what we want!), and teh normal hash would - // lead to lots of conflicts. - unsigned hash = cryptographicallyRandomNumber() | 1; - hash <<= s_flagCount; - if (!hash) - hash = 1 << s_flagCount; - m_hashAndFlags = hash | BufferInternal; - - STRING_STATS_ADD_16BIT_STRING(m_length); + STRING_STATS_ADD_16BIT_STRING2(m_length, true); } - ~StringImpl(); - public: WTF_EXPORT_STRING_API static void destroy(StringImpl*); - WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const UChar*, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const LChar*, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> create8BitIfPossible(const UChar*, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> create(const UChar*, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> create(const LChar*, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> create8BitIfPossible(const UChar*, unsigned length); template<size_t inlineCapacity> - static PassRef<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) + static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) { return create8BitIfPossible(vector.data(), vector.size()); } - WTF_EXPORT_STRING_API static PassRef<StringImpl> create8BitIfPossible(const UChar*); + WTF_EXPORT_STRING_API static Ref<StringImpl> create8BitIfPossible(const UChar*); - ALWAYS_INLINE static PassRef<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); } - WTF_EXPORT_STRING_API static PassRef<StringImpl> create(const LChar*); - ALWAYS_INLINE static PassRef<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); } + ALWAYS_INLINE static Ref<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); } + WTF_EXPORT_STRING_API static Ref<StringImpl> create(const LChar*); + ALWAYS_INLINE static Ref<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); } - static ALWAYS_INLINE PassRef<StringImpl> create8(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) + static ALWAYS_INLINE Ref<StringImpl> createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length) { - ASSERT(rep); - ASSERT(length <= rep->length()); + ASSERT(length <= rep.length()); if (!length) return *empty(); - ASSERT(rep->is8Bit()); - StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); - return adoptRef(*new StringImpl(rep->m_data8 + offset, length, ownerRep)); - } - - static ALWAYS_INLINE PassRef<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) - { - ASSERT(rep); - ASSERT(length <= rep->length()); - - if (!length) - return *empty(); + auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep); - StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); - if (rep->is8Bit()) - return adoptRef(*new StringImpl(rep->m_data8 + offset, length, ownerRep)); - return adoptRef(*new StringImpl(rep->m_data16 + offset, length, ownerRep)); + // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string. + auto* stringImpl = static_cast<StringImpl*>(fastMalloc(allocationSize<StringImpl*>(1))); + if (rep.is8Bit()) + return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data8 + offset, length, *ownerRep)); + return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data16 + offset, length, *ownerRep)); } template<unsigned charactersCount> - ALWAYS_INLINE static PassRef<StringImpl> createFromLiteral(const char (&characters)[charactersCount]) + ALWAYS_INLINE static Ref<StringImpl> createFromLiteral(const char (&characters)[charactersCount]) { COMPILE_ASSERT(charactersCount > 1, StringImplFromLiteralNotEmpty); COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow); @@ -387,53 +325,50 @@ public: } // FIXME: Transition off of these functions to createWithoutCopying instead. - WTF_EXPORT_STRING_API static PassRef<StringImpl> createFromLiteral(const char* characters, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createFromLiteral(const char* characters); + WTF_EXPORT_STRING_API static Ref<StringImpl> createFromLiteral(const char* characters, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> createFromLiteral(const char* characters); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createWithoutCopying(const UChar* characters, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createWithoutCopying(const LChar* characters, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> createWithoutCopying(const UChar* characters, unsigned length); + WTF_EXPORT_STRING_API static Ref<StringImpl> createWithoutCopying(const LChar* characters, unsigned length); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createUninitialized(unsigned length, LChar*& data); - WTF_EXPORT_STRING_API static PassRef<StringImpl> createUninitialized(unsigned length, UChar*& data); - template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output) + WTF_EXPORT_STRING_API static Ref<StringImpl> createUninitialized(unsigned length, LChar*& data); + WTF_EXPORT_STRING_API static Ref<StringImpl> createUninitialized(unsigned length, UChar*& data); + template <typename T> static ALWAYS_INLINE RefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output) { if (!length) { - output = 0; + output = nullptr; return empty(); } if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(T))) { - output = 0; - return 0; + output = nullptr; + return nullptr; } StringImpl* resultImpl; - if (!tryFastMalloc(sizeof(T) * length + sizeof(StringImpl)).getValue(resultImpl)) { - output = 0; - return 0; + if (!tryFastMalloc(allocationSize<T>(length)).getValue(resultImpl)) { + output = nullptr; + return nullptr; } - output = reinterpret_cast<T*>(resultImpl + 1); + output = resultImpl->tailPointer<T>(); return constructInternal<T>(resultImpl, length); } - static PassRef<StringImpl> createEmptyUnique() - { - return adoptRef(*new StringImpl(CreateEmptyUnique)); - } - - // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr, + // Reallocate the StringImpl. The originalString must be only owned by the Ref, // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(), // the originalString can't be used after this function. - static PassRef<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data); - static PassRef<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data); + static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data); + static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data); static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); } static unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; } - static unsigned flagIsIdentifier() { return s_hashFlagIsIdentifier; } + static unsigned flagIsAtomic() { return s_hashFlagStringKindIsAtomic; } + static unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; } + static unsigned maskStringKind() { return s_hashMaskStringKind; } static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); } template<typename CharType, size_t inlineCapacity, typename OverflowHandler> - static PassRef<StringImpl> adopt(Vector<CharType, inlineCapacity, OverflowHandler>& vector) + static Ref<StringImpl> adopt(Vector<CharType, inlineCapacity, OverflowHandler>&& vector) { if (size_t size = vector.size()) { ASSERT(vector.data()); @@ -444,31 +379,24 @@ public: return *empty(); } - WTF_EXPORT_STRING_API static PassRef<StringImpl> adopt(StringBuffer<UChar>&); - WTF_EXPORT_STRING_API static PassRef<StringImpl> adopt(StringBuffer<LChar>&); + WTF_EXPORT_STRING_API static Ref<StringImpl> adopt(StringBuffer<UChar>&&); + WTF_EXPORT_STRING_API static Ref<StringImpl> adopt(StringBuffer<LChar>&&); unsigned length() const { return m_length; } + static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); } bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; } ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; } - const UChar* characters() const { return deprecatedCharacters(); } // FIXME: Delete this. - ALWAYS_INLINE const UChar* deprecatedCharacters() const - { - if (!is8Bit()) - return m_data16; - - return getData16SlowCase(); - } template <typename CharType> - ALWAYS_INLINE const CharType * getCharacters() const; + ALWAYS_INLINE const CharType *characters() const; size_t cost() const { // For substrings, return the cost of the base string. if (bufferOwnership() == BufferSubstring) - return m_substringBuffer->cost(); + return substringBuffer()->cost(); if (m_hashAndFlags & s_hashFlagDidReportCost) return 0; @@ -486,7 +414,7 @@ public: return 0; if (bufferOwnership() == BufferSubstring) - return divideRoundedUp(m_substringBuffer->costDuringGC(), refCount()); + return divideRoundedUp(substringBuffer()->costDuringGC(), refCount()); size_t result = m_length; if (!is8Bit()) @@ -496,40 +424,28 @@ public: WTF_EXPORT_STRING_API size_t sizeInBytes() const; - bool has16BitShadow() const { return m_hashAndFlags & s_hashFlagHas16BitShadow; } - WTF_EXPORT_STRING_API void upconvertCharacters(unsigned, unsigned) const; - bool isIdentifier() const { return m_hashAndFlags & s_hashFlagIsIdentifier; } - bool isIdentifierOrUnique() const { return isIdentifier() || isEmptyUnique(); } - void setIsIdentifier(bool isIdentifier) - { - ASSERT(!isStatic()); - ASSERT(!isEmptyUnique()); - if (isIdentifier) - m_hashAndFlags |= s_hashFlagIsIdentifier; - else - m_hashAndFlags &= ~s_hashFlagIsIdentifier; - } - - bool isEmptyUnique() const - { - return !length() && !isStatic(); - } + StringKind stringKind() const { return static_cast<StringKind>(m_hashAndFlags & s_hashMaskStringKind); } + bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; } + bool isAtomic() const { return m_hashAndFlags & s_hashFlagStringKindIsAtomic; } - bool isAtomic() const { return m_hashAndFlags & s_hashFlagIsAtomic; } void setIsAtomic(bool isAtomic) { ASSERT(!isStatic()); - ASSERT(!isEmptyUnique()); - if (isAtomic) - m_hashAndFlags |= s_hashFlagIsAtomic; - else - m_hashAndFlags &= ~s_hashFlagIsAtomic; + ASSERT(!isSymbol()); + if (isAtomic) { + m_hashAndFlags |= s_hashFlagStringKindIsAtomic; + ASSERT(stringKind() == StringAtomic); + } else { + m_hashAndFlags &= ~s_hashFlagStringKindIsAtomic; + ASSERT(stringKind() == StringNormal); + } } -#ifdef STRING_STATS - bool isSubString() const { return bufferOwnership() == BufferSubstring; } +#if STRING_STATS + bool isSubString() const { return bufferOwnership() == BufferSubstring; } #endif + static WTF_EXPORT_STRING_API CString utf8ForCharacters(const LChar* characters, unsigned length); static WTF_EXPORT_STRING_API CString utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion); WTF_EXPORT_STRING_API CString utf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const; WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const; @@ -577,7 +493,12 @@ public: return existingHash(); return hashSlowCase(); } - + + WTF_EXPORT_PRIVATE unsigned concurrentHash() const; + + unsigned symbolAwareHash() const; + unsigned existingSymbolAwareHash() const; + bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; } inline size_t refCount() const @@ -598,13 +519,15 @@ public: inline void ref() { - ASSERT(!isCompilationThread()); + STRING_STATS_REF_STRING(*this); + m_refCount += s_refCountIncrement; } inline void deref() { - ASSERT(!isCompilationThread()); + STRING_STATS_DEREF_STRING(*this); + unsigned tempRefCount = m_refCount - s_refCountIncrement; if (!tempRefCount) { StringImpl::destroy(this); @@ -613,7 +536,47 @@ public: m_refCount = tempRefCount; } - WTF_EXPORT_PRIVATE static StringImpl* empty(); + class StaticStringImpl { + WTF_MAKE_NONCOPYABLE(StaticStringImpl); + public: + // Used to construct static strings, which have an special refCount that can never hit zero. + // This means that the static string will never be destroyed, which is important because + // static strings will be shared across threads & ref-counted in a non-threadsafe manner. + template<unsigned charactersCount> + constexpr StaticStringImpl(const char (&characters)[charactersCount], StringKind stringKind = StringNormal) + : m_refCount(s_refCountFlagIsStaticString) + , m_length(charactersCount - 1) + , m_data8(characters) + , m_hashAndFlags(s_hashFlag8BitBuffer | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount)) + { + } + + template<unsigned charactersCount> + constexpr StaticStringImpl(const char16_t (&characters)[charactersCount], StringKind stringKind = StringNormal) + : m_refCount(s_refCountFlagIsStaticString) + , m_length(charactersCount - 1) + , m_data16(characters) + , m_hashAndFlags(stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount)) + { + } + + operator StringImpl&() + { + return *reinterpret_cast<StringImpl*>(this); + } + + // These member variables must match the layout of StringImpl. + unsigned m_refCount; + unsigned m_length; + union { + const char* m_data8; + const char16_t* m_data16; + }; + unsigned m_hashAndFlags; + }; + + WTF_EXPORTDATA static StaticStringImpl s_atomicEmptyString; + ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_atomicEmptyString); } // FIXME: Does this really belong in StringImpl? template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters) @@ -652,9 +615,9 @@ public: // Some string features, like refcounting and the atomicity flag, are not // thread-safe. We achieve thread safety by isolation, giving each thread // its own copy of the string. - PassRef<StringImpl> isolatedCopy() const; + Ref<StringImpl> isolatedCopy() const; - WTF_EXPORT_STRING_API PassRef<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); + WTF_EXPORT_STRING_API Ref<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); UChar at(unsigned i) const { @@ -686,23 +649,24 @@ public: double toDouble(bool* ok = 0); float toFloat(bool* ok = 0); - WTF_EXPORT_STRING_API PassRef<StringImpl> lower(); - WTF_EXPORT_STRING_API PassRef<StringImpl> upper(); - WTF_EXPORT_STRING_API PassRef<StringImpl> lower(const AtomicString& localeIdentifier); - WTF_EXPORT_STRING_API PassRef<StringImpl> upper(const AtomicString& localeIdentifier); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToASCIILowercase(); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToASCIIUppercase(); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithoutLocale(); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToUppercaseWithoutLocale(); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithLocale(const AtomicString& localeIdentifier); + WTF_EXPORT_STRING_API Ref<StringImpl> convertToUppercaseWithLocale(const AtomicString& localeIdentifier); - WTF_EXPORT_STRING_API PassRef<StringImpl> fill(UChar); - // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII? - PassRef<StringImpl> foldCase(); + Ref<StringImpl> foldCase(); - PassRef<StringImpl> stripWhiteSpace(); - PassRef<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); - WTF_EXPORT_STRING_API PassRef<StringImpl> simplifyWhiteSpace(); - PassRef<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr); + Ref<StringImpl> stripWhiteSpace(); + Ref<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); + WTF_EXPORT_STRING_API Ref<StringImpl> simplifyWhiteSpace(); + Ref<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr); - PassRef<StringImpl> removeCharacters(CharacterMatchFunctionPtr); + Ref<StringImpl> removeCharacters(CharacterMatchFunctionPtr); template <typename CharType> - ALWAYS_INLINE PassRef<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr); + ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr); size_t find(LChar character, unsigned start = 0); size_t find(char character, unsigned start = 0); @@ -715,33 +679,44 @@ public: size_t findIgnoringCase(const LChar*, unsigned index = 0); ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } WTF_EXPORT_STRING_API size_t findIgnoringCase(StringImpl*, unsigned index = 0); - - WTF_EXPORT_STRING_API size_t findNextLineStart(unsigned index = UINT_MAX); + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl&) const; + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const; + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl*) const; + WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl*, unsigned startOffset) const; WTF_EXPORT_STRING_API size_t reverseFind(UChar, unsigned index = UINT_MAX); WTF_EXPORT_STRING_API size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); WTF_EXPORT_STRING_API size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); WTF_EXPORT_STRING_API bool startsWith(const StringImpl*) const; + WTF_EXPORT_STRING_API bool startsWith(const StringImpl&) const; + WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringImpl*) const; + WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringImpl&) const; bool startsWith(StringImpl* str, bool caseSensitive) { return caseSensitive ? startsWith(str) : (reverseFindIgnoringCase(str, 0) == 0); } WTF_EXPORT_STRING_API bool startsWith(UChar) const; WTF_EXPORT_STRING_API bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const; template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); } + WTF_EXPORT_STRING_API bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const; - WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive = true); + WTF_EXPORT_STRING_API bool endsWith(StringImpl*); + WTF_EXPORT_STRING_API bool endsWith(StringImpl&); + WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringImpl*) const; + WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringImpl&) const; + WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive); WTF_EXPORT_STRING_API bool endsWith(UChar) const; WTF_EXPORT_STRING_API bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const; template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); } + WTF_EXPORT_STRING_API bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const; - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, UChar); - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, StringImpl*); - ALWAYS_INLINE PassRef<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); - PassRef<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(StringImpl*, StringImpl*); - WTF_EXPORT_STRING_API PassRef<StringImpl> replace(unsigned index, unsigned len, StringImpl*); + WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, UChar); + WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, StringImpl*); + ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } + WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); + Ref<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); + WTF_EXPORT_STRING_API Ref<StringImpl> replace(StringImpl*, StringImpl*); + WTF_EXPORT_STRING_API Ref<StringImpl> replace(unsigned index, unsigned len, StringImpl*); WTF_EXPORT_STRING_API UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr); @@ -749,14 +724,70 @@ public: RetainPtr<CFStringRef> createCFString(); #endif #ifdef __OBJC__ - WTF_EXPORT_STRING_API operator NSString*(); + WTF_EXPORT_STRING_API operator NSString *(); #endif -#ifdef STRING_STATS +#if STRING_STATS ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } #endif - WTF_EXPORT_STRING_API static const UChar latin1CaseFoldTable[256]; +protected: + ~StringImpl(); + + enum CreateSymbolTag { CreateSymbol }; + + // Used to create new symbol strings that holds existing 8-bit [[Description]] string as a substring buffer (BufferSubstring). + StringImpl(CreateSymbolTag, const LChar* characters, unsigned length) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data8(characters) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringSymbol | BufferSubstring) + { + ASSERT(is8Bit()); + ASSERT(m_data8); + STRING_STATS_ADD_8BIT_STRING2(m_length, true); + } + + // Used to create new symbol strings that holds existing 16-bit [[Description]] string as a substring buffer (BufferSubstring). + StringImpl(CreateSymbolTag, const UChar* characters, unsigned length) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data16(characters) + , m_hashAndFlags(StringSymbol | BufferSubstring) + { + ASSERT(!is8Bit()); + ASSERT(m_data16); + STRING_STATS_ADD_16BIT_STRING2(m_length, true); + } + + // Null symbol. + StringImpl(CreateSymbolTag) + : m_refCount(s_refCountIncrement) + , m_length(0) + , m_data8(empty()->characters8()) + , m_hashAndFlags(s_hashFlag8BitBuffer | StringSymbol | BufferSubstring) + { + ASSERT(is8Bit()); + ASSERT(m_data8); + STRING_STATS_ADD_8BIT_STRING2(m_length, true); + } + + template<typename T> + static size_t allocationSize(unsigned tailElementCount) + { + return tailOffset<T>() + tailElementCount * sizeof(T); + } + + template<typename T> + static ptrdiff_t tailOffset() + { +#if COMPILER(MSVC) + // MSVC doesn't support alignof yet. + return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl)); +#else + return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags)); +#endif + } private: bool requiresCopy() const @@ -765,59 +796,61 @@ private: return true; if (is8Bit()) - return reinterpret_cast<const void*>(m_data8) == reinterpret_cast<const void*>(this + 1); - return reinterpret_cast<const void*>(m_data16) == reinterpret_cast<const void*>(this + 1); + return m_data8 == tailPointer<LChar>(); + return m_data16 == tailPointer<UChar>(); + } + + template<typename T> + const T* tailPointer() const + { + return reinterpret_cast_ptr<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>()); + } + + template<typename T> + T* tailPointer() + { + return reinterpret_cast_ptr<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>()); + } + + StringImpl* const& substringBuffer() const + { + ASSERT(bufferOwnership() == BufferSubstring); + + return *tailPointer<StringImpl*>(); + } + + StringImpl*& substringBuffer() + { + ASSERT(bufferOwnership() == BufferSubstring); + + return *tailPointer<StringImpl*>(); } // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. static const unsigned s_copyCharsInlineCutOff = 20; + enum class CaseConvertType { Upper, Lower }; + template<CaseConvertType type, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType*, unsigned); + BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); } - template <class UCharPredicate> PassRef<StringImpl> stripMatchedCharacters(UCharPredicate); - template <typename CharType, class UCharPredicate> PassRef<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate); - template <typename CharType> static PassRef<StringImpl> constructInternal(StringImpl*, unsigned); - template <typename CharType> static PassRef<StringImpl> createUninitializedInternal(unsigned, CharType*&); - template <typename CharType> static PassRef<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharType*&); - template <typename CharType> static PassRef<StringImpl> reallocateInternal(PassRefPtr<StringImpl>, unsigned, CharType*&); - template <typename CharType> static PassRef<StringImpl> createInternal(const CharType*, unsigned); - WTF_EXPORT_STRING_API NEVER_INLINE const UChar* getData16SlowCase() const; + template <class UCharPredicate> Ref<StringImpl> stripMatchedCharacters(UCharPredicate); + template <typename CharType, class UCharPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate); + template <typename CharType> static Ref<StringImpl> constructInternal(StringImpl*, unsigned); + template <typename CharType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharType*&); + template <typename CharType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharType*&); + template <typename CharType> static Ref<StringImpl> reallocateInternal(Ref<StringImpl>&&, unsigned, CharType*&); + template <typename CharType> static Ref<StringImpl> createInternal(const CharType*, unsigned); WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const; // The bottom bit in the ref count indicates a static (immortal) string. static const unsigned s_refCountFlagIsStaticString = 0x1; static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag. - // The bottom 7 bits in the hash are flags. - static const unsigned s_flagCount = 7; - static const unsigned s_flagMask = (1u << s_flagCount) - 1; - COMPILE_ASSERT(s_flagCount <= StringHasher::flagCount, StringHasher_reserves_enough_bits_for_StringImpl_flags); - - static const unsigned s_hashFlagHas16BitShadow = 1u << 6; - static const unsigned s_hashFlag8BitBuffer = 1u << 5; - static const unsigned s_hashFlagIsAtomic = 1u << 4; - static const unsigned s_hashFlagDidReportCost = 1u << 3; - static const unsigned s_hashFlagIsIdentifier = 1u << 2; - static const unsigned s_hashMaskBufferOwnership = 1u | (1u << 1); - -#ifdef STRING_STATS +#if STRING_STATS WTF_EXPORTDATA static StringStats m_stringStats; #endif public: - struct StaticASCIILiteral { - // These member variables must match the layout of StringImpl. - unsigned m_refCount; - unsigned m_length; - const LChar* m_data8; - void* m_buffer; - unsigned m_hashAndFlags; - - // These values mimic ConstructFromLiteral. - static const unsigned s_initialRefCount = s_refCountIncrement; - static const unsigned s_initialFlags = s_hashFlag8BitBuffer | BufferInternal; - static const unsigned s_hashShift = s_flagCount; - }; - #ifndef NDEBUG void assertHashIsCorrect() { @@ -827,25 +860,20 @@ public: #endif private: - // These member variables must match the layout of StaticASCIILiteral. + // These member variables must match the layout of StaticStringImpl. unsigned m_refCount; unsigned m_length; union { const LChar* m_data8; const UChar* m_data16; }; - union { - void* m_buffer; - StringImpl* m_substringBuffer; - mutable UChar* m_copyData16; - }; mutable unsigned m_hashAndFlags; }; -COMPILE_ASSERT(sizeof(StringImpl) == sizeof(StringImpl::StaticASCIILiteral), StringImpl_should_match_its_StaticASCIILiteral); +static_assert(sizeof(StringImpl) == sizeof(StringImpl::StaticStringImpl), ""); #if !ASSERT_DISABLED -// StringImpls created from StaticASCIILiteral will ASSERT +// StringImpls created from StaticStringImpl will ASSERT // in the generic ValueCheck<T>::checkConsistency // as they are not allocated by fastMalloc. // We don't currently have any way to detect that case @@ -857,15 +885,15 @@ ValueCheck<StringImpl*> { #endif template <> -ALWAYS_INLINE PassRef<StringImpl> StringImpl::constructInternal<LChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length, Force8BitConstructor)); } +ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length, Force8BitConstructor)); } template <> -ALWAYS_INLINE PassRef<StringImpl> StringImpl::constructInternal<UChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length)); } +ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length)); } template <> -ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return characters8(); } +ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const { return characters8(); } template <> -ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return deprecatedCharacters(); } +ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const { return characters16(); } WTF_EXPORT_STRING_API bool equal(const StringImpl*, const StringImpl*); WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*); @@ -875,266 +903,20 @@ WTF_EXPORT_STRING_API bool equal(const StringImpl*, const UChar*, unsigned); inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); } inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); } -WTF_EXPORT_STRING_API bool equalNonNull(const StringImpl* a, const StringImpl* b); - -// Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. -#if CPU(X86_64) || CPU(ARM64) -ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) -{ - unsigned dwordLength = length >> 3; - - if (dwordLength) { - const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a); - const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b); - - for (unsigned i = 0; i != dwordLength; ++i) { - if (*aDWordCharacters++ != *bDWordCharacters++) - return false; - } - - a = reinterpret_cast<const LChar*>(aDWordCharacters); - b = reinterpret_cast<const LChar*>(bDWordCharacters); - } - - if (length & 4) { - if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uint32_t*>(b)) - return false; - - a += 4; - b += 4; - } - - if (length & 2) { - if (*reinterpret_cast<const uint16_t*>(a) != *reinterpret_cast<const uint16_t*>(b)) - return false; - - a += 2; - b += 2; - } - - if (length & 1 && (*a != *b)) - return false; - - return true; -} - -ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) -{ - unsigned dwordLength = length >> 2; - - if (dwordLength) { - const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a); - const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b); - - for (unsigned i = 0; i != dwordLength; ++i) { - if (*aDWordCharacters++ != *bDWordCharacters++) - return false; - } - - a = reinterpret_cast<const UChar*>(aDWordCharacters); - b = reinterpret_cast<const UChar*>(bDWordCharacters); - } - - if (length & 2) { - if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uint32_t*>(b)) - return false; - - a += 2; - b += 2; - } - - if (length & 1 && (*a != *b)) - return false; - - return true; -} -#elif CPU(X86) -ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) -{ - const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a); - const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b); - - unsigned wordLength = length >> 2; - for (unsigned i = 0; i != wordLength; ++i) { - if (*aCharacters++ != *bCharacters++) - return false; - } - - length &= 3; - - if (length) { - const LChar* aRemainder = reinterpret_cast<const LChar*>(aCharacters); - const LChar* bRemainder = reinterpret_cast<const LChar*>(bCharacters); - - for (unsigned i = 0; i < length; ++i) { - if (aRemainder[i] != bRemainder[i]) - return false; - } - } - - return true; -} - -ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) -{ - const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a); - const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b); - - unsigned wordLength = length >> 1; - for (unsigned i = 0; i != wordLength; ++i) { - if (*aCharacters++ != *bCharacters++) - return false; - } - - if (length & 1 && *reinterpret_cast<const UChar*>(aCharacters) != *reinterpret_cast<const UChar*>(bCharacters)) - return false; - - return true; -} -#elif PLATFORM(IOS) && WTF_ARM_ARCH_AT_LEAST(7) -ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) -{ - bool isEqual = false; - uint32_t aValue; - uint32_t bValue; - asm("subs %[length], #4\n" - "blo 2f\n" - - "0:\n" // Label 0 = Start of loop over 32 bits. - "ldr %[aValue], [%[a]], #4\n" - "ldr %[bValue], [%[b]], #4\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - "subs %[length], #4\n" - "bhs 0b\n" - - // At this point, length can be: - // -0: 00000000000000000000000000000000 (0 bytes left) - // -1: 11111111111111111111111111111111 (3 bytes left) - // -2: 11111111111111111111111111111110 (2 bytes left) - // -3: 11111111111111111111111111111101 (1 byte left) - // -4: 11111111111111111111111111111100 (length was 0) - // The pointers are at the correct position. - "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters. - "tst %[length], #2\n" - "beq 1f\n" - "ldrh %[aValue], [%[a]], #2\n" - "ldrh %[bValue], [%[b]], #2\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - - "1:\n" // Label 1 = Check for a single character left. - "tst %[length], #1\n" - "beq 42f\n" - "ldrb %[aValue], [%[a]]\n" - "ldrb %[bValue], [%[b]]\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - - "42:\n" // Label 42 = Success. - "mov %[isEqual], #1\n" - "66:\n" // Label 66 = End without changing isEqual to 1. - : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) - : - : - ); - return isEqual; -} - -ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) -{ - bool isEqual = false; - uint32_t aValue; - uint32_t bValue; - asm("subs %[length], #2\n" - "blo 1f\n" - - "0:\n" // Label 0 = Start of loop over 32 bits. - "ldr %[aValue], [%[a]], #4\n" - "ldr %[bValue], [%[b]], #4\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - "subs %[length], #2\n" - "bhs 0b\n" - - // At this point, length can be: - // -0: 00000000000000000000000000000000 (0 bytes left) - // -1: 11111111111111111111111111111111 (1 character left, 2 bytes) - // -2: 11111111111111111111111111111110 (length was zero) - // The pointers are at the correct position. - "1:\n" // Label 1 = Check for a single character left. - "tst %[length], #1\n" - "beq 42f\n" - "ldrh %[aValue], [%[a]]\n" - "ldrh %[bValue], [%[b]]\n" - "cmp %[aValue], %[bValue]\n" - "bne 66f\n" - - "42:\n" // Label 42 = Success. - "mov %[isEqual], #1\n" - "66:\n" // Label 66 = End without changing isEqual to 1. - : [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue) - : - : - ); - return isEqual; -} -#else -ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); } -ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); } -#endif - -ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) -{ - for (unsigned i = 0; i < length; ++i) { - if (a[i] != b[i]) - return false; - } - return true; -} - -ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } - -WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const StringImpl*); -WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const LChar*); -inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equalIgnoringCase(b, a); } -WTF_EXPORT_STRING_API bool equalIgnoringCase(const LChar*, const LChar*, unsigned); -WTF_EXPORT_STRING_API bool equalIgnoringCase(const UChar*, const LChar*, unsigned); -inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } -inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } -inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } -inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } -inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) -{ - ASSERT(length >= 0); - return !u_memcasecmp(a, b, length, U_FOLD_CASE_DEFAULT); -} -WTF_EXPORT_STRING_API bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*); +WTF_EXPORT_STRING_API bool equal(const StringImpl& a, const StringImpl& b); WTF_EXPORT_STRING_API bool equalIgnoringNullity(StringImpl*, StringImpl*); +WTF_EXPORT_STRING_API bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*); -template<typename CharacterType> -inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) -{ - while (index < length) { - if (characters[index] == matchCharacter) - return index; - ++index; - } - return notFound; -} +bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&); +WTF_EXPORT_STRING_API bool equalIgnoringASCIICase(const StringImpl*, const StringImpl*); +bool equalIgnoringASCIICase(const StringImpl&, const char*); +bool equalIgnoringASCIICase(const StringImpl*, const char*); -ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) -{ - return find(characters, length, static_cast<UChar>(matchCharacter), index); -} +WTF_EXPORT_STRING_API bool equalIgnoringASCIICaseNonNull(const StringImpl*, const StringImpl*); -inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) -{ - if (matchCharacter & ~0xFF) - return notFound; - return find(characters, length, static_cast<LChar>(matchCharacter), index); -} +template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]); +template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl*, const char (&lowercaseLetters)[length]); inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) { @@ -1157,37 +939,6 @@ inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunct } template<typename CharacterType> -inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0) -{ - while (index < length) { - CharacterType c = characters[index++]; - if ((c != '\n') && (c != '\r')) - continue; - - // There can only be a start of a new line if there are more characters - // beyond the current character. - if (index < length) { - // The 3 common types of line terminators are 1. \r\n (Windows), - // 2. \r (old MacOS) and 3. \n (Unix'es). - - if (c == '\n') - return index; // Case 3: just \n. - - CharacterType c2 = characters[index]; - if (c2 != '\n') - return index; // Case 2: just \r. - - // Case 1: \r\n. - // But, there's only a start of a new line if there are more - // characters beyond the \r\n. - if (++index < length) - return index; - } - } - return notFound; -} - -template<typename CharacterType> inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX) { if (!length) @@ -1248,18 +999,13 @@ inline size_t StringImpl::find(UChar character, unsigned start) return WTF::find(characters16(), m_length, character, start); } -template<size_t inlineCapacity> -bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) +template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) { - if (!b) - return !a.size(); - if (a.size() != b->length()) - return false; - return !memcmp(a.data(), b->deprecatedCharacters(), b->length() * sizeof(UChar)); + return equalIgnoringNullity(a.data(), a.size(), b); } template<typename CharacterType1, typename CharacterType2> -static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2) +inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2) { const unsigned lmin = l1 < l2 ? l1 : l2; unsigned pos = 0; @@ -1278,22 +1024,22 @@ static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType return (l1 > l2) ? 1 : -1; } -static inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2) +inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2) { return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8()); } -static inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2) +inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2) { return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16()); } -static inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2) +inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2) { return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16()); } -static inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) +inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) { if (!string1) return (string2 && string2->length()) ? -1 : 0; @@ -1313,7 +1059,7 @@ static inline int codePointCompare(const StringImpl* string1, const StringImpl* return codePointCompare16(string1, string2); } -static inline bool isSpaceOrNewline(UChar c) +inline bool isSpaceOrNewline(UChar c) { // Use isASCIISpace() for basic Latin-1. // This will include newlines, which aren't included in Unicode DirWS. @@ -1332,7 +1078,7 @@ inline unsigned lengthOfNullTerminatedString(const CharacterType* string) return static_cast<unsigned>(length); } -inline PassRef<StringImpl> StringImpl::isolatedCopy() const +inline Ref<StringImpl> StringImpl::isolatedCopy() const { if (!requiresCopy()) { if (is8Bit()) @@ -1345,8 +1091,6 @@ inline PassRef<StringImpl> StringImpl::isolatedCopy() const return create(m_data16, m_length); } -struct StringHash; - // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> template<typename T> struct DefaultHash; template<> struct DefaultHash<StringImpl*> { @@ -1356,11 +1100,45 @@ template<> struct DefaultHash<RefPtr<StringImpl>> { typedef StringHash Hash; }; +inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b) +{ + return equalIgnoringASCIICaseCommon(a, b); +} + +inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b) +{ + return equalIgnoringASCIICaseCommon(a, b); +} + +inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b) +{ + return a && equalIgnoringASCIICase(*a, b); +} + +template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length]) +{ + return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters); +} + +template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length]) +{ + return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters); +} + +template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length]) +{ + return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters); +} + +template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length]) +{ + return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters); +} + } // namespace WTF using WTF::StringImpl; using WTF::equal; -using WTF::equalNonNull; using WTF::TextCaseSensitivity; using WTF::TextCaseSensitive; using WTF::TextCaseInsensitive; |