diff options
Diffstat (limited to 'Source/WTF/wtf/Hasher.h')
-rw-r--r-- | Source/WTF/wtf/Hasher.h | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/Source/WTF/wtf/Hasher.h b/Source/WTF/wtf/Hasher.h new file mode 100644 index 000000000..c0a2e8cf2 --- /dev/null +++ b/Source/WTF/wtf/Hasher.h @@ -0,0 +1,389 @@ +/* + * Copyright (C) 2005-2006, 2008, 2010, 2013, 2016 Apple Inc. All rights reserved. + * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTF_Hasher_h +#define WTF_Hasher_h + +#include <unicode/utypes.h> +#include <wtf/text/LChar.h> + +namespace WTF { + +// Paul Hsieh's SuperFastHash +// http://www.azillionmonkeys.com/qed/hash.html + +// LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits). + +// NOTE: The hash computation here must stay in sync with the create_hash_table script in +// JavaScriptCore and the CodeGeneratorJS.pm script in WebCore. + +// Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash value of zero. +static constexpr const unsigned stringHashingStartValue = 0x9E3779B9U; + +class StringHasher { +public: + static constexpr const unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags. + static constexpr const unsigned maskHash = (1U << (sizeof(unsigned) * 8 - flagCount)) - 1; + + StringHasher() + : m_hash(stringHashingStartValue) + , m_hasPendingCharacter(false) + , m_pendingCharacter(0) + { + } + + // The hasher hashes two characters at a time, and thus an "aligned" hasher is one + // where an even number of characters have been added. Callers that always add + // characters two at a time can use the "assuming aligned" functions. + void addCharactersAssumingAligned(UChar a, UChar b) + { + ASSERT(!m_hasPendingCharacter); + m_hash += a; + m_hash = (m_hash << 16) ^ ((b << 11) ^ m_hash); + m_hash += m_hash >> 11; + } + + void addCharacter(UChar character) + { + if (m_hasPendingCharacter) { + m_hasPendingCharacter = false; + addCharactersAssumingAligned(m_pendingCharacter, character); + return; + } + + m_pendingCharacter = character; + m_hasPendingCharacter = true; + } + + void addCharacters(UChar a, UChar b) + { + if (m_hasPendingCharacter) { +#if !ASSERT_DISABLED + m_hasPendingCharacter = false; +#endif + addCharactersAssumingAligned(m_pendingCharacter, a); + m_pendingCharacter = b; +#if !ASSERT_DISABLED + m_hasPendingCharacter = true; +#endif + return; + } + + addCharactersAssumingAligned(a, b); + } + + template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(const T* data, unsigned length) + { + ASSERT(!m_hasPendingCharacter); + + bool remainder = length & 1; + length >>= 1; + + while (length--) { + addCharactersAssumingAligned(Converter(data[0]), Converter(data[1])); + data += 2; + } + + if (remainder) + addCharacter(Converter(*data)); + } + + template<typename T> void addCharactersAssumingAligned(const T* data, unsigned length) + { + addCharactersAssumingAligned<T, defaultConverter>(data, length); + } + + template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(const T* data) + { + ASSERT(!m_hasPendingCharacter); + + while (T a = *data++) { + T b = *data++; + if (!b) { + addCharacter(Converter(a)); + break; + } + addCharactersAssumingAligned(Converter(a), Converter(b)); + } + } + + template<typename T> void addCharactersAssumingAligned(const T* data) + { + addCharactersAssumingAligned<T, defaultConverter>(data); + } + + template<typename T, UChar Converter(T)> void addCharacters(const T* data, unsigned length) + { + if (!length) + return; + if (m_hasPendingCharacter) { + m_hasPendingCharacter = false; + addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++)); + --length; + } + addCharactersAssumingAligned<T, Converter>(data, length); + } + + template<typename T> void addCharacters(const T* data, unsigned length) + { + addCharacters<T, defaultConverter>(data, length); + } + + template<typename T, UChar Converter(T)> void addCharacters(const T* data) + { + if (m_hasPendingCharacter && *data) { + m_hasPendingCharacter = false; + addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++)); + } + addCharactersAssumingAligned<T, Converter>(data); + } + + template<typename T> void addCharacters(const T* data) + { + addCharacters<T, defaultConverter>(data); + } + + unsigned hashWithTop8BitsMasked() const + { + return finalizeAndMaskTop8Bits(processPendingCharacter()); + } + + unsigned hash() const + { + return finalize(processPendingCharacter()); + } + + template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length) + { + StringHasher hasher; + hasher.addCharactersAssumingAligned<T, Converter>(data, length); + return hasher.hashWithTop8BitsMasked(); + } + + template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskTop8Bits(const T* data) + { + StringHasher hasher; + hasher.addCharactersAssumingAligned<T, Converter>(data); + return hasher.hashWithTop8BitsMasked(); + } + + template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length) + { + return computeHashAndMaskTop8Bits<T, defaultConverter>(data, length); + } + + template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* data) + { + return computeHashAndMaskTop8Bits<T, defaultConverter>(data); + } + + template<typename T, UChar Converter(T)> static unsigned computeHash(const T* data, unsigned length) + { + StringHasher hasher; + hasher.addCharactersAssumingAligned<T, Converter>(data, length); + return hasher.hash(); + } + + template<typename T, UChar Converter(T)> static unsigned computeHash(const T* data) + { + StringHasher hasher; + hasher.addCharactersAssumingAligned<T, Converter>(data); + return hasher.hash(); + } + + template<typename T> static unsigned computeHash(const T* data, unsigned length) + { + return computeHash<T, defaultConverter>(data, length); + } + + template<typename T> static unsigned computeHash(const T* data) + { + return computeHash<T, defaultConverter>(data); + } + + static unsigned hashMemory(const void* data, unsigned length) + { + size_t lengthInUChar = length / sizeof(UChar); + StringHasher hasher; + hasher.addCharactersAssumingAligned(static_cast<const UChar*>(data), lengthInUChar); + + for (size_t i = 0; i < length % sizeof(UChar); ++i) + hasher.addCharacter(static_cast<const char*>(data)[lengthInUChar * sizeof(UChar) + i]); + + return hasher.hash(); + } + + template<size_t length> static unsigned hashMemory(const void* data) + { + return hashMemory(data, length); + } + + static constexpr unsigned finalize(unsigned hash) + { + return avoidZero(avalancheBits(hash)); + } + + static constexpr unsigned finalizeAndMaskTop8Bits(unsigned hash) + { + // Reserving space from the high bits for flags preserves most of the hash's + // value, since hash lookup typically masks out the high bits anyway. + return avoidZero(avalancheBits(hash) & StringHasher::maskHash); + } + + template<typename T, unsigned charactersCount> + static constexpr unsigned computeLiteralHash(const T (&characters)[charactersCount]) + { + return StringHasher::finalize(computeLiteralHashImpl(stringHashingStartValue, 0, characters, charactersCount - 1)); + } + + template<typename T, unsigned charactersCount> + static constexpr unsigned computeLiteralHashAndMaskTop8Bits(const T (&characters)[charactersCount]) + { + return StringHasher::finalizeAndMaskTop8Bits(computeLiteralHashImpl(stringHashingStartValue, 0, characters, charactersCount - 1)); + } + +private: + static UChar defaultConverter(UChar character) + { + return character; + } + + static UChar defaultConverter(LChar character) + { + return character; + } + + ALWAYS_INLINE static constexpr unsigned avalancheBits3(unsigned hash) + { + return hash ^ (hash << 10); + } + + ALWAYS_INLINE static constexpr unsigned avalancheBits2(unsigned hash) + { + return avalancheBits3(hash + (hash >> 15)); + } + + ALWAYS_INLINE static constexpr unsigned avalancheBits1(unsigned hash) + { + return avalancheBits2(hash ^ (hash << 2)); + } + + ALWAYS_INLINE static constexpr unsigned avalancheBits0(unsigned hash) + { + return avalancheBits1(hash + (hash >> 5)); + } + + ALWAYS_INLINE static constexpr unsigned avalancheBits(unsigned hash) + { + return avalancheBits0(hash ^ (hash << 3)); + } + + // This avoids ever returning a hash code of 0, since that is used to + // signal "hash not computed yet". Setting the high bit maintains + // reasonable fidelity to a hash code of 0 because it is likely to yield + // exactly 0 when hash lookup masks out the high bits. + ALWAYS_INLINE static constexpr unsigned avoidZero(unsigned hash) + { + return hash ? hash : (0x80000000 >> StringHasher::flagCount); + } + + unsigned processPendingCharacter() const + { + unsigned result = m_hash; + + // Handle end case. + if (m_hasPendingCharacter) { + result += m_pendingCharacter; + result ^= result << 11; + result += result >> 17; + } + return result; + } + + + // FIXME: This code limits itself to the older, more limited C++11 constexpr capabilities, using + // recursion instead of looping, for example. Would be nice to rewrite this in a simpler way + // once we no longer need to support compilers like GCC 4.9 that do not yet support it. + static constexpr unsigned calculateWithRemainingLastCharacter1(unsigned hash) + { + return hash + (hash >> 17); + } + + static constexpr unsigned calculateWithRemainingLastCharacter0(unsigned hash) + { + return calculateWithRemainingLastCharacter1((hash << 11) ^ hash); + } + + static constexpr unsigned calculateWithRemainingLastCharacter(unsigned hash, unsigned character) + { + return calculateWithRemainingLastCharacter0(hash + character); + } + + static constexpr unsigned calculate1(unsigned hash) + { + return hash + (hash >> 11); + } + + static constexpr unsigned calculate0(unsigned hash, unsigned secondCharacter) + { + return calculate1((hash << 16) ^ ((secondCharacter << 11) ^ hash)); + } + + static constexpr unsigned calculate(unsigned hash, unsigned firstCharacter, unsigned secondCharacter) + { + return calculate0(hash + firstCharacter, secondCharacter); + } + + static constexpr unsigned computeLiteralHashImpl(unsigned hash, unsigned index, const char* characters, unsigned length) + { + return (index == length) + ? hash : ((index + 1) == length) + ? calculateWithRemainingLastCharacter(hash, characters[index]) + : computeLiteralHashImpl(calculate(hash, characters[index], characters[index + 1]), index + 2, characters, length); + } + + unsigned m_hash; + bool m_hasPendingCharacter; + UChar m_pendingCharacter; +}; + +class IntegerHasher { +public: + void add(unsigned integer) + { + m_underlyingHasher.addCharactersAssumingAligned(integer, integer >> 16); + } + + unsigned hash() const + { + return m_underlyingHasher.hash(); + } + +private: + StringHasher m_underlyingHasher; +}; + +} // namespace WTF + +using WTF::IntegerHasher; +using WTF::StringHasher; + +#endif // WTF_Hasher_h |