diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/contentextensions/NFAToDFA.cpp | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/WebCore/contentextensions/NFAToDFA.cpp')
-rw-r--r-- | Source/WebCore/contentextensions/NFAToDFA.cpp | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/Source/WebCore/contentextensions/NFAToDFA.cpp b/Source/WebCore/contentextensions/NFAToDFA.cpp new file mode 100644 index 000000000..843df3008 --- /dev/null +++ b/Source/WebCore/contentextensions/NFAToDFA.cpp @@ -0,0 +1,398 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "NFAToDFA.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionsDebugging.h" +#include "DFANode.h" +#include "ImmutableNFA.h" +#include "MutableRangeList.h" +#include "NFA.h" +#include <wtf/DataLog.h> +#include <wtf/HashMap.h> +#include <wtf/HashSet.h> + +namespace WebCore { + +namespace ContentExtensions { + +typedef MutableRange<char, NFANodeIndexSet> NFANodeRange; +typedef MutableRangeList<char, NFANodeIndexSet> NFANodeRangeList; +typedef MutableRangeList<char, NFANodeIndexSet, 128> PreallocatedNFANodeRangeList; +typedef Vector<uint32_t, 0, ContentExtensionsOverflowHandler> UniqueNodeList; +typedef Vector<UniqueNodeList, 0, ContentExtensionsOverflowHandler> NFANodeClosures; + +// FIXME: set a better initial size. +// FIXME: include the hash inside NodeIdSet. +typedef NFANodeIndexSet NodeIdSet; + +static inline void epsilonClosureExcludingSelf(NFA& nfa, unsigned nodeId, UniqueNodeList& output) +{ + NodeIdSet closure({ nodeId }); + Vector<unsigned, 64, ContentExtensionsOverflowHandler> unprocessedNodes({ nodeId }); + + do { + unsigned unprocessedNodeId = unprocessedNodes.takeLast(); + const auto& node = nfa.nodes[unprocessedNodeId]; + + for (uint32_t epsilonTargetIndex = node.epsilonTransitionTargetsStart; epsilonTargetIndex < node.epsilonTransitionTargetsEnd; ++epsilonTargetIndex) { + uint32_t targetNodeId = nfa.epsilonTransitionsTargets[epsilonTargetIndex]; + auto addResult = closure.add(targetNodeId); + if (addResult.isNewEntry) { + unprocessedNodes.append(targetNodeId); + output.append(targetNodeId); + } + } + } while (!unprocessedNodes.isEmpty()); + + output.shrinkToFit(); +} + +static void resolveEpsilonClosures(NFA& nfa, NFANodeClosures& nfaNodeClosures) +{ + unsigned nfaGraphSize = nfa.nodes.size(); + nfaNodeClosures.resize(nfaGraphSize); + + for (unsigned nodeId = 0; nodeId < nfaGraphSize; ++nodeId) + epsilonClosureExcludingSelf(nfa, nodeId, nfaNodeClosures[nodeId]); + + // Every nodes still point to that table, but we won't use it ever again. + // Clear it to get back the memory. That's not pretty but memory is important here, we have both + // graphs existing at the same time. + nfa.epsilonTransitionsTargets.clear(); +} + +static ALWAYS_INLINE void extendSetWithClosure(const NFANodeClosures& nfaNodeClosures, unsigned nodeId, NodeIdSet& set) +{ + ASSERT(set.contains(nodeId)); + const UniqueNodeList& nodeClosure = nfaNodeClosures[nodeId]; + if (!nodeClosure.isEmpty()) + set.add(nodeClosure.begin(), nodeClosure.end()); +} + +struct UniqueNodeIdSetImpl { + unsigned* buffer() + { + return m_buffer; + } + + const unsigned* buffer() const + { + return m_buffer; + } + + unsigned m_size; + unsigned m_hash; + unsigned m_dfaNodeId; +private: + unsigned m_buffer[1]; +}; + +typedef Vector<UniqueNodeIdSetImpl*, 128, ContentExtensionsOverflowHandler> UniqueNodeQueue; + +class UniqueNodeIdSet { +public: + UniqueNodeIdSet() { } + enum EmptyValueTag { EmptyValue }; + enum DeletedValueTag { DeletedValue }; + + UniqueNodeIdSet(EmptyValueTag) { } + UniqueNodeIdSet(DeletedValueTag) + : m_uniqueNodeIdSetBuffer(reinterpret_cast<UniqueNodeIdSetImpl*>(-1)) + { + } + + UniqueNodeIdSet(const NodeIdSet& nodeIdSet, unsigned hash, unsigned dfaNodeId) + { + ASSERT(nodeIdSet.size()); + + unsigned size = nodeIdSet.size(); + size_t byteSize = sizeof(UniqueNodeIdSetImpl) + (size - 1) * sizeof(unsigned); + m_uniqueNodeIdSetBuffer = static_cast<UniqueNodeIdSetImpl*>(fastMalloc(byteSize)); + + m_uniqueNodeIdSetBuffer->m_size = size; + m_uniqueNodeIdSetBuffer->m_hash = hash; + m_uniqueNodeIdSetBuffer->m_dfaNodeId = dfaNodeId; + + unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer(); + for (unsigned nodeId : nodeIdSet) { + *buffer = nodeId; + ++buffer; + } + } + + UniqueNodeIdSet(UniqueNodeIdSet&& other) + : m_uniqueNodeIdSetBuffer(other.m_uniqueNodeIdSetBuffer) + { + other.m_uniqueNodeIdSetBuffer = nullptr; + } + + UniqueNodeIdSet& operator=(UniqueNodeIdSet&& other) + { + m_uniqueNodeIdSetBuffer = other.m_uniqueNodeIdSetBuffer; + other.m_uniqueNodeIdSetBuffer = nullptr; + return *this; + } + + ~UniqueNodeIdSet() + { + fastFree(m_uniqueNodeIdSetBuffer); + } + + bool operator==(const UniqueNodeIdSet& other) const + { + return m_uniqueNodeIdSetBuffer == other.m_uniqueNodeIdSetBuffer; + } + + bool operator==(const NodeIdSet& other) const + { + if (m_uniqueNodeIdSetBuffer->m_size != static_cast<unsigned>(other.size())) + return false; + unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer(); + for (unsigned i = 0; i < m_uniqueNodeIdSetBuffer->m_size; ++i) { + if (!other.contains(buffer[i])) + return false; + } + return true; + } + + UniqueNodeIdSetImpl* impl() const { return m_uniqueNodeIdSetBuffer; } + + unsigned hash() const { return m_uniqueNodeIdSetBuffer->m_hash; } + bool isEmptyValue() const { return !m_uniqueNodeIdSetBuffer; } + bool isDeletedValue() const { return m_uniqueNodeIdSetBuffer == reinterpret_cast<UniqueNodeIdSetImpl*>(-1); } + +private: + UniqueNodeIdSetImpl* m_uniqueNodeIdSetBuffer = nullptr; +}; + +struct UniqueNodeIdSetHash { + static unsigned hash(const UniqueNodeIdSet& p) + { + return p.hash(); + } + + static bool equal(const UniqueNodeIdSet& a, const UniqueNodeIdSet& b) + { + return a == b; + } + // It would be fine to compare empty or deleted here, but not for the HashTranslator. + static const bool safeToCompareToEmptyOrDeleted = false; +}; + +struct UniqueNodeIdSetHashHashTraits : public WTF::CustomHashTraits<UniqueNodeIdSet> { + static const bool emptyValueIsZero = true; + + // FIXME: Get a good size. + static const int minimumTableSize = 128; +}; + +typedef HashSet<std::unique_ptr<UniqueNodeIdSet>, UniqueNodeIdSetHash, UniqueNodeIdSetHashHashTraits> UniqueNodeIdSetTable; + +struct NodeIdSetToUniqueNodeIdSetSource { + NodeIdSetToUniqueNodeIdSetSource(DFA& dfa, const NFA& nfa, const NodeIdSet& nodeIdSet) + : dfa(dfa) + , nfa(nfa) + , nodeIdSet(nodeIdSet) + { + // The hashing operation must be independant of the nodeId. + unsigned hash = 4207445155; + for (unsigned nodeId : nodeIdSet) + hash += nodeId; + this->hash = DefaultHash<unsigned>::Hash::hash(hash); + } + DFA& dfa; + const NFA& nfa; + const NodeIdSet& nodeIdSet; + unsigned hash; +}; + +struct NodeIdSetToUniqueNodeIdSetTranslator { + static unsigned hash(const NodeIdSetToUniqueNodeIdSetSource& source) + { + return source.hash; + } + + static inline bool equal(const UniqueNodeIdSet& a, const NodeIdSetToUniqueNodeIdSetSource& b) + { + return a == b.nodeIdSet; + } + + static void translate(UniqueNodeIdSet& location, const NodeIdSetToUniqueNodeIdSetSource& source, unsigned hash) + { + DFANode newDFANode; + + HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions; + + for (unsigned nfaNodeId : source.nodeIdSet) { + const auto& nfaNode = source.nfa.nodes[nfaNodeId]; + for (unsigned actionIndex = nfaNode.actionStart; actionIndex < nfaNode.actionEnd; ++actionIndex) + actions.add(source.nfa.actions[actionIndex]); + } + + unsigned actionsStart = source.dfa.actions.size(); + for (uint64_t action : actions) + source.dfa.actions.append(action); + unsigned actionsEnd = source.dfa.actions.size(); + unsigned actionsLength = actionsEnd - actionsStart; + RELEASE_ASSERT_WITH_MESSAGE(actionsLength <= std::numeric_limits<uint16_t>::max(), "Too many actions for the current DFANode size."); + newDFANode.setActions(actionsStart, static_cast<uint16_t>(actionsLength)); + + unsigned dfaNodeId = source.dfa.nodes.size(); + source.dfa.nodes.append(newDFANode); + new (NotNull, &location) UniqueNodeIdSet(source.nodeIdSet, hash, dfaNodeId); + + ASSERT(location.impl()); + } +}; + +class SetTransitions { +public: + NodeIdSet& operator[](unsigned index) + { + ASSERT(index < size()); + return m_targets[index]; + } + + unsigned size() const + { + return WTF_ARRAY_LENGTH(m_targets); + } + + NodeIdSet* begin() + { + return m_targets; + } + + NodeIdSet* end() + { + return m_targets + size(); + } + +private: + NodeIdSet m_targets[128]; +}; + +struct DataConverterWithEpsilonClosure { + const NFANodeClosures& nfaNodeclosures; + + template<typename Iterable> + NFANodeIndexSet convert(const Iterable& iterable) + { + NFANodeIndexSet result; + for (unsigned nodeId : iterable) { + result.add(nodeId); + const UniqueNodeList& nodeClosure = nfaNodeclosures[nodeId]; + result.add(nodeClosure.begin(), nodeClosure.end()); + } + return result; + } + + template<typename Iterable> + void extend(NFANodeIndexSet& destination, const Iterable& iterable) + { + for (unsigned nodeId : iterable) { + auto addResult = destination.add(nodeId); + if (addResult.isNewEntry) { + const UniqueNodeList& nodeClosure = nfaNodeclosures[nodeId]; + destination.add(nodeClosure.begin(), nodeClosure.end()); + } + } + } +}; + +static inline void createCombinedTransition(PreallocatedNFANodeRangeList& combinedRangeList, const UniqueNodeIdSetImpl& sourceNodeSet, const NFA& immutableNFA, const NFANodeClosures& nfaNodeclosures) +{ + combinedRangeList.clear(); + + const unsigned* buffer = sourceNodeSet.buffer(); + + DataConverterWithEpsilonClosure converter { nfaNodeclosures }; + for (unsigned i = 0; i < sourceNodeSet.m_size; ++i) { + unsigned nodeId = buffer[i]; + auto transitions = immutableNFA.transitionsForNode(nodeId); + combinedRangeList.extend(transitions.begin(), transitions.end(), converter); + } +} + +static ALWAYS_INLINE unsigned getOrCreateDFANode(const NodeIdSet& nfaNodeSet, const NFA& nfa, DFA& dfa, UniqueNodeIdSetTable& uniqueNodeIdSetTable, UniqueNodeQueue& unprocessedNodes) +{ + NodeIdSetToUniqueNodeIdSetSource nodeIdSetToUniqueNodeIdSetSource(dfa, nfa, nfaNodeSet); + auto uniqueNodeIdAddResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(nodeIdSetToUniqueNodeIdSetSource); + if (uniqueNodeIdAddResult.isNewEntry) + unprocessedNodes.append(uniqueNodeIdAddResult.iterator->impl()); + + return uniqueNodeIdAddResult.iterator->impl()->m_dfaNodeId; +} + +DFA NFAToDFA::convert(NFA& nfa) +{ + NFANodeClosures nfaNodeClosures; + resolveEpsilonClosures(nfa, nfaNodeClosures); + + DFA dfa; + + NodeIdSet initialSet({ nfa.root() }); + extendSetWithClosure(nfaNodeClosures, nfa.root(), initialSet); + + UniqueNodeIdSetTable uniqueNodeIdSetTable; + + NodeIdSetToUniqueNodeIdSetSource initialNodeIdSetToUniqueNodeIdSetSource(dfa, nfa, initialSet); + auto addResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(initialNodeIdSetToUniqueNodeIdSetSource); + + UniqueNodeQueue unprocessedNodes; + unprocessedNodes.append(addResult.iterator->impl()); + + PreallocatedNFANodeRangeList combinedRangeList; + do { + UniqueNodeIdSetImpl* uniqueNodeIdSetImpl = unprocessedNodes.takeLast(); + createCombinedTransition(combinedRangeList, *uniqueNodeIdSetImpl, nfa, nfaNodeClosures); + + unsigned transitionsStart = dfa.transitionRanges.size(); + for (const NFANodeRange& range : combinedRangeList) { + unsigned targetNodeId = getOrCreateDFANode(range.data, nfa, dfa, uniqueNodeIdSetTable, unprocessedNodes); + dfa.transitionRanges.append({ range.first, range.last }); + dfa.transitionDestinations.append(targetNodeId); + } + unsigned transitionsEnd = dfa.transitionRanges.size(); + unsigned transitionsLength = transitionsEnd - transitionsStart; + + unsigned dfaNodeId = uniqueNodeIdSetImpl->m_dfaNodeId; + DFANode& dfaSourceNode = dfa.nodes[dfaNodeId]; + dfaSourceNode.setTransitions(transitionsStart, static_cast<uint8_t>(transitionsLength)); + } while (!unprocessedNodes.isEmpty()); + + dfa.shrinkToFit(); + return dfa; +} + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) |