diff options
Diffstat (limited to 'Source/WebCore/contentextensions')
48 files changed, 8339 insertions, 0 deletions
diff --git a/Source/WebCore/contentextensions/CombinedFiltersAlphabet.cpp b/Source/WebCore/contentextensions/CombinedFiltersAlphabet.cpp new file mode 100644 index 000000000..c2851844f --- /dev/null +++ b/Source/WebCore/contentextensions/CombinedFiltersAlphabet.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "CombinedFiltersAlphabet.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +namespace ContentExtensions { + +struct TermCreatorInput { + const Term& term; + Vector<std::unique_ptr<Term>>& internedTermsStorage; +}; + +struct TermCreatorTranslator { + static unsigned hash(const TermCreatorInput& input) + { + return input.term.hash(); + } + + static inline bool equal(const Term* term, const TermCreatorInput& input) + { + return *term == input.term; + } + + static void translate(const Term*& location, const TermCreatorInput& input, unsigned) + { + std::unique_ptr<Term> newUniqueTerm(new Term(input.term)); + location = newUniqueTerm.get(); + input.internedTermsStorage.append(WTFMove(newUniqueTerm)); + } +}; + +const Term* CombinedFiltersAlphabet::interned(const Term& term) +{ + TermCreatorInput input { term, m_internedTermsStorage }; + auto addResult = m_uniqueTerms.add<TermCreatorTranslator>(input); + return *addResult.iterator; +} + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING +size_t CombinedFiltersAlphabet::memoryUsed() const +{ + size_t termsSize = 0; + for (const auto& termPointer : m_internedTermsStorage) + termsSize += termPointer->memoryUsed(); + return sizeof(CombinedFiltersAlphabet) + + termsSize + + m_uniqueTerms.capacity() * sizeof(Term*) + + m_internedTermsStorage.capacity() * sizeof(std::unique_ptr<Term>); +} +#endif + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/CombinedFiltersAlphabet.h b/Source/WebCore/contentextensions/CombinedFiltersAlphabet.h new file mode 100644 index 000000000..c7cf72fce --- /dev/null +++ b/Source/WebCore/contentextensions/CombinedFiltersAlphabet.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionsDebugging.h" +#include "Term.h" +#include <wtf/HashSet.h> +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +class CombinedFiltersAlphabet { +public: + const Term* interned(const Term&); +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + size_t memoryUsed() const; +#endif + +private: + struct TermPointerHash { + static unsigned hash(const Term* key) { return key->hash(); } + static inline bool equal(const Term* a, const Term* b) + { + return *a == *b; + } + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + HashSet<const Term*, TermPointerHash> m_uniqueTerms; + Vector<std::unique_ptr<Term>> m_internedTermsStorage; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/CombinedURLFilters.cpp b/Source/WebCore/contentextensions/CombinedURLFilters.cpp new file mode 100644 index 000000000..c3950957e --- /dev/null +++ b/Source/WebCore/contentextensions/CombinedURLFilters.cpp @@ -0,0 +1,513 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "CombinedURLFilters.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "HashableActionList.h" +#include "Term.h" +#include <wtf/DataLog.h> +#include <wtf/Vector.h> +#include <wtf/text/CString.h> + +namespace WebCore { + +namespace ContentExtensions { + +struct PrefixTreeEdge { + const Term* term; + std::unique_ptr<PrefixTreeVertex> child; +}; + +typedef Vector<PrefixTreeEdge, 0, WTF::CrashOnOverflow, 1> PrefixTreeEdges; + +struct PrefixTreeVertex { + PrefixTreeEdges edges; +}; + +struct ReverseSuffixTreeVertex; +struct ReverseSuffixTreeEdge { + const Term* term; + std::unique_ptr<ReverseSuffixTreeVertex> child; +}; +typedef Vector<ReverseSuffixTreeEdge, 0, WTF::CrashOnOverflow, 1> ReverseSuffixTreeEdges; + +struct ReverseSuffixTreeVertex { + ReverseSuffixTreeEdges edges; + uint32_t nodeId; +}; +typedef HashMap<HashableActionList, ReverseSuffixTreeVertex, HashableActionListHash, HashableActionListHashTraits> ReverseSuffixTreeRoots; + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING +static size_t recursiveMemoryUsed(const PrefixTreeVertex& vertex) +{ + size_t size = sizeof(PrefixTreeVertex) + + vertex.edges.capacity() * sizeof(PrefixTreeEdge); + for (const auto& edge : vertex.edges) { + ASSERT(edge.child); + size += recursiveMemoryUsed(*edge.child.get()); + } + return size; +} + +size_t CombinedURLFilters::memoryUsed() const +{ + ASSERT(m_prefixTreeRoot); + + size_t actionsSize = 0; + for (const auto& slot : m_actions) + actionsSize += slot.value.capacity() * sizeof(uint64_t); + + return sizeof(CombinedURLFilters) + + m_alphabet.memoryUsed() + + recursiveMemoryUsed(*m_prefixTreeRoot.get()) + + sizeof(HashMap<PrefixTreeVertex*, ActionList>) + + m_actions.capacity() * (sizeof(PrefixTreeVertex*) + sizeof(ActionList)) + + actionsSize; +} +#endif + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING +static String prefixTreeVertexToString(const PrefixTreeVertex& vertex, const HashMap<const PrefixTreeVertex*, ActionList>& actions, unsigned depth) +{ + StringBuilder builder; + while (depth--) + builder.appendLiteral(" "); + builder.appendLiteral("vertex actions: "); + + auto actionsSlot = actions.find(&vertex); + if (actionsSlot != actions.end()) { + for (uint64_t action : actionsSlot->value) { + builder.appendNumber(action); + builder.append(','); + } + } + builder.append('\n'); + return builder.toString(); +} + +static void recursivePrint(const PrefixTreeVertex& vertex, const HashMap<const PrefixTreeVertex*, ActionList>& actions, unsigned depth) +{ + dataLogF("%s", prefixTreeVertexToString(vertex, actions, depth).utf8().data()); + for (const auto& edge : vertex.edges) { + StringBuilder builder; + for (unsigned i = 0; i < depth * 2; ++i) + builder.append(' '); + builder.appendLiteral("vertex edge: "); + builder.append(edge.term->toString()); + builder.append('\n'); + dataLogF("%s", builder.toString().utf8().data()); + ASSERT(edge.child); + recursivePrint(*edge.child.get(), actions, depth + 1); + } +} + +void CombinedURLFilters::print() const +{ + recursivePrint(*m_prefixTreeRoot.get(), m_actions, 0); +} +#endif + +CombinedURLFilters::CombinedURLFilters() + : m_prefixTreeRoot(std::make_unique<PrefixTreeVertex>()) +{ +} + +CombinedURLFilters::~CombinedURLFilters() +{ +} + +bool CombinedURLFilters::isEmpty() const +{ + return m_prefixTreeRoot->edges.isEmpty(); +} + +void CombinedURLFilters::addDomain(uint64_t actionId, const String& domain) +{ + unsigned domainLength = domain.length(); + if (domainLength && domain[0] == '*') { + // If domain starts with a '*' then it means match domain and its subdomains, like (^|.)domain$ + // This way a domain of "*webkit.org" will match "bugs.webkit.org" and "webkit.org". + Vector<Term> prependDot; + Vector<Term> prependBeginningOfLine; + prependDot.reserveInitialCapacity(domainLength + 2); + prependBeginningOfLine.reserveInitialCapacity(domainLength); // This is just no .* at the beginning. + + Term canonicalDotStar(Term::UniversalTransition); + canonicalDotStar.quantify(AtomQuantifier::ZeroOrMore); + prependDot.uncheckedAppend(canonicalDotStar); + prependDot.uncheckedAppend(Term('.', true)); + + for (unsigned i = 1; i < domainLength; i++) { + ASSERT(isASCII(domain[i])); + ASSERT(!isASCIIUpper(domain[i])); + prependDot.uncheckedAppend(Term(domain[i], true)); + prependBeginningOfLine.uncheckedAppend(Term(domain[i], true)); + } + prependDot.uncheckedAppend(Term::EndOfLineAssertionTerm); + prependBeginningOfLine.uncheckedAppend(Term::EndOfLineAssertionTerm); + + addPattern(actionId, prependDot); + addPattern(actionId, prependBeginningOfLine); + } else { + // This is like adding ^domain$, but interpreting domain as a series of characters, not a regular expression. + // "webkit.org" will match "webkit.org" but not "bugs.webkit.org". + Vector<Term> prependBeginningOfLine; + prependBeginningOfLine.reserveInitialCapacity(domainLength + 1); // This is just no .* at the beginning. + for (unsigned i = 0; i < domainLength; i++) { + ASSERT(isASCII(domain[i])); + ASSERT(!isASCIIUpper(domain[i])); + prependBeginningOfLine.uncheckedAppend(Term(domain[i], true)); + } + prependBeginningOfLine.uncheckedAppend(Term::EndOfLineAssertionTerm); + addPattern(actionId, prependBeginningOfLine); + } +} + +void CombinedURLFilters::addPattern(uint64_t actionId, const Vector<Term>& pattern) +{ + ASSERT_WITH_MESSAGE(!pattern.isEmpty(), "The parser should have excluded empty patterns before reaching CombinedURLFilters."); + + if (pattern.isEmpty()) + return; + + // Extend the prefix tree with the new pattern. + PrefixTreeVertex* lastPrefixTree = m_prefixTreeRoot.get(); + + for (const Term& term : pattern) { + size_t nextEntryIndex = WTF::notFound; + for (size_t i = 0; i < lastPrefixTree->edges.size(); ++i) { + if (*lastPrefixTree->edges[i].term == term) { + nextEntryIndex = i; + break; + } + } + if (nextEntryIndex != WTF::notFound) + lastPrefixTree = lastPrefixTree->edges[nextEntryIndex].child.get(); + else { + lastPrefixTree->edges.append(PrefixTreeEdge({m_alphabet.interned(term), std::make_unique<PrefixTreeVertex>()})); + lastPrefixTree = lastPrefixTree->edges.last().child.get(); + } + } + + auto addResult = m_actions.add(lastPrefixTree, ActionList()); + ActionList& actions = addResult.iterator->value; + if (actions.find(actionId) == WTF::notFound) + actions.append(actionId); +} + +struct ActiveSubtree { + ActiveSubtree(PrefixTreeVertex& vertex, ImmutableCharNFANodeBuilder&& nfaNode, unsigned edgeIndex) + : vertex(vertex) + , nfaNode(WTFMove(nfaNode)) + , edgeIndex(edgeIndex) + { + } + PrefixTreeVertex& vertex; + ImmutableCharNFANodeBuilder nfaNode; + unsigned edgeIndex; +}; + +static void generateInfixUnsuitableForReverseSuffixTree(NFA& nfa, Vector<ActiveSubtree>& stack, const HashMap<const PrefixTreeVertex*, ActionList>& actions) +{ + // To avoid conflicts, we use the reverse suffix tree for subtrees that do not merge + // in the prefix tree. + // + // We only unify the suffixes to the actions on the leaf. + // If there are actions inside the tree, we generate the part of the subtree up to the action. + // + // If we accidentally insert a node with action inside the reverse-suffix-tree, we would create + // new actions on unrelated pattern when unifying their suffixes. + for (unsigned i = stack.size() - 1; i--;) { + ActiveSubtree& activeSubtree = stack[i]; + if (activeSubtree.nfaNode.isValid()) + return; + + RELEASE_ASSERT_WITH_MESSAGE(i > 0, "The bottom of the stack must be the root of our fixed-length subtree. It should have it the isValid() case above."); + + auto actionsIterator = actions.find(&activeSubtree.vertex); + bool hasActionInsideTree = actionsIterator != actions.end(); + + // Stricto sensu, we should count the number of exit edges with fixed length. + // That is costly and unlikely to matter in practice. + bool hasSingleOutcome = activeSubtree.vertex.edges.size() == 1; + + if (hasActionInsideTree || !hasSingleOutcome) { + // Go back to the end of the subtree that has already been generated. + // From there, generate everything up to the vertex we found. + unsigned end = i; + unsigned beginning = end; + + ActiveSubtree* sourceActiveSubtree = nullptr; + while (beginning--) { + ActiveSubtree& activeSubtree = stack[beginning]; + if (activeSubtree.nfaNode.isValid()) { + sourceActiveSubtree = &activeSubtree; + break; + } + } + ASSERT_WITH_MESSAGE(sourceActiveSubtree, "The root should always have a valid generator."); + + for (unsigned stackIndex = beginning + 1; stackIndex <= end; ++stackIndex) { + ImmutableCharNFANodeBuilder& sourceNode = sourceActiveSubtree->nfaNode; + ASSERT(sourceNode.isValid()); + auto& edge = sourceActiveSubtree->vertex.edges[sourceActiveSubtree->edgeIndex]; + + ActiveSubtree& destinationActiveSubtree = stack[stackIndex]; + destinationActiveSubtree.nfaNode = edge.term->generateGraph(nfa, sourceNode, actions.get(&destinationActiveSubtree.vertex)); + + sourceActiveSubtree = &destinationActiveSubtree; + } + + return; + } + } +} + +static void generateSuffixWithReverseSuffixTree(NFA& nfa, Vector<ActiveSubtree>& stack, const HashMap<const PrefixTreeVertex*, ActionList>& actions, ReverseSuffixTreeRoots& reverseSuffixTreeRoots) +{ + ActiveSubtree& leafSubtree = stack.last(); + ASSERT_WITH_MESSAGE(!leafSubtree.nfaNode.isValid(), "The leaf should never be generated by the code above, it should always be inserted into the prefix tree."); + + ActionList actionList = actions.get(&leafSubtree.vertex); + ASSERT_WITH_MESSAGE(!actionList.isEmpty(), "Our prefix tree should always have actions on the leaves by construction."); + + HashableActionList hashableActionList(actionList); + auto rootAddResult = reverseSuffixTreeRoots.add(hashableActionList, ReverseSuffixTreeVertex()); + if (rootAddResult.isNewEntry) { + ImmutableCharNFANodeBuilder newNode(nfa); + newNode.setActions(actionList.begin(), actionList.end()); + rootAddResult.iterator->value.nodeId = newNode.nodeId(); + } + + ReverseSuffixTreeVertex* activeReverseSuffixTreeVertex = &rootAddResult.iterator->value; + uint32_t destinationNodeId = rootAddResult.iterator->value.nodeId; + + unsigned stackPosition = stack.size() - 2; + while (true) { + ActiveSubtree& source = stack[stackPosition]; + auto& edge = source.vertex.edges[source.edgeIndex]; + + // This is the end condition: when we meet a node that has already been generated, + // we just need to connect our backward tree to the forward tree. + // + // We *must not* add this last node to the reverse-suffix tree. That node can have + // transitions back to earlier part of the prefix tree. If the prefix tree "caches" + // such node, it would create new transitions that did not exist in the source language. + if (source.nfaNode.isValid()) { + stack.shrink(stackPosition + 1); + edge.term->generateGraph(nfa, source.nfaNode, destinationNodeId); + return; + } + --stackPosition; + + ASSERT_WITH_MESSAGE(!actions.contains(&source.vertex), "Any node with final actions should have been created before hitting the reverse suffix-tree."); + + ReverseSuffixTreeEdge* existingEdge = nullptr; + for (ReverseSuffixTreeEdge& potentialExistingEdge : activeReverseSuffixTreeVertex->edges) { + if (edge.term == potentialExistingEdge.term) { + existingEdge = &potentialExistingEdge; + break; + } + } + + if (existingEdge) + activeReverseSuffixTreeVertex = existingEdge->child.get(); + else { + ImmutableCharNFANodeBuilder newNode(nfa); + edge.term->generateGraph(nfa, newNode, destinationNodeId); + std::unique_ptr<ReverseSuffixTreeVertex> newVertex(new ReverseSuffixTreeVertex()); + newVertex->nodeId = newNode.nodeId(); + + ReverseSuffixTreeVertex* newVertexAddress = newVertex.get(); + activeReverseSuffixTreeVertex->edges.append(ReverseSuffixTreeEdge({ edge.term, WTFMove(newVertex) })); + activeReverseSuffixTreeVertex = newVertexAddress; + } + destinationNodeId = activeReverseSuffixTreeVertex->nodeId; + + ASSERT(source.vertex.edges.size() == 1); + source.vertex.edges.clear(); + } + + RELEASE_ASSERT_NOT_REACHED(); +} + +static void clearReverseSuffixTree(ReverseSuffixTreeRoots& reverseSuffixTreeRoots) +{ + // We cannot rely on the destructor being called in order from top to bottom as we may overflow + // the stack. Instead, we go depth first in the reverse-suffix-tree. + + for (auto& slot : reverseSuffixTreeRoots) { + Vector<ReverseSuffixTreeVertex*, 128> stack; + stack.append(&slot.value); + + while (true) { + ReverseSuffixTreeVertex* top = stack.last(); + if (top->edges.isEmpty()) { + stack.removeLast(); + if (stack.isEmpty()) + break; + stack.last()->edges.removeLast(); + } else + stack.append(top->edges.last().child.get()); + } + } + reverseSuffixTreeRoots.clear(); +} + +static void generateNFAForSubtree(NFA& nfa, ImmutableCharNFANodeBuilder&& subtreeRoot, PrefixTreeVertex& root, const HashMap<const PrefixTreeVertex*, ActionList>& actions, size_t maxNFASize) +{ + // This recurses the subtree of the prefix tree. + // For each edge that has fixed length (no quantifiers like ?, *, or +) it generates the nfa graph, + // recurses into children, and deletes any processed leaf nodes. + + ReverseSuffixTreeRoots reverseSuffixTreeRoots; + Vector<ActiveSubtree> stack; + if (!root.edges.isEmpty()) + stack.append(ActiveSubtree(root, WTFMove(subtreeRoot), 0)); + + bool nfaTooBig = false; + + // Generate graphs for each subtree that does not contain any quantifiers. + while (!stack.isEmpty()) { + PrefixTreeVertex& vertex = stack.last().vertex; + const unsigned edgeIndex = stack.last().edgeIndex; + + if (edgeIndex < vertex.edges.size()) { + auto& edge = vertex.edges[edgeIndex]; + + // Clean up any processed leaves and return early if we are past the maxNFASize. + if (nfaTooBig) { + stack.last().edgeIndex = stack.last().vertex.edges.size(); + continue; + } + + // Quantified edges in the subtree will be a part of another NFA. + if (!edge.term->hasFixedLength()) { + stack.last().edgeIndex++; + continue; + } + + ASSERT(edge.child.get()); + ImmutableCharNFANodeBuilder emptyBuilder; + stack.append(ActiveSubtree(*edge.child.get(), WTFMove(emptyBuilder), 0)); + } else { + bool isLeaf = vertex.edges.isEmpty(); + + ASSERT(edgeIndex == vertex.edges.size()); + vertex.edges.removeAllMatching([](PrefixTreeEdge& edge) + { + return !edge.term; + }); + + if (isLeaf) { + generateInfixUnsuitableForReverseSuffixTree(nfa, stack, actions); + generateSuffixWithReverseSuffixTree(nfa, stack, actions, reverseSuffixTreeRoots); + + // Only stop generating an NFA at a leaf to ensure we have a correct NFA. We could go slightly over the maxNFASize. + if (nfa.nodes.size() > maxNFASize) + nfaTooBig = true; + } else + stack.removeLast(); + + if (!stack.isEmpty()) { + auto& activeSubtree = stack.last(); + auto& edge = activeSubtree.vertex.edges[stack.last().edgeIndex]; + if (edge.child->edges.isEmpty()) + edge.term = nullptr; // Mark this leaf for deleting. + activeSubtree.edgeIndex++; + } + } + } + clearReverseSuffixTree(reverseSuffixTreeRoots); +} + +void CombinedURLFilters::processNFAs(size_t maxNFASize, std::function<void(NFA&&)> handler) +{ +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + print(); +#endif + while (true) { + // Traverse out to a leaf. + Vector<PrefixTreeVertex*, 128> stack; + PrefixTreeVertex* vertex = m_prefixTreeRoot.get(); + while (true) { + ASSERT(vertex); + stack.append(vertex); + if (vertex->edges.isEmpty()) + break; + vertex = vertex->edges.last().child.get(); + } + if (stack.size() == 1) + break; // We're done once we have processed and removed all the edges in the prefix tree. + + // Find the prefix root for this NFA. This is the vertex after the last term with a quantifier if there is one, + // or the root if there are no quantifiers left. + while (stack.size() > 1) { + if (!stack[stack.size() - 2]->edges.last().term->hasFixedLength()) + break; + stack.removeLast(); + } + ASSERT_WITH_MESSAGE(!stack.isEmpty(), "At least the root should be in the stack"); + + // Make an NFA with the subtrees for whom this is also the last quantifier (or who also have no quantifier). + NFA nfa; + { + // Put the prefix into the NFA. + ImmutableCharNFANodeBuilder lastNode(nfa); + for (unsigned i = 0; i < stack.size() - 1; ++i) { + const PrefixTreeEdge& edge = stack[i]->edges.last(); + ImmutableCharNFANodeBuilder newNode = edge.term->generateGraph(nfa, lastNode, m_actions.get(edge.child.get())); + lastNode = WTFMove(newNode); + } + + // Put the non-quantified vertices in the subtree into the NFA and delete them. + ASSERT(stack.last()); + generateNFAForSubtree(nfa, WTFMove(lastNode), *stack.last(), m_actions, maxNFASize); + } + nfa.finalize(); + + handler(WTFMove(nfa)); + + // Clean up any processed leaf nodes. + while (true) { + if (stack.size() > 1) { + if (stack[stack.size() - 1]->edges.isEmpty()) { + stack[stack.size() - 2]->edges.removeLast(); + stack.removeLast(); + } else + break; // Vertex is not a leaf. + } else + break; // Leave the empty root. + } + } +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/CombinedURLFilters.h b/Source/WebCore/contentextensions/CombinedURLFilters.h new file mode 100644 index 000000000..643ed889d --- /dev/null +++ b/Source/WebCore/contentextensions/CombinedURLFilters.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "CombinedFiltersAlphabet.h" +#include "ContentExtensionsDebugging.h" +#include "NFA.h" +#include <wtf/Forward.h> +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +struct PrefixTreeVertex; + +class WEBCORE_EXPORT CombinedURLFilters { +public: + CombinedURLFilters(); + ~CombinedURLFilters(); + + void addPattern(uint64_t actionId, const Vector<Term>& pattern); + void addDomain(uint64_t actionId, const String& domain); + + void processNFAs(size_t maxNFASize, std::function<void(NFA&&)> handler); + bool isEmpty() const; + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + size_t memoryUsed() const; +#endif +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + void print() const; +#endif + +private: + CombinedFiltersAlphabet m_alphabet; + std::unique_ptr<PrefixTreeVertex> m_prefixTreeRoot; + HashMap<const PrefixTreeVertex*, ActionList> m_actions; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/CompiledContentExtension.cpp b/Source/WebCore/contentextensions/CompiledContentExtension.cpp new file mode 100644 index 000000000..e577a9127 --- /dev/null +++ b/Source/WebCore/contentextensions/CompiledContentExtension.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "CompiledContentExtension.h" +#include "DFABytecodeInterpreter.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { +namespace ContentExtensions { + +CompiledContentExtension::~CompiledContentExtension() +{ +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/CompiledContentExtension.h b/Source/WebCore/contentextensions/CompiledContentExtension.h new file mode 100644 index 000000000..47b3d6269 --- /dev/null +++ b/Source/WebCore/contentextensions/CompiledContentExtension.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionRule.h" +#include "DFABytecode.h" +#include <wtf/ThreadSafeRefCounted.h> + +namespace WebCore { +namespace ContentExtensions { + +class WEBCORE_EXPORT CompiledContentExtension : public ThreadSafeRefCounted<CompiledContentExtension> { +public: + virtual ~CompiledContentExtension(); + + virtual const DFABytecode* filtersWithoutDomainsBytecode() const = 0; + virtual unsigned filtersWithoutDomainsBytecodeLength() const = 0; + virtual const DFABytecode* filtersWithDomainsBytecode() const = 0; + virtual unsigned filtersWithDomainsBytecodeLength() const = 0; + virtual const DFABytecode* domainFiltersBytecode() const = 0; + virtual unsigned domainFiltersBytecodeLength() const = 0; + virtual const SerializedActionByte* actions() const = 0; + virtual unsigned actionsLength() const = 0; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtension.cpp b/Source/WebCore/contentextensions/ContentExtension.cpp new file mode 100644 index 000000000..dacbefe2a --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtension.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ContentExtension.h" + +#include "CompiledContentExtension.h" +#include "ContentExtensionsBackend.h" +#include "StyleSheetContents.h" +#include <wtf/text/StringBuilder.h> + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { +namespace ContentExtensions { + +RefPtr<ContentExtension> ContentExtension::create(const String& identifier, Ref<CompiledContentExtension>&& compiledExtension) +{ + return adoptRef(*new ContentExtension(identifier, WTFMove(compiledExtension))); +} + +ContentExtension::ContentExtension(const String& identifier, Ref<CompiledContentExtension>&& compiledExtension) + : m_identifier(identifier) + , m_compiledExtension(WTFMove(compiledExtension)) +{ + DFABytecodeInterpreter withoutDomains(m_compiledExtension->filtersWithoutDomainsBytecode(), m_compiledExtension->filtersWithoutDomainsBytecodeLength()); + DFABytecodeInterpreter withDomains(m_compiledExtension->filtersWithDomainsBytecode(), m_compiledExtension->filtersWithDomainsBytecodeLength()); + for (uint64_t action : withoutDomains.actionsMatchingEverything()) { + ASSERT(static_cast<uint32_t>(action) == action); + m_universalActionsWithoutDomains.append(static_cast<uint32_t>(action)); + } + for (uint64_t action : withDomains.actionsMatchingEverything()) { + ASSERT((action & ~IfDomainFlag) == static_cast<uint32_t>(action)); + m_universalActionsWithDomains.append(action); + } + + compileGlobalDisplayNoneStyleSheet(); + m_universalActionsWithoutDomains.shrinkToFit(); + m_universalActionsWithDomains.shrinkToFit(); +} + +uint32_t ContentExtension::findFirstIgnorePreviousRules() const +{ + auto* actions = m_compiledExtension->actions(); + uint32_t actionsLength = m_compiledExtension->actionsLength(); + uint32_t currentActionIndex = 0; + while (currentActionIndex < actionsLength) { + if (Action::deserializeType(actions, actionsLength, currentActionIndex) == ActionType::IgnorePreviousRules) + return currentActionIndex; + currentActionIndex += Action::serializedLength(actions, actionsLength, currentActionIndex); + } + return std::numeric_limits<uint32_t>::max(); +} + +StyleSheetContents* ContentExtension::globalDisplayNoneStyleSheet() +{ + return m_globalDisplayNoneStyleSheet.get(); +} + +void ContentExtension::compileGlobalDisplayNoneStyleSheet() +{ + uint32_t firstIgnorePreviousRules = findFirstIgnorePreviousRules(); + + auto* actions = m_compiledExtension->actions(); + uint32_t actionsLength = m_compiledExtension->actionsLength(); + + auto inGlobalDisplayNoneStyleSheet = [&](const uint32_t location) + { + return location < firstIgnorePreviousRules && Action::deserializeType(actions, actionsLength, location) == ActionType::CSSDisplayNoneSelector; + }; + + StringBuilder css; + for (uint32_t universalActionLocation : m_universalActionsWithoutDomains) { + if (inGlobalDisplayNoneStyleSheet(universalActionLocation)) { + if (!css.isEmpty()) + css.append(','); + Action action = Action::deserialize(actions, actionsLength, universalActionLocation); + ASSERT(action.type() == ActionType::CSSDisplayNoneSelector); + css.append(action.stringArgument()); + } + } + if (css.isEmpty()) + return; + css.append("{"); + css.append(ContentExtensionsBackend::displayNoneCSSRule()); + css.append("}"); + + m_globalDisplayNoneStyleSheet = StyleSheetContents::create(); + m_globalDisplayNoneStyleSheet->setIsUserStyleSheet(true); + if (!m_globalDisplayNoneStyleSheet->parseString(css.toString())) + m_globalDisplayNoneStyleSheet = nullptr; + + // These actions don't need to be applied individually any more. They will all be applied to every page as a precompiled style sheet. + m_universalActionsWithoutDomains.removeAllMatching(inGlobalDisplayNoneStyleSheet); +} + +void ContentExtension::populateDomainCacheIfNeeded(const String& domain) +{ + if (m_cachedDomain != domain) { + DFABytecodeInterpreter interpreter(m_compiledExtension->domainFiltersBytecode(), m_compiledExtension->domainFiltersBytecodeLength()); + const uint16_t allLoadTypesAndResourceTypes = LoadTypeMask | ResourceTypeMask; + auto domainActions = interpreter.interpret(domain.utf8(), allLoadTypesAndResourceTypes); + + m_cachedDomainActions.clear(); + for (uint64_t action : domainActions) + m_cachedDomainActions.add(action); + + m_cachedUniversalDomainActions.clear(); + for (uint64_t action : m_universalActionsWithDomains) { + ASSERT_WITH_MESSAGE((action & ~IfDomainFlag) == static_cast<uint32_t>(action), "Universal actions with domains should not have flags."); + if (!!(action & IfDomainFlag) == m_cachedDomainActions.contains(action)) + m_cachedUniversalDomainActions.append(static_cast<uint32_t>(action)); + } + m_cachedUniversalDomainActions.shrinkToFit(); + m_cachedDomain = domain; + } +} + +const DFABytecodeInterpreter::Actions& ContentExtension::cachedDomainActions(const String& domain) +{ + populateDomainCacheIfNeeded(domain); + return m_cachedDomainActions; +} + +const Vector<uint32_t>& ContentExtension::universalActionsWithDomains(const String& domain) +{ + populateDomainCacheIfNeeded(domain); + return m_cachedUniversalDomainActions; +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtension.h b/Source/WebCore/contentextensions/ContentExtension.h new file mode 100644 index 000000000..5e7c11b2a --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtension.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "DFABytecodeInterpreter.h" +#include "StyleSheetContents.h" +#include <wtf/Ref.h> +#include <wtf/RefCounted.h> +#include <wtf/RefPtr.h> + +namespace WebCore { + +namespace ContentExtensions { + +class CompiledContentExtension; + +class ContentExtension : public RefCounted<ContentExtension> { +public: + static RefPtr<ContentExtension> create(const String& identifier, Ref<CompiledContentExtension>&&); + + const String& identifier() const { return m_identifier; } + const CompiledContentExtension& compiledExtension() const { return m_compiledExtension.get(); } + StyleSheetContents* globalDisplayNoneStyleSheet(); + const DFABytecodeInterpreter::Actions& cachedDomainActions(const String& domain); + const Vector<uint32_t>& universalActionsWithoutDomains() { return m_universalActionsWithoutDomains; } + const Vector<uint32_t>& universalActionsWithDomains(const String& domain); + +private: + ContentExtension(const String& identifier, Ref<CompiledContentExtension>&&); + uint32_t findFirstIgnorePreviousRules() const; + + String m_identifier; + Ref<CompiledContentExtension> m_compiledExtension; + + RefPtr<StyleSheetContents> m_globalDisplayNoneStyleSheet; + void compileGlobalDisplayNoneStyleSheet(); + + String m_cachedDomain; + void populateDomainCacheIfNeeded(const String& domain); + DFABytecodeInterpreter::Actions m_cachedDomainActions; + Vector<uint32_t> m_cachedUniversalDomainActions; + + Vector<uint32_t> m_universalActionsWithoutDomains; + Vector<uint64_t> m_universalActionsWithDomains; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionActions.h b/Source/WebCore/contentextensions/ContentExtensionActions.h new file mode 100644 index 000000000..992b6bbf7 --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionActions.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +class ResourceRequest; + +namespace ContentExtensions { + +typedef uint8_t SerializedActionByte; + +enum class ActionType : uint8_t { + BlockLoad, + BlockCookies, + CSSDisplayNoneSelector, + CSSDisplayNoneStyleSheet, + IgnorePreviousRules, + MakeHTTPS, + InvalidAction, +}; + +struct BlockedStatus { + bool blockedLoad { false }; + bool blockedCookies { false }; + bool madeHTTPS { false }; +}; + +void applyBlockedStatusToRequest(const BlockedStatus&, ResourceRequest&); + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp b/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp new file mode 100644 index 000000000..90bfed2db --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp @@ -0,0 +1,483 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ContentExtensionCompiler.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "CombinedURLFilters.h" +#include "CompiledContentExtension.h" +#include "ContentExtensionActions.h" +#include "ContentExtensionError.h" +#include "ContentExtensionParser.h" +#include "ContentExtensionRule.h" +#include "ContentExtensionsDebugging.h" +#include "DFABytecodeCompiler.h" +#include "DFACombiner.h" +#include "NFA.h" +#include "NFAToDFA.h" +#include "URLFilterParser.h" +#include <wtf/CurrentTime.h> +#include <wtf/DataLog.h> +#include <wtf/text/CString.h> +#include <wtf/text/StringBuilder.h> + +namespace WebCore { +namespace ContentExtensions { + +static void serializeSelector(Vector<SerializedActionByte>& actions, const String& selector) +{ + // Append action type (1 byte). + actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNoneSelector)); + // Append Selector length (4 bytes). + unsigned selectorLength = selector.length(); + actions.resize(actions.size() + sizeof(unsigned)); + *reinterpret_cast<unsigned*>(&actions[actions.size() - sizeof(unsigned)]) = selectorLength; + bool wideCharacters = !selector.is8Bit(); + actions.append(wideCharacters); + // Append Selector. + if (wideCharacters) { + unsigned startIndex = actions.size(); + actions.resize(actions.size() + sizeof(UChar) * selectorLength); + for (unsigned i = 0; i < selectorLength; ++i) + *reinterpret_cast<UChar*>(&actions[startIndex + i * sizeof(UChar)]) = selector[i]; + } else { + for (unsigned i = 0; i < selectorLength; ++i) + actions.append(selector[i]); + } +} + +struct PendingDisplayNoneActions { + Vector<String> selectors; + Vector<unsigned> clientLocations; +}; +typedef HashMap<Trigger, PendingDisplayNoneActions, TriggerHash, TriggerHashTraits> PendingDisplayNoneActionsMap; + +static void resolvePendingDisplayNoneActions(Vector<SerializedActionByte>& actions, Vector<unsigned>& actionLocations, PendingDisplayNoneActionsMap& pendingDisplayNoneActionsMap) +{ + for (auto& slot : pendingDisplayNoneActionsMap) { + PendingDisplayNoneActions& pendingActions = slot.value; + + StringBuilder combinedSelectors; + for (unsigned i = 0; i < pendingActions.selectors.size(); ++i) { + if (i) + combinedSelectors.append(','); + combinedSelectors.append(pendingActions.selectors[i]); + } + + unsigned actionLocation = actions.size(); + serializeSelector(actions, combinedSelectors.toString()); + for (unsigned clientLocation : pendingActions.clientLocations) + actionLocations[clientLocation] = actionLocation; + } + pendingDisplayNoneActionsMap.clear(); +} + +static Vector<unsigned> serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions) +{ + ASSERT(!actions.size()); + + Vector<unsigned> actionLocations; + + // Order only matters because of IgnorePreviousRules. All other identical actions can be combined between each IgnorePreviousRules + // and CSSDisplayNone strings can be combined if their triggers are identical. + typedef HashMap<uint32_t, uint32_t, DefaultHash<uint32_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint32_t>> ActionMap; + ActionMap blockLoadActionsMap; + ActionMap blockCookiesActionsMap; + PendingDisplayNoneActionsMap cssDisplayNoneActionsMap; + ActionMap ignorePreviousRuleActionsMap; + ActionMap makeHTTPSActionsMap; + + for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) { + const ContentExtensionRule& rule = ruleList[ruleIndex]; + ActionType actionType = rule.action().type(); + + if (actionType == ActionType::IgnorePreviousRules) { + resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap); + + blockLoadActionsMap.clear(); + blockCookiesActionsMap.clear(); + cssDisplayNoneActionsMap.clear(); + makeHTTPSActionsMap.clear(); + } else + ignorePreviousRuleActionsMap.clear(); + + // Anything with domain is just pushed. + // We could try to merge domains but that case is not common in practice. + if (!rule.trigger().domains.isEmpty()) { + actionLocations.append(actions.size()); + + if (actionType == ActionType::CSSDisplayNoneSelector) + serializeSelector(actions, rule.action().stringArgument()); + else + actions.append(static_cast<SerializedActionByte>(actionType)); + continue; + } + + ResourceFlags flags = rule.trigger().flags; + unsigned actionLocation = std::numeric_limits<unsigned>::max(); + + auto findOrMakeActionLocation = [&] (ActionMap& map) + { + const auto existingAction = map.find(flags); + if (existingAction == map.end()) { + actionLocation = actions.size(); + actions.append(static_cast<SerializedActionByte>(actionType)); + map.set(flags, actionLocation); + } else + actionLocation = existingAction->value; + }; + + switch (actionType) { + case ActionType::CSSDisplayNoneStyleSheet: + case ActionType::InvalidAction: + RELEASE_ASSERT_NOT_REACHED(); + + case ActionType::CSSDisplayNoneSelector: { + const auto addResult = cssDisplayNoneActionsMap.add(rule.trigger(), PendingDisplayNoneActions()); + PendingDisplayNoneActions& pendingDisplayNoneActions = addResult.iterator->value; + pendingDisplayNoneActions.selectors.append(rule.action().stringArgument()); + pendingDisplayNoneActions.clientLocations.append(actionLocations.size()); + + actionLocation = std::numeric_limits<unsigned>::max(); + break; + } + case ActionType::IgnorePreviousRules: + findOrMakeActionLocation(ignorePreviousRuleActionsMap); + break; + case ActionType::BlockLoad: + findOrMakeActionLocation(blockLoadActionsMap); + break; + case ActionType::BlockCookies: + findOrMakeActionLocation(blockCookiesActionsMap); + break; + case ActionType::MakeHTTPS: + findOrMakeActionLocation(makeHTTPSActionsMap); + break; + } + + actionLocations.append(actionLocation); + } + resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap); + return actionLocations; +} + +typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> UniversalActionSet; + +static void addUniversalActionsToDFA(DFA& dfa, const UniversalActionSet& universalActions) +{ + if (universalActions.isEmpty()) + return; + + DFANode& root = dfa.nodes[dfa.root]; + ASSERT(!root.actionsLength()); + unsigned actionsStart = dfa.actions.size(); + dfa.actions.reserveCapacity(dfa.actions.size() + universalActions.size()); + for (uint64_t action : universalActions) + dfa.actions.uncheckedAppend(action); + unsigned actionsEnd = dfa.actions.size(); + + unsigned actionsLength = actionsEnd - actionsStart; + RELEASE_ASSERT_WITH_MESSAGE(actionsLength < std::numeric_limits<uint16_t>::max(), "Too many uncombined actions that match everything"); + root.setActions(actionsStart, static_cast<uint16_t>(actionsLength)); +} + +std::error_code compileRuleList(ContentExtensionCompilationClient& client, String&& ruleList) +{ + Vector<ContentExtensionRule> parsedRuleList; + auto parserError = parseRuleList(ruleList, parsedRuleList); + ruleList = String(); + if (parserError) + return parserError; + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + double patternPartitioningStart = monotonicallyIncreasingTime(); +#endif + + Vector<SerializedActionByte> actions; + Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions); + client.writeActions(WTFMove(actions)); + LOG_LARGE_STRUCTURES(actions, actions.capacity() * sizeof(SerializedActionByte)); + actions.clear(); + + UniversalActionSet universalActionsWithoutDomains; + UniversalActionSet universalActionsWithDomains; + + // FIXME: These don't all need to be in memory at the same time. + CombinedURLFilters filtersWithoutDomains; + CombinedURLFilters filtersWithDomains; + CombinedURLFilters domainFilters; + URLFilterParser filtersWithoutDomainParser(filtersWithoutDomains); + URLFilterParser filtersWithDomainParser(filtersWithDomains); + + for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) { + const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex]; + const Trigger& trigger = contentExtensionRule.trigger(); + ASSERT(trigger.urlFilter.length()); + + // High bits are used for flags. This should match how they are used in DFABytecodeCompiler::compileNode. + ASSERT(!trigger.flags || ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32)); + ASSERT(!(~ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32))); + uint64_t actionLocationAndFlags = (static_cast<uint64_t>(trigger.flags) << 32) | static_cast<uint64_t>(actionLocations[ruleIndex]); + URLFilterParser::ParseStatus status = URLFilterParser::Ok; + if (trigger.domains.isEmpty()) { + ASSERT(trigger.domainCondition == Trigger::DomainCondition::None); + status = filtersWithoutDomainParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags); + if (status == URLFilterParser::MatchesEverything) { + universalActionsWithoutDomains.add(actionLocationAndFlags); + status = URLFilterParser::Ok; + } + if (status != URLFilterParser::Ok) { + dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data()); + return ContentExtensionError::JSONInvalidRegex; + } + } else { + if (trigger.domainCondition == Trigger::DomainCondition::IfDomain) + actionLocationAndFlags |= IfDomainFlag; + else { + ASSERT(trigger.domainCondition == Trigger::DomainCondition::UnlessDomain); + ASSERT(!(actionLocationAndFlags & IfDomainFlag)); + } + + status = filtersWithDomainParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags); + if (status == URLFilterParser::MatchesEverything) { + universalActionsWithDomains.add(actionLocationAndFlags); + status = URLFilterParser::Ok; + } + if (status != URLFilterParser::Ok) { + dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data()); + return ContentExtensionError::JSONInvalidRegex; + } + for (const String& domain : trigger.domains) + domainFilters.addDomain(actionLocationAndFlags, domain); + } + ASSERT(status == URLFilterParser::Ok); + } + LOG_LARGE_STRUCTURES(parsedRuleList, parsedRuleList.capacity() * sizeof(ContentExtensionRule)); // Doesn't include strings. + LOG_LARGE_STRUCTURES(actionLocations, actionLocations.capacity() * sizeof(unsigned)); + parsedRuleList.clear(); + actionLocations.clear(); + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + double patternPartitioningEnd = monotonicallyIncreasingTime(); + dataLogF(" Time spent partitioning the rules into groups: %f\n", (patternPartitioningEnd - patternPartitioningStart)); +#endif + + LOG_LARGE_STRUCTURES(filtersWithoutDomains, filtersWithoutDomains.memoryUsed()); + LOG_LARGE_STRUCTURES(filtersWithDomains, filtersWithDomains.memoryUsed()); + LOG_LARGE_STRUCTURES(domainFilters, domainFilters.memoryUsed()); + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + unsigned machinesWithoutDomainsCount = 0; + unsigned totalBytecodeSizeForMachinesWithoutDomains = 0; + unsigned machinesWithDomainsCount = 0; + unsigned totalBytecodeSizeForMachinesWithDomains = 0; + double totalNFAToByteCodeBuildTimeStart = monotonicallyIncreasingTime(); +#endif + + // Smaller maxNFASizes risk high compiling and interpreting times from having too many DFAs, + // larger maxNFASizes use too much memory when compiling. + const unsigned maxNFASize = 75000; + + bool firstNFAWithoutDomainsSeen = false; + + auto lowerFiltersWithoutDomainsDFAToBytecode = [&](DFA&& dfa) + { +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + dataLogF("filtersWithoutDomains DFA\n"); + dfa.debugPrintDot(); +#endif + ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "All actions on the DFA root should come from regular expressions that match everything."); + + if (!firstNFAWithoutDomainsSeen) { + // Put all the universal actions on the first DFA. + addUniversalActionsToDFA(dfa, universalActionsWithoutDomains); + } + + Vector<DFABytecode> bytecode; + DFABytecodeCompiler compiler(dfa, bytecode); + compiler.compile(); + LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t)); +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + ++machinesWithoutDomainsCount; + totalBytecodeSizeForMachinesWithoutDomains += bytecode.size(); +#endif + client.writeFiltersWithoutDomainsBytecode(WTFMove(bytecode)); + + firstNFAWithoutDomainsSeen = true; + }; + + const unsigned smallDFASize = 100; + DFACombiner smallFiltersWithoutDomainsDFACombiner; + filtersWithoutDomains.processNFAs(maxNFASize, [&](NFA&& nfa) { +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + dataLogF("filtersWithoutDomains NFA\n"); + nfa.debugPrintDot(); +#endif + + LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed()); + DFA dfa = NFAToDFA::convert(nfa); + LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed()); + + if (dfa.graphSize() < smallDFASize) + smallFiltersWithoutDomainsDFACombiner.addDFA(WTFMove(dfa)); + else { + dfa.minimize(); + lowerFiltersWithoutDomainsDFAToBytecode(WTFMove(dfa)); + } + }); + + + smallFiltersWithoutDomainsDFACombiner.combineDFAs(smallDFASize, [&](DFA&& dfa) { + LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed()); + lowerFiltersWithoutDomainsDFAToBytecode(WTFMove(dfa)); + }); + + ASSERT(filtersWithoutDomains.isEmpty()); + + if (!firstNFAWithoutDomainsSeen) { + // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any + // create a dummy one and add any universal actions. + + DFA dummyDFA = DFA::empty(); + addUniversalActionsToDFA(dummyDFA, universalActionsWithoutDomains); + + Vector<DFABytecode> bytecode; + DFABytecodeCompiler compiler(dummyDFA, bytecode); + compiler.compile(); + LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t)); + client.writeFiltersWithoutDomainsBytecode(WTFMove(bytecode)); + } + LOG_LARGE_STRUCTURES(universalActionsWithoutDomains, universalActionsWithoutDomains.capacity() * sizeof(unsigned)); + universalActionsWithoutDomains.clear(); + + bool firstNFAWithDomainsSeen = false; + auto lowerFiltersWithDomainsDFAToBytecode = [&](DFA&& dfa) + { + if (!firstNFAWithDomainsSeen) { + // Put all the universal actions on the first DFA. + addUniversalActionsToDFA(dfa, universalActionsWithDomains); + } + + Vector<DFABytecode> bytecode; + DFABytecodeCompiler compiler(dfa, bytecode); + compiler.compile(); + LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t)); +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + ++machinesWithDomainsCount; + totalBytecodeSizeForMachinesWithDomains += bytecode.size(); +#endif + client.writeFiltersWithDomainsBytecode(WTFMove(bytecode)); + + firstNFAWithDomainsSeen = true; + }; + + DFACombiner smallFiltersWithDomainsDFACombiner; + filtersWithDomains.processNFAs(maxNFASize, [&](NFA&& nfa) { +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + dataLogF("filtersWithDomains NFA\n"); + nfa.debugPrintDot(); +#endif + LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed()); + DFA dfa = NFAToDFA::convert(nfa); +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + dataLogF("filtersWithDomains PRE MINIMIZING DFA\n"); + dfa.debugPrintDot(); +#endif + LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed()); + + ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "Filters with domains that match everything are not allowed right now."); + + if (dfa.graphSize() < smallDFASize) + smallFiltersWithDomainsDFACombiner.addDFA(WTFMove(dfa)); + else { + dfa.minimize(); + lowerFiltersWithDomainsDFAToBytecode(WTFMove(dfa)); + } + }); + smallFiltersWithDomainsDFACombiner.combineDFAs(smallDFASize, [&](DFA&& dfa) { + LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed()); + lowerFiltersWithDomainsDFAToBytecode(WTFMove(dfa)); + }); + ASSERT(filtersWithDomains.isEmpty()); + + if (!firstNFAWithDomainsSeen) { + // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any + // create a dummy one and add any universal actions. + + DFA dummyDFA = DFA::empty(); + addUniversalActionsToDFA(dummyDFA, universalActionsWithDomains); + + Vector<DFABytecode> bytecode; + DFABytecodeCompiler compiler(dummyDFA, bytecode); + compiler.compile(); + LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t)); + client.writeFiltersWithDomainsBytecode(WTFMove(bytecode)); + } + LOG_LARGE_STRUCTURES(universalActionsWithDomains, universalActionsWithDomains.capacity() * sizeof(unsigned)); + universalActionsWithDomains.clear(); + + domainFilters.processNFAs(maxNFASize, [&](NFA&& nfa) { +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + dataLogF("domainFilters NFA\n"); + nfa.debugPrintDot(); +#endif + LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed()); + DFA dfa = NFAToDFA::convert(nfa); +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + dataLogF("domainFilters DFA\n"); + dfa.debugPrintDot(); +#endif + LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed()); + // Minimizing this DFA would not be effective because all actions are unique + // and because of the tree-like structure of this DFA. + ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "There should not be any domains that match everything."); + + Vector<DFABytecode> bytecode; + DFABytecodeCompiler compiler(dfa, bytecode); + compiler.compile(); + LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t)); + client.writeDomainFiltersBytecode(WTFMove(bytecode)); + }); + ASSERT(domainFilters.isEmpty()); + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + double totalNFAToByteCodeBuildTimeEnd = monotonicallyIncreasingTime(); + dataLogF(" Time spent building and compiling the DFAs: %f\n", (totalNFAToByteCodeBuildTimeEnd - totalNFAToByteCodeBuildTimeStart)); + + dataLogF(" Number of machines without domain filters: %d (total bytecode size = %d)\n", machinesWithoutDomainsCount, totalBytecodeSizeForMachinesWithoutDomains); + dataLogF(" Number of machines with domain filters: %d (total bytecode size = %d)\n", machinesWithDomainsCount, totalBytecodeSizeForMachinesWithDomains); +#endif + + client.finalize(); + + return { }; +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionCompiler.h b/Source/WebCore/contentextensions/ContentExtensionCompiler.h new file mode 100644 index 000000000..3928f9544 --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionCompiler.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "CompiledContentExtension.h" +#include <system_error> +#include <wtf/Forward.h> +#include <wtf/Ref.h> + +namespace WebCore { +namespace ContentExtensions { + +class ContentExtensionCompilationClient { +public: + virtual ~ContentExtensionCompilationClient() { } + + // Functions should be called in this order. All except writeActions and finalize can be called multiple times, though. + virtual void writeActions(Vector<SerializedActionByte>&&) = 0; + virtual void writeFiltersWithoutDomainsBytecode(Vector<DFABytecode>&&) = 0; + virtual void writeFiltersWithDomainsBytecode(Vector<DFABytecode>&&) = 0; + virtual void writeDomainFiltersBytecode(Vector<DFABytecode>&&) = 0; + virtual void finalize() = 0; +}; + +WEBCORE_EXPORT std::error_code compileRuleList(ContentExtensionCompilationClient&, String&&); + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionError.cpp b/Source/WebCore/contentextensions/ContentExtensionError.cpp new file mode 100644 index 000000000..52e9cf2fd --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionError.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ContentExtensionError.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include <string> +#include <wtf/NeverDestroyed.h> + +namespace WebCore { +namespace ContentExtensions { + +const char* WebKitContentBlockerDomain = "WebKitContentBlockerDomain"; + +const std::error_category& contentExtensionErrorCategory() +{ + class ContentExtensionErrorCategory : public std::error_category { + const char* name() const noexcept override + { + return "content extension"; + } + + std::string message(int errorCode) const override + { + switch (static_cast<ContentExtensionError>(errorCode)) { + case ContentExtensionError::JSONInvalid: + return "Failed to parse the JSON String."; + case ContentExtensionError::JSONTopLevelStructureNotAnObject: + return "Invalid input, the top level structure is not an object."; + case ContentExtensionError::JSONTopLevelStructureNotAnArray: + return "Invalid input, the top level structure is not an array."; + case ContentExtensionError::JSONInvalidObjectInTopLevelArray: + return "Invalid object in the top level array."; + case ContentExtensionError::JSONInvalidRule: + return "Invalid rule."; + case ContentExtensionError::JSONContainsNoRules: + return "Empty extension."; + case ContentExtensionError::JSONInvalidTrigger: + return "Invalid trigger object."; + case ContentExtensionError::JSONInvalidURLFilterInTrigger: + return "Invalid url-filter object."; + case ContentExtensionError::JSONInvalidTriggerFlagsArray: + return "Invalid trigger flags array."; + case ContentExtensionError::JSONInvalidObjectInTriggerFlagsArray: + return "Invalid object in the trigger flags array."; + case ContentExtensionError::JSONInvalidStringInTriggerFlagsArray: + return "Invalid string in the trigger flags array."; + case ContentExtensionError::JSONInvalidAction: + return "Invalid action object."; + case ContentExtensionError::JSONInvalidActionType: + return "Invalid action type."; + case ContentExtensionError::JSONInvalidCSSDisplayNoneActionType: + return "Invalid css-display-none action type. Requires a selector."; + case ContentExtensionError::JSONInvalidRegex: + return "Invalid or unsupported regular expression."; + case ContentExtensionError::JSONInvalidDomainList: + return "Invalid domain list."; + case ContentExtensionError::JSONTooManyRules: + return "Too many rules in JSON array."; + case ContentExtensionError::JSONDomainNotLowerCaseASCII: + return "Domains must be lower case ASCII. Use punycode to encode non-ASCII characters."; + case ContentExtensionError::JSONUnlessAndIfDomain: + return "A trigger cannot have both unless- and if-domain."; + } + + return std::string(); + } + }; + + static NeverDestroyed<ContentExtensionErrorCategory> contentExtensionErrorCategory; + return contentExtensionErrorCategory; +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionError.h b/Source/WebCore/contentextensions/ContentExtensionError.h new file mode 100644 index 000000000..35462bacc --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionError.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "PlatformExportMacros.h" +#include <system_error> + +namespace WebCore { +namespace ContentExtensions { + +enum class ContentExtensionError { + // JSON parser error + JSONInvalid = 1, + + // JSON semantics error + JSONTopLevelStructureNotAnObject, + JSONTopLevelStructureNotAnArray, + JSONInvalidObjectInTopLevelArray, + JSONInvalidRule, + JSONContainsNoRules, + + JSONInvalidTrigger, + JSONInvalidURLFilterInTrigger, + JSONInvalidTriggerFlagsArray, + JSONInvalidObjectInTriggerFlagsArray, + JSONInvalidStringInTriggerFlagsArray, + JSONInvalidDomainList, + JSONDomainNotLowerCaseASCII, + JSONUnlessAndIfDomain, + JSONTooManyRules, + + JSONInvalidAction, + JSONInvalidActionType, + JSONInvalidCSSDisplayNoneActionType, + JSONInvalidRegex, +}; + +extern const char* WebKitContentBlockerDomain; + +WEBCORE_EXPORT const std::error_category& contentExtensionErrorCategory(); + +inline std::error_code make_error_code(ContentExtensionError error) +{ + return { static_cast<int>(error), contentExtensionErrorCategory() }; +} + +} // namespace ContentExtensions +} // namespace WebCore + +namespace std { + template<> struct is_error_code_enum<WebCore::ContentExtensions::ContentExtensionError> : public true_type { }; +} + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionParser.cpp b/Source/WebCore/contentextensions/ContentExtensionParser.cpp new file mode 100644 index 000000000..c0b3e3bd2 --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionParser.cpp @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ContentExtensionParser.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "CSSParser.h" +#include "CSSParserMode.h" +#include "CSSSelectorList.h" +#include "ContentExtensionError.h" +#include "ContentExtensionRule.h" +#include "ContentExtensionsBackend.h" +#include "ContentExtensionsDebugging.h" +#include <JavaScriptCore/JSCInlines.h> +#include <JavaScriptCore/JSGlobalObject.h> +#include <JavaScriptCore/JSONObject.h> +#include <JavaScriptCore/VM.h> +#include <wtf/CurrentTime.h> +#include <wtf/text/WTFString.h> + +using namespace JSC; + +namespace WebCore { + +namespace ContentExtensions { + +static bool containsOnlyASCIIWithNoUppercase(const String& domain) +{ + for (unsigned i = 0; i < domain.length(); ++i) { + UChar c = domain.at(i); + if (!isASCII(c) || isASCIIUpper(c)) + return false; + } + return true; +} + +static std::error_code getDomainList(ExecState& exec, const JSObject* arrayObject, Vector<String>& vector) +{ + VM& vm = exec.vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + ASSERT(vector.isEmpty()); + if (!arrayObject || !isJSArray(arrayObject)) + return ContentExtensionError::JSONInvalidDomainList; + const JSArray* array = jsCast<const JSArray*>(arrayObject); + + unsigned length = array->length(); + for (unsigned i = 0; i < length; ++i) { + const JSValue value = array->getIndex(&exec, i); + if (scope.exception() || !value.isString()) + return ContentExtensionError::JSONInvalidDomainList; + + // Domains should be punycode encoded lower case. + const String& domain = asString(value)->value(&exec); + if (domain.isEmpty()) + return ContentExtensionError::JSONInvalidDomainList; + if (!containsOnlyASCIIWithNoUppercase(domain)) + return ContentExtensionError::JSONDomainNotLowerCaseASCII; + vector.append(domain); + } + return { }; +} + +static std::error_code getTypeFlags(ExecState& exec, const JSValue& typeValue, ResourceFlags& flags, uint16_t (*stringToType)(const String&)) +{ + VM& vm = exec.vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + if (!typeValue.isObject()) + return { }; + + const JSObject* object = typeValue.toObject(&exec); + ASSERT(!scope.exception()); + if (!isJSArray(object)) + return ContentExtensionError::JSONInvalidTriggerFlagsArray; + + const JSArray* array = jsCast<const JSArray*>(object); + + unsigned length = array->length(); + for (unsigned i = 0; i < length; ++i) { + const JSValue value = array->getIndex(&exec, i); + if (scope.exception() || !value) + return ContentExtensionError::JSONInvalidObjectInTriggerFlagsArray; + + String name = value.toWTFString(&exec); + uint16_t type = stringToType(name); + if (!type) + return ContentExtensionError::JSONInvalidStringInTriggerFlagsArray; + + flags |= type; + } + + return { }; +} + +static std::error_code loadTrigger(ExecState& exec, const JSObject& ruleObject, Trigger& trigger) +{ + VM& vm = exec.vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + const JSValue triggerObject = ruleObject.get(&exec, Identifier::fromString(&exec, "trigger")); + if (!triggerObject || scope.exception() || !triggerObject.isObject()) + return ContentExtensionError::JSONInvalidTrigger; + + const JSValue urlFilterObject = triggerObject.get(&exec, Identifier::fromString(&exec, "url-filter")); + if (!urlFilterObject || scope.exception() || !urlFilterObject.isString()) + return ContentExtensionError::JSONInvalidURLFilterInTrigger; + + String urlFilter = asString(urlFilterObject)->value(&exec); + if (urlFilter.isEmpty()) + return ContentExtensionError::JSONInvalidURLFilterInTrigger; + + trigger.urlFilter = urlFilter; + + const JSValue urlFilterCaseValue = triggerObject.get(&exec, Identifier::fromString(&exec, "url-filter-is-case-sensitive")); + if (urlFilterCaseValue && !scope.exception() && urlFilterCaseValue.isBoolean()) + trigger.urlFilterIsCaseSensitive = urlFilterCaseValue.toBoolean(&exec); + + const JSValue resourceTypeValue = triggerObject.get(&exec, Identifier::fromString(&exec, "resource-type")); + if (!scope.exception() && resourceTypeValue.isObject()) { + auto typeFlagsError = getTypeFlags(exec, resourceTypeValue, trigger.flags, readResourceType); + if (typeFlagsError) + return typeFlagsError; + } else if (!resourceTypeValue.isUndefined()) + return ContentExtensionError::JSONInvalidTriggerFlagsArray; + + const JSValue loadTypeValue = triggerObject.get(&exec, Identifier::fromString(&exec, "load-type")); + if (!scope.exception() && loadTypeValue.isObject()) { + auto typeFlagsError = getTypeFlags(exec, loadTypeValue, trigger.flags, readLoadType); + if (typeFlagsError) + return typeFlagsError; + } else if (!loadTypeValue.isUndefined()) + return ContentExtensionError::JSONInvalidTriggerFlagsArray; + + const JSValue ifDomain = triggerObject.get(&exec, Identifier::fromString(&exec, "if-domain")); + if (!scope.exception() && ifDomain.isObject()) { + auto ifDomainError = getDomainList(exec, asObject(ifDomain), trigger.domains); + if (ifDomainError) + return ifDomainError; + if (trigger.domains.isEmpty()) + return ContentExtensionError::JSONInvalidDomainList; + ASSERT(trigger.domainCondition == Trigger::DomainCondition::None); + trigger.domainCondition = Trigger::DomainCondition::IfDomain; + } else if (!ifDomain.isUndefined()) + return ContentExtensionError::JSONInvalidDomainList; + + const JSValue unlessDomain = triggerObject.get(&exec, Identifier::fromString(&exec, "unless-domain")); + if (!scope.exception() && unlessDomain.isObject()) { + if (trigger.domainCondition != Trigger::DomainCondition::None) + return ContentExtensionError::JSONUnlessAndIfDomain; + auto unlessDomainError = getDomainList(exec, asObject(unlessDomain), trigger.domains); + if (unlessDomainError) + return unlessDomainError; + if (trigger.domains.isEmpty()) + return ContentExtensionError::JSONInvalidDomainList; + trigger.domainCondition = Trigger::DomainCondition::UnlessDomain; + } else if (!unlessDomain.isUndefined()) + return ContentExtensionError::JSONInvalidDomainList; + + return { }; +} + +static bool isValidSelector(const String& selector) +{ + CSSParserContext context(HTMLQuirksMode); + CSSParser parser(context); + CSSSelectorList selectorList; + parser.parseSelector(selector, selectorList); + return selectorList.isValid(); +} + +static std::error_code loadAction(ExecState& exec, const JSObject& ruleObject, Action& action, bool& validSelector) +{ + VM& vm = exec.vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + validSelector = true; + const JSValue actionObject = ruleObject.get(&exec, Identifier::fromString(&exec, "action")); + if (!actionObject || scope.exception() || !actionObject.isObject()) + return ContentExtensionError::JSONInvalidAction; + + const JSValue typeObject = actionObject.get(&exec, Identifier::fromString(&exec, "type")); + if (!typeObject || scope.exception() || !typeObject.isString()) + return ContentExtensionError::JSONInvalidActionType; + + String actionType = asString(typeObject)->value(&exec); + + if (actionType == "block") + action = ActionType::BlockLoad; + else if (actionType == "ignore-previous-rules") + action = ActionType::IgnorePreviousRules; + else if (actionType == "block-cookies") + action = ActionType::BlockCookies; + else if (actionType == "css-display-none") { + JSValue selector = actionObject.get(&exec, Identifier::fromString(&exec, "selector")); + if (!selector || scope.exception() || !selector.isString()) + return ContentExtensionError::JSONInvalidCSSDisplayNoneActionType; + + String selectorString = asString(selector)->value(&exec); + if (!isValidSelector(selectorString)) { + // Skip rules with invalid selectors to be backwards-compatible. + validSelector = false; + return { }; + } + action = Action(ActionType::CSSDisplayNoneSelector, selectorString); + } else if (actionType == "make-https") { + action = ActionType::MakeHTTPS; + } else + return ContentExtensionError::JSONInvalidActionType; + + return { }; +} + +static std::error_code loadRule(ExecState& exec, const JSObject& ruleObject, Vector<ContentExtensionRule>& ruleList) +{ + Trigger trigger; + auto triggerError = loadTrigger(exec, ruleObject, trigger); + if (triggerError) + return triggerError; + + Action action; + bool validSelector; + auto actionError = loadAction(exec, ruleObject, action, validSelector); + if (actionError) + return actionError; + + if (validSelector) + ruleList.append(ContentExtensionRule(trigger, action)); + return { }; +} + +static std::error_code loadEncodedRules(ExecState& exec, const String& rules, Vector<ContentExtensionRule>& ruleList) +{ + VM& vm = exec.vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + // FIXME: JSONParse should require callbacks instead of an ExecState. + const JSValue decodedRules = JSONParse(&exec, rules); + + if (scope.exception() || !decodedRules) + return ContentExtensionError::JSONInvalid; + + if (!decodedRules.isObject()) + return ContentExtensionError::JSONTopLevelStructureNotAnObject; + + const JSObject* topLevelObject = decodedRules.toObject(&exec); + if (!topLevelObject || scope.exception()) + return ContentExtensionError::JSONTopLevelStructureNotAnObject; + + if (!isJSArray(topLevelObject)) + return ContentExtensionError::JSONTopLevelStructureNotAnArray; + + const JSArray* topLevelArray = jsCast<const JSArray*>(topLevelObject); + + Vector<ContentExtensionRule> localRuleList; + + unsigned length = topLevelArray->length(); + const unsigned maxRuleCount = 50000; + if (length > maxRuleCount) + return ContentExtensionError::JSONTooManyRules; + for (unsigned i = 0; i < length; ++i) { + const JSValue value = topLevelArray->getIndex(&exec, i); + if (scope.exception() || !value) + return ContentExtensionError::JSONInvalidObjectInTopLevelArray; + + const JSObject* ruleObject = value.toObject(&exec); + if (!ruleObject || scope.exception()) + return ContentExtensionError::JSONInvalidRule; + + auto error = loadRule(exec, *ruleObject, localRuleList); + if (error) + return error; + } + + ruleList = WTFMove(localRuleList); + return { }; +} + +std::error_code parseRuleList(const String& rules, Vector<ContentExtensionRule>& ruleList) +{ +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + double loadExtensionStartTime = monotonicallyIncreasingTime(); +#endif + RefPtr<VM> vm = VM::create(); + + JSLockHolder locker(vm.get()); + JSGlobalObject* globalObject = JSGlobalObject::create(*vm, JSGlobalObject::createStructure(*vm, jsNull())); + + ExecState* exec = globalObject->globalExec(); + auto error = loadEncodedRules(*exec, rules, ruleList); + + vm = nullptr; + + if (error) + return error; + + if (ruleList.isEmpty()) + return ContentExtensionError::JSONContainsNoRules; + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + double loadExtensionEndTime = monotonicallyIncreasingTime(); + dataLogF("Time spent loading extension %f\n", (loadExtensionEndTime - loadExtensionStartTime)); +#endif + + return { }; +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionParser.h b/Source/WebCore/contentextensions/ContentExtensionParser.h new file mode 100644 index 000000000..9fbca15e5 --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionParser.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include <system_error> +#include <wtf/Forward.h> +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +class ContentExtensionRule; + +std::error_code parseRuleList(const String& rules, Vector<ContentExtensionRule>&); + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionRule.cpp b/Source/WebCore/contentextensions/ContentExtensionRule.cpp new file mode 100644 index 000000000..2dc73275c --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionRule.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ContentExtensionRule.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +namespace ContentExtensions { + +ContentExtensionRule::ContentExtensionRule(const Trigger& trigger, const Action& action) + : m_trigger(trigger) + , m_action(action) +{ + ASSERT(!m_trigger.urlFilter.isEmpty()); +} + +Action Action::deserialize(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location) +{ + RELEASE_ASSERT(location < actionsLength); + switch (static_cast<ActionType>(actions[location])) { + case ActionType::BlockCookies: + return Action(ActionType::BlockCookies, location); + case ActionType::BlockLoad: + return Action(ActionType::BlockLoad, location); + case ActionType::IgnorePreviousRules: + return Action(ActionType::IgnorePreviousRules, location); + case ActionType::MakeHTTPS: + return Action(ActionType::MakeHTTPS, location); + case ActionType::CSSDisplayNoneSelector: { + uint32_t headerLength = sizeof(ActionType) + sizeof(uint32_t) + sizeof(bool); + uint32_t stringStartIndex = location + headerLength; + RELEASE_ASSERT(actionsLength >= stringStartIndex); + uint32_t selectorLength = *reinterpret_cast<const unsigned*>(&actions[location + sizeof(ActionType)]); + bool wideCharacters = actions[location + sizeof(ActionType) + sizeof(uint32_t)]; + + if (wideCharacters) { + RELEASE_ASSERT(actionsLength >= stringStartIndex + selectorLength * sizeof(UChar)); + return Action(ActionType::CSSDisplayNoneSelector, String(reinterpret_cast<const UChar*>(&actions[stringStartIndex]), selectorLength), location); + } + RELEASE_ASSERT(actionsLength >= stringStartIndex + selectorLength * sizeof(LChar)); + return Action(ActionType::CSSDisplayNoneSelector, String(reinterpret_cast<const LChar*>(&actions[stringStartIndex]), selectorLength), location); + } + case ActionType::CSSDisplayNoneStyleSheet: + case ActionType::InvalidAction: + default: + RELEASE_ASSERT_NOT_REACHED(); + } +} + +ActionType Action::deserializeType(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location) +{ + RELEASE_ASSERT(location < actionsLength); + ActionType type = static_cast<ActionType>(actions[location]); + switch (type) { + case ActionType::BlockCookies: + case ActionType::BlockLoad: + case ActionType::IgnorePreviousRules: + case ActionType::CSSDisplayNoneSelector: + case ActionType::MakeHTTPS: + return type; + case ActionType::CSSDisplayNoneStyleSheet: + case ActionType::InvalidAction: + default: + RELEASE_ASSERT_NOT_REACHED(); + } +} + +uint32_t Action::serializedLength(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location) +{ + RELEASE_ASSERT(location < actionsLength); + switch (static_cast<ActionType>(actions[location])) { + case ActionType::BlockCookies: + case ActionType::BlockLoad: + case ActionType::IgnorePreviousRules: + case ActionType::MakeHTTPS: + return sizeof(ActionType); + case ActionType::CSSDisplayNoneSelector: { + uint32_t headerLength = sizeof(ActionType) + sizeof(uint32_t) + sizeof(bool); + uint32_t stringStartIndex = location + headerLength; + RELEASE_ASSERT(actionsLength >= stringStartIndex); + uint32_t selectorLength = *reinterpret_cast<const unsigned*>(&actions[location + sizeof(ActionType)]); + bool wideCharacters = actions[location + sizeof(ActionType) + sizeof(uint32_t)]; + + if (wideCharacters) + return headerLength + selectorLength * sizeof(UChar); + return headerLength + selectorLength * sizeof(LChar); + } + case ActionType::CSSDisplayNoneStyleSheet: + case ActionType::InvalidAction: + default: + RELEASE_ASSERT_NOT_REACHED(); + } +} + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionRule.h b/Source/WebCore/contentextensions/ContentExtensionRule.h new file mode 100644 index 000000000..8983a8a30 --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionRule.h @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionActions.h" +#include "ResourceLoadInfo.h" +#include <wtf/text/WTFString.h> + +namespace WebCore { + +namespace ContentExtensions { + +// A ContentExtensionRule is the smallest unit in a ContentExtension. +// +// It is composed of a trigger and an action. The trigger defines on what kind of content this extension should apply. +// The action defines what to perform on that content. + +struct Trigger { + String urlFilter; + bool urlFilterIsCaseSensitive { false }; + ResourceFlags flags { 0 }; + Vector<String> domains; + enum class DomainCondition { + None, + IfDomain, + UnlessDomain, + } domainCondition { DomainCondition::None }; + + ~Trigger() + { + ASSERT(domains.isEmpty() == (domainCondition == DomainCondition::None)); + } + + bool isEmpty() const + { + return urlFilter.isEmpty() + && !urlFilterIsCaseSensitive + && !flags + && domains.isEmpty() + && domainCondition == DomainCondition::None; + } + + bool operator==(const Trigger& other) const + { + return urlFilter == other.urlFilter + && urlFilterIsCaseSensitive == other.urlFilterIsCaseSensitive + && flags == other.flags + && domains == other.domains + && domainCondition == other.domainCondition; + } +}; + +struct TriggerHash { + static unsigned hash(const Trigger& trigger) + { + unsigned hash = trigger.urlFilterIsCaseSensitive ? 10619863 : 40960001; + if (!trigger.urlFilter.isNull()) + hash ^= StringHash::hash(trigger.urlFilter); + hash = WTF::pairIntHash(hash, DefaultHash<ResourceFlags>::Hash::hash(trigger.flags)); + + for (const String& domain : trigger.domains) + hash ^= StringHash::hash(domain); + + if (trigger.domainCondition == Trigger::DomainCondition::IfDomain) + hash |= 1 << 16; + else if (trigger.domainCondition == Trigger::DomainCondition::IfDomain) + hash |= 1 << 31; + return hash; + } + + static bool equal(const Trigger& a, const Trigger& b) + { + return a == b; + } + + static const bool safeToCompareToEmptyOrDeleted = false; +}; + +struct TriggerHashTraits : public WTF::CustomHashTraits<Trigger> { + static const bool emptyValueIsZero = false; + static const bool hasIsEmptyValueFunction = true; + + static void constructDeletedValue(Trigger& trigger) + { + new (NotNull, std::addressof(trigger.urlFilter)) String(WTF::HashTableDeletedValue); + } + + static bool isDeletedValue(const Trigger& trigger) + { + return trigger.urlFilter.isHashTableDeletedValue(); + } + + static Trigger emptyValue() + { + return Trigger(); + } + + static bool isEmptyValue(const Trigger& trigger) + { + return trigger.isEmpty(); + } +}; + +struct Action { + Action() + : m_type(ActionType::InvalidAction) + , m_actionID(std::numeric_limits<uint32_t>::max()) + { + } + + Action(ActionType type, const String& stringArgument, uint32_t actionID = std::numeric_limits<uint32_t>::max()) + : m_type(type) + , m_actionID(actionID) + , m_stringArgument(stringArgument) + { + ASSERT(type == ActionType::CSSDisplayNoneSelector || type == ActionType::CSSDisplayNoneStyleSheet); + } + + Action(ActionType type, uint32_t actionID = std::numeric_limits<uint32_t>::max()) + : m_type(type) + , m_actionID(actionID) + { + ASSERT(type != ActionType::CSSDisplayNoneSelector && type != ActionType::CSSDisplayNoneStyleSheet); + } + + bool operator==(const Action& other) const + { + return m_type == other.m_type + && m_extensionIdentifier == other.m_extensionIdentifier + && m_actionID == other.m_actionID + && m_stringArgument == other.m_stringArgument; + } + + static Action deserialize(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location); + static ActionType deserializeType(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location); + static uint32_t serializedLength(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location); + + void setExtensionIdentifier(const String& extensionIdentifier) { m_extensionIdentifier = extensionIdentifier; } + const String& extensionIdentifier() const { return m_extensionIdentifier; } + ActionType type() const { return m_type; } + uint32_t actionID() const { return m_actionID; } + const String& stringArgument() const { return m_stringArgument; } + +private: + String m_extensionIdentifier; + ActionType m_type; + uint32_t m_actionID; + String m_stringArgument; +}; + +class ContentExtensionRule { +public: + ContentExtensionRule(const Trigger&, const Action&); + + const Trigger& trigger() const { return m_trigger; } + const Action& action() const { return m_action; } + +private: + Trigger m_trigger; + Action m_action; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionStyleSheet.cpp b/Source/WebCore/contentextensions/ContentExtensionStyleSheet.cpp new file mode 100644 index 000000000..e24a3669d --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionStyleSheet.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ContentExtensionStyleSheet.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "CSSStyleSheet.h" +#include "ContentExtensionsBackend.h" +#include "Document.h" +#include "StyleSheetContents.h" +#include <wtf/text/StringBuilder.h> + +namespace WebCore { +namespace ContentExtensions { + +ContentExtensionStyleSheet::ContentExtensionStyleSheet(Document& document) + : m_styleSheet(CSSStyleSheet::create(StyleSheetContents::create(), document)) +{ + m_styleSheet->contents().setIsUserStyleSheet(true); +} + +ContentExtensionStyleSheet::~ContentExtensionStyleSheet() +{ + m_styleSheet->clearOwnerNode(); +} + +bool ContentExtensionStyleSheet::addDisplayNoneSelector(const String& selector, uint32_t selectorID) +{ + ASSERT(selectorID != std::numeric_limits<uint32_t>::max()); + + if (!m_addedSelectorIDs.add(selectorID).isNewEntry) + return false; + + StringBuilder css; + css.append(selector); + css.append('{'); + css.append(ContentExtensionsBackend::displayNoneCSSRule()); + css.append('}'); + m_styleSheet->contents().parseString(css.toString()); + return true; +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionStyleSheet.h b/Source/WebCore/contentextensions/ContentExtensionStyleSheet.h new file mode 100644 index 000000000..307736235 --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionStyleSheet.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "CSSStyleSheet.h" +#include <wtf/HashSet.h> +#include <wtf/Ref.h> +#include <wtf/RefCounted.h> +#include <wtf/text/WTFString.h> + +namespace WebCore { + +class Document; + +namespace ContentExtensions { + +class ContentExtensionStyleSheet : public RefCounted<ContentExtensionStyleSheet> { +public: + static Ref<ContentExtensionStyleSheet> create(Document& document) + { + return adoptRef(*new ContentExtensionStyleSheet(document)); + } + virtual ~ContentExtensionStyleSheet(); + + bool addDisplayNoneSelector(const String& selector, uint32_t selectorID); + + CSSStyleSheet& styleSheet() { return m_styleSheet.get(); } + +private: + ContentExtensionStyleSheet(Document&); + + Ref<CSSStyleSheet> m_styleSheet; + HashSet<uint32_t, DefaultHash<uint32_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint32_t>> m_addedSelectorIDs; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp b/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp new file mode 100644 index 000000000..83ea96e4e --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ContentExtensionsBackend.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "CompiledContentExtension.h" +#include "ContentExtension.h" +#include "ContentExtensionsDebugging.h" +#include "DFABytecodeInterpreter.h" +#include "Document.h" +#include "DocumentLoader.h" +#include "ExtensionStyleSheets.h" +#include "Frame.h" +#include "FrameLoaderClient.h" +#include "MainFrame.h" +#include "ResourceLoadInfo.h" +#include "URL.h" +#include "UserContentController.h" +#include <wtf/NeverDestroyed.h> +#include <wtf/text/CString.h> + +namespace WebCore { + +namespace ContentExtensions { + +void ContentExtensionsBackend::addContentExtension(const String& identifier, RefPtr<CompiledContentExtension> compiledContentExtension) +{ + ASSERT(!identifier.isEmpty()); + if (identifier.isEmpty()) + return; + + if (!compiledContentExtension) { + removeContentExtension(identifier); + return; + } + + RefPtr<ContentExtension> extension = ContentExtension::create(identifier, adoptRef(*compiledContentExtension.leakRef())); + m_contentExtensions.set(identifier, WTFMove(extension)); +} + +void ContentExtensionsBackend::removeContentExtension(const String& identifier) +{ + m_contentExtensions.remove(identifier); +} + +void ContentExtensionsBackend::removeAllContentExtensions() +{ + m_contentExtensions.clear(); +} + +Vector<Action> ContentExtensionsBackend::actionsForResourceLoad(const ResourceLoadInfo& resourceLoadInfo) const +{ +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + double addedTimeStart = monotonicallyIncreasingTime(); +#endif + if (resourceLoadInfo.resourceURL.protocolIsData()) + return Vector<Action>(); + + const String& urlString = resourceLoadInfo.resourceURL.string(); + ASSERT_WITH_MESSAGE(urlString.containsOnlyASCII(), "A decoded URL should only contain ASCII characters. The matching algorithm assumes the input is ASCII."); + const CString& urlCString = urlString.utf8(); + + Vector<Action> finalActions; + ResourceFlags flags = resourceLoadInfo.getResourceFlags(); + for (auto& contentExtension : m_contentExtensions.values()) { + RELEASE_ASSERT(contentExtension); + const CompiledContentExtension& compiledExtension = contentExtension->compiledExtension(); + + DFABytecodeInterpreter withoutDomainsInterpreter(compiledExtension.filtersWithoutDomainsBytecode(), compiledExtension.filtersWithoutDomainsBytecodeLength()); + DFABytecodeInterpreter::Actions withoutDomainsActions = withoutDomainsInterpreter.interpret(urlCString, flags); + + String domain = resourceLoadInfo.mainDocumentURL.host(); + DFABytecodeInterpreter withDomainsInterpreter(compiledExtension.filtersWithDomainsBytecode(), compiledExtension.filtersWithDomainsBytecodeLength()); + DFABytecodeInterpreter::Actions withDomainsActions = withDomainsInterpreter.interpretWithDomains(urlCString, flags, contentExtension->cachedDomainActions(domain)); + + const SerializedActionByte* actions = compiledExtension.actions(); + const unsigned actionsLength = compiledExtension.actionsLength(); + + bool sawIgnorePreviousRules = false; + const Vector<uint32_t>& universalWithDomains = contentExtension->universalActionsWithDomains(domain); + const Vector<uint32_t>& universalWithoutDomains = contentExtension->universalActionsWithoutDomains(); + if (!withoutDomainsActions.isEmpty() || !withDomainsActions.isEmpty() || !universalWithDomains.isEmpty() || !universalWithoutDomains.isEmpty()) { + Vector<uint32_t> actionLocations; + actionLocations.reserveInitialCapacity(withoutDomainsActions.size() + withDomainsActions.size() + universalWithoutDomains.size() + universalWithDomains.size()); + for (uint64_t actionLocation : withoutDomainsActions) + actionLocations.uncheckedAppend(static_cast<uint32_t>(actionLocation)); + for (uint64_t actionLocation : withDomainsActions) + actionLocations.uncheckedAppend(static_cast<uint32_t>(actionLocation)); + for (uint32_t actionLocation : universalWithoutDomains) + actionLocations.uncheckedAppend(actionLocation); + for (uint32_t actionLocation : universalWithDomains) + actionLocations.uncheckedAppend(actionLocation); + std::sort(actionLocations.begin(), actionLocations.end()); + + // Add actions in reverse order to properly deal with IgnorePreviousRules. + for (unsigned i = actionLocations.size(); i; i--) { + Action action = Action::deserialize(actions, actionsLength, actionLocations[i - 1]); + action.setExtensionIdentifier(contentExtension->identifier()); + if (action.type() == ActionType::IgnorePreviousRules) { + sawIgnorePreviousRules = true; + break; + } + finalActions.append(action); + } + } + if (!sawIgnorePreviousRules) { + finalActions.append(Action(ActionType::CSSDisplayNoneStyleSheet, contentExtension->identifier())); + finalActions.last().setExtensionIdentifier(contentExtension->identifier()); + } + } +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + double addedTimeEnd = monotonicallyIncreasingTime(); + dataLogF("Time added: %f microseconds %s \n", (addedTimeEnd - addedTimeStart) * 1.0e6, resourceLoadInfo.resourceURL.string().utf8().data()); +#endif + return finalActions; +} + +StyleSheetContents* ContentExtensionsBackend::globalDisplayNoneStyleSheet(const String& identifier) const +{ + const auto& contentExtension = m_contentExtensions.get(identifier); + return contentExtension ? contentExtension->globalDisplayNoneStyleSheet() : nullptr; +} + +BlockedStatus ContentExtensionsBackend::processContentExtensionRulesForLoad(const URL& url, ResourceType resourceType, DocumentLoader& initiatingDocumentLoader) +{ + if (m_contentExtensions.isEmpty()) + return { }; + + Document* currentDocument = nullptr; + URL mainDocumentURL; + + if (Frame* frame = initiatingDocumentLoader.frame()) { + currentDocument = frame->document(); + + if (initiatingDocumentLoader.isLoadingMainResource() + && frame->isMainFrame() + && resourceType == ResourceType::Document) + mainDocumentURL = url; + else if (Document* mainDocument = frame->mainFrame().document()) + mainDocumentURL = mainDocument->url(); + } + + ResourceLoadInfo resourceLoadInfo = { url, mainDocumentURL, resourceType }; + Vector<ContentExtensions::Action> actions = actionsForResourceLoad(resourceLoadInfo); + + bool willBlockLoad = false; + bool willBlockCookies = false; + bool willMakeHTTPS = false; + for (const auto& action : actions) { + switch (action.type()) { + case ContentExtensions::ActionType::BlockLoad: + willBlockLoad = true; + break; + case ContentExtensions::ActionType::BlockCookies: + willBlockCookies = true; + break; + case ContentExtensions::ActionType::CSSDisplayNoneSelector: + if (resourceType == ResourceType::Document) + initiatingDocumentLoader.addPendingContentExtensionDisplayNoneSelector(action.extensionIdentifier(), action.stringArgument(), action.actionID()); + else if (currentDocument) + currentDocument->extensionStyleSheets().addDisplayNoneSelector(action.extensionIdentifier(), action.stringArgument(), action.actionID()); + break; + case ContentExtensions::ActionType::CSSDisplayNoneStyleSheet: { + StyleSheetContents* styleSheetContents = globalDisplayNoneStyleSheet(action.stringArgument()); + if (styleSheetContents) { + if (resourceType == ResourceType::Document) + initiatingDocumentLoader.addPendingContentExtensionSheet(action.stringArgument(), *styleSheetContents); + else if (currentDocument) + currentDocument->extensionStyleSheets().maybeAddContentExtensionSheet(action.stringArgument(), *styleSheetContents); + } + break; + } + case ContentExtensions::ActionType::MakeHTTPS: { + if ((url.protocolIs("http") || url.protocolIs("ws")) + && (!url.port() || isDefaultPortForProtocol(url.port().value(), url.protocol()))) + willMakeHTTPS = true; + break; + } + case ContentExtensions::ActionType::IgnorePreviousRules: + case ContentExtensions::ActionType::InvalidAction: + RELEASE_ASSERT_NOT_REACHED(); + } + } + + if (currentDocument) { + if (willMakeHTTPS) { + ASSERT(url.protocolIs("http") || url.protocolIs("ws")); + String newProtocol = url.protocolIs("http") ? ASCIILiteral("https") : ASCIILiteral("wss"); + currentDocument->addConsoleMessage(MessageSource::ContentBlocker, MessageLevel::Info, makeString("Content blocker promoted URL from ", url.string(), " to ", newProtocol)); + } + if (willBlockLoad) + currentDocument->addConsoleMessage(MessageSource::ContentBlocker, MessageLevel::Info, makeString("Content blocker prevented frame displaying ", mainDocumentURL.string(), " from loading a resource from ", url.string())); + } + return { willBlockLoad, willBlockCookies, willMakeHTTPS }; +} + +const String& ContentExtensionsBackend::displayNoneCSSRule() +{ + static NeverDestroyed<const String> rule(ASCIILiteral("display:none !important;")); + return rule; +} + +void applyBlockedStatusToRequest(const BlockedStatus& status, ResourceRequest& request) +{ + if (status.blockedCookies) + request.setAllowCookies(false); + + if (status.madeHTTPS) { + const URL& originalURL = request.url(); + ASSERT(originalURL.protocolIs("http")); + ASSERT(!originalURL.port() || isDefaultPortForProtocol(originalURL.port().value(), originalURL.protocol())); + + URL newURL = originalURL; + newURL.setProtocol("https"); + if (originalURL.port()) + newURL.setPort(defaultPortForProtocol("https").value()); + request.setURL(newURL); + } +} + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionsBackend.h b/Source/WebCore/contentextensions/ContentExtensionsBackend.h new file mode 100644 index 000000000..f15ff8c43 --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionsBackend.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtension.h" +#include "ContentExtensionRule.h" +#include <wtf/HashMap.h> +#include <wtf/text/StringHash.h> +#include <wtf/text/WTFString.h> + +namespace WebCore { + +class DocumentLoader; +class ResourceRequest; +class URL; + +struct ResourceLoadInfo; + +namespace ContentExtensions { + +class CompiledContentExtension; + +// The ContentExtensionsBackend is the internal model of all the content extensions. +// +// It provides two services: +// 1) It stores the rules for each content extension. +// 2) It provides APIs for the WebCore interfaces to use those rules efficiently. +class ContentExtensionsBackend { +public: + // - Rule management interface. This can be used by upper layer. + + // Set a list of rules for a given name. If there were existing rules for the name, they are overriden. + // The identifier cannot be empty. + WEBCORE_EXPORT void addContentExtension(const String& identifier, RefPtr<CompiledContentExtension>); + WEBCORE_EXPORT void removeContentExtension(const String& identifier); + WEBCORE_EXPORT void removeAllContentExtensions(); + + // - Internal WebCore Interface. + WEBCORE_EXPORT Vector<Action> actionsForResourceLoad(const ResourceLoadInfo&) const; + WEBCORE_EXPORT StyleSheetContents* globalDisplayNoneStyleSheet(const String& identifier) const; + + BlockedStatus processContentExtensionRulesForLoad(const URL&, ResourceType, DocumentLoader& initiatingDocumentLoader); + + static const String& displayNoneCSSRule(); + +private: + HashMap<String, RefPtr<ContentExtension>> m_contentExtensions; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ContentExtensionsDebugging.h b/Source/WebCore/contentextensions/ContentExtensionsDebugging.h new file mode 100644 index 000000000..2100c26a1 --- /dev/null +++ b/Source/WebCore/contentextensions/ContentExtensionsDebugging.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include <wtf/Vector.h> + +#define CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING 0 + +#define CONTENT_EXTENSIONS_PERFORMANCE_REPORTING 0 + +#define CONTENT_EXTENSIONS_MEMORY_REPORTING 0 +#define CONTENT_EXTENSIONS_PAGE_SIZE 16384 + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING +typedef WTF::CrashOnOverflow ContentExtensionsOverflowHandler; +#else +typedef UnsafeVectorOverflow ContentExtensionsOverflowHandler; +#endif + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING +#define LOG_LARGE_STRUCTURES(name, size) if (size > 1000000) { dataLogF("NAME: %s SIZE %d\n", #name, (int)(size)); }; +#else +#define LOG_LARGE_STRUCTURES(name, size) +#endif + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFA.cpp b/Source/WebCore/contentextensions/DFA.cpp new file mode 100644 index 000000000..40dee4a32 --- /dev/null +++ b/Source/WebCore/contentextensions/DFA.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "DFA.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "DFAMinimizer.h" +#include <wtf/DataLog.h> + +namespace WebCore { + +namespace ContentExtensions { + +DFA DFA::empty() +{ + DFA newDFA; + newDFA.nodes.append(DFANode()); + return newDFA; +} + +size_t DFA::memoryUsed() const +{ + return sizeof(DFA) + + actions.capacity() * sizeof(uint64_t) + + transitionRanges.capacity() * sizeof(CharRange) + + transitionDestinations.capacity() * sizeof(uint32_t) + + nodes.capacity() * sizeof(DFANode); +} + +void DFA::shrinkToFit() +{ + nodes.shrinkToFit(); + actions.shrinkToFit(); + transitionRanges.shrinkToFit(); + transitionDestinations.shrinkToFit(); +} + +void DFA::minimize() +{ + DFAMinimizer::minimize(*this); +} + +unsigned DFA::graphSize() const +{ + unsigned count = 0; + for (const DFANode& node : nodes) { + if (!node.isKilled()) + ++count; + } + return count; +} + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING +static void printRange(bool firstRange, char rangeStart, char rangeEnd) +{ + if (!firstRange) + dataLogF(", "); + if (rangeStart == rangeEnd) { + if (rangeStart == '"' || rangeStart == '\\') + dataLogF("\\%c", rangeStart); + else if (rangeStart >= '!' && rangeStart <= '~') + dataLogF("%c", rangeStart); + else + dataLogF("\\\\%d", rangeStart); + } else + dataLogF("\\\\%d-\\\\%d", rangeStart, rangeEnd); +} + +static void printTransitions(const DFA& dfa, unsigned sourceNodeId) +{ + const DFANode& sourceNode = dfa.nodes[sourceNodeId]; + auto transitions = sourceNode.transitions(dfa); + + if (transitions.begin() == transitions.end()) + return; + + HashMap<unsigned, Vector<uint16_t>, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> transitionsPerTarget; + + // First, we build the list of transitions coming to each target node. + for (const auto& transition : transitions) { + unsigned target = transition.target(); + transitionsPerTarget.add(target, Vector<uint16_t>()); + + for (unsigned offset = 0; offset < transition.range().size(); ++offset) + transitionsPerTarget.find(target)->value.append(transition.first() + offset); + } + + // Then we go over each one an display the ranges one by one. + for (const auto& transitionPerTarget : transitionsPerTarget) { + dataLogF(" %d -> %d [label=\"", sourceNodeId, transitionPerTarget.key); + + Vector<uint16_t> incommingCharacters = transitionPerTarget.value; + std::sort(incommingCharacters.begin(), incommingCharacters.end()); + + char rangeStart = incommingCharacters.first(); + char rangeEnd = rangeStart; + bool first = true; + for (unsigned sortedTransitionIndex = 1; sortedTransitionIndex < incommingCharacters.size(); ++sortedTransitionIndex) { + char nextChar = incommingCharacters[sortedTransitionIndex]; + if (nextChar == rangeEnd+1) { + rangeEnd = nextChar; + continue; + } + printRange(first, rangeStart, rangeEnd); + rangeStart = rangeEnd = nextChar; + first = false; + } + printRange(first, rangeStart, rangeEnd); + + dataLogF("\"];\n"); + } +} + +void DFA::debugPrintDot() const +{ + dataLogF("digraph DFA_Transitions {\n"); + dataLogF(" rankdir=LR;\n"); + dataLogF(" node [shape=circle];\n"); + dataLogF(" {\n"); + for (unsigned i = 0; i < nodes.size(); ++i) { + if (nodes[i].isKilled()) + continue; + + dataLogF(" %d [label=<Node %d", i, i); + const Vector<uint64_t>& actions = nodes[i].actions(*this); + if (!actions.isEmpty()) { + dataLogF("<BR/>Actions: "); + for (unsigned actionIndex = 0; actionIndex < actions.size(); ++actionIndex) { + if (actionIndex) + dataLogF(", "); + dataLogF("%llu", actions[actionIndex]); + } + } + dataLogF(">]"); + + if (!actions.isEmpty()) + dataLogF(" [shape=doublecircle]"); + + dataLogF(";\n"); + } + dataLogF(" }\n"); + + dataLogF(" {\n"); + for (unsigned i = 0; i < nodes.size(); ++i) + printTransitions(*this, i); + + dataLogF(" }\n"); + dataLogF("}\n"); +} +#endif + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFA.h b/Source/WebCore/contentextensions/DFA.h new file mode 100644 index 000000000..289ae592c --- /dev/null +++ b/Source/WebCore/contentextensions/DFA.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionsDebugging.h" +#include "DFANode.h" +#include "PlatformExportMacros.h" +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +// The DFA abstract a partial DFA graph in a compact form. +struct WEBCORE_EXPORT DFA { + static DFA empty(); + + void shrinkToFit(); + void minimize(); + unsigned graphSize() const; + size_t memoryUsed() const; + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + void debugPrintDot() const; +#endif + + Vector<DFANode, 0, ContentExtensionsOverflowHandler> nodes; + Vector<uint64_t, 0, ContentExtensionsOverflowHandler> actions; + Vector<CharRange, 0, ContentExtensionsOverflowHandler> transitionRanges; + Vector<uint32_t, 0, ContentExtensionsOverflowHandler> transitionDestinations; + unsigned root { 0 }; +}; + +inline const CharRange& DFANode::ConstRangeIterator::range() const +{ + return dfa.transitionRanges[position]; +} + +inline uint32_t DFANode::ConstRangeIterator::target() const +{ + return dfa.transitionDestinations[position]; +} + +inline const CharRange& DFANode::RangeIterator::range() const +{ + return dfa.transitionRanges[position]; +} + +inline uint32_t DFANode::RangeIterator::target() const +{ + return dfa.transitionDestinations[position]; +} + +inline void DFANode::RangeIterator::resetTarget(uint32_t newTarget) +{ + dfa.transitionDestinations[position] = newTarget; +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFABytecode.h b/Source/WebCore/contentextensions/DFABytecode.h new file mode 100644 index 000000000..50981c278 --- /dev/null +++ b/Source/WebCore/contentextensions/DFABytecode.h @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +namespace ContentExtensions { + +typedef uint8_t DFABytecode; + +// Increment UserContentExtensionStore::CurrentContentExtensionFileVersion +// when making any non-backwards-compatible changes to the bytecode. +// FIXME: Changes here should not require changes in WebKit2. Move all versioning to WebCore. +enum class DFABytecodeInstruction : uint8_t { + + // CheckValue has two arguments: + // The value to check (1 byte), + // The distance to jump if the values are equal (1-4 bytes, signed). + CheckValueCaseInsensitive = 0x0, + CheckValueCaseSensitive = 0x1, + + // Jump table if the input value is within a certain range. + // The lower value (1 byte). + // The higher value (1 byte). + // The distance to jump if the value is in the range + // for every character in the range (1-4 bytes, signed). + JumpTableCaseInsensitive = 0x2, + JumpTableCaseSensitive = 0x3, + + // Jump to an offset if the input value is within a certain range. + // The lower value (1 byte). + // The higher value (1 byte). + // The distance to jump if the value is in the range (1-4 bytes, signed). + CheckValueRangeCaseInsensitive = 0x4, + CheckValueRangeCaseSensitive = 0x5, + + // AppendAction has one argument: + // The action to append (4 bytes). + AppendAction = 0x6, + AppendActionWithIfDomain = 0x7, + + // TestFlagsAndAppendAction has two arguments: + // The flags to check before appending (2 bytes). + // The action to append (4 bytes). + TestFlagsAndAppendAction = 0x8, + TestFlagsAndAppendActionWithIfDomain = 0x9, + + // Terminate has no arguments. + Terminate = 0xA, + + // Jump has one argument: + // The distance to jump (1-4 bytes, signed). + Jump = 0xB, +}; + +// The last four bits contain the instruction type. +const uint8_t DFABytecodeInstructionMask = 0x0F; +const uint8_t DFABytecodeJumpSizeMask = 0xF0; + +// DFA bytecode starts with a 4 byte header which contains the size of this DFA. +typedef uint32_t DFAHeader; + +// A DFABytecodeJumpSize is stored in the top four bits of the DFABytecodeInstructions that have a jump. +enum DFABytecodeJumpSize { + Int8 = 0x10, + Int16 = 0x20, + Int24 = 0x30, + Int32 = 0x40, +}; +const int32_t Int24Max = (1 << 23) - 1; +const int32_t Int24Min = -(1 << 23); + +static inline DFABytecodeJumpSize smallestPossibleJumpSize(int32_t longestPossibleJump) +{ + if (longestPossibleJump <= std::numeric_limits<int8_t>::max() && longestPossibleJump >= std::numeric_limits<int8_t>::min()) + return Int8; + if (longestPossibleJump <= std::numeric_limits<int16_t>::max() && longestPossibleJump >= std::numeric_limits<int16_t>::min()) + return Int16; + if (longestPossibleJump <= Int24Max && longestPossibleJump >= Int24Min) + return Int24; + return Int32; +} + +static inline size_t instructionSizeWithArguments(DFABytecodeInstruction instruction) +{ + switch (instruction) { + case DFABytecodeInstruction::CheckValueCaseSensitive: + case DFABytecodeInstruction::CheckValueCaseInsensitive: + case DFABytecodeInstruction::JumpTableCaseInsensitive: + case DFABytecodeInstruction::JumpTableCaseSensitive: + case DFABytecodeInstruction::CheckValueRangeCaseSensitive: + case DFABytecodeInstruction::CheckValueRangeCaseInsensitive: + case DFABytecodeInstruction::Jump: + RELEASE_ASSERT_NOT_REACHED(); // Variable instruction size. + case DFABytecodeInstruction::AppendAction: + case DFABytecodeInstruction::AppendActionWithIfDomain: + return sizeof(DFABytecodeInstruction) + sizeof(uint32_t); + case DFABytecodeInstruction::TestFlagsAndAppendAction: + case DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain: + return sizeof(DFABytecodeInstruction) + sizeof(uint16_t) + sizeof(uint32_t); + case DFABytecodeInstruction::Terminate: + return sizeof(DFABytecodeInstruction); + } +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp b/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp new file mode 100644 index 000000000..c9c5f8b8e --- /dev/null +++ b/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp @@ -0,0 +1,495 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "DFABytecodeCompiler.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionRule.h" +#include "DFA.h" +#include "DFANode.h" + +namespace WebCore { + +namespace ContentExtensions { + +template <typename IntType> +inline void append(Vector<DFABytecode>& bytecode, IntType value) +{ + bytecode.resize(bytecode.size() + sizeof(IntType)); + *reinterpret_cast<IntType*>(&bytecode[bytecode.size() - sizeof(IntType)]) = value; +} + +inline void appendZeroes(Vector<DFABytecode>& bytecode, DFABytecodeJumpSize jumpSize) +{ + switch (jumpSize) { + case DFABytecodeJumpSize::Int8: + append<int8_t>(bytecode, 0); // This value will be set when linking. + break; + case DFABytecodeJumpSize::Int16: + append<int16_t>(bytecode, 0); // This value will be set when linking. + break; + case DFABytecodeJumpSize::Int24: + append<uint16_t>(bytecode, 0); + append<int8_t>(bytecode, 0); // These values will be set when linking. + break; + case DFABytecodeJumpSize::Int32: + append<int32_t>(bytecode, 0); // This value will be set when linking. + break; + } +} + +template <typename IntType> +inline void setBits(Vector<DFABytecode>& bytecode, uint32_t index, IntType value) +{ + RELEASE_ASSERT(index + sizeof(IntType) <= bytecode.size()); + ASSERT_WITH_MESSAGE(!*reinterpret_cast<IntType*>(&bytecode[index]), "Right now we should only be using setBits to overwrite values that were zero as a placeholder."); + *reinterpret_cast<IntType*>(&bytecode[index]) = value; +} + +static unsigned appendActionBytecodeSize(uint64_t action) +{ + if (action & ActionFlagMask) + return sizeof(DFABytecodeInstruction) + sizeof(uint16_t) + sizeof(uint32_t); + return sizeof(DFABytecodeInstruction) + sizeof(uint32_t); +} + +void DFABytecodeCompiler::emitAppendAction(uint64_t action) +{ + // High bits are used to store flags. See compileRuleList. + if (action & ActionFlagMask) { + if (action & IfDomainFlag) + append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain); + else + append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendAction); + append<uint16_t>(m_bytecode, static_cast<uint16_t>(action >> 32)); + append<uint32_t>(m_bytecode, static_cast<uint32_t>(action)); + } else { + if (action & IfDomainFlag) + append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendActionWithIfDomain); + else + append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendAction); + append<uint32_t>(m_bytecode, static_cast<uint32_t>(action)); + } +} + +int32_t DFABytecodeCompiler::longestPossibleJump(uint32_t instructionLocation, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex) +{ + if (m_nodeStartOffsets[destinationNodeIndex] == std::numeric_limits<uint32_t>::max()) { + // Jumping to a node that hasn't been compiled yet, we don't know exactly how far forward we will need to jump, + // so make sure we have enough room for the worst possible case, the farthest possible jump + // which would be the distance if there were no compacted branches between this jump and its destination. + ASSERT(instructionLocation >= m_nodeStartOffsets[sourceNodeIndex]); + ASSERT(m_maxNodeStartOffsets[destinationNodeIndex] > m_maxNodeStartOffsets[sourceNodeIndex]); + ASSERT(m_nodeStartOffsets[sourceNodeIndex] != std::numeric_limits<uint32_t>::max()); + return m_maxNodeStartOffsets[destinationNodeIndex] - m_maxNodeStartOffsets[sourceNodeIndex] - (m_nodeStartOffsets[sourceNodeIndex] - instructionLocation); + } + + // Jumping to an already compiled node, we already know exactly where we will need to jump to. + ASSERT(m_nodeStartOffsets[destinationNodeIndex] <= instructionLocation); + return m_nodeStartOffsets[destinationNodeIndex] - instructionLocation; +} + +void DFABytecodeCompiler::emitJump(uint32_t sourceNodeIndex, uint32_t destinationNodeIndex) +{ + uint32_t instructionLocation = m_bytecode.size(); + uint32_t jumpLocation = instructionLocation + sizeof(uint8_t); + int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex); + DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance); + append<uint8_t>(m_bytecode, static_cast<uint8_t>(DFABytecodeInstruction::Jump) | jumpSize); + + m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex})); + appendZeroes(m_bytecode, jumpSize); +} + +void DFABytecodeCompiler::emitCheckValue(uint8_t value, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive) +{ + uint32_t instructionLocation = m_bytecode.size(); + uint32_t jumpLocation = instructionLocation + 2 * sizeof(uint8_t); + int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex); + DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance); + DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueCaseSensitive : DFABytecodeInstruction::CheckValueCaseInsensitive; + append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize); + append<uint8_t>(m_bytecode, value); + m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex})); + appendZeroes(m_bytecode, jumpSize); +} + +void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive) +{ + ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is strictly less than highValue."); + + uint32_t instructionLocation = m_bytecode.size(); + uint32_t jumpLocation = instructionLocation + 3 * sizeof(uint8_t); + int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex); + DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance); + DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueRangeCaseSensitive : DFABytecodeInstruction::CheckValueRangeCaseInsensitive; + append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize); + append<uint8_t>(m_bytecode, lowValue); + append<uint8_t>(m_bytecode, highValue); + m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex})); + appendZeroes(m_bytecode, jumpSize); +} + +void DFABytecodeCompiler::emitTerminate() +{ + append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::Terminate); +} + +void DFABytecodeCompiler::compileNode(uint32_t index, bool root) +{ + unsigned startSize = m_bytecode.size(); + + const DFANode& node = m_dfa.nodes[index]; + if (node.isKilled()) { + ASSERT(m_nodeStartOffsets[index] == std::numeric_limits<uint32_t>::max()); + return; + } + + // Record starting index for linking. + if (!root) + m_nodeStartOffsets[index] = m_bytecode.size(); + + for (uint64_t action : node.actions(m_dfa)) + emitAppendAction(action); + + // If we jump to the root, we don't want to re-add its actions to a HashSet. + // We know we have already added them because the root is always compiled first and we always start interpreting at the beginning. + if (root) + m_nodeStartOffsets[index] = m_bytecode.size(); + + compileNodeTransitions(index); + + ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= compiledNodeMaxBytecodeSize(index)); +} + +unsigned DFABytecodeCompiler::compiledNodeMaxBytecodeSize(uint32_t index) +{ + const DFANode& node = m_dfa.nodes[index]; + if (node.isKilled()) + return 0; + unsigned size = 0; + for (uint64_t action : node.actions(m_dfa)) + size += appendActionBytecodeSize(action); + size += nodeTransitionsMaxBytecodeSize(node); + return size; +} + +DFABytecodeCompiler::JumpTable DFABytecodeCompiler::extractJumpTable(Vector<DFABytecodeCompiler::Range>& ranges, unsigned firstRange, unsigned lastRange) +{ + ASSERT(lastRange > firstRange); + ASSERT(lastRange < ranges.size()); + + JumpTable jumpTable; + jumpTable.min = ranges[firstRange].min; + jumpTable.max = ranges[lastRange].max; + jumpTable.caseSensitive = ranges[lastRange].caseSensitive; + + unsigned size = lastRange - firstRange + 1; + jumpTable.destinations.reserveInitialCapacity(size); + for (unsigned i = firstRange; i <= lastRange; ++i) { + const Range& range = ranges[i]; + + ASSERT(range.caseSensitive == jumpTable.caseSensitive); + ASSERT(range.min == range.max); + ASSERT(range.min >= jumpTable.min); + ASSERT(range.min <= jumpTable.max); + + jumpTable.destinations.uncheckedAppend(range.destination); + } + + ranges.remove(firstRange, size); + + return jumpTable; +} + +DFABytecodeCompiler::Transitions DFABytecodeCompiler::transitions(const DFANode& node) +{ + Transitions transitions; + + uint32_t destinations[128]; + memset(destinations, 0xff, sizeof(destinations)); + const uint32_t noDestination = std::numeric_limits<uint32_t>::max(); + + transitions.useFallbackTransition = node.canUseFallbackTransition(m_dfa); + if (transitions.useFallbackTransition) + transitions.fallbackTransitionTarget = node.bestFallbackTarget(m_dfa); + + for (const auto& transition : node.transitions(m_dfa)) { + uint32_t targetNodeIndex = transition.target(); + if (transitions.useFallbackTransition && transitions.fallbackTransitionTarget == targetNodeIndex) + continue; + + for (uint16_t i = transition.range().first; i <= transition.range().last; ++i) + destinations[i] = targetNodeIndex; + } + + Vector<Range>& ranges = transitions.ranges; + uint8_t rangeMin; + bool hasRangeMin = false; + for (uint8_t i = 0; i < 128; i++) { + if (hasRangeMin) { + if (destinations[i] != destinations[rangeMin]) { + + // This is the end of a range. Check if it can be case insensitive. + uint8_t rangeMax = i - 1; + bool caseSensitive = true; + if (rangeMin >= 'A' && rangeMax <= 'Z') { + caseSensitive = false; + for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) { + if (destinations[rangeMin] != destinations[toASCIILower(rangeIndex)]) { + caseSensitive = true; + break; + } + } + } + + if (!caseSensitive) { + // If all the lower-case destinations are the same as the upper-case destinations, + // then they will be covered by a case-insensitive range and will not need their own range. + for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) { + ASSERT(destinations[rangeMin] == destinations[toASCIILower(rangeIndex)]); + destinations[toASCIILower(rangeIndex)] = noDestination; + } + ranges.append(Range(toASCIILower(rangeMin), toASCIILower(rangeMax), destinations[rangeMin], caseSensitive)); + } else + ranges.append(Range(rangeMin, rangeMax, destinations[rangeMin], caseSensitive)); + + if (destinations[i] == noDestination) + hasRangeMin = false; + else + rangeMin = i; + } + } else { + if (destinations[i] != noDestination) { + rangeMin = i; + hasRangeMin = true; + } + } + } + if (hasRangeMin) { + // Ranges are appended after passing the end of them. + // If a range goes to 127, we will have an uncommitted rangeMin because the loop does not check 128. + // If a range goes to 127, there will never be values higher than it, so checking for case-insensitive ranges would always fail. + ranges.append(Range(rangeMin, 127, destinations[rangeMin], true)); + } + + Vector<JumpTable>& jumpTables = transitions.jumpTables; + unsigned rangePosition = 0; + unsigned baseRangePosition = std::numeric_limits<unsigned>::max(); + Range* baseRange = nullptr; + while (rangePosition < ranges.size()) { + auto& range = ranges[rangePosition]; + if (baseRange) { + if (range.min != range.max + || baseRange->caseSensitive != range.caseSensitive + || ranges[rangePosition - 1].max + 1 != range.min) { + if (rangePosition - baseRangePosition > 1) { + jumpTables.append(extractJumpTable(ranges, baseRangePosition, rangePosition - 1)); + rangePosition = baseRangePosition; + } + baseRangePosition = std::numeric_limits<unsigned>::max(); + baseRange = nullptr; + } + } else { + if (range.min == range.max) { + baseRangePosition = rangePosition; + baseRange = ⦥ + } + } + ++rangePosition; + } + + if (baseRange && ranges.size() - baseRangePosition > 1) + jumpTables.append(extractJumpTable(ranges, baseRangePosition, ranges.size() - 1)); + + return transitions; +} + +unsigned DFABytecodeCompiler::checkForJumpTableMaxBytecodeSize(const JumpTable& jumpTable) +{ + unsigned baselineSize = sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t); + unsigned targetsSize = (jumpTable.max - jumpTable.min + 1) * sizeof(uint32_t); + return baselineSize + targetsSize; +} + +unsigned DFABytecodeCompiler::checkForRangeMaxBytecodeSize(const Range& range) +{ + if (range.min == range.max) + return sizeof(DFABytecodeInstruction::CheckValueCaseInsensitive) + sizeof(uint8_t) + sizeof(uint32_t); + return sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t) + sizeof(uint32_t); +} + +void DFABytecodeCompiler::compileJumpTable(uint32_t nodeIndex, const JumpTable& jumpTable) +{ + unsigned startSize = m_bytecode.size(); + ASSERT_WITH_MESSAGE(jumpTable.max < 128, "The DFA engine only supports the ASCII alphabet."); + ASSERT(jumpTable.min <= jumpTable.max); + + uint32_t instructionLocation = m_bytecode.size(); + DFABytecodeJumpSize jumpSize = Int8; + for (uint32_t destinationNodeIndex : jumpTable.destinations) { + int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex); + DFABytecodeJumpSize localJumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance); + jumpSize = std::max(jumpSize, localJumpSize); + } + + DFABytecodeInstruction instruction = jumpTable.caseSensitive ? DFABytecodeInstruction::JumpTableCaseSensitive : DFABytecodeInstruction::JumpTableCaseInsensitive; + append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize); + append<uint8_t>(m_bytecode, jumpTable.min); + append<uint8_t>(m_bytecode, jumpTable.max); + + for (uint32_t destinationNodeIndex : jumpTable.destinations) { + int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex); + uint32_t jumpLocation = m_bytecode.size(); + m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex})); + appendZeroes(m_bytecode, jumpSize); + } + + ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForJumpTableMaxBytecodeSize(jumpTable)); +} + +void DFABytecodeCompiler::compileCheckForRange(uint32_t nodeIndex, const Range& range) +{ + unsigned startSize = m_bytecode.size(); + ASSERT_WITH_MESSAGE(range.max < 128, "The DFA engine only supports the ASCII alphabet."); + ASSERT(range.min <= range.max); + + if (range.min == range.max) + emitCheckValue(range.min, nodeIndex, range.destination, range.caseSensitive); + else + emitCheckValueRange(range.min, range.max, nodeIndex, range.destination, range.caseSensitive); + + ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForRangeMaxBytecodeSize(range)); +} + +unsigned DFABytecodeCompiler::nodeTransitionsMaxBytecodeSize(const DFANode& node) +{ + unsigned size = 0; + Transitions nodeTransitions = transitions(node); + for (const auto& jumpTable : nodeTransitions.jumpTables) + size += checkForJumpTableMaxBytecodeSize(jumpTable); + for (const auto& range : nodeTransitions.ranges) + size += checkForRangeMaxBytecodeSize(range); + if (nodeTransitions.useFallbackTransition) + size += sizeof(DFABytecodeInstruction::Jump) + sizeof(uint32_t); + else + size += instructionSizeWithArguments(DFABytecodeInstruction::Terminate); + return size; +} + +void DFABytecodeCompiler::compileNodeTransitions(uint32_t nodeIndex) +{ + const DFANode& node = m_dfa.nodes[nodeIndex]; + unsigned startSize = m_bytecode.size(); + + Transitions nodeTransitions = transitions(node); + for (const auto& jumpTable : nodeTransitions.jumpTables) + compileJumpTable(nodeIndex, jumpTable); + for (const auto& range : nodeTransitions.ranges) + compileCheckForRange(nodeIndex, range); + if (nodeTransitions.useFallbackTransition) + emitJump(nodeIndex, nodeTransitions.fallbackTransitionTarget); + else + emitTerminate(); + + ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= nodeTransitionsMaxBytecodeSize(node)); +} + +void DFABytecodeCompiler::compile() +{ + uint32_t startLocation = m_bytecode.size(); + append<DFAHeader>(m_bytecode, 0); // This will be set when we are finished compiling this DFA. + + m_nodeStartOffsets.resize(m_dfa.nodes.size()); + for (unsigned i = 0; i < m_dfa.nodes.size(); ++i) + m_nodeStartOffsets[i] = std::numeric_limits<uint32_t>::max(); + + // Populate m_maxNodeStartOffsets with a worst-case index of where the node would be with no branch compaction. + // Compacting the branches using 1-4 byte signed jump distances should only make nodes closer together than this. + ASSERT(m_maxNodeStartOffsets.isEmpty()); + m_maxNodeStartOffsets.clear(); + m_maxNodeStartOffsets.resize(m_dfa.nodes.size()); + unsigned rootActionsSize = 0; + for (uint64_t action : m_dfa.nodes[m_dfa.root].actions(m_dfa)) + rootActionsSize += appendActionBytecodeSize(action); + m_maxNodeStartOffsets[m_dfa.root] = sizeof(DFAHeader) + rootActionsSize; + unsigned nextIndex = sizeof(DFAHeader) + compiledNodeMaxBytecodeSize(m_dfa.root); + for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) { + if (i != m_dfa.root) { + m_maxNodeStartOffsets[i] = nextIndex; + nextIndex += compiledNodeMaxBytecodeSize(i); + } + } + + // Make sure the root is always at the beginning of the bytecode. + compileNode(m_dfa.root, true); + for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) { + if (i != m_dfa.root) + compileNode(i, false); + } + + ASSERT(m_maxNodeStartOffsets.size() == m_nodeStartOffsets.size()); + for (unsigned i = 0; i < m_dfa.nodes.size(); ++i) { + if (m_nodeStartOffsets[i] != std::numeric_limits<uint32_t>::max()) + ASSERT(m_maxNodeStartOffsets[i] >= m_nodeStartOffsets[i]); + } + + // Link. + for (const auto& linkRecord : m_linkRecords) { + uint32_t destination = m_nodeStartOffsets[linkRecord.destinationNodeIndex]; + RELEASE_ASSERT(destination < std::numeric_limits<int32_t>::max()); + int32_t distance = destination - linkRecord.instructionLocation; + ASSERT(abs(distance) <= abs(linkRecord.longestPossibleJump)); + + switch (linkRecord.jumpSize) { + case Int8: + RELEASE_ASSERT(distance == static_cast<int8_t>(distance)); + setBits<int8_t>(m_bytecode, linkRecord.jumpLocation, static_cast<int8_t>(distance)); + break; + case Int16: + RELEASE_ASSERT(distance == static_cast<int16_t>(distance)); + setBits<int16_t>(m_bytecode, linkRecord.jumpLocation, static_cast<int16_t>(distance)); + break; + case Int24: + RELEASE_ASSERT(distance >= Int24Min && distance <= Int24Max); + setBits<uint16_t>(m_bytecode, linkRecord.jumpLocation, static_cast<uint16_t>(distance)); + setBits<int8_t>(m_bytecode, linkRecord.jumpLocation + sizeof(int16_t), static_cast<int8_t>(distance >> 16)); + break; + case Int32: + setBits<int32_t>(m_bytecode, linkRecord.jumpLocation, distance); + break; + } + } + + setBits<DFAHeader>(m_bytecode, startLocation, m_bytecode.size() - startLocation); +} + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFABytecodeCompiler.h b/Source/WebCore/contentextensions/DFABytecodeCompiler.h new file mode 100644 index 000000000..5ef0bd789 --- /dev/null +++ b/Source/WebCore/contentextensions/DFABytecodeCompiler.h @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "DFABytecode.h" +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +struct DFA; +class DFANode; + +class WEBCORE_EXPORT DFABytecodeCompiler { +public: + DFABytecodeCompiler(const DFA& dfa, Vector<DFABytecode>& bytecode) + : m_bytecode(bytecode) + , m_dfa(dfa) + { + } + + void compile(); + +private: + struct Range { + Range(uint8_t min, uint8_t max, uint32_t destination, bool caseSensitive) + : min(min) + , max(max) + , destination(destination) + , caseSensitive(caseSensitive) + { + } + uint8_t min; + uint8_t max; + uint32_t destination; + bool caseSensitive; + }; + struct JumpTable { + ~JumpTable() + { + ASSERT(min + destinations.size() == max + 1); + ASSERT(min == max || destinations.size() > 1); + } + + uint8_t min { 0 }; + uint8_t max { 0 }; + bool caseSensitive { true }; + Vector<uint32_t> destinations; + }; + struct Transitions { + Vector<JumpTable> jumpTables; + Vector<Range> ranges; + bool useFallbackTransition { false }; + uint32_t fallbackTransitionTarget { std::numeric_limits<uint32_t>::max() }; + }; + JumpTable extractJumpTable(Vector<Range>&, unsigned first, unsigned last); + Transitions transitions(const DFANode&); + + unsigned compiledNodeMaxBytecodeSize(uint32_t index); + void compileNode(uint32_t index, bool root); + unsigned nodeTransitionsMaxBytecodeSize(const DFANode&); + void compileNodeTransitions(uint32_t nodeIndex); + unsigned checkForJumpTableMaxBytecodeSize(const JumpTable&); + unsigned checkForRangeMaxBytecodeSize(const Range&); + void compileJumpTable(uint32_t nodeIndex, const JumpTable&); + void compileCheckForRange(uint32_t nodeIndex, const Range&); + int32_t longestPossibleJump(uint32_t jumpLocation, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex); + + void emitAppendAction(uint64_t); + void emitJump(uint32_t sourceNodeIndex, uint32_t destinationNodeIndex); + void emitCheckValue(uint8_t value, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive); + void emitCheckValueRange(uint8_t lowValue, uint8_t highValue, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive); + void emitTerminate(); + + Vector<DFABytecode>& m_bytecode; + const DFA& m_dfa; + + Vector<uint32_t> m_maxNodeStartOffsets; + Vector<uint32_t> m_nodeStartOffsets; + + struct LinkRecord { + DFABytecodeJumpSize jumpSize; + int32_t longestPossibleJump; + uint32_t instructionLocation; + uint32_t jumpLocation; + uint32_t destinationNodeIndex; + }; + Vector<LinkRecord> m_linkRecords; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp b/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp new file mode 100644 index 000000000..a0da084a6 --- /dev/null +++ b/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp @@ -0,0 +1,362 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "DFABytecodeInterpreter.h" + +#include "ContentExtensionsDebugging.h" +#include <wtf/text/CString.h> + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +namespace ContentExtensions { + +template <typename IntType> +static inline IntType getBits(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index) +{ + ASSERT_UNUSED(bytecodeLength, index + sizeof(IntType) <= bytecodeLength); + return *reinterpret_cast<const IntType*>(&bytecode[index]); +} + +static inline DFABytecodeInstruction getInstruction(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index) +{ + return static_cast<DFABytecodeInstruction>(getBits<uint8_t>(bytecode, bytecodeLength, index) & DFABytecodeInstructionMask); +} + +static inline size_t jumpSizeInBytes(DFABytecodeJumpSize jumpSize) +{ + switch (jumpSize) { + case Int8: + return sizeof(int8_t); + case Int16: + return sizeof(int16_t); + case Int24: + return sizeof(uint16_t) + sizeof(int8_t); + case Int32: + return sizeof(int32_t); + default: + RELEASE_ASSERT_NOT_REACHED(); + } +} + +static inline DFABytecodeJumpSize getJumpSize(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index) +{ + DFABytecodeJumpSize jumpSize = static_cast<DFABytecodeJumpSize>(getBits<uint8_t>(bytecode, bytecodeLength, index) & DFABytecodeJumpSizeMask); + ASSERT(jumpSize == DFABytecodeJumpSize::Int32 || jumpSize == DFABytecodeJumpSize::Int24 || jumpSize == DFABytecodeJumpSize::Int16 || jumpSize == DFABytecodeJumpSize::Int8); + return jumpSize; +} + +static inline int32_t getJumpDistance(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index, DFABytecodeJumpSize jumpSize) +{ + switch (jumpSize) { + case Int8: + return getBits<int8_t>(bytecode, bytecodeLength, index); + case Int16: + return getBits<int16_t>(bytecode, bytecodeLength, index); + case Int24: + return getBits<uint16_t>(bytecode, bytecodeLength, index) | (static_cast<int32_t>(getBits<int8_t>(bytecode, bytecodeLength, index + sizeof(uint16_t))) << 16); + case Int32: + return getBits<int32_t>(bytecode, bytecodeLength, index); + default: + RELEASE_ASSERT_NOT_REACHED(); + } +} + +static inline bool matchesDomain(uint64_t actionAndFlags, const DFABytecodeInterpreter::Actions& domainActions) +{ + bool ifDomain = actionAndFlags & IfDomainFlag; + bool domain = domainActions.contains(actionAndFlags); + return ifDomain == domain; +} + +void DFABytecodeInterpreter::interpretAppendAction(uint32_t& programCounter, Actions& actions, bool ifDomain) +{ + ASSERT(getInstruction(m_bytecode, m_bytecodeLength, programCounter) == DFABytecodeInstruction::AppendAction + || getInstruction(m_bytecode, m_bytecodeLength, programCounter) == DFABytecodeInstruction::AppendActionWithIfDomain); + uint64_t action = (ifDomain ? IfDomainFlag : 0) | static_cast<uint64_t>(getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction))); + if (!m_domainActions || matchesDomain(action, *m_domainActions)) + actions.add(action); + + programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendAction); + ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::AppendAction) == instructionSizeWithArguments(DFABytecodeInstruction::AppendActionWithIfDomain)); +} + +void DFABytecodeInterpreter::interpretTestFlagsAndAppendAction(uint32_t& programCounter, uint16_t flags, Actions& actions, bool ifDomain) +{ + ASSERT(getInstruction(m_bytecode, m_bytecodeLength, programCounter) == DFABytecodeInstruction::TestFlagsAndAppendAction + || getInstruction(m_bytecode, m_bytecodeLength, programCounter) == DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain); + uint16_t flagsToCheck = getBits<uint16_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction)); + + uint16_t loadTypeFlags = flagsToCheck & LoadTypeMask; + uint16_t resourceTypeFlags = flagsToCheck & ResourceTypeMask; + + bool loadTypeMatches = loadTypeFlags ? (loadTypeFlags & flags) : true; + bool resourceTypeMatches = resourceTypeFlags ? (resourceTypeFlags & flags) : true; + + if (loadTypeMatches && resourceTypeMatches) { + uint64_t actionAndFlags = (ifDomain ? IfDomainFlag : 0) | (static_cast<uint64_t>(flagsToCheck) << 32) | static_cast<uint64_t>(getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint16_t))); + if (!m_domainActions || matchesDomain(actionAndFlags, *m_domainActions)) + actions.add(actionAndFlags); + } + programCounter += instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction); + ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction) == instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain)); +} + +template<bool caseSensitive> +inline void DFABytecodeInterpreter::interpetJumpTable(const char* url, uint32_t& urlIndex, uint32_t& programCounter, bool& urlIndexIsAfterEndOfString) +{ + DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter); + + char character = caseSensitive ? url[urlIndex] : toASCIILower(url[urlIndex]); + uint8_t firstCharacter = getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction)); + uint8_t lastCharacter = getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t)); + if (character >= firstCharacter && character <= lastCharacter) { + uint32_t startOffset = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t); + uint32_t jumpLocation = startOffset + (character - firstCharacter) * jumpSizeInBytes(jumpSize); + programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize); + if (!character) + urlIndexIsAfterEndOfString = true; + urlIndex++; // This represents an edge in the DFA. + } else + programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize) * (lastCharacter - firstCharacter + 1); +} + +DFABytecodeInterpreter::Actions DFABytecodeInterpreter::actionsMatchingEverything() +{ + Actions actions; + + // DFA header. + uint32_t dfaBytecodeLength = getBits<uint32_t>(m_bytecode, m_bytecodeLength, 0); + uint32_t programCounter = sizeof(uint32_t); + + while (programCounter < dfaBytecodeLength) { + DFABytecodeInstruction instruction = getInstruction(m_bytecode, m_bytecodeLength, programCounter); + if (instruction == DFABytecodeInstruction::AppendAction) + interpretAppendAction(programCounter, actions, false); + else if (instruction == DFABytecodeInstruction::AppendActionWithIfDomain) + interpretAppendAction(programCounter, actions, true); + else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendAction) + programCounter += instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction); + else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain) + programCounter += instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain); + else + break; + } + return actions; +} + +DFABytecodeInterpreter::Actions DFABytecodeInterpreter::interpretWithDomains(const CString& urlCString, uint16_t flags, const DFABytecodeInterpreter::Actions& domainActions) +{ + ASSERT(!m_domainActions); + m_domainActions = &domainActions; + DFABytecodeInterpreter::Actions actions = interpret(urlCString, flags); + m_domainActions = nullptr; + return actions; +} + +DFABytecodeInterpreter::Actions DFABytecodeInterpreter::interpret(const CString& urlCString, uint16_t flags) +{ + const char* url = urlCString.data(); + ASSERT(url); + + Actions actions; + + uint32_t programCounter = 0; + while (programCounter < m_bytecodeLength) { + + // DFA header. + uint32_t dfaStart = programCounter; + uint32_t dfaBytecodeLength = getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter); + programCounter += sizeof(uint32_t); + + // Skip the actions without flags on the DFA root. These are accessed via actionsMatchingEverything. + if (!dfaStart) { + while (programCounter < dfaBytecodeLength) { + DFABytecodeInstruction instruction = getInstruction(m_bytecode, m_bytecodeLength, programCounter); + if (instruction == DFABytecodeInstruction::AppendAction) + programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendAction); + else if (instruction == DFABytecodeInstruction::AppendActionWithIfDomain) + programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendActionWithIfDomain); + else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendAction) + interpretTestFlagsAndAppendAction(programCounter, flags, actions, false); + else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain) + interpretTestFlagsAndAppendAction(programCounter, flags, actions, true); + else + break; + } + if (programCounter >= m_bytecodeLength) + return actions; + } else { + ASSERT_WITH_MESSAGE(getInstruction(m_bytecode, m_bytecodeLength, programCounter) != DFABytecodeInstruction::AppendAction + && getInstruction(m_bytecode, m_bytecodeLength, programCounter) != DFABytecodeInstruction::AppendActionWithIfDomain + && getInstruction(m_bytecode, m_bytecodeLength, programCounter) != DFABytecodeInstruction::TestFlagsAndAppendAction + && getInstruction(m_bytecode, m_bytecodeLength, programCounter) != DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain, + "Triggers that match everything should only be in the first DFA."); + } + + // Interpret the bytecode from this DFA. + // This should always terminate if interpreting correctly compiled bytecode. + uint32_t urlIndex = 0; + bool urlIndexIsAfterEndOfString = false; + while (true) { + ASSERT(programCounter <= m_bytecodeLength); + switch (getInstruction(m_bytecode, m_bytecodeLength, programCounter)) { + + case DFABytecodeInstruction::Terminate: + goto nextDFA; + + case DFABytecodeInstruction::CheckValueCaseSensitive: { + if (urlIndexIsAfterEndOfString) + goto nextDFA; + + // Check to see if the next character in the url is the value stored with the bytecode. + char character = url[urlIndex]; + DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter); + if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction))) { + uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t); + programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize); + if (!character) + urlIndexIsAfterEndOfString = true; + urlIndex++; // This represents an edge in the DFA. + } else + programCounter += sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + jumpSizeInBytes(jumpSize); + break; + } + + case DFABytecodeInstruction::CheckValueCaseInsensitive: { + if (urlIndexIsAfterEndOfString) + goto nextDFA; + + // Check to see if the next character in the url is the value stored with the bytecode. + char character = toASCIILower(url[urlIndex]); + DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter); + if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction))) { + uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t); + programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize); + if (!character) + urlIndexIsAfterEndOfString = true; + urlIndex++; // This represents an edge in the DFA. + } else + programCounter += sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + jumpSizeInBytes(jumpSize); + break; + } + + case DFABytecodeInstruction::JumpTableCaseInsensitive: + if (urlIndexIsAfterEndOfString) + goto nextDFA; + + interpetJumpTable<false>(url, urlIndex, programCounter, urlIndexIsAfterEndOfString); + break; + case DFABytecodeInstruction::JumpTableCaseSensitive: + if (urlIndexIsAfterEndOfString) + goto nextDFA; + + interpetJumpTable<true>(url, urlIndex, programCounter, urlIndexIsAfterEndOfString); + break; + + case DFABytecodeInstruction::CheckValueRangeCaseSensitive: { + if (urlIndexIsAfterEndOfString) + goto nextDFA; + + char character = url[urlIndex]; + DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter); + if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction)) + && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t))) { + uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t); + programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize); + if (!character) + urlIndexIsAfterEndOfString = true; + urlIndex++; // This represents an edge in the DFA. + } else + programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize); + break; + } + + case DFABytecodeInstruction::CheckValueRangeCaseInsensitive: { + if (urlIndexIsAfterEndOfString) + goto nextDFA; + + char character = toASCIILower(url[urlIndex]); + DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter); + if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction)) + && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t))) { + uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t); + programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize); + if (!character) + urlIndexIsAfterEndOfString = true; + urlIndex++; // This represents an edge in the DFA. + } else + programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize); + break; + } + + case DFABytecodeInstruction::Jump: { + if (!url[urlIndex] || urlIndexIsAfterEndOfString) + goto nextDFA; + + uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction); + DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter); + programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize); + urlIndex++; // This represents an edge in the DFA. + break; + } + + case DFABytecodeInstruction::AppendAction: + interpretAppendAction(programCounter, actions, false); + break; + + case DFABytecodeInstruction::AppendActionWithIfDomain: + interpretAppendAction(programCounter, actions, true); + break; + + case DFABytecodeInstruction::TestFlagsAndAppendAction: + interpretTestFlagsAndAppendAction(programCounter, flags, actions, false); + break; + + case DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain: + interpretTestFlagsAndAppendAction(programCounter, flags, actions, true); + break; + + default: + RELEASE_ASSERT_NOT_REACHED(); // Invalid bytecode. + } + // We should always terminate before or at a null character at the end of a String. + ASSERT(urlIndex <= urlCString.length() || (urlIndexIsAfterEndOfString && urlIndex <= urlCString.length() + 1)); + } + RELEASE_ASSERT_NOT_REACHED(); // The while loop can only be exited using goto nextDFA. + nextDFA: + ASSERT(dfaBytecodeLength); + programCounter = dfaStart + dfaBytecodeLength; + } + return actions; +} + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFABytecodeInterpreter.h b/Source/WebCore/contentextensions/DFABytecodeInterpreter.h new file mode 100644 index 000000000..4c69e2660 --- /dev/null +++ b/Source/WebCore/contentextensions/DFABytecodeInterpreter.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionRule.h" +#include "ContentExtensionsDebugging.h" +#include "DFABytecode.h" +#include <wtf/DataLog.h> +#include <wtf/HashSet.h> + +namespace WebCore { + +namespace ContentExtensions { + +class WEBCORE_EXPORT DFABytecodeInterpreter { +public: + DFABytecodeInterpreter(const DFABytecode* bytecode, unsigned bytecodeLength) + : m_bytecode(bytecode) + , m_bytecodeLength(bytecodeLength) + { + } + + typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> Actions; + + Actions interpret(const CString&, uint16_t flags); + Actions interpretWithDomains(const CString&, uint16_t flags, const DFABytecodeInterpreter::Actions& domainActions); + Actions actionsMatchingEverything(); + +private: + void interpretAppendAction(unsigned& programCounter, Actions&, bool ifDomain); + void interpretTestFlagsAndAppendAction(unsigned& programCounter, uint16_t flags, Actions&, bool ifDomain); + + template<bool caseSensitive> + void interpetJumpTable(const char* url, uint32_t& urlIndex, uint32_t& programCounter, bool& urlIndexIsAfterEndOfString); + + const DFABytecode* m_bytecode; + const unsigned m_bytecodeLength; + const DFABytecodeInterpreter::Actions* m_domainActions { nullptr }; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFACombiner.cpp b/Source/WebCore/contentextensions/DFACombiner.cpp new file mode 100644 index 000000000..513567540 --- /dev/null +++ b/Source/WebCore/contentextensions/DFACombiner.cpp @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "DFACombiner.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "MutableRangeList.h" +#include <wtf/HashMap.h> +#include <wtf/HashSet.h> + +namespace WebCore { + +namespace ContentExtensions { + +class DFAMerger { + typedef MutableRangeList<char, uint64_t, 128> CombinedTransitionsMutableRangeList; + + enum class WhichDFA { + A, + B + }; + + template<WhichDFA whichDFA> + struct TargetConverter { + uint64_t convert(uint32_t target) + { + uint64_t value = 0xffffffffffffffff; + extend(value, target); + return value; + } + + void extend(uint64_t& destination, uint32_t target) + { + setHalfSignature(destination, target); + } + private: + void setHalfSignature(uint64_t& signature, uint32_t value) + { + unsigned shiftAmount = (whichDFA == WhichDFA::A) ? 32 : 0; + uint64_t mask = static_cast<uint64_t>(0xffffffff) << (32 - shiftAmount); + signature = (signature & mask) | static_cast<uint64_t>(value) << shiftAmount; + } + }; + +public: + DFAMerger(const DFA& a, const DFA& b) + : m_dfaA(a) + , m_dfaB(b) + { + } + + DFA merge() + { + if (!m_nodeMapping.isEmpty()) + return m_output; + + uint64_t rootSignature = signatureForIndices(m_dfaA.root, m_dfaB.root); + getOrCreateCombinedNode(rootSignature); + + CombinedTransitionsMutableRangeList combinedTransitions; + while (!m_unprocessedNodes.isEmpty()) { + combinedTransitions.clear(); + + uint64_t unprocessedNode = m_unprocessedNodes.takeLast(); + + uint32_t indexA = extractIndexA(unprocessedNode); + if (indexA != invalidNodeIndex) { + const DFANode& nodeA = m_dfaA.nodes[indexA]; + auto transitionsA = nodeA.transitions(m_dfaA); + TargetConverter<WhichDFA::A> converterA; + combinedTransitions.extend(transitionsA.begin(), transitionsA.end(), converterA); + } + + uint32_t indexB = extractIndexB(unprocessedNode); + if (indexB != invalidNodeIndex) { + const DFANode& nodeB = m_dfaB.nodes[indexB]; + auto transitionsB = nodeB.transitions(m_dfaB); + TargetConverter<WhichDFA::B> converterB; + combinedTransitions.extend(transitionsB.begin(), transitionsB.end(), converterB); + } + + unsigned transitionsStart = m_output.transitionRanges.size(); + for (const auto& range : combinedTransitions) { + unsigned targetNodeId = getOrCreateCombinedNode(range.data); + m_output.transitionRanges.append({ range.first, range.last }); + m_output.transitionDestinations.append(targetNodeId); + } + unsigned transitionsEnd = m_output.transitionRanges.size(); + unsigned transitionsLength = transitionsEnd - transitionsStart; + + uint32_t sourceNodeId = m_nodeMapping.get(unprocessedNode); + DFANode& dfaSourceNode = m_output.nodes[sourceNodeId]; + dfaSourceNode.setTransitions(transitionsStart, static_cast<uint8_t>(transitionsLength)); + } + return m_output; + } + +private: + uint32_t invalidNodeIndex = 0xffffffff; + + static uint64_t signatureForIndices(uint32_t aIndex, uint32_t bIndex) + { + return static_cast<uint64_t>(aIndex) << 32 | bIndex; + } + + static uint32_t extractIndexA(uint64_t signature) + { + return static_cast<uint32_t>(signature >> 32); + } + + static uint32_t extractIndexB(uint64_t signature) + { + return static_cast<uint32_t>(signature); + } + + uint32_t getOrCreateCombinedNode(uint64_t newNodeSignature) + { + auto addResult = m_nodeMapping.add(newNodeSignature, invalidNodeIndex); + if (!addResult.isNewEntry) + return addResult.iterator->value; + + m_output.nodes.append(DFANode()); + uint32_t newNodeIndex = m_output.nodes.size() - 1; + addResult.iterator->value = newNodeIndex; + m_unprocessedNodes.append(newNodeSignature); + + uint32_t indexA = extractIndexA(newNodeSignature); + uint32_t indexB = extractIndexB(newNodeSignature); + + HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions; + if (indexA != invalidNodeIndex) { + const DFANode& node = m_dfaA.nodes[indexA]; + uint32_t actionsStart = node.actionsStart(); + uint32_t actionsEnd = actionsStart + node.actionsLength(); + for (uint32_t i = actionsStart; i < actionsEnd; ++i) + actions.add(m_dfaA.actions[i]); + } + if (indexB != invalidNodeIndex) { + const DFANode& node = m_dfaB.nodes[indexB]; + uint32_t actionsStart = node.actionsStart(); + uint32_t actionsEnd = actionsStart + node.actionsLength(); + for (uint32_t i = actionsStart; i < actionsEnd; ++i) + actions.add(m_dfaB.actions[i]); + } + + uint32_t actionsStart = m_output.actions.size(); + for (uint64_t action : actions) + m_output.actions.append(action); + uint32_t actionsEnd = m_output.actions.size(); + uint16_t actionsLength = static_cast<uint16_t>(actionsEnd - actionsStart); + + m_output.nodes.last().setActions(actionsStart, actionsLength); + return newNodeIndex; + } + + const DFA& m_dfaA; + const DFA& m_dfaB; + DFA m_output; + HashMap<uint64_t, uint32_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> m_nodeMapping; + Vector<uint64_t, 0, ContentExtensionsOverflowHandler> m_unprocessedNodes; +}; + +void DFACombiner::combineDFAs(unsigned minimumSize, std::function<void(DFA&&)> handler) +{ + if (m_dfas.isEmpty()) + return; + + for (unsigned i = m_dfas.size(); i--;) { + if (m_dfas[i].graphSize() > minimumSize) { + handler(WTFMove(m_dfas[i])); + m_dfas.remove(i); + } + } + + while (!m_dfas.isEmpty()) { + if (m_dfas.size() == 1) { + handler(WTFMove(m_dfas.first())); + return; + } + + DFA a = m_dfas.takeLast(); + DFA b = m_dfas.takeLast(); + DFAMerger dfaMerger(a, b); + DFA c = dfaMerger.merge(); + + if (c.graphSize() > minimumSize || m_dfas.isEmpty()) { + // Minimizing is somewhat expensive. We only do it in bulk when we reach the threshold + // to reduce the load. + c.minimize(); + } + + if (c.graphSize() > minimumSize) + handler(WTFMove(c)); + else + m_dfas.append(c); + } +} + +} + +} // namespace WebCore + +#endif diff --git a/Source/WebCore/contentextensions/DFACombiner.h b/Source/WebCore/contentextensions/DFACombiner.h new file mode 100644 index 000000000..090946b12 --- /dev/null +++ b/Source/WebCore/contentextensions/DFACombiner.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "DFA.h" +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +class WEBCORE_EXPORT DFACombiner { +public: + void addDFA(DFA&&); + void combineDFAs(unsigned minimumSize, std::function<void(DFA&&)> handler); + +private: + Vector<DFA> m_dfas; +}; + +inline void DFACombiner::addDFA(DFA&& dfa) +{ + dfa.minimize(); + m_dfas.append(WTFMove(dfa)); +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFAMinimizer.cpp b/Source/WebCore/contentextensions/DFAMinimizer.cpp new file mode 100644 index 000000000..4408e84bf --- /dev/null +++ b/Source/WebCore/contentextensions/DFAMinimizer.cpp @@ -0,0 +1,509 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "DFAMinimizer.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "DFA.h" +#include "DFANode.h" +#include "MutableRangeList.h" +#include <wtf/HashMap.h> +#include <wtf/Hasher.h> +#include <wtf/Vector.h> + +namespace WebCore { +namespace ContentExtensions { + +namespace { + +template<typename VectorType, typename Iterable, typename Function> +static inline void iterateIntersections(const VectorType& singularTransitionsFirsts, const Iterable& iterableTransitionList, const Function& intersectionHandler) +{ + ASSERT(!singularTransitionsFirsts.isEmpty()); + auto otherIterator = iterableTransitionList.begin(); + auto otherEnd = iterableTransitionList.end(); + + if (otherIterator == otherEnd) + return; + + unsigned singularTransitionsLength = singularTransitionsFirsts.size(); + unsigned singularTransitionsFirstsIndex = 0; + for (; otherIterator != otherEnd; ++otherIterator) { + auto firstCharacter = otherIterator.first(); + while (singularTransitionsFirstsIndex < singularTransitionsLength + && singularTransitionsFirsts[singularTransitionsFirstsIndex] != firstCharacter) + ++singularTransitionsFirstsIndex; + + intersectionHandler(singularTransitionsFirstsIndex, otherIterator); + ++singularTransitionsFirstsIndex; + + auto lastCharacter = otherIterator.last(); + while (singularTransitionsFirstsIndex < singularTransitionsLength + && singularTransitionsFirsts[singularTransitionsFirstsIndex] <= lastCharacter) { + intersectionHandler(singularTransitionsFirstsIndex, otherIterator); + ++singularTransitionsFirstsIndex; + } + } +} + +class Partition { +public: + void initialize(unsigned size) + { + if (!size) + return; + + m_sets.reserveInitialCapacity(size); + m_partitionedElements.resize(size); + m_elementPositionInPartitionedNodes.resize(size); + m_elementToSetMap.resize(size); + + for (unsigned i = 0; i < size; ++i) { + m_partitionedElements[i] = i; + m_elementPositionInPartitionedNodes[i] = i; + m_elementToSetMap[i] = 0; + } + m_sets.uncheckedAppend(SetDescriptor { 0, size, 0 }); + } + + void reserveUninitializedCapacity(unsigned elementCount) + { + m_partitionedElements.resize(elementCount); + m_elementPositionInPartitionedNodes.resize(elementCount); + m_elementToSetMap.resize(elementCount); + } + + void addSetUnchecked(unsigned start, unsigned size) + { + m_sets.append(SetDescriptor { start, size, 0 }); + } + + void setElementUnchecked(unsigned elementIndex, unsigned positionInPartition, unsigned setIndex) + { + ASSERT(setIndex < m_sets.size()); + m_partitionedElements[positionInPartition] = elementIndex; + m_elementPositionInPartitionedNodes[elementIndex] = positionInPartition; + m_elementToSetMap[elementIndex] = setIndex; + } + + unsigned startOffsetOfSet(unsigned setIndex) const + { + return m_sets[setIndex].start; + } + + ALWAYS_INLINE void markElementInCurrentGeneration(unsigned elementIndex) + { + // Swap the node with the first unmarked node. + unsigned setIndex = m_elementToSetMap[elementIndex]; + SetDescriptor& setDescriptor = m_sets[setIndex]; + + unsigned elementPositionInPartition = m_elementPositionInPartitionedNodes[elementIndex]; + ASSERT(elementPositionInPartition >= setDescriptor.start); + ASSERT(elementPositionInPartition < setDescriptor.end()); + + unsigned firstUnmarkedElementPositionInPartition = setDescriptor.indexAfterMarkedElements(); + ASSERT(firstUnmarkedElementPositionInPartition >= setDescriptor.start && firstUnmarkedElementPositionInPartition < setDescriptor.end()); + ASSERT(firstUnmarkedElementPositionInPartition >= firstUnmarkedElementPositionInPartition); + + // Swap the nodes in the set. + unsigned firstUnmarkedElement = m_partitionedElements[firstUnmarkedElementPositionInPartition]; + m_partitionedElements[firstUnmarkedElementPositionInPartition] = elementIndex; + m_partitionedElements[elementPositionInPartition] = firstUnmarkedElement; + + // Update their index. + m_elementPositionInPartitionedNodes[elementIndex] = firstUnmarkedElementPositionInPartition; + m_elementPositionInPartitionedNodes[firstUnmarkedElement] = elementPositionInPartition; + + if (!setDescriptor.markedCount) { + ASSERT(!m_setsMarkedInCurrentGeneration.contains(setIndex)); + m_setsMarkedInCurrentGeneration.append(setIndex); + } + ++setDescriptor.markedCount; + } + + // The function passed as argument MUST not modify the partition. + template<typename Function> + void refineGeneration(const Function& function) + { + for (unsigned setIndex : m_setsMarkedInCurrentGeneration) { + SetDescriptor& setDescriptor = m_sets[setIndex]; + if (setDescriptor.markedCount == setDescriptor.size) { + // Everything is marked, there is nothing to refine. + setDescriptor.markedCount = 0; + continue; + } + + SetDescriptor newSet; + bool newSetIsMarkedSet = setDescriptor.markedCount * 2 <= setDescriptor.size; + if (newSetIsMarkedSet) { + // Less than half of the nodes have been marked. + newSet = { setDescriptor.start, setDescriptor.markedCount, 0 }; + setDescriptor.start = setDescriptor.start + setDescriptor.markedCount; + } else + newSet = { setDescriptor.start + setDescriptor.markedCount, setDescriptor.size - setDescriptor.markedCount, 0 }; + setDescriptor.size -= newSet.size; + setDescriptor.markedCount = 0; + + unsigned newSetIndex = m_sets.size(); + m_sets.append(newSet); + + for (unsigned i = newSet.start; i < newSet.end(); ++i) + m_elementToSetMap[m_partitionedElements[i]] = newSetIndex; + + function(newSetIndex); + } + m_setsMarkedInCurrentGeneration.clear(); + } + + // Call Function() on every node of a given subset. + template<typename Function> + void iterateSet(unsigned setIndex, const Function& function) + { + SetDescriptor& setDescriptor = m_sets[setIndex]; + for (unsigned i = setDescriptor.start; i < setDescriptor.end(); ++i) + function(m_partitionedElements[i]); + } + + // Index of the set containing the Node. + unsigned setIndex(unsigned elementIndex) const + { + return m_elementToSetMap[elementIndex]; + } + + // NodeIndex of the first element in the set. + unsigned firstElementInSet(unsigned setIndex) const + { + return m_partitionedElements[m_sets[setIndex].start]; + } + + unsigned size() const + { + return m_sets.size(); + } + +private: + struct SetDescriptor { + unsigned start; + unsigned size; + unsigned markedCount; + + unsigned indexAfterMarkedElements() const { return start + markedCount; } + unsigned end() const { return start + size; } + }; + + // List of sets. + Vector<SetDescriptor, 0, ContentExtensionsOverflowHandler> m_sets; + + // All the element indices such that two elements of the same set never have a element of a different set between them. + Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_partitionedElements; + + // Map elementIndex->position in the partitionedElements. + Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_elementPositionInPartitionedNodes; + + // Map elementIndex->SetIndex. + Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_elementToSetMap; + + // List of sets with any marked node. Each set can appear at most once. + // FIXME: find a good inline size for this. + Vector<unsigned, 128, ContentExtensionsOverflowHandler> m_setsMarkedInCurrentGeneration; +}; + +class FullGraphPartition { + typedef MutableRangeList<char, uint32_t, 128> SingularTransitionsMutableRangeList; +public: + FullGraphPartition(const DFA& dfa) + { + m_nodePartition.initialize(dfa.nodes.size()); + + SingularTransitionsMutableRangeList singularTransitions; + CounterConverter counterConverter; + for (const DFANode& node : dfa.nodes) { + if (node.isKilled()) + continue; + auto transitions = node.transitions(dfa); + singularTransitions.extend(transitions.begin(), transitions.end(), counterConverter); + } + + // Count the number of transition for each singular range. This will give us the bucket size + // for the transition partition, where transitions are partitioned by "symbol". + unsigned rangeIndexAccumulator = 0; + for (const auto& transition : singularTransitions) { + m_transitionPartition.addSetUnchecked(rangeIndexAccumulator, transition.data); + rangeIndexAccumulator += transition.data; + } + + // Count the number of incoming transitions per node. + m_flattenedTransitionsStartOffsetPerNode.resize(dfa.nodes.size()); + memset(m_flattenedTransitionsStartOffsetPerNode.data(), 0, m_flattenedTransitionsStartOffsetPerNode.size() * sizeof(unsigned)); + + Vector<char, 0, ContentExtensionsOverflowHandler> singularTransitionsFirsts; + singularTransitionsFirsts.reserveInitialCapacity(singularTransitions.m_ranges.size()); + for (const auto& transition : singularTransitions) + singularTransitionsFirsts.uncheckedAppend(transition.first); + + for (const DFANode& node : dfa.nodes) { + if (node.isKilled()) + continue; + auto transitions = node.transitions(dfa); + iterateIntersections(singularTransitionsFirsts, transitions, [&](unsigned, const DFANode::ConstRangeIterator& origin) { + uint32_t targetNodeIndex = origin.target(); + ++m_flattenedTransitionsStartOffsetPerNode[targetNodeIndex]; + }); + } + + // Accumulate the offsets. This gives us the start position of each bucket. + unsigned transitionAccumulator = 0; + for (unsigned i = 0; i < m_flattenedTransitionsStartOffsetPerNode.size(); ++i) { + unsigned transitionsCountForNode = m_flattenedTransitionsStartOffsetPerNode[i]; + m_flattenedTransitionsStartOffsetPerNode[i] = transitionAccumulator; + transitionAccumulator += transitionsCountForNode; + } + unsigned flattenedTransitionsSize = transitionAccumulator; + ASSERT_WITH_MESSAGE(flattenedTransitionsSize == rangeIndexAccumulator, "The number of transitions should be the same, regardless of how they are arranged in buckets."); + + m_transitionPartition.reserveUninitializedCapacity(flattenedTransitionsSize); + + // Next, let's fill the transition table and set up the size of each group at the same time. + m_flattenedTransitionsSizePerNode.resize(dfa.nodes.size()); + for (unsigned& counter : m_flattenedTransitionsSizePerNode) + counter = 0; + m_flattenedTransitions.resize(flattenedTransitionsSize); + + Vector<uint32_t> transitionPerRangeOffset(m_transitionPartition.size()); + memset(transitionPerRangeOffset.data(), 0, transitionPerRangeOffset.size() * sizeof(uint32_t)); + + for (unsigned i = 0; i < dfa.nodes.size(); ++i) { + const DFANode& node = dfa.nodes[i]; + if (node.isKilled()) + continue; + + auto transitions = node.transitions(dfa); + iterateIntersections(singularTransitionsFirsts, transitions, [&](unsigned singularTransitonIndex, const DFANode::ConstRangeIterator& origin) { + uint32_t targetNodeIndex = origin.target(); + + unsigned start = m_flattenedTransitionsStartOffsetPerNode[targetNodeIndex]; + unsigned offset = m_flattenedTransitionsSizePerNode[targetNodeIndex]; + unsigned positionInFlattenedTransitions = start + offset; + m_flattenedTransitions[positionInFlattenedTransitions] = Transition({ i }); + + uint32_t& inRangeOffset = transitionPerRangeOffset[singularTransitonIndex]; + unsigned positionInTransitionPartition = m_transitionPartition.startOffsetOfSet(singularTransitonIndex) + inRangeOffset; + ++inRangeOffset; + + m_transitionPartition.setElementUnchecked(positionInFlattenedTransitions, positionInTransitionPartition, singularTransitonIndex); + + ++m_flattenedTransitionsSizePerNode[targetNodeIndex]; + }); + } + } + + void markNode(unsigned nodeIndex) + { + m_nodePartition.markElementInCurrentGeneration(nodeIndex); + } + + void refinePartitions() + { + m_nodePartition.refineGeneration([&](unsigned smallestSetIndex) { + m_nodePartition.iterateSet(smallestSetIndex, [&](unsigned nodeIndex) { + unsigned incomingTransitionsStartForNode = m_flattenedTransitionsStartOffsetPerNode[nodeIndex]; + unsigned incomingTransitionsSizeForNode = m_flattenedTransitionsSizePerNode[nodeIndex]; + + for (unsigned i = 0; i < incomingTransitionsSizeForNode; ++i) + m_transitionPartition.markElementInCurrentGeneration(incomingTransitionsStartForNode + i); + }); + + // We only need to split the transitions, we handle the new sets through the main loop. + m_transitionPartition.refineGeneration([](unsigned) { }); + }); + } + + void splitByUniqueTransitions() + { + for (; m_nextTransitionSetToProcess < m_transitionPartition.size(); ++m_nextTransitionSetToProcess) { + // We use the known splitters to refine the set. + m_transitionPartition.iterateSet(m_nextTransitionSetToProcess, [&](unsigned transitionIndex) { + unsigned sourceNodeIndex = m_flattenedTransitions[transitionIndex].source; + m_nodePartition.markElementInCurrentGeneration(sourceNodeIndex); + }); + + refinePartitions(); + } + } + + unsigned nodeReplacement(unsigned nodeIndex) + { + unsigned setIndex = m_nodePartition.setIndex(nodeIndex); + return m_nodePartition.firstElementInSet(setIndex); + } + +private: + struct Transition { + unsigned source; + }; + + struct CounterConverter { + uint32_t convert(uint32_t) + { + return 1; + } + + void extend(uint32_t& destination, uint32_t) + { + ++destination; + } + }; + + Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_flattenedTransitionsStartOffsetPerNode; + Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_flattenedTransitionsSizePerNode; + Vector<Transition, 0, ContentExtensionsOverflowHandler> m_flattenedTransitions; + + Partition m_nodePartition; + Partition m_transitionPartition; + + unsigned m_nextTransitionSetToProcess { 0 }; +}; + +struct ActionKey { + enum DeletedValueTag { DeletedValue }; + explicit ActionKey(DeletedValueTag) { state = Deleted; } + + enum EmptyValueTag { EmptyValue }; + explicit ActionKey(EmptyValueTag) { state = Empty; } + + explicit ActionKey(const DFA* dfa, uint32_t actionsStart, uint16_t actionsLength) + : dfa(dfa) + , actionsStart(actionsStart) + , actionsLength(actionsLength) + , state(Valid) + { + StringHasher hasher; + hasher.addCharactersAssumingAligned(reinterpret_cast<const UChar*>(&dfa->actions[actionsStart]), actionsLength * sizeof(uint64_t) / sizeof(UChar)); + hash = hasher.hash(); + } + + bool isEmptyValue() const { return state == Empty; } + bool isDeletedValue() const { return state == Deleted; } + + unsigned hash; + + const DFA* dfa; + uint32_t actionsStart; + uint16_t actionsLength; + + enum { + Valid, + Empty, + Deleted + } state; +}; + +struct ActionKeyHash { + static unsigned hash(const ActionKey& actionKey) + { + return actionKey.hash; + } + + static bool equal(const ActionKey& a, const ActionKey& b) + { + if (a.state != b.state + || a.dfa != b.dfa + || a.actionsLength != b.actionsLength) + return false; + for (uint16_t i = 0; i < a.actionsLength; ++i) { + if (a.dfa->actions[a.actionsStart + i] != a.dfa->actions[b.actionsStart + i]) + return false; + } + return true; + } + static const bool safeToCompareToEmptyOrDeleted = false; +}; + +struct ActionKeyHashTraits : public WTF::CustomHashTraits<ActionKey> { + static const bool emptyValueIsZero = true; +}; + +} // anonymous namespace. + +void DFAMinimizer::minimize(DFA& dfa) +{ + FullGraphPartition fullGraphPartition(dfa); + + // Unlike traditional minimization final states can be differentiated by their action. + // Instead of creating a single set for the final state, we partition by actions from + // the start. + HashMap<ActionKey, Vector<unsigned>, ActionKeyHash, ActionKeyHashTraits> finalStates; + for (unsigned i = 0; i < dfa.nodes.size(); ++i) { + const DFANode& node = dfa.nodes[i]; + if (node.hasActions()) { + // FIXME: Sort the actions in the dfa to make nodes that have the same actions in different order equal. + auto addResult = finalStates.add(ActionKey(&dfa, node.actionsStart(), node.actionsLength()), Vector<unsigned>()); + addResult.iterator->value.append(i); + } + } + + for (const auto& slot : finalStates) { + for (unsigned finalStateIndex : slot.value) + fullGraphPartition.markNode(finalStateIndex); + fullGraphPartition.refinePartitions(); + } + + // Use every splitter to refine the node partitions. + fullGraphPartition.splitByUniqueTransitions(); + + Vector<unsigned> relocationVector; + relocationVector.reserveInitialCapacity(dfa.nodes.size()); + for (unsigned i = 0; i < dfa.nodes.size(); ++i) + relocationVector.uncheckedAppend(i); + + // Update all the transitions. + for (unsigned i = 0; i < dfa.nodes.size(); ++i) { + unsigned replacement = fullGraphPartition.nodeReplacement(i); + if (i != replacement) { + relocationVector[i] = replacement; + dfa.nodes[i].kill(dfa); + } + } + + dfa.root = relocationVector[dfa.root]; + for (DFANode& node : dfa.nodes) { + if (node.isKilled()) + continue; + + for (auto& transition : node.transitions(dfa)) { + uint32_t target = transition.target(); + uint32_t relocatedTarget = relocationVector[target]; + if (target != relocatedTarget) + transition.resetTarget(relocatedTarget); + } + } +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFAMinimizer.h b/Source/WebCore/contentextensions/DFAMinimizer.h new file mode 100644 index 000000000..9e0aafc95 --- /dev/null +++ b/Source/WebCore/contentextensions/DFAMinimizer.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { +namespace ContentExtensions { + +struct DFA; + +class DFAMinimizer { +public: + WEBCORE_EXPORT static void minimize(DFA&); +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFANode.cpp b/Source/WebCore/contentextensions/DFANode.cpp new file mode 100644 index 000000000..762a13558 --- /dev/null +++ b/Source/WebCore/contentextensions/DFANode.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "DFANode.h" + +#include "DFA.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +namespace ContentExtensions { + +Vector<uint64_t> DFANode::actions(const DFA& dfa) const +{ + // FIXME: Use iterators instead of copying the Vector elements. + Vector<uint64_t> vector; + vector.reserveInitialCapacity(m_actionsLength); + for (uint32_t i = m_actionsStart; i < m_actionsStart + m_actionsLength; ++i) + vector.uncheckedAppend(dfa.actions[i]); + return vector; +} + +bool DFANode::containsTransition(char transition, const DFA& dfa) const +{ + // Called from DFAMinimizer, this loops though a maximum of 128 transitions, so it's not too slow. + ASSERT(m_transitionsLength <= 128); + for (unsigned i = m_transitionsStart; i < m_transitionsStart + m_transitionsLength; ++i) { + if (dfa.transitionRanges[i].first <= transition + && dfa.transitionRanges[i].last >= transition) + return true; + } + return false; +} + +void DFANode::kill(DFA& dfa) +{ + ASSERT(m_flags != IsKilled); + m_flags = IsKilled; // Killed nodes don't have any other flags. + + // Invalidate the now-unused memory in the DFA to make finding bugs easier. + for (unsigned i = m_transitionsStart; i < m_transitionsStart + m_transitionsLength; ++i) { + dfa.transitionRanges[i] = { -1, -1 }; + dfa.transitionDestinations[i] = std::numeric_limits<uint32_t>::max(); + } + for (unsigned i = m_actionsStart; i < m_actionsStart + m_actionsLength; ++i) + dfa.actions[i] = std::numeric_limits<uint64_t>::max(); + + m_actionsStart = 0; + m_actionsLength = 0; + m_transitionsStart = 0; + m_transitionsLength = 0; +}; + +bool DFANode::canUseFallbackTransition(const DFA& dfa) const +{ + // Transitions can contain '\0' if the expression has a end-of-line marker. + // Fallback transitions cover 1-127. We have to be careful with the first. + + IterableConstRange iterableTransitions = transitions(dfa); + auto iterator = iterableTransitions.begin(); + auto end = iterableTransitions.end(); + if (iterator == end) + return false; + + char lastSeenCharacter = 0; + if (!iterator.first()) { + lastSeenCharacter = iterator.last(); + if (lastSeenCharacter == 127) + return true; + ++iterator; + } + + for (;iterator != end; ++iterator) { + ASSERT(iterator.first() > lastSeenCharacter); + if (iterator.first() != lastSeenCharacter + 1) + return false; + + if (iterator.range().last == 127) + return true; + lastSeenCharacter = iterator.last(); + } + return false; +} + +uint32_t DFANode::bestFallbackTarget(const DFA& dfa) const +{ + ASSERT(canUseFallbackTransition(dfa)); + + HashMap<uint32_t, unsigned, DefaultHash<uint32_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint32_t>> histogram; + + IterableConstRange iterableTransitions = transitions(dfa); + auto iterator = iterableTransitions.begin(); + auto end = iterableTransitions.end(); + ASSERT_WITH_MESSAGE(iterator != end, "An empty range list cannot use a fallback transition."); + + if (!iterator.first() && !iterator.last()) + ++iterator; + ASSERT_WITH_MESSAGE(iterator != end, "An empty range list matching only zero cannot use a fallback transition."); + + uint32_t bestTarget = iterator.target(); + unsigned bestTargetScore = !iterator.range().first ? iterator.range().size() - 1 : iterator.range().size(); + histogram.add(bestTarget, bestTargetScore); + ++iterator; + + for (;iterator != end; ++iterator) { + unsigned rangeSize = iterator.range().size(); + uint32_t target = iterator.target(); + auto addResult = histogram.add(target, rangeSize); + if (!addResult.isNewEntry) + addResult.iterator->value += rangeSize; + if (addResult.iterator->value > bestTargetScore) { + bestTargetScore = addResult.iterator->value; + bestTarget = target; + } + } + return bestTarget; +} + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/DFANode.h b/Source/WebCore/contentextensions/DFANode.h new file mode 100644 index 000000000..342ef9167 --- /dev/null +++ b/Source/WebCore/contentextensions/DFANode.h @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionsDebugging.h" +#include <wtf/HashMap.h> +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +struct DFA; + +struct CharRange { + char first; + char last; + unsigned size() const { return last - first + 1; } +}; + +// A DFANode abstract the transition table out of a DFA state. If a state is accepting, the DFANode also have +// the actions for that state. +class DFANode { +public: + struct ConstRangeIterator { + const DFA& dfa; + uint32_t position; + + const ConstRangeIterator& operator*() const { return *this; } + + bool operator==(const ConstRangeIterator& other) const + { + ASSERT(&dfa == &other.dfa); + return position == other.position; + } + bool operator!=(const ConstRangeIterator& other) const { return !(*this == other); } + + ConstRangeIterator& operator++() + { + ++position; + return *this; + } + + const CharRange& range() const; + uint32_t target() const; + + char first() const { return range().first; } + char last() const { return range().last; } + uint32_t data() const { return target(); } + }; + + struct IterableConstRange { + const DFA& dfa; + uint32_t rangesStart; + uint32_t rangesEnd; + + ConstRangeIterator begin() const { return { dfa, rangesStart }; } + ConstRangeIterator end() const { return { dfa, rangesEnd }; } + }; + + IterableConstRange transitions(const DFA& dfa) const + { + return IterableConstRange { dfa, m_transitionsStart, m_transitionsStart + m_transitionsLength }; + } + + struct RangeIterator { + DFA& dfa; + uint32_t position; + + RangeIterator& operator*() { return *this; } + + bool operator==(const RangeIterator& other) const + { + ASSERT(&dfa == &other.dfa); + return position == other.position; + } + bool operator!=(const RangeIterator& other) const { return !(*this == other); } + + RangeIterator& operator++() + { + ++position; + return *this; + } + + const CharRange& range() const; + uint32_t target() const; + void resetTarget(uint32_t); + + char first() const { return range().first; } + char last() const { return range().last; } + uint32_t data() const { return target(); } + }; + + struct IterableRange { + DFA& dfa; + uint32_t rangesStart; + uint32_t rangesEnd; + + RangeIterator begin() const { return { dfa, rangesStart }; } + RangeIterator end() const { return { dfa, rangesEnd }; } + }; + + IterableRange transitions(DFA& dfa) + { + return IterableRange { dfa, m_transitionsStart, m_transitionsStart + m_transitionsLength }; + } + + // FIXME: Stop minimizing killed nodes and add ASSERT(!isKilled()) in many functions here. + void kill(DFA&); + Vector<uint64_t> actions(const DFA&) const; + bool containsTransition(char, const DFA&) const; + + bool isKilled() const { return m_flags & IsKilled; } + bool hasActions() const { return !!m_actionsLength; } + uint16_t actionsLength() const { return m_actionsLength; } + uint32_t actionsStart() const { return m_actionsStart; } + + bool canUseFallbackTransition(const DFA&) const; + uint32_t bestFallbackTarget(const DFA&) const; + + void setActions(uint32_t start, uint16_t length) + { + ASSERT(!m_actionsStart); + ASSERT(!m_actionsLength); + m_actionsStart = start; + m_actionsLength = length; + } + void setTransitions(uint32_t start, uint16_t length) + { + ASSERT(!m_transitionsStart); + ASSERT(!m_transitionsLength); + m_transitionsStart = start; + m_transitionsLength = length; + } + +private: + uint32_t m_actionsStart { 0 }; + uint32_t m_transitionsStart { 0 }; + uint16_t m_actionsLength { 0 }; + uint8_t m_transitionsLength { 0 }; + + uint8_t m_flags { 0 }; + const uint8_t IsKilled = 0x01; +}; + +COMPILE_ASSERT(sizeof(DFANode) <= 16, Keep the DFANodes small); + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/HashableActionList.h b/Source/WebCore/contentextensions/HashableActionList.h new file mode 100644 index 000000000..1039c1da5 --- /dev/null +++ b/Source/WebCore/contentextensions/HashableActionList.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <wtf/Hasher.h> +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +struct HashableActionList { + enum DeletedValueTag { DeletedValue }; + explicit HashableActionList(DeletedValueTag) { state = Deleted; } + + enum EmptyValueTag { EmptyValue }; + explicit HashableActionList(EmptyValueTag) { state = Empty; } + + template<typename AnyVectorType> + explicit HashableActionList(const AnyVectorType& otherActions) + : actions(otherActions) + , state(Valid) + { + std::sort(actions.begin(), actions.end()); + StringHasher hasher; + hasher.addCharactersAssumingAligned(reinterpret_cast<const UChar*>(actions.data()), actions.size() * sizeof(uint64_t) / sizeof(UChar)); + hash = hasher.hash(); + } + + bool isEmptyValue() const { return state == Empty; } + bool isDeletedValue() const { return state == Deleted; } + + bool operator==(const HashableActionList other) const + { + return state == other.state && actions == other.actions; + } + + bool operator!=(const HashableActionList other) const + { + return !(*this == other); + } + + Vector<uint64_t> actions; + unsigned hash; + enum { + Valid, + Empty, + Deleted + } state; +}; + +struct HashableActionListHash { + static unsigned hash(const HashableActionList& actionKey) + { + return actionKey.hash; + } + + static bool equal(const HashableActionList& a, const HashableActionList& b) + { + return a == b; + } + static const bool safeToCompareToEmptyOrDeleted = false; +}; + +struct HashableActionListHashTraits : public WTF::CustomHashTraits<HashableActionList> { + static const bool emptyValueIsZero = false; +}; + +} // namespace ContentExtensions +} // namespace WebCore diff --git a/Source/WebCore/contentextensions/ImmutableNFA.h b/Source/WebCore/contentextensions/ImmutableNFA.h new file mode 100644 index 000000000..f5d21aeba --- /dev/null +++ b/Source/WebCore/contentextensions/ImmutableNFA.h @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +template <typename CharacterType> +struct ImmutableRange { + uint32_t targetStart; + uint32_t targetEnd; + CharacterType first; + CharacterType last; +}; + +struct ImmutableNFANode { + uint32_t rangesStart { 0 }; + uint32_t rangesEnd { 0 }; + uint32_t epsilonTransitionTargetsStart { 0 }; + uint32_t epsilonTransitionTargetsEnd { 0 }; + uint32_t actionStart { 0 }; + uint32_t actionEnd { 0 }; +}; + +template <typename CharacterType, typename ActionType> +struct ImmutableNFA { + Vector<ImmutableNFANode, 0, ContentExtensionsOverflowHandler> nodes; + Vector<ImmutableRange<CharacterType>, 0, ContentExtensionsOverflowHandler> transitions; + Vector<uint32_t, 0, ContentExtensionsOverflowHandler> targets; + Vector<uint32_t, 0, ContentExtensionsOverflowHandler> epsilonTransitionsTargets; + Vector<ActionType, 0, ContentExtensionsOverflowHandler> actions; + + struct ConstTargetIterator { + const ImmutableNFA& immutableNFA; + uint32_t position; + + const uint32_t& operator*() const { return immutableNFA.targets[position]; } + const uint32_t* operator->() const { return &immutableNFA.targets[position]; } + + bool operator==(const ConstTargetIterator& other) const + { + ASSERT(&immutableNFA == &other.immutableNFA); + return position == other.position; + } + bool operator!=(const ConstTargetIterator& other) const { return !(*this == other); } + + ConstTargetIterator& operator++() + { + ++position; + return *this; + } + }; + + struct IterableConstTargets { + const ImmutableNFA& immutableNFA; + uint32_t targetStart; + uint32_t targetEnd; + + ConstTargetIterator begin() const { return { immutableNFA, targetStart }; } + ConstTargetIterator end() const { return { immutableNFA, targetEnd }; } + }; + + struct ConstRangeIterator { + const ImmutableNFA& immutableNFA; + uint32_t position; + + bool operator==(const ConstRangeIterator& other) const + { + ASSERT(&immutableNFA == &other.immutableNFA); + return position == other.position; + } + bool operator!=(const ConstRangeIterator& other) const { return !(*this == other); } + + ConstRangeIterator& operator++() + { + ++position; + return *this; + } + + CharacterType first() const + { + return range().first; + } + + CharacterType last() const + { + return range().last; + } + + IterableConstTargets data() const + { + const ImmutableRange<CharacterType>& range = this->range(); + return { immutableNFA, range.targetStart, range.targetEnd }; + }; + + private: + const ImmutableRange<CharacterType>& range() const + { + return immutableNFA.transitions[position]; + } + }; + + struct IterableConstRange { + const ImmutableNFA& immutableNFA; + uint32_t rangesStart; + uint32_t rangesEnd; + + ConstRangeIterator begin() const { return { immutableNFA, rangesStart }; } + ConstRangeIterator end() const { return { immutableNFA, rangesEnd }; } + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + void debugPrint() const + { + for (const auto& range : *this) + WTFLogAlways(" %d-%d", range.first, range.last); + } +#endif + }; + + IterableConstRange transitionsForNode(uint32_t nodeId) const + { + const ImmutableNFANode& node = nodes[nodeId]; + return { *this, node.rangesStart, node.rangesEnd }; + }; + + uint32_t root() const + { + RELEASE_ASSERT(!nodes.isEmpty()); + return 0; + } + + void finalize() + { + nodes.shrinkToFit(); + transitions.shrinkToFit(); + targets.shrinkToFit(); + epsilonTransitionsTargets.shrinkToFit(); + actions.shrinkToFit(); + } + + size_t memoryUsed() const + { + return nodes.capacity() * sizeof(ImmutableNFANode) + + transitions.capacity() * sizeof(ImmutableRange<CharacterType>) + + targets.capacity() * sizeof(uint32_t) + + epsilonTransitionsTargets.capacity() * sizeof(uint32_t) + + actions.capacity() * sizeof(ActionType); + } +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/ImmutableNFANodeBuilder.h b/Source/WebCore/contentextensions/ImmutableNFANodeBuilder.h new file mode 100644 index 000000000..e7cd31cf7 --- /dev/null +++ b/Source/WebCore/contentextensions/ImmutableNFANodeBuilder.h @@ -0,0 +1,229 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "ImmutableNFA.h" +#include "MutableRangeList.h" +#include <wtf/HashSet.h> + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +namespace ContentExtensions { + +// A ImmutableNFANodeBuilder let you build a NFA node by adding states and linking with other nodes. +// Whe a builder is destructed, all its properties are finalized into the NFA. Using the NA with a live +// builder results in undefined behaviors. +template <typename CharacterType, typename ActionType> +class ImmutableNFANodeBuilder { + typedef ImmutableNFA<CharacterType, ActionType> TypedImmutableNFA; + typedef HashSet<uint32_t, DefaultHash<uint32_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint32_t>> TargetSet; +public: + ImmutableNFANodeBuilder() { } + + ImmutableNFANodeBuilder(TypedImmutableNFA& immutableNFA) + : m_immutableNFA(&immutableNFA) + , m_finalized(false) + { + m_nodeId = m_immutableNFA->nodes.size(); + m_immutableNFA->nodes.append(ImmutableNFANode()); + } + + ImmutableNFANodeBuilder(ImmutableNFANodeBuilder&& other) + : m_immutableNFA(other.m_immutableNFA) + , m_ranges(WTFMove(other.m_ranges)) + , m_epsilonTransitionTargets(WTFMove(other.m_epsilonTransitionTargets)) + , m_actions(WTFMove(other.m_actions)) + , m_nodeId(other.m_nodeId) + , m_finalized(other.m_finalized) + { + other.m_immutableNFA = nullptr; + other.m_finalized = true; + } + + ~ImmutableNFANodeBuilder() + { + if (!m_finalized) + finalize(); + } + + bool isValid() const + { + return !!m_immutableNFA; + } + + uint32_t nodeId() const + { + ASSERT(isValid()); + return m_nodeId; + } + + struct TrivialRange { + CharacterType first; + CharacterType last; + }; + + struct FakeRangeIterator { + CharacterType first() const { return range.first; } + CharacterType last() const { return range.last; } + uint32_t data() const { return targetId; } + bool operator==(const FakeRangeIterator& other) + { + return this->isEnd == other.isEnd; + } + bool operator!=(const FakeRangeIterator& other) { return !(*this == other); } + FakeRangeIterator operator++() + { + isEnd = true; + return *this; + } + + TrivialRange range; + uint32_t targetId; + bool isEnd; + }; + + void addTransition(CharacterType first, CharacterType last, uint32_t targetNodeId) + { + ASSERT(!m_finalized); + ASSERT(m_immutableNFA); + + struct Converter { + TargetSet convert(uint32_t target) + { + return TargetSet({ target }); + } + void extend(TargetSet& existingTargets, uint32_t target) + { + existingTargets.add(target); + } + }; + + Converter converter; + m_ranges.extend(FakeRangeIterator { { first, last }, targetNodeId, false }, FakeRangeIterator { { 0, 0 }, targetNodeId, true }, converter); + } + + void addEpsilonTransition(const ImmutableNFANodeBuilder& target) + { + ASSERT(m_immutableNFA == target.m_immutableNFA); + addEpsilonTransition(target.m_nodeId); + } + + void addEpsilonTransition(uint32_t targetNodeId) + { + ASSERT(!m_finalized); + ASSERT(m_immutableNFA); + m_epsilonTransitionTargets.add(targetNodeId); + } + + template<typename ActionIterator> + void setActions(ActionIterator begin, ActionIterator end) + { + ASSERT(!m_finalized); + ASSERT(m_immutableNFA); + + m_actions.add(begin, end); + } + + ImmutableNFANodeBuilder& operator=(ImmutableNFANodeBuilder&& other) + { + if (!m_finalized) + finalize(); + + m_immutableNFA = other.m_immutableNFA; + m_ranges = WTFMove(other.m_ranges); + m_epsilonTransitionTargets = WTFMove(other.m_epsilonTransitionTargets); + m_actions = WTFMove(other.m_actions); + m_nodeId = other.m_nodeId; + m_finalized = other.m_finalized; + + other.m_immutableNFA = nullptr; + other.m_finalized = true; + return *this; + } + +private: + void finalize() + { + ASSERT_WITH_MESSAGE(!m_finalized, "The API contract is that the builder can only be finalized once."); + m_finalized = true; + ImmutableNFANode& immutableNFANode = m_immutableNFA->nodes[m_nodeId]; + sinkActions(immutableNFANode); + sinkEpsilonTransitions(immutableNFANode); + sinkTransitions(immutableNFANode); + } + + void sinkActions(ImmutableNFANode& immutableNFANode) + { + unsigned actionStart = m_immutableNFA->actions.size(); + for (const ActionType& action : m_actions) + m_immutableNFA->actions.append(action); + unsigned actionEnd = m_immutableNFA->actions.size(); + immutableNFANode.actionStart = actionStart; + immutableNFANode.actionEnd = actionEnd; + } + + void sinkTransitions(ImmutableNFANode& immutableNFANode) + { + unsigned transitionsStart = m_immutableNFA->transitions.size(); + for (const auto& range : m_ranges) { + unsigned targetsStart = m_immutableNFA->targets.size(); + for (uint32_t target : range.data) + m_immutableNFA->targets.append(target); + unsigned targetsEnd = m_immutableNFA->targets.size(); + + m_immutableNFA->transitions.append(ImmutableRange<CharacterType> { targetsStart, targetsEnd, range.first, range.last }); + } + unsigned transitionsEnd = m_immutableNFA->transitions.size(); + + immutableNFANode.rangesStart = transitionsStart; + immutableNFANode.rangesEnd = transitionsEnd; + } + + void sinkEpsilonTransitions(ImmutableNFANode& immutableNFANode) + { + unsigned start = m_immutableNFA->epsilonTransitionsTargets.size(); + for (uint32_t target : m_epsilonTransitionTargets) + m_immutableNFA->epsilonTransitionsTargets.append(target); + unsigned end = m_immutableNFA->epsilonTransitionsTargets.size(); + + immutableNFANode.epsilonTransitionTargetsStart = start; + immutableNFANode.epsilonTransitionTargetsEnd = end; + } + + TypedImmutableNFA* m_immutableNFA { nullptr }; + MutableRangeList<CharacterType, TargetSet> m_ranges; + TargetSet m_epsilonTransitionTargets; + HashSet<ActionType, WTF::IntHash<ActionType>, WTF::UnsignedWithZeroKeyHashTraits<ActionType>> m_actions; + uint32_t m_nodeId; + bool m_finalized { true }; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/MutableRange.h b/Source/WebCore/contentextensions/MutableRange.h new file mode 100644 index 000000000..2802881ff --- /dev/null +++ b/Source/WebCore/contentextensions/MutableRange.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +namespace ContentExtensions { + +template <typename CharacterType, typename DataType> +class MutableRange { + typedef MutableRange<CharacterType, DataType> TypedMutableRange; +public: + MutableRange(uint32_t nextRangeIndex, CharacterType first, CharacterType last) + : nextRangeIndex(nextRangeIndex) + , first(first) + , last(last) + { + ASSERT(first <= last); + } + + MutableRange(const DataType& data, uint32_t nextRangeIndex, CharacterType first, CharacterType last) + : data(data) + , nextRangeIndex(nextRangeIndex) + , first(first) + , last(last) + { + ASSERT(first <= last); + } + + MutableRange(DataType&& data, uint32_t nextRangeIndex, CharacterType first, CharacterType last) + : data(WTFMove(data)) + , nextRangeIndex(nextRangeIndex) + , first(first) + , last(last) + { + ASSERT(first <= last); + } + + MutableRange(MutableRange&& other) + : data(WTFMove(other.data)) + , nextRangeIndex(other.nextRangeIndex) + , first(other.first) + , last(other.last) + { + ASSERT(first <= last); + } + + TypedMutableRange& operator=(TypedMutableRange&& other) + { + data = WTFMove(other.data); + nextRangeIndex = WTFMove(other.nextRangeIndex); + first = WTFMove(other.first); + last = WTFMove(other.last); + return *this; + } + + DataType data; + + // We use a funny convention: if there are no nextRange, the nextRangeIndex is zero. + // This is faster to check than a special value in many cases. + // We can use zero because ranges can only form a chain, and the first range is always zero by convention. + // When we insert something in from of the first range, we swap the values. + uint32_t nextRangeIndex; + CharacterType first; + CharacterType last; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/MutableRangeList.h b/Source/WebCore/contentextensions/MutableRangeList.h new file mode 100644 index 000000000..9b20687b8 --- /dev/null +++ b/Source/WebCore/contentextensions/MutableRangeList.h @@ -0,0 +1,266 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "MutableRange.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +namespace WebCore { + +namespace ContentExtensions { + +// A range list keeps ranges sorted. Ranges are not sorted in the vector, but +template <typename CharacterType, typename DataType, unsigned inlineCapacity = 0> +class MutableRangeList { + typedef MutableRange<CharacterType, DataType> TypedMutableRange; +public: + class ConstIterator { + public: + const MutableRangeList& rangeList; + uint32_t index; + bool atEnd; + + const TypedMutableRange& operator*() const { return rangeList.m_ranges[index]; } + const TypedMutableRange* operator->() const { return &rangeList.m_ranges[index]; } + + CharacterType first() const { return rangeList.m_ranges[index].first; } + CharacterType last() const { return rangeList.m_ranges[index].last; } + CharacterType data() const { return rangeList.m_ranges[index].data; } + + bool operator==(const ConstIterator& other) const + { + ASSERT(&rangeList == &other.rangeList); + if (atEnd || other.atEnd) + return atEnd == other.atEnd; + return index == other.index; + } + bool operator!=(const ConstIterator& other) const { return !(*this == other); } + + ConstIterator& operator++() + { + ASSERT(!atEnd); + index = rangeList.m_ranges[index].nextRangeIndex; + if (!index) + atEnd = true; + return *this; + } + }; + + ConstIterator begin() const { return ConstIterator { *this, 0, m_ranges.isEmpty() }; } + ConstIterator end() const { return ConstIterator { *this, 0, true }; } + + uint32_t appendRange(uint32_t lastRangeIndex, CharacterType first, CharacterType last, const DataType& data) + { + uint32_t newRangeIndex = m_ranges.size(); + m_ranges.append(TypedMutableRange(data, 0, first, last)); + if (!newRangeIndex) + return 0; + + ASSERT(!m_ranges[lastRangeIndex].nextRangeIndex); + ASSERT(m_ranges[lastRangeIndex].last < first); + + m_ranges[lastRangeIndex].nextRangeIndex = newRangeIndex; + return newRangeIndex; + } + + template <typename RangeIterator, typename DataConverter> + void extend(RangeIterator otherIterator, RangeIterator otherEnd, DataConverter dataConverter) + { + if (otherIterator == otherEnd) + return; + + if (m_ranges.isEmpty()) { + initializeFrom(otherIterator, otherEnd, dataConverter); + return; + } + + bool reachedSelfEnd = false; + uint32_t lastSelfRangeIndex = 0; + uint32_t selfRangeIndex = 0; + + auto otherRangeOffset = otherIterator.first(); // To get the right type :) + otherRangeOffset = 0; + + do { + TypedMutableRange* activeSelfRange = &m_ranges[selfRangeIndex]; + + // First, we move forward until we find something interesting. + if (activeSelfRange->last < otherIterator.first() + otherRangeOffset) { + lastSelfRangeIndex = selfRangeIndex; + selfRangeIndex = activeSelfRange->nextRangeIndex; + reachedSelfEnd = !selfRangeIndex; + continue; + } + if (otherIterator.last() < activeSelfRange->first) { + insertBetween(lastSelfRangeIndex, selfRangeIndex, otherIterator.first() + otherRangeOffset, otherIterator.last(), dataConverter.convert(otherIterator.data())); + + ++otherIterator; + otherRangeOffset = 0; + continue; + } + + // If we reached here, we have: + // 1) activeRangeA->last >= activeRangeB->first. + // 2) activeRangeA->first <= activeRangeB->last. + // But we don't know how they collide. + + // Do we have a part on the left? Create a new range for it. + if (activeSelfRange->first < otherIterator.first() + otherRangeOffset) { + DataType copiedData = activeSelfRange->data; + CharacterType newRangeFirstCharacter = activeSelfRange->first; + activeSelfRange->first = otherIterator.first() + otherRangeOffset; + insertBetween(lastSelfRangeIndex, selfRangeIndex, newRangeFirstCharacter, otherIterator.first() + otherRangeOffset - 1, WTFMove(copiedData)); + activeSelfRange = &m_ranges[selfRangeIndex]; + } else if (otherIterator.first() + otherRangeOffset < activeSelfRange->first) { + insertBetween(lastSelfRangeIndex, selfRangeIndex, otherIterator.first() + otherRangeOffset, activeSelfRange->first - 1, dataConverter.convert(otherIterator.data())); + + activeSelfRange = &m_ranges[selfRangeIndex]; + ASSERT_WITH_MESSAGE(otherRangeOffset < activeSelfRange->first - otherIterator.first(), "The offset must move forward or we could get stuck on this operation."); + otherRangeOffset = activeSelfRange->first - otherIterator.first(); + } + + // Here, we know both ranges start at the same point, we need to create the part that intersect + // and merge the data. + + if (activeSelfRange->last == otherIterator.last()) { + // If they finish together, things are really easy: we just add B to A. + dataConverter.extend(activeSelfRange->data, otherIterator.data()); + + lastSelfRangeIndex = selfRangeIndex; + selfRangeIndex = activeSelfRange->nextRangeIndex; + reachedSelfEnd = !selfRangeIndex; + + ++otherIterator; + otherRangeOffset = 0; + continue; + } + + if (activeSelfRange->last > otherIterator.last()) { + // If A is bigger than B, we add a merged version and move A to the right. + + CharacterType combinedPartStart = activeSelfRange->first; + activeSelfRange->first = otherIterator.last() + 1; + + DataType combinedData = activeSelfRange->data; + dataConverter.extend(combinedData, otherIterator.data()); + insertBetween(lastSelfRangeIndex, selfRangeIndex, combinedPartStart, otherIterator.last(), WTFMove(combinedData)); + + ++otherIterator; + otherRangeOffset = 0; + continue; + } + + // If we reached here, B ends after A. We merge the intersection and move on. + ASSERT(otherIterator.last() > activeSelfRange->last); + dataConverter.extend(activeSelfRange->data, otherIterator.data()); + + otherRangeOffset = activeSelfRange->last - otherIterator.first() + 1; + lastSelfRangeIndex = selfRangeIndex; + selfRangeIndex = activeSelfRange->nextRangeIndex; + reachedSelfEnd = !selfRangeIndex; + } while (!reachedSelfEnd && otherIterator != otherEnd); + + while (otherIterator != otherEnd) { + lastSelfRangeIndex = appendRange(lastSelfRangeIndex, otherIterator.first() + otherRangeOffset, otherIterator.last(), dataConverter.convert(otherIterator.data())); + otherRangeOffset = 0; + ++otherIterator; + } + } + + unsigned size() const + { + return m_ranges.size(); + } + + bool isEmpty() const + { + return m_ranges.isEmpty(); + } + + void clear() + { + m_ranges.clear(); + } + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + void debugPrint() const + { + for (const TypedMutableRange& range : *this) + WTFLogAlways(" %d-%d", range.first, range.last); + } +#endif + + Vector<MutableRange<CharacterType, DataType>, inlineCapacity, ContentExtensionsOverflowHandler> m_ranges; +private: + void insertBetween(uint32_t& leftRangeIndex, uint32_t& rightRangeIndex, CharacterType first, CharacterType last, DataType&& data) + { + ASSERT(m_ranges[rightRangeIndex].first > last); + + if (!rightRangeIndex) { + // This is a special case. We always keep the first range as the first element in the vector. + uint32_t movedRangeIndex = m_ranges.size(); + m_ranges.append(WTFMove(m_ranges.first())); + m_ranges[0] = TypedMutableRange(WTFMove(data), movedRangeIndex, first, last); + leftRangeIndex = 0; + rightRangeIndex = movedRangeIndex; + return; + } + + ASSERT(m_ranges[leftRangeIndex].nextRangeIndex == rightRangeIndex); + ASSERT(m_ranges[leftRangeIndex].last < first); + + uint32_t newRangeIndex = m_ranges.size(); + m_ranges.append(TypedMutableRange(WTFMove(data), rightRangeIndex, first, last)); + m_ranges[leftRangeIndex].nextRangeIndex = newRangeIndex; + leftRangeIndex = newRangeIndex; + } + + template <typename RangeIterator, typename DataConverter> + void initializeFrom(RangeIterator otherIterator, RangeIterator otherEnd, DataConverter dataConverter) + { + ASSERT_WITH_MESSAGE(otherIterator != otherEnd, "We should never do anything when extending with a null range."); + ASSERT_WITH_MESSAGE(m_ranges.isEmpty(), "This code does not handle splitting, it can only be used on empty RangeList."); + + uint32_t loopCounter = 0; + do { + m_ranges.append(TypedMutableRange(dataConverter.convert(otherIterator.data()), + loopCounter + 1, + otherIterator.first(), + otherIterator.last())); + ++loopCounter; + ++otherIterator; + } while (otherIterator != otherEnd); + + if (!m_ranges.isEmpty()) + m_ranges.last().nextRangeIndex = 0; + } +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/NFA.cpp b/Source/WebCore/contentextensions/NFA.cpp new file mode 100644 index 000000000..bc9921a79 --- /dev/null +++ b/Source/WebCore/contentextensions/NFA.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "NFA.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include <wtf/ASCIICType.h> +#include <wtf/DataLog.h> + +namespace WebCore { + +namespace ContentExtensions { + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING +void NFA::debugPrintDot() const +{ + dataLogF("digraph NFA_Transitions {\n"); + dataLogF(" rankdir=LR;\n"); + dataLogF(" node [shape=circle];\n"); + dataLogF(" {\n"); + + for (unsigned i = 0; i < nodes.size(); ++i) { + dataLogF(" %d [label=<Node %d", i, i); + + const auto& node = nodes[i]; + + if (node.actionStart != node.actionEnd) { + dataLogF("<BR/>(Final: "); + bool isFirst = true; + for (unsigned actionIndex = node.actionStart; actionIndex < node.actionEnd; ++actionIndex) { + if (!isFirst) + dataLogF(", "); + dataLogF("%llu", actions[actionIndex]); + isFirst = false; + } + dataLogF(")"); + } + dataLogF(">]"); + + if (node.actionStart != node.actionEnd) + dataLogF(" [shape=doublecircle]"); + dataLogF(";\n"); + } + dataLogF(" }\n"); + + dataLogF(" {\n"); + for (unsigned i = 0; i < nodes.size(); ++i) { + const auto& node = nodes[i]; + + HashMap<uint32_t, Vector<uint32_t>, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> transitionsPerTarget; + + for (uint32_t transitionIndex = node.rangesStart; transitionIndex < node.rangesEnd; ++transitionIndex) { + const ImmutableCharRange& range = transitions[transitionIndex]; + for (uint32_t targetIndex = range.targetStart; targetIndex < range.targetEnd; ++targetIndex) { + uint32_t target = targets[targetIndex]; + auto addResult = transitionsPerTarget.add(target, Vector<uint32_t>()); + addResult.iterator->value.append(transitionIndex); + } + } + + for (const auto& slot : transitionsPerTarget) { + dataLogF(" %d -> %d [label=\"", i, slot.key); + + bool isFirst = true; + for (uint32_t rangeIndex : slot.value) { + if (!isFirst) + dataLogF(", "); + else + isFirst = false; + + const ImmutableCharRange& range = transitions[rangeIndex]; + if (range.first == range.last) { + if (isASCIIPrintable(range.first)) + dataLogF("%c", range.first); + else + dataLogF("%d", range.first); + } else { + if (isASCIIPrintable(range.first) && isASCIIPrintable(range.last)) + dataLogF("%c-%c", range.first, range.last); + else + dataLogF("%d-%d", range.first, range.last); + } + } + dataLogF("\"]\n"); + } + + for (uint32_t epsilonTargetIndex = node.epsilonTransitionTargetsStart; epsilonTargetIndex < node.epsilonTransitionTargetsEnd; ++epsilonTargetIndex) { + uint32_t target = epsilonTransitionsTargets[epsilonTargetIndex]; + dataLogF(" %d -> %d [label=\"É›\"]\n", i, target); + } + } + + dataLogF(" }\n"); + dataLogF("}\n"); +} +#endif + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/NFA.h b/Source/WebCore/contentextensions/NFA.h new file mode 100644 index 000000000..c86f92b52 --- /dev/null +++ b/Source/WebCore/contentextensions/NFA.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionsDebugging.h" +#include "ImmutableNFANodeBuilder.h" +#include "NFANode.h" + +namespace WebCore { + +namespace ContentExtensions { + +typedef ImmutableRange<char> ImmutableCharRange; +typedef ImmutableNFANodeBuilder<char, uint64_t> ImmutableCharNFANodeBuilder; + +struct NFA : public ImmutableNFA<char, uint64_t> { +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + void debugPrintDot() const; +#endif +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/NFANode.h b/Source/WebCore/contentextensions/NFANode.h new file mode 100644 index 000000000..b70349b3f --- /dev/null +++ b/Source/WebCore/contentextensions/NFANode.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionsDebugging.h" +#include <wtf/HashMap.h> +#include <wtf/HashSet.h> +#include <wtf/Vector.h> + +namespace WebCore { + +namespace ContentExtensions { + +// A NFANode abstract the transition table out of a NFA state. + +typedef Vector<uint64_t, 0, WTF::CrashOnOverflow, 1> ActionList; +typedef HashSet<unsigned, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> NFANodeIndexSet; +typedef HashMap<uint16_t, NFANodeIndexSet, DefaultHash<uint16_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint16_t>> NFANodeTransitions; + +class NFANode { +public: + NFANodeTransitions transitions; + NFANodeIndexSet transitionsOnAnyCharacter; + + ActionList finalRuleIds; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/NFAToDFA.cpp b/Source/WebCore/contentextensions/NFAToDFA.cpp new file mode 100644 index 000000000..843df3008 --- /dev/null +++ b/Source/WebCore/contentextensions/NFAToDFA.cpp @@ -0,0 +1,398 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "NFAToDFA.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionsDebugging.h" +#include "DFANode.h" +#include "ImmutableNFA.h" +#include "MutableRangeList.h" +#include "NFA.h" +#include <wtf/DataLog.h> +#include <wtf/HashMap.h> +#include <wtf/HashSet.h> + +namespace WebCore { + +namespace ContentExtensions { + +typedef MutableRange<char, NFANodeIndexSet> NFANodeRange; +typedef MutableRangeList<char, NFANodeIndexSet> NFANodeRangeList; +typedef MutableRangeList<char, NFANodeIndexSet, 128> PreallocatedNFANodeRangeList; +typedef Vector<uint32_t, 0, ContentExtensionsOverflowHandler> UniqueNodeList; +typedef Vector<UniqueNodeList, 0, ContentExtensionsOverflowHandler> NFANodeClosures; + +// FIXME: set a better initial size. +// FIXME: include the hash inside NodeIdSet. +typedef NFANodeIndexSet NodeIdSet; + +static inline void epsilonClosureExcludingSelf(NFA& nfa, unsigned nodeId, UniqueNodeList& output) +{ + NodeIdSet closure({ nodeId }); + Vector<unsigned, 64, ContentExtensionsOverflowHandler> unprocessedNodes({ nodeId }); + + do { + unsigned unprocessedNodeId = unprocessedNodes.takeLast(); + const auto& node = nfa.nodes[unprocessedNodeId]; + + for (uint32_t epsilonTargetIndex = node.epsilonTransitionTargetsStart; epsilonTargetIndex < node.epsilonTransitionTargetsEnd; ++epsilonTargetIndex) { + uint32_t targetNodeId = nfa.epsilonTransitionsTargets[epsilonTargetIndex]; + auto addResult = closure.add(targetNodeId); + if (addResult.isNewEntry) { + unprocessedNodes.append(targetNodeId); + output.append(targetNodeId); + } + } + } while (!unprocessedNodes.isEmpty()); + + output.shrinkToFit(); +} + +static void resolveEpsilonClosures(NFA& nfa, NFANodeClosures& nfaNodeClosures) +{ + unsigned nfaGraphSize = nfa.nodes.size(); + nfaNodeClosures.resize(nfaGraphSize); + + for (unsigned nodeId = 0; nodeId < nfaGraphSize; ++nodeId) + epsilonClosureExcludingSelf(nfa, nodeId, nfaNodeClosures[nodeId]); + + // Every nodes still point to that table, but we won't use it ever again. + // Clear it to get back the memory. That's not pretty but memory is important here, we have both + // graphs existing at the same time. + nfa.epsilonTransitionsTargets.clear(); +} + +static ALWAYS_INLINE void extendSetWithClosure(const NFANodeClosures& nfaNodeClosures, unsigned nodeId, NodeIdSet& set) +{ + ASSERT(set.contains(nodeId)); + const UniqueNodeList& nodeClosure = nfaNodeClosures[nodeId]; + if (!nodeClosure.isEmpty()) + set.add(nodeClosure.begin(), nodeClosure.end()); +} + +struct UniqueNodeIdSetImpl { + unsigned* buffer() + { + return m_buffer; + } + + const unsigned* buffer() const + { + return m_buffer; + } + + unsigned m_size; + unsigned m_hash; + unsigned m_dfaNodeId; +private: + unsigned m_buffer[1]; +}; + +typedef Vector<UniqueNodeIdSetImpl*, 128, ContentExtensionsOverflowHandler> UniqueNodeQueue; + +class UniqueNodeIdSet { +public: + UniqueNodeIdSet() { } + enum EmptyValueTag { EmptyValue }; + enum DeletedValueTag { DeletedValue }; + + UniqueNodeIdSet(EmptyValueTag) { } + UniqueNodeIdSet(DeletedValueTag) + : m_uniqueNodeIdSetBuffer(reinterpret_cast<UniqueNodeIdSetImpl*>(-1)) + { + } + + UniqueNodeIdSet(const NodeIdSet& nodeIdSet, unsigned hash, unsigned dfaNodeId) + { + ASSERT(nodeIdSet.size()); + + unsigned size = nodeIdSet.size(); + size_t byteSize = sizeof(UniqueNodeIdSetImpl) + (size - 1) * sizeof(unsigned); + m_uniqueNodeIdSetBuffer = static_cast<UniqueNodeIdSetImpl*>(fastMalloc(byteSize)); + + m_uniqueNodeIdSetBuffer->m_size = size; + m_uniqueNodeIdSetBuffer->m_hash = hash; + m_uniqueNodeIdSetBuffer->m_dfaNodeId = dfaNodeId; + + unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer(); + for (unsigned nodeId : nodeIdSet) { + *buffer = nodeId; + ++buffer; + } + } + + UniqueNodeIdSet(UniqueNodeIdSet&& other) + : m_uniqueNodeIdSetBuffer(other.m_uniqueNodeIdSetBuffer) + { + other.m_uniqueNodeIdSetBuffer = nullptr; + } + + UniqueNodeIdSet& operator=(UniqueNodeIdSet&& other) + { + m_uniqueNodeIdSetBuffer = other.m_uniqueNodeIdSetBuffer; + other.m_uniqueNodeIdSetBuffer = nullptr; + return *this; + } + + ~UniqueNodeIdSet() + { + fastFree(m_uniqueNodeIdSetBuffer); + } + + bool operator==(const UniqueNodeIdSet& other) const + { + return m_uniqueNodeIdSetBuffer == other.m_uniqueNodeIdSetBuffer; + } + + bool operator==(const NodeIdSet& other) const + { + if (m_uniqueNodeIdSetBuffer->m_size != static_cast<unsigned>(other.size())) + return false; + unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer(); + for (unsigned i = 0; i < m_uniqueNodeIdSetBuffer->m_size; ++i) { + if (!other.contains(buffer[i])) + return false; + } + return true; + } + + UniqueNodeIdSetImpl* impl() const { return m_uniqueNodeIdSetBuffer; } + + unsigned hash() const { return m_uniqueNodeIdSetBuffer->m_hash; } + bool isEmptyValue() const { return !m_uniqueNodeIdSetBuffer; } + bool isDeletedValue() const { return m_uniqueNodeIdSetBuffer == reinterpret_cast<UniqueNodeIdSetImpl*>(-1); } + +private: + UniqueNodeIdSetImpl* m_uniqueNodeIdSetBuffer = nullptr; +}; + +struct UniqueNodeIdSetHash { + static unsigned hash(const UniqueNodeIdSet& p) + { + return p.hash(); + } + + static bool equal(const UniqueNodeIdSet& a, const UniqueNodeIdSet& b) + { + return a == b; + } + // It would be fine to compare empty or deleted here, but not for the HashTranslator. + static const bool safeToCompareToEmptyOrDeleted = false; +}; + +struct UniqueNodeIdSetHashHashTraits : public WTF::CustomHashTraits<UniqueNodeIdSet> { + static const bool emptyValueIsZero = true; + + // FIXME: Get a good size. + static const int minimumTableSize = 128; +}; + +typedef HashSet<std::unique_ptr<UniqueNodeIdSet>, UniqueNodeIdSetHash, UniqueNodeIdSetHashHashTraits> UniqueNodeIdSetTable; + +struct NodeIdSetToUniqueNodeIdSetSource { + NodeIdSetToUniqueNodeIdSetSource(DFA& dfa, const NFA& nfa, const NodeIdSet& nodeIdSet) + : dfa(dfa) + , nfa(nfa) + , nodeIdSet(nodeIdSet) + { + // The hashing operation must be independant of the nodeId. + unsigned hash = 4207445155; + for (unsigned nodeId : nodeIdSet) + hash += nodeId; + this->hash = DefaultHash<unsigned>::Hash::hash(hash); + } + DFA& dfa; + const NFA& nfa; + const NodeIdSet& nodeIdSet; + unsigned hash; +}; + +struct NodeIdSetToUniqueNodeIdSetTranslator { + static unsigned hash(const NodeIdSetToUniqueNodeIdSetSource& source) + { + return source.hash; + } + + static inline bool equal(const UniqueNodeIdSet& a, const NodeIdSetToUniqueNodeIdSetSource& b) + { + return a == b.nodeIdSet; + } + + static void translate(UniqueNodeIdSet& location, const NodeIdSetToUniqueNodeIdSetSource& source, unsigned hash) + { + DFANode newDFANode; + + HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions; + + for (unsigned nfaNodeId : source.nodeIdSet) { + const auto& nfaNode = source.nfa.nodes[nfaNodeId]; + for (unsigned actionIndex = nfaNode.actionStart; actionIndex < nfaNode.actionEnd; ++actionIndex) + actions.add(source.nfa.actions[actionIndex]); + } + + unsigned actionsStart = source.dfa.actions.size(); + for (uint64_t action : actions) + source.dfa.actions.append(action); + unsigned actionsEnd = source.dfa.actions.size(); + unsigned actionsLength = actionsEnd - actionsStart; + RELEASE_ASSERT_WITH_MESSAGE(actionsLength <= std::numeric_limits<uint16_t>::max(), "Too many actions for the current DFANode size."); + newDFANode.setActions(actionsStart, static_cast<uint16_t>(actionsLength)); + + unsigned dfaNodeId = source.dfa.nodes.size(); + source.dfa.nodes.append(newDFANode); + new (NotNull, &location) UniqueNodeIdSet(source.nodeIdSet, hash, dfaNodeId); + + ASSERT(location.impl()); + } +}; + +class SetTransitions { +public: + NodeIdSet& operator[](unsigned index) + { + ASSERT(index < size()); + return m_targets[index]; + } + + unsigned size() const + { + return WTF_ARRAY_LENGTH(m_targets); + } + + NodeIdSet* begin() + { + return m_targets; + } + + NodeIdSet* end() + { + return m_targets + size(); + } + +private: + NodeIdSet m_targets[128]; +}; + +struct DataConverterWithEpsilonClosure { + const NFANodeClosures& nfaNodeclosures; + + template<typename Iterable> + NFANodeIndexSet convert(const Iterable& iterable) + { + NFANodeIndexSet result; + for (unsigned nodeId : iterable) { + result.add(nodeId); + const UniqueNodeList& nodeClosure = nfaNodeclosures[nodeId]; + result.add(nodeClosure.begin(), nodeClosure.end()); + } + return result; + } + + template<typename Iterable> + void extend(NFANodeIndexSet& destination, const Iterable& iterable) + { + for (unsigned nodeId : iterable) { + auto addResult = destination.add(nodeId); + if (addResult.isNewEntry) { + const UniqueNodeList& nodeClosure = nfaNodeclosures[nodeId]; + destination.add(nodeClosure.begin(), nodeClosure.end()); + } + } + } +}; + +static inline void createCombinedTransition(PreallocatedNFANodeRangeList& combinedRangeList, const UniqueNodeIdSetImpl& sourceNodeSet, const NFA& immutableNFA, const NFANodeClosures& nfaNodeclosures) +{ + combinedRangeList.clear(); + + const unsigned* buffer = sourceNodeSet.buffer(); + + DataConverterWithEpsilonClosure converter { nfaNodeclosures }; + for (unsigned i = 0; i < sourceNodeSet.m_size; ++i) { + unsigned nodeId = buffer[i]; + auto transitions = immutableNFA.transitionsForNode(nodeId); + combinedRangeList.extend(transitions.begin(), transitions.end(), converter); + } +} + +static ALWAYS_INLINE unsigned getOrCreateDFANode(const NodeIdSet& nfaNodeSet, const NFA& nfa, DFA& dfa, UniqueNodeIdSetTable& uniqueNodeIdSetTable, UniqueNodeQueue& unprocessedNodes) +{ + NodeIdSetToUniqueNodeIdSetSource nodeIdSetToUniqueNodeIdSetSource(dfa, nfa, nfaNodeSet); + auto uniqueNodeIdAddResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(nodeIdSetToUniqueNodeIdSetSource); + if (uniqueNodeIdAddResult.isNewEntry) + unprocessedNodes.append(uniqueNodeIdAddResult.iterator->impl()); + + return uniqueNodeIdAddResult.iterator->impl()->m_dfaNodeId; +} + +DFA NFAToDFA::convert(NFA& nfa) +{ + NFANodeClosures nfaNodeClosures; + resolveEpsilonClosures(nfa, nfaNodeClosures); + + DFA dfa; + + NodeIdSet initialSet({ nfa.root() }); + extendSetWithClosure(nfaNodeClosures, nfa.root(), initialSet); + + UniqueNodeIdSetTable uniqueNodeIdSetTable; + + NodeIdSetToUniqueNodeIdSetSource initialNodeIdSetToUniqueNodeIdSetSource(dfa, nfa, initialSet); + auto addResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(initialNodeIdSetToUniqueNodeIdSetSource); + + UniqueNodeQueue unprocessedNodes; + unprocessedNodes.append(addResult.iterator->impl()); + + PreallocatedNFANodeRangeList combinedRangeList; + do { + UniqueNodeIdSetImpl* uniqueNodeIdSetImpl = unprocessedNodes.takeLast(); + createCombinedTransition(combinedRangeList, *uniqueNodeIdSetImpl, nfa, nfaNodeClosures); + + unsigned transitionsStart = dfa.transitionRanges.size(); + for (const NFANodeRange& range : combinedRangeList) { + unsigned targetNodeId = getOrCreateDFANode(range.data, nfa, dfa, uniqueNodeIdSetTable, unprocessedNodes); + dfa.transitionRanges.append({ range.first, range.last }); + dfa.transitionDestinations.append(targetNodeId); + } + unsigned transitionsEnd = dfa.transitionRanges.size(); + unsigned transitionsLength = transitionsEnd - transitionsStart; + + unsigned dfaNodeId = uniqueNodeIdSetImpl->m_dfaNodeId; + DFANode& dfaSourceNode = dfa.nodes[dfaNodeId]; + dfaSourceNode.setTransitions(transitionsStart, static_cast<uint8_t>(transitionsLength)); + } while (!unprocessedNodes.isEmpty()); + + dfa.shrinkToFit(); + return dfa; +} + +} // namespace ContentExtensions + +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/NFAToDFA.h b/Source/WebCore/contentextensions/NFAToDFA.h new file mode 100644 index 000000000..8bee48823 --- /dev/null +++ b/Source/WebCore/contentextensions/NFAToDFA.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "DFA.h" + +namespace WebCore { + +namespace ContentExtensions { + +struct NFA; + +// NFAToDFA provides a way to build a DFA corresponding to a NFA. +class NFAToDFA { +public: + WEBCORE_EXPORT static DFA convert(NFA&); +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/Term.h b/Source/WebCore/contentextensions/Term.h new file mode 100644 index 000000000..0d51833f5 --- /dev/null +++ b/Source/WebCore/contentextensions/Term.h @@ -0,0 +1,687 @@ +/* + * Copyright (C) 2014-2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "ContentExtensionsDebugging.h" +#include "NFA.h" +#include <unicode/utypes.h> +#include <wtf/ASCIICType.h> +#include <wtf/HashMap.h> +#include <wtf/Vector.h> +#include <wtf/text/StringBuilder.h> +#include <wtf/text/WTFString.h> + +namespace WebCore { + +namespace ContentExtensions { + +enum class AtomQuantifier : uint8_t { + One, + ZeroOrOne, + ZeroOrMore, + OneOrMore +}; + +class Term { +public: + Term(); + Term(char character, bool isCaseSensitive); + + enum UniversalTransitionTag { UniversalTransition }; + explicit Term(UniversalTransitionTag); + + enum CharacterSetTermTag { CharacterSetTerm }; + explicit Term(CharacterSetTermTag, bool isInverted); + + enum GroupTermTag { GroupTerm }; + explicit Term(GroupTermTag); + + enum EndOfLineAssertionTermTag { EndOfLineAssertionTerm }; + explicit Term(EndOfLineAssertionTermTag); + + Term(const Term& other); + Term(Term&& other); + + enum EmptyValueTag { EmptyValue }; + Term(EmptyValueTag); + + ~Term(); + + bool isValid() const; + + // CharacterSet terms only. + void addCharacter(UChar character, bool isCaseSensitive); + + // Group terms only. + void extendGroupSubpattern(const Term&); + + void quantify(const AtomQuantifier&); + + ImmutableCharNFANodeBuilder generateGraph(NFA&, ImmutableCharNFANodeBuilder& source, const ActionList& finalActions) const; + void generateGraph(NFA&, ImmutableCharNFANodeBuilder& source, uint32_t destination) const; + + bool isEndOfLineAssertion() const; + + bool matchesAtLeastOneCharacter() const; + + // Matches any string, the empty string included. + // This is very conservative. Patterns matching any string can return false here. + bool isKnownToMatchAnyString() const; + + // Return true if the term can only match input of a single fixed length. + bool hasFixedLength() const; + + Term& operator=(const Term& other); + Term& operator=(Term&& other); + + bool operator==(const Term& other) const; + + unsigned hash() const; + + bool isEmptyValue() const; + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING + String toString() const; +#endif + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING + size_t memoryUsed() const; +#endif + +private: + // This is exact for character sets but conservative for groups. + // The return value can be false for a group equivalent to a universal transition. + bool isUniversalTransition() const; + + void generateSubgraphForAtom(NFA&, ImmutableCharNFANodeBuilder& source, const ImmutableCharNFANodeBuilder& destination) const; + void generateSubgraphForAtom(NFA&, ImmutableCharNFANodeBuilder& source, uint32_t destination) const; + + void destroy(); + + enum class TermType : uint8_t { + Empty, + CharacterSet, + Group + }; + + TermType m_termType { TermType::Empty }; + AtomQuantifier m_quantifier { AtomQuantifier::One }; + + class CharacterSet { + public: + void set(UChar character) + { + RELEASE_ASSERT(character < 128); + m_characters[character / 64] |= (uint64_t(1) << (character % 64)); + } + + bool get(UChar character) const + { + RELEASE_ASSERT(character < 128); + return m_characters[character / 64] & (uint64_t(1) << (character % 64)); + } + + void invert() + { + ASSERT(!m_inverted); + m_inverted = true; + } + + bool inverted() const { return m_inverted; } + + unsigned bitCount() const + { + return WTF::bitCount(m_characters[0]) + WTF::bitCount(m_characters[1]); + } + + bool operator==(const CharacterSet& other) const + { + return other.m_inverted == m_inverted + && other.m_characters[0] == m_characters[0] + && other.m_characters[1] == m_characters[1]; + } + + unsigned hash() const + { + return WTF::pairIntHash(WTF::pairIntHash(WTF::intHash(m_characters[0]), WTF::intHash(m_characters[1])), m_inverted); + } + private: + bool m_inverted { false }; + uint64_t m_characters[2] { 0, 0 }; + }; + + struct Group { + Vector<Term> terms; + + bool operator==(const Group& other) const + { + return other.terms == terms; + } + + unsigned hash() const + { + unsigned hash = 6421749; + for (const Term& term : terms) { + unsigned termHash = term.hash(); + hash = (hash << 16) ^ ((termHash << 11) ^ hash); + hash += hash >> 11; + } + return hash; + } + }; + + union AtomData { + AtomData() + : invalidTerm(0) + { + } + ~AtomData() + { + } + + char invalidTerm; + CharacterSet characterSet; + Group group; + } m_atomData; +}; + +#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING +inline String quantifierToString(AtomQuantifier quantifier) +{ + switch (quantifier) { + case AtomQuantifier::One: + return ""; + case AtomQuantifier::ZeroOrOne: + return "?"; + case AtomQuantifier::ZeroOrMore: + return "*"; + case AtomQuantifier::OneOrMore: + return "+"; + } +} + +inline String Term::toString() const +{ + switch (m_termType) { + case TermType::Empty: + return "(Empty)"; + case TermType::CharacterSet: { + StringBuilder builder; + builder.append('['); + for (UChar c = 0; c < 128; c++) { + if (m_atomData.characterSet.get(c)) { + if (isASCIIPrintable(c) && !isASCIISpace(c)) + builder.append(c); + else { + builder.append('\\'); + builder.append('u'); + builder.appendNumber(c); + } + } + } + builder.append(']'); + builder.append(quantifierToString(m_quantifier)); + return builder.toString(); + } + case TermType::Group: { + StringBuilder builder; + builder.append('('); + for (const Term& term : m_atomData.group.terms) + builder.append(term.toString()); + builder.append(')'); + builder.append(quantifierToString(m_quantifier)); + return builder.toString(); + } + } +} +#endif + +inline Term::Term() +{ +} + +inline Term::Term(char character, bool isCaseSensitive) + : m_termType(TermType::CharacterSet) +{ + new (NotNull, &m_atomData.characterSet) CharacterSet(); + addCharacter(character, isCaseSensitive); +} + +inline Term::Term(UniversalTransitionTag) + : m_termType(TermType::CharacterSet) +{ + new (NotNull, &m_atomData.characterSet) CharacterSet(); + for (UChar i = 1; i < 128; ++i) + m_atomData.characterSet.set(i); +} + +inline Term::Term(CharacterSetTermTag, bool isInverted) + : m_termType(TermType::CharacterSet) +{ + new (NotNull, &m_atomData.characterSet) CharacterSet(); + if (isInverted) + m_atomData.characterSet.invert(); +} + +inline Term::Term(GroupTermTag) + : m_termType(TermType::Group) +{ + new (NotNull, &m_atomData.group) Group(); +} + +inline Term::Term(EndOfLineAssertionTermTag) + : Term(CharacterSetTerm, false) +{ + m_atomData.characterSet.set(0); +} + +inline Term::Term(const Term& other) + : m_termType(other.m_termType) + , m_quantifier(other.m_quantifier) +{ + switch (m_termType) { + case TermType::Empty: + break; + case TermType::CharacterSet: + new (NotNull, &m_atomData.characterSet) CharacterSet(other.m_atomData.characterSet); + break; + case TermType::Group: + new (NotNull, &m_atomData.group) Group(other.m_atomData.group); + break; + } +} + +inline Term::Term(Term&& other) + : m_termType(WTFMove(other.m_termType)) + , m_quantifier(WTFMove(other.m_quantifier)) +{ + switch (m_termType) { + case TermType::Empty: + break; + case TermType::CharacterSet: + new (NotNull, &m_atomData.characterSet) CharacterSet(WTFMove(other.m_atomData.characterSet)); + break; + case TermType::Group: + new (NotNull, &m_atomData.group) Group(WTFMove(other.m_atomData.group)); + break; + } + other.destroy(); +} + +inline Term::Term(EmptyValueTag) + : m_termType(TermType::Empty) +{ +} + +inline Term::~Term() +{ + destroy(); +} + +inline bool Term::isValid() const +{ + return m_termType != TermType::Empty; +} + +inline void Term::addCharacter(UChar character, bool isCaseSensitive) +{ + ASSERT(isASCII(character)); + + ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::CharacterSet); + if (m_termType != TermType::CharacterSet) + return; + + if (isCaseSensitive || !isASCIIAlpha(character)) + m_atomData.characterSet.set(character); + else { + m_atomData.characterSet.set(toASCIIUpper(character)); + m_atomData.characterSet.set(toASCIILower(character)); + } +} + +inline void Term::extendGroupSubpattern(const Term& term) +{ + ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::Group); + if (m_termType != TermType::Group) + return; + m_atomData.group.terms.append(term); +} + +inline void Term::quantify(const AtomQuantifier& quantifier) +{ + ASSERT_WITH_MESSAGE(m_quantifier == AtomQuantifier::One, "Transition to quantified term should only happen once."); + m_quantifier = quantifier; +} + +inline ImmutableCharNFANodeBuilder Term::generateGraph(NFA& nfa, ImmutableCharNFANodeBuilder& source, const ActionList& finalActions) const +{ + ImmutableCharNFANodeBuilder newEnd(nfa); + generateGraph(nfa, source, newEnd.nodeId()); + newEnd.setActions(finalActions.begin(), finalActions.end()); + return newEnd; +} + +inline void Term::generateGraph(NFA& nfa, ImmutableCharNFANodeBuilder& source, uint32_t destination) const +{ + ASSERT(isValid()); + + switch (m_quantifier) { + case AtomQuantifier::One: { + generateSubgraphForAtom(nfa, source, destination); + break; + } + case AtomQuantifier::ZeroOrOne: { + generateSubgraphForAtom(nfa, source, destination); + source.addEpsilonTransition(destination); + break; + } + case AtomQuantifier::ZeroOrMore: { + ImmutableCharNFANodeBuilder repeatStart(nfa); + source.addEpsilonTransition(repeatStart); + + ImmutableCharNFANodeBuilder repeatEnd(nfa); + generateSubgraphForAtom(nfa, repeatStart, repeatEnd); + repeatEnd.addEpsilonTransition(repeatStart); + + repeatEnd.addEpsilonTransition(destination); + source.addEpsilonTransition(destination); + break; + } + case AtomQuantifier::OneOrMore: { + ImmutableCharNFANodeBuilder repeatStart(nfa); + source.addEpsilonTransition(repeatStart); + + ImmutableCharNFANodeBuilder repeatEnd(nfa); + generateSubgraphForAtom(nfa, repeatStart, repeatEnd); + repeatEnd.addEpsilonTransition(repeatStart); + + repeatEnd.addEpsilonTransition(destination); + break; + } + } +} + +inline bool Term::isEndOfLineAssertion() const +{ + return m_termType == TermType::CharacterSet && m_atomData.characterSet.bitCount() == 1 && m_atomData.characterSet.get(0); +} + +inline bool Term::matchesAtLeastOneCharacter() const +{ + ASSERT(isValid()); + + if (m_quantifier == AtomQuantifier::ZeroOrOne || m_quantifier == AtomQuantifier::ZeroOrMore) + return false; + if (isEndOfLineAssertion()) + return false; + + if (m_termType == TermType::Group) { + for (const Term& term : m_atomData.group.terms) { + if (term.matchesAtLeastOneCharacter()) + return true; + } + return false; + } + return true; +} + +inline bool Term::isKnownToMatchAnyString() const +{ + ASSERT(isValid()); + + switch (m_termType) { + case TermType::Empty: + ASSERT_NOT_REACHED(); + break; + case TermType::CharacterSet: + // ".*" is the only simple term matching any string. + return isUniversalTransition() && m_quantifier == AtomQuantifier::ZeroOrMore; + break; + case TermType::Group: { + // There are infinitely many ways to match anything with groups, we just handle simple cases + if (m_atomData.group.terms.size() != 1) + return false; + + const Term& firstTermInGroup = m_atomData.group.terms.first(); + // -(.*) with any quantifier. + if (firstTermInGroup.isKnownToMatchAnyString()) + return true; + + if (firstTermInGroup.isUniversalTransition()) { + // -(.)*, (.+)*, (.?)* etc. + if (m_quantifier == AtomQuantifier::ZeroOrMore) + return true; + + // -(.+)?. + if (m_quantifier == AtomQuantifier::ZeroOrOne && firstTermInGroup.m_quantifier == AtomQuantifier::OneOrMore) + return true; + + // -(.?)+. + if (m_quantifier == AtomQuantifier::OneOrMore && firstTermInGroup.m_quantifier == AtomQuantifier::ZeroOrOne) + return true; + } + break; + } + } + return false; +} + +inline bool Term::hasFixedLength() const +{ + ASSERT(isValid()); + + switch (m_termType) { + case TermType::Empty: + ASSERT_NOT_REACHED(); + break; + case TermType::CharacterSet: + return m_quantifier == AtomQuantifier::One; + case TermType::Group: { + if (m_quantifier != AtomQuantifier::One) + return false; + for (const Term& term : m_atomData.group.terms) { + if (!term.hasFixedLength()) + return false; + } + return true; + } + } + return false; +} + +inline Term& Term::operator=(const Term& other) +{ + destroy(); + new (NotNull, this) Term(other); + return *this; +} + +inline Term& Term::operator=(Term&& other) +{ + destroy(); + new (NotNull, this) Term(WTFMove(other)); + return *this; +} + +inline bool Term::operator==(const Term& other) const +{ + if (other.m_termType != m_termType || other.m_quantifier != m_quantifier) + return false; + + switch (m_termType) { + case TermType::Empty: + return true; + case TermType::CharacterSet: + return m_atomData.characterSet == other.m_atomData.characterSet; + case TermType::Group: + return m_atomData.group == other.m_atomData.group; + } + ASSERT_NOT_REACHED(); + return false; +} + +inline unsigned Term::hash() const +{ + unsigned primary = static_cast<unsigned>(m_termType) << 16 | static_cast<unsigned>(m_quantifier); + unsigned secondary = 0; + switch (m_termType) { + case TermType::Empty: + secondary = 52184393; + break; + case TermType::CharacterSet: + secondary = m_atomData.characterSet.hash(); + break; + case TermType::Group: + secondary = m_atomData.group.hash(); + break; + } + return WTF::pairIntHash(primary, secondary); +} + +inline bool Term::isEmptyValue() const +{ + return m_termType == TermType::Empty; +} + +inline bool Term::isUniversalTransition() const +{ + ASSERT(isValid()); + + switch (m_termType) { + case TermType::Empty: + ASSERT_NOT_REACHED(); + break; + case TermType::CharacterSet: + return (m_atomData.characterSet.inverted() && !m_atomData.characterSet.bitCount()) + || (!m_atomData.characterSet.inverted() && m_atomData.characterSet.bitCount() == 127 && !m_atomData.characterSet.get(0)); + case TermType::Group: + return m_atomData.group.terms.size() == 1 && m_atomData.group.terms.first().isUniversalTransition(); + } + return false; +} + +inline void Term::generateSubgraphForAtom(NFA& nfa, ImmutableCharNFANodeBuilder& source, const ImmutableCharNFANodeBuilder& destination) const +{ + generateSubgraphForAtom(nfa, source, destination.nodeId()); +} + +inline void Term::generateSubgraphForAtom(NFA& nfa, ImmutableCharNFANodeBuilder& source, uint32_t destination) const +{ + switch (m_termType) { + case TermType::Empty: + ASSERT_NOT_REACHED(); + source.addEpsilonTransition(destination); + break; + case TermType::CharacterSet: { + if (!m_atomData.characterSet.inverted()) { + UChar i = 0; + while (true) { + while (i < 128 && !m_atomData.characterSet.get(i)) + ++i; + if (i == 128) + break; + + UChar start = i; + ++i; + while (i < 128 && m_atomData.characterSet.get(i)) + ++i; + source.addTransition(start, i - 1, destination); + } + } else { + UChar i = 1; + while (true) { + while (i < 128 && m_atomData.characterSet.get(i)) + ++i; + if (i == 128) + break; + + UChar start = i; + ++i; + while (i < 128 && !m_atomData.characterSet.get(i)) + ++i; + source.addTransition(start, i - 1, destination); + } + } + break; + } + case TermType::Group: { + if (m_atomData.group.terms.isEmpty()) { + // FIXME: any kind of empty term could be avoided in the parser. This case should turned into an assertion. + source.addEpsilonTransition(destination); + return; + } + + if (m_atomData.group.terms.size() == 1) { + m_atomData.group.terms.first().generateGraph(nfa, source, destination); + return; + } + + ImmutableCharNFANodeBuilder lastTarget = m_atomData.group.terms.first().generateGraph(nfa, source, ActionList()); + for (unsigned i = 1; i < m_atomData.group.terms.size() - 1; ++i) { + const Term& currentTerm = m_atomData.group.terms[i]; + ImmutableCharNFANodeBuilder newNode = currentTerm.generateGraph(nfa, lastTarget, ActionList()); + lastTarget = WTFMove(newNode); + } + const Term& lastTerm = m_atomData.group.terms.last(); + lastTerm.generateGraph(nfa, lastTarget, destination); + break; + } + } +} + +inline void Term::destroy() +{ + switch (m_termType) { + case TermType::Empty: + break; + case TermType::CharacterSet: + m_atomData.characterSet.~CharacterSet(); + break; + case TermType::Group: + m_atomData.group.~Group(); + break; + } + m_termType = TermType::Empty; +} + +#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING +inline size_t Term::memoryUsed() const +{ + size_t extraMemory = 0; + if (m_termType == TermType::Group) { + for (const Term& term : m_atomData.group.terms) + extraMemory += term.memoryUsed(); + } + return sizeof(Term) + extraMemory; +} +#endif + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/URLFilterParser.cpp b/Source/WebCore/contentextensions/URLFilterParser.cpp new file mode 100644 index 000000000..3588c6517 --- /dev/null +++ b/Source/WebCore/contentextensions/URLFilterParser.cpp @@ -0,0 +1,397 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "URLFilterParser.h" + +#if ENABLE(CONTENT_EXTENSIONS) + +#include "CombinedURLFilters.h" +#include "Term.h" +#include <JavaScriptCore/YarrParser.h> +#include <wtf/Deque.h> +#include <wtf/text/CString.h> + +namespace WebCore { + +namespace ContentExtensions { + +class PatternParser { +public: + PatternParser(bool patternIsCaseSensitive) + : m_patternIsCaseSensitive(patternIsCaseSensitive) + , m_parseStatus(URLFilterParser::Ok) + { + } + + void finalize(uint64_t patternId, CombinedURLFilters& combinedURLFilters) + { + if (hasError()) + return; + + sinkFloatingTermIfNecessary(); + + simplifySunkTerms(); + + // Check to see if there are any terms without ? or *. + bool matchesEverything = true; + for (const auto& term : m_sunkTerms) { + if (term.matchesAtLeastOneCharacter()) { + matchesEverything = false; + break; + } + } + if (matchesEverything) { + fail(URLFilterParser::MatchesEverything); + return; + } + + combinedURLFilters.addPattern(patternId, m_sunkTerms); + } + + URLFilterParser::ParseStatus parseStatus() const + { + return m_parseStatus; + } + + void atomPatternCharacter(UChar character) + { + if (hasError()) + return; + + if (!isASCII(character)) { + fail(URLFilterParser::NonASCII); + return; + } + + sinkFloatingTermIfNecessary(); + ASSERT(!m_floatingTerm.isValid()); + + char asciiChararacter = static_cast<char>(character); + m_floatingTerm = Term(asciiChararacter, m_patternIsCaseSensitive); + } + + void atomBuiltInCharacterClass(JSC::Yarr::BuiltInCharacterClassID builtInCharacterClassID, bool inverted) + { + if (hasError()) + return; + + sinkFloatingTermIfNecessary(); + ASSERT(!m_floatingTerm.isValid()); + + if (builtInCharacterClassID == JSC::Yarr::NewlineClassID && inverted) + m_floatingTerm = Term(Term::UniversalTransition); + else + fail(URLFilterParser::UnsupportedCharacterClass); + } + + void quantifyAtom(unsigned minimum, unsigned maximum, bool) + { + if (hasError()) + return; + + ASSERT(m_floatingTerm.isValid()); + + if (!minimum && maximum == 1) + m_floatingTerm.quantify(AtomQuantifier::ZeroOrOne); + else if (!minimum && maximum == JSC::Yarr::quantifyInfinite) + m_floatingTerm.quantify(AtomQuantifier::ZeroOrMore); + else if (minimum == 1 && maximum == JSC::Yarr::quantifyInfinite) + m_floatingTerm.quantify(AtomQuantifier::OneOrMore); + else + fail(URLFilterParser::InvalidQuantifier); + } + + void atomBackReference(unsigned) + { + fail(URLFilterParser::BackReference); + } + + void assertionBOL() + { + if (hasError()) + return; + + if (m_floatingTerm.isValid() || !m_sunkTerms.isEmpty() || !m_openGroups.isEmpty()) { + fail(URLFilterParser::MisplacedStartOfLine); + return; + } + + m_hasBeginningOfLineAssertion = true; + } + + void assertionEOL() + { + if (hasError()) + return; + + sinkFloatingTermIfNecessary(); + ASSERT(!m_floatingTerm.isValid()); + + m_floatingTerm = Term(Term::EndOfLineAssertionTerm); + } + + void assertionWordBoundary(bool) + { + fail(URLFilterParser::WordBoundary); + } + + void atomCharacterClassBegin(bool inverted = false) + { + if (hasError()) + return; + + sinkFloatingTermIfNecessary(); + ASSERT(!m_floatingTerm.isValid()); + + m_floatingTerm = Term(Term::CharacterSetTerm, inverted); + } + + void atomCharacterClassAtom(UChar character) + { + if (hasError()) + return; + + ASSERT(isASCII(character)); + + m_floatingTerm.addCharacter(character, m_patternIsCaseSensitive); + } + + void atomCharacterClassRange(UChar a, UChar b) + { + if (hasError()) + return; + + ASSERT(a); + ASSERT(b); + ASSERT(isASCII(a)); + ASSERT(isASCII(b)); + + for (unsigned i = a; i <= b; ++i) + m_floatingTerm.addCharacter(static_cast<UChar>(i), m_patternIsCaseSensitive); + } + + void atomCharacterClassEnd() + { + // Nothing to do here. The character set atom may have a quantifier, we sink the atom lazily. + } + + void atomCharacterClassBuiltIn(JSC::Yarr::BuiltInCharacterClassID, bool) + { + fail(URLFilterParser::AtomCharacter); + } + + void atomParenthesesSubpatternBegin(bool = true) + { + if (hasError()) + return; + + sinkFloatingTermIfNecessary(); + + m_openGroups.append(Term(Term::GroupTerm)); + } + + void atomParentheticalAssertionBegin(bool = false) + { + fail(URLFilterParser::Group); + } + + void atomParenthesesEnd() + { + if (hasError()) + return; + + sinkFloatingTermIfNecessary(); + ASSERT(!m_floatingTerm.isValid()); + + m_floatingTerm = m_openGroups.takeLast(); + } + + void disjunction() + { + fail(URLFilterParser::Disjunction); + } + +private: + bool hasError() const + { + return m_parseStatus != URLFilterParser::Ok; + } + + void fail(URLFilterParser::ParseStatus reason) + { + if (hasError()) + return; + + m_parseStatus = reason; + } + + void sinkFloatingTermIfNecessary() + { + if (!m_floatingTerm.isValid()) + return; + + if (m_hasProcessedEndOfLineAssertion) { + fail(URLFilterParser::MisplacedEndOfLine); + m_floatingTerm = Term(); + return; + } + + if (m_floatingTerm.isEndOfLineAssertion()) + m_hasProcessedEndOfLineAssertion = true; + + if (!m_openGroups.isEmpty()) { + m_openGroups.last().extendGroupSubpattern(m_floatingTerm); + m_floatingTerm = Term(); + return; + } + + m_sunkTerms.append(m_floatingTerm); + m_floatingTerm = Term(); + } + + void simplifySunkTerms() + { + ASSERT(!m_floatingTerm.isValid()); + + if (m_sunkTerms.isEmpty()) + return; + + Term canonicalDotStar(Term::UniversalTransition); + canonicalDotStar.quantify(AtomQuantifier::ZeroOrMore); + + // Replace every ".*"-like terms by our canonical version. Remove any duplicate ".*". + { + unsigned termIndex = 0; + bool isAfterDotStar = false; + while (termIndex < m_sunkTerms.size()) { + if (isAfterDotStar && m_sunkTerms[termIndex].isKnownToMatchAnyString()) { + m_sunkTerms.remove(termIndex); + continue; + } + isAfterDotStar = false; + + if (m_sunkTerms[termIndex].isKnownToMatchAnyString()) { + m_sunkTerms[termIndex] = canonicalDotStar; + isAfterDotStar = true; + } + ++termIndex; + } + } + + // Add our ".*" in front if needed. + if (!m_hasBeginningOfLineAssertion && !m_sunkTerms.first().isKnownToMatchAnyString()) + m_sunkTerms.insert(0, canonicalDotStar); + + // Remove trailing ".*$". + if (m_sunkTerms.size() > 2 && m_sunkTerms.last().isEndOfLineAssertion() && m_sunkTerms[m_sunkTerms.size() - 2].isKnownToMatchAnyString()) + m_sunkTerms.shrink(m_sunkTerms.size() - 2); + + // Remove irrelevant terms that can match empty. For example in "foob?", matching "b" is irrelevant. + if (m_sunkTerms.last().isEndOfLineAssertion()) + return; + while (!m_sunkTerms.isEmpty() && !m_sunkTerms.last().matchesAtLeastOneCharacter()) + m_sunkTerms.removeLast(); + } + + bool m_patternIsCaseSensitive; + + Deque<Term> m_openGroups; + Vector<Term> m_sunkTerms; + Term m_floatingTerm; + bool m_hasBeginningOfLineAssertion { false }; + bool m_hasProcessedEndOfLineAssertion { false }; + + URLFilterParser::ParseStatus m_parseStatus; +}; + +URLFilterParser::URLFilterParser(CombinedURLFilters& combinedURLFilters) + : m_combinedURLFilters(combinedURLFilters) +{ +} + +URLFilterParser::~URLFilterParser() +{ +} + +URLFilterParser::ParseStatus URLFilterParser::addPattern(const String& pattern, bool patternIsCaseSensitive, uint64_t patternId) +{ + if (!pattern.containsOnlyASCII()) + return NonASCII; + if (pattern.isEmpty()) + return EmptyPattern; + + ParseStatus status = Ok; + PatternParser patternParser(patternIsCaseSensitive); + String error = String(JSC::Yarr::parse(patternParser, pattern, false, 0)); + if (error.isNull()) + patternParser.finalize(patternId, m_combinedURLFilters); + else + status = YarrError; + + if (status == Ok) + status = patternParser.parseStatus(); + + return status; +} + +String URLFilterParser::statusString(ParseStatus status) +{ + switch (status) { + case Ok: + return "Ok"; + case MatchesEverything: + return "Matches everything."; + case NonASCII: + return "Only ASCII characters are supported in pattern."; + case UnsupportedCharacterClass: + return "Character class is not supported."; + case BackReference: + return "Patterns cannot contain backreferences."; + case MisplacedStartOfLine: + return "Start of line assertion can only appear as the first term in a filter."; + case WordBoundary: + return "Word boundaries assertions are not supported yet."; + case AtomCharacter: + return "Builtins character class atoms are not supported yet."; + case Group: + return "Groups are not supported yet."; + case Disjunction: + return "Disjunctions are not supported yet."; + case MisplacedEndOfLine: + return "The end of line assertion must be the last term in an expression."; + case EmptyPattern: + return "Empty pattern."; + case YarrError: + return "Internal error in YARR."; + case InvalidQuantifier: + return "Arbitrary atom repetitions are not supported."; + } +} + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) diff --git a/Source/WebCore/contentextensions/URLFilterParser.h b/Source/WebCore/contentextensions/URLFilterParser.h new file mode 100644 index 000000000..be19b9f70 --- /dev/null +++ b/Source/WebCore/contentextensions/URLFilterParser.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2014, 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(CONTENT_EXTENSIONS) + +#include <wtf/text/WTFString.h> + +namespace WebCore { + +namespace ContentExtensions { + +class CombinedURLFilters; + +class WEBCORE_EXPORT URLFilterParser { +public: + enum ParseStatus { + Ok, + MatchesEverything, + NonASCII, + UnsupportedCharacterClass, + BackReference, + MisplacedStartOfLine, + WordBoundary, + AtomCharacter, + Group, + Disjunction, + MisplacedEndOfLine, + EmptyPattern, + YarrError, + InvalidQuantifier, + }; + static String statusString(ParseStatus); + explicit URLFilterParser(CombinedURLFilters&); + ~URLFilterParser(); + ParseStatus addPattern(const String& pattern, bool patternIsCaseSensitive, uint64_t patternId); + +private: + CombinedURLFilters& m_combinedURLFilters; +}; + +} // namespace ContentExtensions +} // namespace WebCore + +#endif // ENABLE(CONTENT_EXTENSIONS) |