summaryrefslogtreecommitdiff
path: root/Source/WebCore/contentextensions
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/contentextensions
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/WebCore/contentextensions')
-rw-r--r--Source/WebCore/contentextensions/CombinedFiltersAlphabet.cpp82
-rw-r--r--Source/WebCore/contentextensions/CombinedFiltersAlphabet.h63
-rw-r--r--Source/WebCore/contentextensions/CombinedURLFilters.cpp513
-rw-r--r--Source/WebCore/contentextensions/CombinedURLFilters.h69
-rw-r--r--Source/WebCore/contentextensions/CompiledContentExtension.cpp42
-rw-r--r--Source/WebCore/contentextensions/CompiledContentExtension.h54
-rw-r--r--Source/WebCore/contentextensions/ContentExtension.cpp156
-rw-r--r--Source/WebCore/contentextensions/ContentExtension.h75
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionActions.h60
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionCompiler.cpp483
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionCompiler.h55
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionError.cpp101
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionError.h79
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionParser.cpp335
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionParser.h45
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionRule.cpp124
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionRule.h191
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionStyleSheet.cpp70
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionStyleSheet.h64
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionsBackend.cpp250
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionsBackend.h78
-rw-r--r--Source/WebCore/contentextensions/ContentExtensionsDebugging.h51
-rw-r--r--Source/WebCore/contentextensions/DFA.cpp180
-rw-r--r--Source/WebCore/contentextensions/DFA.h87
-rw-r--r--Source/WebCore/contentextensions/DFABytecode.h134
-rw-r--r--Source/WebCore/contentextensions/DFABytecodeCompiler.cpp495
-rw-r--r--Source/WebCore/contentextensions/DFABytecodeCompiler.h120
-rw-r--r--Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp362
-rw-r--r--Source/WebCore/contentextensions/DFABytecodeInterpreter.h69
-rw-r--r--Source/WebCore/contentextensions/DFACombiner.cpp227
-rw-r--r--Source/WebCore/contentextensions/DFACombiner.h55
-rw-r--r--Source/WebCore/contentextensions/DFAMinimizer.cpp509
-rw-r--r--Source/WebCore/contentextensions/DFAMinimizer.h43
-rw-r--r--Source/WebCore/contentextensions/DFANode.cpp147
-rw-r--r--Source/WebCore/contentextensions/DFANode.h176
-rw-r--r--Source/WebCore/contentextensions/HashableActionList.h93
-rw-r--r--Source/WebCore/contentextensions/ImmutableNFA.h182
-rw-r--r--Source/WebCore/contentextensions/ImmutableNFANodeBuilder.h229
-rw-r--r--Source/WebCore/contentextensions/MutableRange.h96
-rw-r--r--Source/WebCore/contentextensions/MutableRangeList.h266
-rw-r--r--Source/WebCore/contentextensions/NFA.cpp126
-rw-r--r--Source/WebCore/contentextensions/NFA.h50
-rw-r--r--Source/WebCore/contentextensions/NFANode.h56
-rw-r--r--Source/WebCore/contentextensions/NFAToDFA.cpp398
-rw-r--r--Source/WebCore/contentextensions/NFAToDFA.h47
-rw-r--r--Source/WebCore/contentextensions/Term.h687
-rw-r--r--Source/WebCore/contentextensions/URLFilterParser.cpp397
-rw-r--r--Source/WebCore/contentextensions/URLFilterParser.h68
48 files changed, 8339 insertions, 0 deletions
diff --git a/Source/WebCore/contentextensions/CombinedFiltersAlphabet.cpp b/Source/WebCore/contentextensions/CombinedFiltersAlphabet.cpp
new file mode 100644
index 000000000..c2851844f
--- /dev/null
+++ b/Source/WebCore/contentextensions/CombinedFiltersAlphabet.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "CombinedFiltersAlphabet.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+struct TermCreatorInput {
+ const Term& term;
+ Vector<std::unique_ptr<Term>>& internedTermsStorage;
+};
+
+struct TermCreatorTranslator {
+ static unsigned hash(const TermCreatorInput& input)
+ {
+ return input.term.hash();
+ }
+
+ static inline bool equal(const Term* term, const TermCreatorInput& input)
+ {
+ return *term == input.term;
+ }
+
+ static void translate(const Term*& location, const TermCreatorInput& input, unsigned)
+ {
+ std::unique_ptr<Term> newUniqueTerm(new Term(input.term));
+ location = newUniqueTerm.get();
+ input.internedTermsStorage.append(WTFMove(newUniqueTerm));
+ }
+};
+
+const Term* CombinedFiltersAlphabet::interned(const Term& term)
+{
+ TermCreatorInput input { term, m_internedTermsStorage };
+ auto addResult = m_uniqueTerms.add<TermCreatorTranslator>(input);
+ return *addResult.iterator;
+}
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+size_t CombinedFiltersAlphabet::memoryUsed() const
+{
+ size_t termsSize = 0;
+ for (const auto& termPointer : m_internedTermsStorage)
+ termsSize += termPointer->memoryUsed();
+ return sizeof(CombinedFiltersAlphabet)
+ + termsSize
+ + m_uniqueTerms.capacity() * sizeof(Term*)
+ + m_internedTermsStorage.capacity() * sizeof(std::unique_ptr<Term>);
+}
+#endif
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/CombinedFiltersAlphabet.h b/Source/WebCore/contentextensions/CombinedFiltersAlphabet.h
new file mode 100644
index 000000000..c7cf72fce
--- /dev/null
+++ b/Source/WebCore/contentextensions/CombinedFiltersAlphabet.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionsDebugging.h"
+#include "Term.h"
+#include <wtf/HashSet.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+class CombinedFiltersAlphabet {
+public:
+ const Term* interned(const Term&);
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ size_t memoryUsed() const;
+#endif
+
+private:
+ struct TermPointerHash {
+ static unsigned hash(const Term* key) { return key->hash(); }
+ static inline bool equal(const Term* a, const Term* b)
+ {
+ return *a == *b;
+ }
+ static const bool safeToCompareToEmptyOrDeleted = false;
+ };
+
+ HashSet<const Term*, TermPointerHash> m_uniqueTerms;
+ Vector<std::unique_ptr<Term>> m_internedTermsStorage;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/CombinedURLFilters.cpp b/Source/WebCore/contentextensions/CombinedURLFilters.cpp
new file mode 100644
index 000000000..c3950957e
--- /dev/null
+++ b/Source/WebCore/contentextensions/CombinedURLFilters.cpp
@@ -0,0 +1,513 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "CombinedURLFilters.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "HashableActionList.h"
+#include "Term.h"
+#include <wtf/DataLog.h>
+#include <wtf/Vector.h>
+#include <wtf/text/CString.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+struct PrefixTreeEdge {
+ const Term* term;
+ std::unique_ptr<PrefixTreeVertex> child;
+};
+
+typedef Vector<PrefixTreeEdge, 0, WTF::CrashOnOverflow, 1> PrefixTreeEdges;
+
+struct PrefixTreeVertex {
+ PrefixTreeEdges edges;
+};
+
+struct ReverseSuffixTreeVertex;
+struct ReverseSuffixTreeEdge {
+ const Term* term;
+ std::unique_ptr<ReverseSuffixTreeVertex> child;
+};
+typedef Vector<ReverseSuffixTreeEdge, 0, WTF::CrashOnOverflow, 1> ReverseSuffixTreeEdges;
+
+struct ReverseSuffixTreeVertex {
+ ReverseSuffixTreeEdges edges;
+ uint32_t nodeId;
+};
+typedef HashMap<HashableActionList, ReverseSuffixTreeVertex, HashableActionListHash, HashableActionListHashTraits> ReverseSuffixTreeRoots;
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+static size_t recursiveMemoryUsed(const PrefixTreeVertex& vertex)
+{
+ size_t size = sizeof(PrefixTreeVertex)
+ + vertex.edges.capacity() * sizeof(PrefixTreeEdge);
+ for (const auto& edge : vertex.edges) {
+ ASSERT(edge.child);
+ size += recursiveMemoryUsed(*edge.child.get());
+ }
+ return size;
+}
+
+size_t CombinedURLFilters::memoryUsed() const
+{
+ ASSERT(m_prefixTreeRoot);
+
+ size_t actionsSize = 0;
+ for (const auto& slot : m_actions)
+ actionsSize += slot.value.capacity() * sizeof(uint64_t);
+
+ return sizeof(CombinedURLFilters)
+ + m_alphabet.memoryUsed()
+ + recursiveMemoryUsed(*m_prefixTreeRoot.get())
+ + sizeof(HashMap<PrefixTreeVertex*, ActionList>)
+ + m_actions.capacity() * (sizeof(PrefixTreeVertex*) + sizeof(ActionList))
+ + actionsSize;
+}
+#endif
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+static String prefixTreeVertexToString(const PrefixTreeVertex& vertex, const HashMap<const PrefixTreeVertex*, ActionList>& actions, unsigned depth)
+{
+ StringBuilder builder;
+ while (depth--)
+ builder.appendLiteral(" ");
+ builder.appendLiteral("vertex actions: ");
+
+ auto actionsSlot = actions.find(&vertex);
+ if (actionsSlot != actions.end()) {
+ for (uint64_t action : actionsSlot->value) {
+ builder.appendNumber(action);
+ builder.append(',');
+ }
+ }
+ builder.append('\n');
+ return builder.toString();
+}
+
+static void recursivePrint(const PrefixTreeVertex& vertex, const HashMap<const PrefixTreeVertex*, ActionList>& actions, unsigned depth)
+{
+ dataLogF("%s", prefixTreeVertexToString(vertex, actions, depth).utf8().data());
+ for (const auto& edge : vertex.edges) {
+ StringBuilder builder;
+ for (unsigned i = 0; i < depth * 2; ++i)
+ builder.append(' ');
+ builder.appendLiteral("vertex edge: ");
+ builder.append(edge.term->toString());
+ builder.append('\n');
+ dataLogF("%s", builder.toString().utf8().data());
+ ASSERT(edge.child);
+ recursivePrint(*edge.child.get(), actions, depth + 1);
+ }
+}
+
+void CombinedURLFilters::print() const
+{
+ recursivePrint(*m_prefixTreeRoot.get(), m_actions, 0);
+}
+#endif
+
+CombinedURLFilters::CombinedURLFilters()
+ : m_prefixTreeRoot(std::make_unique<PrefixTreeVertex>())
+{
+}
+
+CombinedURLFilters::~CombinedURLFilters()
+{
+}
+
+bool CombinedURLFilters::isEmpty() const
+{
+ return m_prefixTreeRoot->edges.isEmpty();
+}
+
+void CombinedURLFilters::addDomain(uint64_t actionId, const String& domain)
+{
+ unsigned domainLength = domain.length();
+ if (domainLength && domain[0] == '*') {
+ // If domain starts with a '*' then it means match domain and its subdomains, like (^|.)domain$
+ // This way a domain of "*webkit.org" will match "bugs.webkit.org" and "webkit.org".
+ Vector<Term> prependDot;
+ Vector<Term> prependBeginningOfLine;
+ prependDot.reserveInitialCapacity(domainLength + 2);
+ prependBeginningOfLine.reserveInitialCapacity(domainLength); // This is just no .* at the beginning.
+
+ Term canonicalDotStar(Term::UniversalTransition);
+ canonicalDotStar.quantify(AtomQuantifier::ZeroOrMore);
+ prependDot.uncheckedAppend(canonicalDotStar);
+ prependDot.uncheckedAppend(Term('.', true));
+
+ for (unsigned i = 1; i < domainLength; i++) {
+ ASSERT(isASCII(domain[i]));
+ ASSERT(!isASCIIUpper(domain[i]));
+ prependDot.uncheckedAppend(Term(domain[i], true));
+ prependBeginningOfLine.uncheckedAppend(Term(domain[i], true));
+ }
+ prependDot.uncheckedAppend(Term::EndOfLineAssertionTerm);
+ prependBeginningOfLine.uncheckedAppend(Term::EndOfLineAssertionTerm);
+
+ addPattern(actionId, prependDot);
+ addPattern(actionId, prependBeginningOfLine);
+ } else {
+ // This is like adding ^domain$, but interpreting domain as a series of characters, not a regular expression.
+ // "webkit.org" will match "webkit.org" but not "bugs.webkit.org".
+ Vector<Term> prependBeginningOfLine;
+ prependBeginningOfLine.reserveInitialCapacity(domainLength + 1); // This is just no .* at the beginning.
+ for (unsigned i = 0; i < domainLength; i++) {
+ ASSERT(isASCII(domain[i]));
+ ASSERT(!isASCIIUpper(domain[i]));
+ prependBeginningOfLine.uncheckedAppend(Term(domain[i], true));
+ }
+ prependBeginningOfLine.uncheckedAppend(Term::EndOfLineAssertionTerm);
+ addPattern(actionId, prependBeginningOfLine);
+ }
+}
+
+void CombinedURLFilters::addPattern(uint64_t actionId, const Vector<Term>& pattern)
+{
+ ASSERT_WITH_MESSAGE(!pattern.isEmpty(), "The parser should have excluded empty patterns before reaching CombinedURLFilters.");
+
+ if (pattern.isEmpty())
+ return;
+
+ // Extend the prefix tree with the new pattern.
+ PrefixTreeVertex* lastPrefixTree = m_prefixTreeRoot.get();
+
+ for (const Term& term : pattern) {
+ size_t nextEntryIndex = WTF::notFound;
+ for (size_t i = 0; i < lastPrefixTree->edges.size(); ++i) {
+ if (*lastPrefixTree->edges[i].term == term) {
+ nextEntryIndex = i;
+ break;
+ }
+ }
+ if (nextEntryIndex != WTF::notFound)
+ lastPrefixTree = lastPrefixTree->edges[nextEntryIndex].child.get();
+ else {
+ lastPrefixTree->edges.append(PrefixTreeEdge({m_alphabet.interned(term), std::make_unique<PrefixTreeVertex>()}));
+ lastPrefixTree = lastPrefixTree->edges.last().child.get();
+ }
+ }
+
+ auto addResult = m_actions.add(lastPrefixTree, ActionList());
+ ActionList& actions = addResult.iterator->value;
+ if (actions.find(actionId) == WTF::notFound)
+ actions.append(actionId);
+}
+
+struct ActiveSubtree {
+ ActiveSubtree(PrefixTreeVertex& vertex, ImmutableCharNFANodeBuilder&& nfaNode, unsigned edgeIndex)
+ : vertex(vertex)
+ , nfaNode(WTFMove(nfaNode))
+ , edgeIndex(edgeIndex)
+ {
+ }
+ PrefixTreeVertex& vertex;
+ ImmutableCharNFANodeBuilder nfaNode;
+ unsigned edgeIndex;
+};
+
+static void generateInfixUnsuitableForReverseSuffixTree(NFA& nfa, Vector<ActiveSubtree>& stack, const HashMap<const PrefixTreeVertex*, ActionList>& actions)
+{
+ // To avoid conflicts, we use the reverse suffix tree for subtrees that do not merge
+ // in the prefix tree.
+ //
+ // We only unify the suffixes to the actions on the leaf.
+ // If there are actions inside the tree, we generate the part of the subtree up to the action.
+ //
+ // If we accidentally insert a node with action inside the reverse-suffix-tree, we would create
+ // new actions on unrelated pattern when unifying their suffixes.
+ for (unsigned i = stack.size() - 1; i--;) {
+ ActiveSubtree& activeSubtree = stack[i];
+ if (activeSubtree.nfaNode.isValid())
+ return;
+
+ RELEASE_ASSERT_WITH_MESSAGE(i > 0, "The bottom of the stack must be the root of our fixed-length subtree. It should have it the isValid() case above.");
+
+ auto actionsIterator = actions.find(&activeSubtree.vertex);
+ bool hasActionInsideTree = actionsIterator != actions.end();
+
+ // Stricto sensu, we should count the number of exit edges with fixed length.
+ // That is costly and unlikely to matter in practice.
+ bool hasSingleOutcome = activeSubtree.vertex.edges.size() == 1;
+
+ if (hasActionInsideTree || !hasSingleOutcome) {
+ // Go back to the end of the subtree that has already been generated.
+ // From there, generate everything up to the vertex we found.
+ unsigned end = i;
+ unsigned beginning = end;
+
+ ActiveSubtree* sourceActiveSubtree = nullptr;
+ while (beginning--) {
+ ActiveSubtree& activeSubtree = stack[beginning];
+ if (activeSubtree.nfaNode.isValid()) {
+ sourceActiveSubtree = &activeSubtree;
+ break;
+ }
+ }
+ ASSERT_WITH_MESSAGE(sourceActiveSubtree, "The root should always have a valid generator.");
+
+ for (unsigned stackIndex = beginning + 1; stackIndex <= end; ++stackIndex) {
+ ImmutableCharNFANodeBuilder& sourceNode = sourceActiveSubtree->nfaNode;
+ ASSERT(sourceNode.isValid());
+ auto& edge = sourceActiveSubtree->vertex.edges[sourceActiveSubtree->edgeIndex];
+
+ ActiveSubtree& destinationActiveSubtree = stack[stackIndex];
+ destinationActiveSubtree.nfaNode = edge.term->generateGraph(nfa, sourceNode, actions.get(&destinationActiveSubtree.vertex));
+
+ sourceActiveSubtree = &destinationActiveSubtree;
+ }
+
+ return;
+ }
+ }
+}
+
+static void generateSuffixWithReverseSuffixTree(NFA& nfa, Vector<ActiveSubtree>& stack, const HashMap<const PrefixTreeVertex*, ActionList>& actions, ReverseSuffixTreeRoots& reverseSuffixTreeRoots)
+{
+ ActiveSubtree& leafSubtree = stack.last();
+ ASSERT_WITH_MESSAGE(!leafSubtree.nfaNode.isValid(), "The leaf should never be generated by the code above, it should always be inserted into the prefix tree.");
+
+ ActionList actionList = actions.get(&leafSubtree.vertex);
+ ASSERT_WITH_MESSAGE(!actionList.isEmpty(), "Our prefix tree should always have actions on the leaves by construction.");
+
+ HashableActionList hashableActionList(actionList);
+ auto rootAddResult = reverseSuffixTreeRoots.add(hashableActionList, ReverseSuffixTreeVertex());
+ if (rootAddResult.isNewEntry) {
+ ImmutableCharNFANodeBuilder newNode(nfa);
+ newNode.setActions(actionList.begin(), actionList.end());
+ rootAddResult.iterator->value.nodeId = newNode.nodeId();
+ }
+
+ ReverseSuffixTreeVertex* activeReverseSuffixTreeVertex = &rootAddResult.iterator->value;
+ uint32_t destinationNodeId = rootAddResult.iterator->value.nodeId;
+
+ unsigned stackPosition = stack.size() - 2;
+ while (true) {
+ ActiveSubtree& source = stack[stackPosition];
+ auto& edge = source.vertex.edges[source.edgeIndex];
+
+ // This is the end condition: when we meet a node that has already been generated,
+ // we just need to connect our backward tree to the forward tree.
+ //
+ // We *must not* add this last node to the reverse-suffix tree. That node can have
+ // transitions back to earlier part of the prefix tree. If the prefix tree "caches"
+ // such node, it would create new transitions that did not exist in the source language.
+ if (source.nfaNode.isValid()) {
+ stack.shrink(stackPosition + 1);
+ edge.term->generateGraph(nfa, source.nfaNode, destinationNodeId);
+ return;
+ }
+ --stackPosition;
+
+ ASSERT_WITH_MESSAGE(!actions.contains(&source.vertex), "Any node with final actions should have been created before hitting the reverse suffix-tree.");
+
+ ReverseSuffixTreeEdge* existingEdge = nullptr;
+ for (ReverseSuffixTreeEdge& potentialExistingEdge : activeReverseSuffixTreeVertex->edges) {
+ if (edge.term == potentialExistingEdge.term) {
+ existingEdge = &potentialExistingEdge;
+ break;
+ }
+ }
+
+ if (existingEdge)
+ activeReverseSuffixTreeVertex = existingEdge->child.get();
+ else {
+ ImmutableCharNFANodeBuilder newNode(nfa);
+ edge.term->generateGraph(nfa, newNode, destinationNodeId);
+ std::unique_ptr<ReverseSuffixTreeVertex> newVertex(new ReverseSuffixTreeVertex());
+ newVertex->nodeId = newNode.nodeId();
+
+ ReverseSuffixTreeVertex* newVertexAddress = newVertex.get();
+ activeReverseSuffixTreeVertex->edges.append(ReverseSuffixTreeEdge({ edge.term, WTFMove(newVertex) }));
+ activeReverseSuffixTreeVertex = newVertexAddress;
+ }
+ destinationNodeId = activeReverseSuffixTreeVertex->nodeId;
+
+ ASSERT(source.vertex.edges.size() == 1);
+ source.vertex.edges.clear();
+ }
+
+ RELEASE_ASSERT_NOT_REACHED();
+}
+
+static void clearReverseSuffixTree(ReverseSuffixTreeRoots& reverseSuffixTreeRoots)
+{
+ // We cannot rely on the destructor being called in order from top to bottom as we may overflow
+ // the stack. Instead, we go depth first in the reverse-suffix-tree.
+
+ for (auto& slot : reverseSuffixTreeRoots) {
+ Vector<ReverseSuffixTreeVertex*, 128> stack;
+ stack.append(&slot.value);
+
+ while (true) {
+ ReverseSuffixTreeVertex* top = stack.last();
+ if (top->edges.isEmpty()) {
+ stack.removeLast();
+ if (stack.isEmpty())
+ break;
+ stack.last()->edges.removeLast();
+ } else
+ stack.append(top->edges.last().child.get());
+ }
+ }
+ reverseSuffixTreeRoots.clear();
+}
+
+static void generateNFAForSubtree(NFA& nfa, ImmutableCharNFANodeBuilder&& subtreeRoot, PrefixTreeVertex& root, const HashMap<const PrefixTreeVertex*, ActionList>& actions, size_t maxNFASize)
+{
+ // This recurses the subtree of the prefix tree.
+ // For each edge that has fixed length (no quantifiers like ?, *, or +) it generates the nfa graph,
+ // recurses into children, and deletes any processed leaf nodes.
+
+ ReverseSuffixTreeRoots reverseSuffixTreeRoots;
+ Vector<ActiveSubtree> stack;
+ if (!root.edges.isEmpty())
+ stack.append(ActiveSubtree(root, WTFMove(subtreeRoot), 0));
+
+ bool nfaTooBig = false;
+
+ // Generate graphs for each subtree that does not contain any quantifiers.
+ while (!stack.isEmpty()) {
+ PrefixTreeVertex& vertex = stack.last().vertex;
+ const unsigned edgeIndex = stack.last().edgeIndex;
+
+ if (edgeIndex < vertex.edges.size()) {
+ auto& edge = vertex.edges[edgeIndex];
+
+ // Clean up any processed leaves and return early if we are past the maxNFASize.
+ if (nfaTooBig) {
+ stack.last().edgeIndex = stack.last().vertex.edges.size();
+ continue;
+ }
+
+ // Quantified edges in the subtree will be a part of another NFA.
+ if (!edge.term->hasFixedLength()) {
+ stack.last().edgeIndex++;
+ continue;
+ }
+
+ ASSERT(edge.child.get());
+ ImmutableCharNFANodeBuilder emptyBuilder;
+ stack.append(ActiveSubtree(*edge.child.get(), WTFMove(emptyBuilder), 0));
+ } else {
+ bool isLeaf = vertex.edges.isEmpty();
+
+ ASSERT(edgeIndex == vertex.edges.size());
+ vertex.edges.removeAllMatching([](PrefixTreeEdge& edge)
+ {
+ return !edge.term;
+ });
+
+ if (isLeaf) {
+ generateInfixUnsuitableForReverseSuffixTree(nfa, stack, actions);
+ generateSuffixWithReverseSuffixTree(nfa, stack, actions, reverseSuffixTreeRoots);
+
+ // Only stop generating an NFA at a leaf to ensure we have a correct NFA. We could go slightly over the maxNFASize.
+ if (nfa.nodes.size() > maxNFASize)
+ nfaTooBig = true;
+ } else
+ stack.removeLast();
+
+ if (!stack.isEmpty()) {
+ auto& activeSubtree = stack.last();
+ auto& edge = activeSubtree.vertex.edges[stack.last().edgeIndex];
+ if (edge.child->edges.isEmpty())
+ edge.term = nullptr; // Mark this leaf for deleting.
+ activeSubtree.edgeIndex++;
+ }
+ }
+ }
+ clearReverseSuffixTree(reverseSuffixTreeRoots);
+}
+
+void CombinedURLFilters::processNFAs(size_t maxNFASize, std::function<void(NFA&&)> handler)
+{
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ print();
+#endif
+ while (true) {
+ // Traverse out to a leaf.
+ Vector<PrefixTreeVertex*, 128> stack;
+ PrefixTreeVertex* vertex = m_prefixTreeRoot.get();
+ while (true) {
+ ASSERT(vertex);
+ stack.append(vertex);
+ if (vertex->edges.isEmpty())
+ break;
+ vertex = vertex->edges.last().child.get();
+ }
+ if (stack.size() == 1)
+ break; // We're done once we have processed and removed all the edges in the prefix tree.
+
+ // Find the prefix root for this NFA. This is the vertex after the last term with a quantifier if there is one,
+ // or the root if there are no quantifiers left.
+ while (stack.size() > 1) {
+ if (!stack[stack.size() - 2]->edges.last().term->hasFixedLength())
+ break;
+ stack.removeLast();
+ }
+ ASSERT_WITH_MESSAGE(!stack.isEmpty(), "At least the root should be in the stack");
+
+ // Make an NFA with the subtrees for whom this is also the last quantifier (or who also have no quantifier).
+ NFA nfa;
+ {
+ // Put the prefix into the NFA.
+ ImmutableCharNFANodeBuilder lastNode(nfa);
+ for (unsigned i = 0; i < stack.size() - 1; ++i) {
+ const PrefixTreeEdge& edge = stack[i]->edges.last();
+ ImmutableCharNFANodeBuilder newNode = edge.term->generateGraph(nfa, lastNode, m_actions.get(edge.child.get()));
+ lastNode = WTFMove(newNode);
+ }
+
+ // Put the non-quantified vertices in the subtree into the NFA and delete them.
+ ASSERT(stack.last());
+ generateNFAForSubtree(nfa, WTFMove(lastNode), *stack.last(), m_actions, maxNFASize);
+ }
+ nfa.finalize();
+
+ handler(WTFMove(nfa));
+
+ // Clean up any processed leaf nodes.
+ while (true) {
+ if (stack.size() > 1) {
+ if (stack[stack.size() - 1]->edges.isEmpty()) {
+ stack[stack.size() - 2]->edges.removeLast();
+ stack.removeLast();
+ } else
+ break; // Vertex is not a leaf.
+ } else
+ break; // Leave the empty root.
+ }
+ }
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/CombinedURLFilters.h b/Source/WebCore/contentextensions/CombinedURLFilters.h
new file mode 100644
index 000000000..643ed889d
--- /dev/null
+++ b/Source/WebCore/contentextensions/CombinedURLFilters.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "CombinedFiltersAlphabet.h"
+#include "ContentExtensionsDebugging.h"
+#include "NFA.h"
+#include <wtf/Forward.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+struct PrefixTreeVertex;
+
+class WEBCORE_EXPORT CombinedURLFilters {
+public:
+ CombinedURLFilters();
+ ~CombinedURLFilters();
+
+ void addPattern(uint64_t actionId, const Vector<Term>& pattern);
+ void addDomain(uint64_t actionId, const String& domain);
+
+ void processNFAs(size_t maxNFASize, std::function<void(NFA&&)> handler);
+ bool isEmpty() const;
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ size_t memoryUsed() const;
+#endif
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ void print() const;
+#endif
+
+private:
+ CombinedFiltersAlphabet m_alphabet;
+ std::unique_ptr<PrefixTreeVertex> m_prefixTreeRoot;
+ HashMap<const PrefixTreeVertex*, ActionList> m_actions;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/CompiledContentExtension.cpp b/Source/WebCore/contentextensions/CompiledContentExtension.cpp
new file mode 100644
index 000000000..e577a9127
--- /dev/null
+++ b/Source/WebCore/contentextensions/CompiledContentExtension.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "CompiledContentExtension.h"
+#include "DFABytecodeInterpreter.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+namespace ContentExtensions {
+
+CompiledContentExtension::~CompiledContentExtension()
+{
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/CompiledContentExtension.h b/Source/WebCore/contentextensions/CompiledContentExtension.h
new file mode 100644
index 000000000..47b3d6269
--- /dev/null
+++ b/Source/WebCore/contentextensions/CompiledContentExtension.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionRule.h"
+#include "DFABytecode.h"
+#include <wtf/ThreadSafeRefCounted.h>
+
+namespace WebCore {
+namespace ContentExtensions {
+
+class WEBCORE_EXPORT CompiledContentExtension : public ThreadSafeRefCounted<CompiledContentExtension> {
+public:
+ virtual ~CompiledContentExtension();
+
+ virtual const DFABytecode* filtersWithoutDomainsBytecode() const = 0;
+ virtual unsigned filtersWithoutDomainsBytecodeLength() const = 0;
+ virtual const DFABytecode* filtersWithDomainsBytecode() const = 0;
+ virtual unsigned filtersWithDomainsBytecodeLength() const = 0;
+ virtual const DFABytecode* domainFiltersBytecode() const = 0;
+ virtual unsigned domainFiltersBytecodeLength() const = 0;
+ virtual const SerializedActionByte* actions() const = 0;
+ virtual unsigned actionsLength() const = 0;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtension.cpp b/Source/WebCore/contentextensions/ContentExtension.cpp
new file mode 100644
index 000000000..dacbefe2a
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtension.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "ContentExtension.h"
+
+#include "CompiledContentExtension.h"
+#include "ContentExtensionsBackend.h"
+#include "StyleSheetContents.h"
+#include <wtf/text/StringBuilder.h>
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+namespace ContentExtensions {
+
+RefPtr<ContentExtension> ContentExtension::create(const String& identifier, Ref<CompiledContentExtension>&& compiledExtension)
+{
+ return adoptRef(*new ContentExtension(identifier, WTFMove(compiledExtension)));
+}
+
+ContentExtension::ContentExtension(const String& identifier, Ref<CompiledContentExtension>&& compiledExtension)
+ : m_identifier(identifier)
+ , m_compiledExtension(WTFMove(compiledExtension))
+{
+ DFABytecodeInterpreter withoutDomains(m_compiledExtension->filtersWithoutDomainsBytecode(), m_compiledExtension->filtersWithoutDomainsBytecodeLength());
+ DFABytecodeInterpreter withDomains(m_compiledExtension->filtersWithDomainsBytecode(), m_compiledExtension->filtersWithDomainsBytecodeLength());
+ for (uint64_t action : withoutDomains.actionsMatchingEverything()) {
+ ASSERT(static_cast<uint32_t>(action) == action);
+ m_universalActionsWithoutDomains.append(static_cast<uint32_t>(action));
+ }
+ for (uint64_t action : withDomains.actionsMatchingEverything()) {
+ ASSERT((action & ~IfDomainFlag) == static_cast<uint32_t>(action));
+ m_universalActionsWithDomains.append(action);
+ }
+
+ compileGlobalDisplayNoneStyleSheet();
+ m_universalActionsWithoutDomains.shrinkToFit();
+ m_universalActionsWithDomains.shrinkToFit();
+}
+
+uint32_t ContentExtension::findFirstIgnorePreviousRules() const
+{
+ auto* actions = m_compiledExtension->actions();
+ uint32_t actionsLength = m_compiledExtension->actionsLength();
+ uint32_t currentActionIndex = 0;
+ while (currentActionIndex < actionsLength) {
+ if (Action::deserializeType(actions, actionsLength, currentActionIndex) == ActionType::IgnorePreviousRules)
+ return currentActionIndex;
+ currentActionIndex += Action::serializedLength(actions, actionsLength, currentActionIndex);
+ }
+ return std::numeric_limits<uint32_t>::max();
+}
+
+StyleSheetContents* ContentExtension::globalDisplayNoneStyleSheet()
+{
+ return m_globalDisplayNoneStyleSheet.get();
+}
+
+void ContentExtension::compileGlobalDisplayNoneStyleSheet()
+{
+ uint32_t firstIgnorePreviousRules = findFirstIgnorePreviousRules();
+
+ auto* actions = m_compiledExtension->actions();
+ uint32_t actionsLength = m_compiledExtension->actionsLength();
+
+ auto inGlobalDisplayNoneStyleSheet = [&](const uint32_t location)
+ {
+ return location < firstIgnorePreviousRules && Action::deserializeType(actions, actionsLength, location) == ActionType::CSSDisplayNoneSelector;
+ };
+
+ StringBuilder css;
+ for (uint32_t universalActionLocation : m_universalActionsWithoutDomains) {
+ if (inGlobalDisplayNoneStyleSheet(universalActionLocation)) {
+ if (!css.isEmpty())
+ css.append(',');
+ Action action = Action::deserialize(actions, actionsLength, universalActionLocation);
+ ASSERT(action.type() == ActionType::CSSDisplayNoneSelector);
+ css.append(action.stringArgument());
+ }
+ }
+ if (css.isEmpty())
+ return;
+ css.append("{");
+ css.append(ContentExtensionsBackend::displayNoneCSSRule());
+ css.append("}");
+
+ m_globalDisplayNoneStyleSheet = StyleSheetContents::create();
+ m_globalDisplayNoneStyleSheet->setIsUserStyleSheet(true);
+ if (!m_globalDisplayNoneStyleSheet->parseString(css.toString()))
+ m_globalDisplayNoneStyleSheet = nullptr;
+
+ // These actions don't need to be applied individually any more. They will all be applied to every page as a precompiled style sheet.
+ m_universalActionsWithoutDomains.removeAllMatching(inGlobalDisplayNoneStyleSheet);
+}
+
+void ContentExtension::populateDomainCacheIfNeeded(const String& domain)
+{
+ if (m_cachedDomain != domain) {
+ DFABytecodeInterpreter interpreter(m_compiledExtension->domainFiltersBytecode(), m_compiledExtension->domainFiltersBytecodeLength());
+ const uint16_t allLoadTypesAndResourceTypes = LoadTypeMask | ResourceTypeMask;
+ auto domainActions = interpreter.interpret(domain.utf8(), allLoadTypesAndResourceTypes);
+
+ m_cachedDomainActions.clear();
+ for (uint64_t action : domainActions)
+ m_cachedDomainActions.add(action);
+
+ m_cachedUniversalDomainActions.clear();
+ for (uint64_t action : m_universalActionsWithDomains) {
+ ASSERT_WITH_MESSAGE((action & ~IfDomainFlag) == static_cast<uint32_t>(action), "Universal actions with domains should not have flags.");
+ if (!!(action & IfDomainFlag) == m_cachedDomainActions.contains(action))
+ m_cachedUniversalDomainActions.append(static_cast<uint32_t>(action));
+ }
+ m_cachedUniversalDomainActions.shrinkToFit();
+ m_cachedDomain = domain;
+ }
+}
+
+const DFABytecodeInterpreter::Actions& ContentExtension::cachedDomainActions(const String& domain)
+{
+ populateDomainCacheIfNeeded(domain);
+ return m_cachedDomainActions;
+}
+
+const Vector<uint32_t>& ContentExtension::universalActionsWithDomains(const String& domain)
+{
+ populateDomainCacheIfNeeded(domain);
+ return m_cachedUniversalDomainActions;
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtension.h b/Source/WebCore/contentextensions/ContentExtension.h
new file mode 100644
index 000000000..5e7c11b2a
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtension.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "DFABytecodeInterpreter.h"
+#include "StyleSheetContents.h"
+#include <wtf/Ref.h>
+#include <wtf/RefCounted.h>
+#include <wtf/RefPtr.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+class CompiledContentExtension;
+
+class ContentExtension : public RefCounted<ContentExtension> {
+public:
+ static RefPtr<ContentExtension> create(const String& identifier, Ref<CompiledContentExtension>&&);
+
+ const String& identifier() const { return m_identifier; }
+ const CompiledContentExtension& compiledExtension() const { return m_compiledExtension.get(); }
+ StyleSheetContents* globalDisplayNoneStyleSheet();
+ const DFABytecodeInterpreter::Actions& cachedDomainActions(const String& domain);
+ const Vector<uint32_t>& universalActionsWithoutDomains() { return m_universalActionsWithoutDomains; }
+ const Vector<uint32_t>& universalActionsWithDomains(const String& domain);
+
+private:
+ ContentExtension(const String& identifier, Ref<CompiledContentExtension>&&);
+ uint32_t findFirstIgnorePreviousRules() const;
+
+ String m_identifier;
+ Ref<CompiledContentExtension> m_compiledExtension;
+
+ RefPtr<StyleSheetContents> m_globalDisplayNoneStyleSheet;
+ void compileGlobalDisplayNoneStyleSheet();
+
+ String m_cachedDomain;
+ void populateDomainCacheIfNeeded(const String& domain);
+ DFABytecodeInterpreter::Actions m_cachedDomainActions;
+ Vector<uint32_t> m_cachedUniversalDomainActions;
+
+ Vector<uint32_t> m_universalActionsWithoutDomains;
+ Vector<uint64_t> m_universalActionsWithDomains;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionActions.h b/Source/WebCore/contentextensions/ContentExtensionActions.h
new file mode 100644
index 000000000..992b6bbf7
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionActions.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+class ResourceRequest;
+
+namespace ContentExtensions {
+
+typedef uint8_t SerializedActionByte;
+
+enum class ActionType : uint8_t {
+ BlockLoad,
+ BlockCookies,
+ CSSDisplayNoneSelector,
+ CSSDisplayNoneStyleSheet,
+ IgnorePreviousRules,
+ MakeHTTPS,
+ InvalidAction,
+};
+
+struct BlockedStatus {
+ bool blockedLoad { false };
+ bool blockedCookies { false };
+ bool madeHTTPS { false };
+};
+
+void applyBlockedStatusToRequest(const BlockedStatus&, ResourceRequest&);
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp b/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp
new file mode 100644
index 000000000..90bfed2db
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp
@@ -0,0 +1,483 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "ContentExtensionCompiler.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "CombinedURLFilters.h"
+#include "CompiledContentExtension.h"
+#include "ContentExtensionActions.h"
+#include "ContentExtensionError.h"
+#include "ContentExtensionParser.h"
+#include "ContentExtensionRule.h"
+#include "ContentExtensionsDebugging.h"
+#include "DFABytecodeCompiler.h"
+#include "DFACombiner.h"
+#include "NFA.h"
+#include "NFAToDFA.h"
+#include "URLFilterParser.h"
+#include <wtf/CurrentTime.h>
+#include <wtf/DataLog.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/StringBuilder.h>
+
+namespace WebCore {
+namespace ContentExtensions {
+
+static void serializeSelector(Vector<SerializedActionByte>& actions, const String& selector)
+{
+ // Append action type (1 byte).
+ actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNoneSelector));
+ // Append Selector length (4 bytes).
+ unsigned selectorLength = selector.length();
+ actions.resize(actions.size() + sizeof(unsigned));
+ *reinterpret_cast<unsigned*>(&actions[actions.size() - sizeof(unsigned)]) = selectorLength;
+ bool wideCharacters = !selector.is8Bit();
+ actions.append(wideCharacters);
+ // Append Selector.
+ if (wideCharacters) {
+ unsigned startIndex = actions.size();
+ actions.resize(actions.size() + sizeof(UChar) * selectorLength);
+ for (unsigned i = 0; i < selectorLength; ++i)
+ *reinterpret_cast<UChar*>(&actions[startIndex + i * sizeof(UChar)]) = selector[i];
+ } else {
+ for (unsigned i = 0; i < selectorLength; ++i)
+ actions.append(selector[i]);
+ }
+}
+
+struct PendingDisplayNoneActions {
+ Vector<String> selectors;
+ Vector<unsigned> clientLocations;
+};
+typedef HashMap<Trigger, PendingDisplayNoneActions, TriggerHash, TriggerHashTraits> PendingDisplayNoneActionsMap;
+
+static void resolvePendingDisplayNoneActions(Vector<SerializedActionByte>& actions, Vector<unsigned>& actionLocations, PendingDisplayNoneActionsMap& pendingDisplayNoneActionsMap)
+{
+ for (auto& slot : pendingDisplayNoneActionsMap) {
+ PendingDisplayNoneActions& pendingActions = slot.value;
+
+ StringBuilder combinedSelectors;
+ for (unsigned i = 0; i < pendingActions.selectors.size(); ++i) {
+ if (i)
+ combinedSelectors.append(',');
+ combinedSelectors.append(pendingActions.selectors[i]);
+ }
+
+ unsigned actionLocation = actions.size();
+ serializeSelector(actions, combinedSelectors.toString());
+ for (unsigned clientLocation : pendingActions.clientLocations)
+ actionLocations[clientLocation] = actionLocation;
+ }
+ pendingDisplayNoneActionsMap.clear();
+}
+
+static Vector<unsigned> serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions)
+{
+ ASSERT(!actions.size());
+
+ Vector<unsigned> actionLocations;
+
+ // Order only matters because of IgnorePreviousRules. All other identical actions can be combined between each IgnorePreviousRules
+ // and CSSDisplayNone strings can be combined if their triggers are identical.
+ typedef HashMap<uint32_t, uint32_t, DefaultHash<uint32_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint32_t>> ActionMap;
+ ActionMap blockLoadActionsMap;
+ ActionMap blockCookiesActionsMap;
+ PendingDisplayNoneActionsMap cssDisplayNoneActionsMap;
+ ActionMap ignorePreviousRuleActionsMap;
+ ActionMap makeHTTPSActionsMap;
+
+ for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
+ const ContentExtensionRule& rule = ruleList[ruleIndex];
+ ActionType actionType = rule.action().type();
+
+ if (actionType == ActionType::IgnorePreviousRules) {
+ resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap);
+
+ blockLoadActionsMap.clear();
+ blockCookiesActionsMap.clear();
+ cssDisplayNoneActionsMap.clear();
+ makeHTTPSActionsMap.clear();
+ } else
+ ignorePreviousRuleActionsMap.clear();
+
+ // Anything with domain is just pushed.
+ // We could try to merge domains but that case is not common in practice.
+ if (!rule.trigger().domains.isEmpty()) {
+ actionLocations.append(actions.size());
+
+ if (actionType == ActionType::CSSDisplayNoneSelector)
+ serializeSelector(actions, rule.action().stringArgument());
+ else
+ actions.append(static_cast<SerializedActionByte>(actionType));
+ continue;
+ }
+
+ ResourceFlags flags = rule.trigger().flags;
+ unsigned actionLocation = std::numeric_limits<unsigned>::max();
+
+ auto findOrMakeActionLocation = [&] (ActionMap& map)
+ {
+ const auto existingAction = map.find(flags);
+ if (existingAction == map.end()) {
+ actionLocation = actions.size();
+ actions.append(static_cast<SerializedActionByte>(actionType));
+ map.set(flags, actionLocation);
+ } else
+ actionLocation = existingAction->value;
+ };
+
+ switch (actionType) {
+ case ActionType::CSSDisplayNoneStyleSheet:
+ case ActionType::InvalidAction:
+ RELEASE_ASSERT_NOT_REACHED();
+
+ case ActionType::CSSDisplayNoneSelector: {
+ const auto addResult = cssDisplayNoneActionsMap.add(rule.trigger(), PendingDisplayNoneActions());
+ PendingDisplayNoneActions& pendingDisplayNoneActions = addResult.iterator->value;
+ pendingDisplayNoneActions.selectors.append(rule.action().stringArgument());
+ pendingDisplayNoneActions.clientLocations.append(actionLocations.size());
+
+ actionLocation = std::numeric_limits<unsigned>::max();
+ break;
+ }
+ case ActionType::IgnorePreviousRules:
+ findOrMakeActionLocation(ignorePreviousRuleActionsMap);
+ break;
+ case ActionType::BlockLoad:
+ findOrMakeActionLocation(blockLoadActionsMap);
+ break;
+ case ActionType::BlockCookies:
+ findOrMakeActionLocation(blockCookiesActionsMap);
+ break;
+ case ActionType::MakeHTTPS:
+ findOrMakeActionLocation(makeHTTPSActionsMap);
+ break;
+ }
+
+ actionLocations.append(actionLocation);
+ }
+ resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap);
+ return actionLocations;
+}
+
+typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> UniversalActionSet;
+
+static void addUniversalActionsToDFA(DFA& dfa, const UniversalActionSet& universalActions)
+{
+ if (universalActions.isEmpty())
+ return;
+
+ DFANode& root = dfa.nodes[dfa.root];
+ ASSERT(!root.actionsLength());
+ unsigned actionsStart = dfa.actions.size();
+ dfa.actions.reserveCapacity(dfa.actions.size() + universalActions.size());
+ for (uint64_t action : universalActions)
+ dfa.actions.uncheckedAppend(action);
+ unsigned actionsEnd = dfa.actions.size();
+
+ unsigned actionsLength = actionsEnd - actionsStart;
+ RELEASE_ASSERT_WITH_MESSAGE(actionsLength < std::numeric_limits<uint16_t>::max(), "Too many uncombined actions that match everything");
+ root.setActions(actionsStart, static_cast<uint16_t>(actionsLength));
+}
+
+std::error_code compileRuleList(ContentExtensionCompilationClient& client, String&& ruleList)
+{
+ Vector<ContentExtensionRule> parsedRuleList;
+ auto parserError = parseRuleList(ruleList, parsedRuleList);
+ ruleList = String();
+ if (parserError)
+ return parserError;
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double patternPartitioningStart = monotonicallyIncreasingTime();
+#endif
+
+ Vector<SerializedActionByte> actions;
+ Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions);
+ client.writeActions(WTFMove(actions));
+ LOG_LARGE_STRUCTURES(actions, actions.capacity() * sizeof(SerializedActionByte));
+ actions.clear();
+
+ UniversalActionSet universalActionsWithoutDomains;
+ UniversalActionSet universalActionsWithDomains;
+
+ // FIXME: These don't all need to be in memory at the same time.
+ CombinedURLFilters filtersWithoutDomains;
+ CombinedURLFilters filtersWithDomains;
+ CombinedURLFilters domainFilters;
+ URLFilterParser filtersWithoutDomainParser(filtersWithoutDomains);
+ URLFilterParser filtersWithDomainParser(filtersWithDomains);
+
+ for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) {
+ const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex];
+ const Trigger& trigger = contentExtensionRule.trigger();
+ ASSERT(trigger.urlFilter.length());
+
+ // High bits are used for flags. This should match how they are used in DFABytecodeCompiler::compileNode.
+ ASSERT(!trigger.flags || ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32));
+ ASSERT(!(~ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32)));
+ uint64_t actionLocationAndFlags = (static_cast<uint64_t>(trigger.flags) << 32) | static_cast<uint64_t>(actionLocations[ruleIndex]);
+ URLFilterParser::ParseStatus status = URLFilterParser::Ok;
+ if (trigger.domains.isEmpty()) {
+ ASSERT(trigger.domainCondition == Trigger::DomainCondition::None);
+ status = filtersWithoutDomainParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
+ if (status == URLFilterParser::MatchesEverything) {
+ universalActionsWithoutDomains.add(actionLocationAndFlags);
+ status = URLFilterParser::Ok;
+ }
+ if (status != URLFilterParser::Ok) {
+ dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
+ return ContentExtensionError::JSONInvalidRegex;
+ }
+ } else {
+ if (trigger.domainCondition == Trigger::DomainCondition::IfDomain)
+ actionLocationAndFlags |= IfDomainFlag;
+ else {
+ ASSERT(trigger.domainCondition == Trigger::DomainCondition::UnlessDomain);
+ ASSERT(!(actionLocationAndFlags & IfDomainFlag));
+ }
+
+ status = filtersWithDomainParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
+ if (status == URLFilterParser::MatchesEverything) {
+ universalActionsWithDomains.add(actionLocationAndFlags);
+ status = URLFilterParser::Ok;
+ }
+ if (status != URLFilterParser::Ok) {
+ dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
+ return ContentExtensionError::JSONInvalidRegex;
+ }
+ for (const String& domain : trigger.domains)
+ domainFilters.addDomain(actionLocationAndFlags, domain);
+ }
+ ASSERT(status == URLFilterParser::Ok);
+ }
+ LOG_LARGE_STRUCTURES(parsedRuleList, parsedRuleList.capacity() * sizeof(ContentExtensionRule)); // Doesn't include strings.
+ LOG_LARGE_STRUCTURES(actionLocations, actionLocations.capacity() * sizeof(unsigned));
+ parsedRuleList.clear();
+ actionLocations.clear();
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double patternPartitioningEnd = monotonicallyIncreasingTime();
+ dataLogF(" Time spent partitioning the rules into groups: %f\n", (patternPartitioningEnd - patternPartitioningStart));
+#endif
+
+ LOG_LARGE_STRUCTURES(filtersWithoutDomains, filtersWithoutDomains.memoryUsed());
+ LOG_LARGE_STRUCTURES(filtersWithDomains, filtersWithDomains.memoryUsed());
+ LOG_LARGE_STRUCTURES(domainFilters, domainFilters.memoryUsed());
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ unsigned machinesWithoutDomainsCount = 0;
+ unsigned totalBytecodeSizeForMachinesWithoutDomains = 0;
+ unsigned machinesWithDomainsCount = 0;
+ unsigned totalBytecodeSizeForMachinesWithDomains = 0;
+ double totalNFAToByteCodeBuildTimeStart = monotonicallyIncreasingTime();
+#endif
+
+ // Smaller maxNFASizes risk high compiling and interpreting times from having too many DFAs,
+ // larger maxNFASizes use too much memory when compiling.
+ const unsigned maxNFASize = 75000;
+
+ bool firstNFAWithoutDomainsSeen = false;
+
+ auto lowerFiltersWithoutDomainsDFAToBytecode = [&](DFA&& dfa)
+ {
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ dataLogF("filtersWithoutDomains DFA\n");
+ dfa.debugPrintDot();
+#endif
+ ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "All actions on the DFA root should come from regular expressions that match everything.");
+
+ if (!firstNFAWithoutDomainsSeen) {
+ // Put all the universal actions on the first DFA.
+ addUniversalActionsToDFA(dfa, universalActionsWithoutDomains);
+ }
+
+ Vector<DFABytecode> bytecode;
+ DFABytecodeCompiler compiler(dfa, bytecode);
+ compiler.compile();
+ LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ ++machinesWithoutDomainsCount;
+ totalBytecodeSizeForMachinesWithoutDomains += bytecode.size();
+#endif
+ client.writeFiltersWithoutDomainsBytecode(WTFMove(bytecode));
+
+ firstNFAWithoutDomainsSeen = true;
+ };
+
+ const unsigned smallDFASize = 100;
+ DFACombiner smallFiltersWithoutDomainsDFACombiner;
+ filtersWithoutDomains.processNFAs(maxNFASize, [&](NFA&& nfa) {
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ dataLogF("filtersWithoutDomains NFA\n");
+ nfa.debugPrintDot();
+#endif
+
+ LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed());
+ DFA dfa = NFAToDFA::convert(nfa);
+ LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
+
+ if (dfa.graphSize() < smallDFASize)
+ smallFiltersWithoutDomainsDFACombiner.addDFA(WTFMove(dfa));
+ else {
+ dfa.minimize();
+ lowerFiltersWithoutDomainsDFAToBytecode(WTFMove(dfa));
+ }
+ });
+
+
+ smallFiltersWithoutDomainsDFACombiner.combineDFAs(smallDFASize, [&](DFA&& dfa) {
+ LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
+ lowerFiltersWithoutDomainsDFAToBytecode(WTFMove(dfa));
+ });
+
+ ASSERT(filtersWithoutDomains.isEmpty());
+
+ if (!firstNFAWithoutDomainsSeen) {
+ // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any
+ // create a dummy one and add any universal actions.
+
+ DFA dummyDFA = DFA::empty();
+ addUniversalActionsToDFA(dummyDFA, universalActionsWithoutDomains);
+
+ Vector<DFABytecode> bytecode;
+ DFABytecodeCompiler compiler(dummyDFA, bytecode);
+ compiler.compile();
+ LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
+ client.writeFiltersWithoutDomainsBytecode(WTFMove(bytecode));
+ }
+ LOG_LARGE_STRUCTURES(universalActionsWithoutDomains, universalActionsWithoutDomains.capacity() * sizeof(unsigned));
+ universalActionsWithoutDomains.clear();
+
+ bool firstNFAWithDomainsSeen = false;
+ auto lowerFiltersWithDomainsDFAToBytecode = [&](DFA&& dfa)
+ {
+ if (!firstNFAWithDomainsSeen) {
+ // Put all the universal actions on the first DFA.
+ addUniversalActionsToDFA(dfa, universalActionsWithDomains);
+ }
+
+ Vector<DFABytecode> bytecode;
+ DFABytecodeCompiler compiler(dfa, bytecode);
+ compiler.compile();
+ LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ ++machinesWithDomainsCount;
+ totalBytecodeSizeForMachinesWithDomains += bytecode.size();
+#endif
+ client.writeFiltersWithDomainsBytecode(WTFMove(bytecode));
+
+ firstNFAWithDomainsSeen = true;
+ };
+
+ DFACombiner smallFiltersWithDomainsDFACombiner;
+ filtersWithDomains.processNFAs(maxNFASize, [&](NFA&& nfa) {
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ dataLogF("filtersWithDomains NFA\n");
+ nfa.debugPrintDot();
+#endif
+ LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed());
+ DFA dfa = NFAToDFA::convert(nfa);
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ dataLogF("filtersWithDomains PRE MINIMIZING DFA\n");
+ dfa.debugPrintDot();
+#endif
+ LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
+
+ ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "Filters with domains that match everything are not allowed right now.");
+
+ if (dfa.graphSize() < smallDFASize)
+ smallFiltersWithDomainsDFACombiner.addDFA(WTFMove(dfa));
+ else {
+ dfa.minimize();
+ lowerFiltersWithDomainsDFAToBytecode(WTFMove(dfa));
+ }
+ });
+ smallFiltersWithDomainsDFACombiner.combineDFAs(smallDFASize, [&](DFA&& dfa) {
+ LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
+ lowerFiltersWithDomainsDFAToBytecode(WTFMove(dfa));
+ });
+ ASSERT(filtersWithDomains.isEmpty());
+
+ if (!firstNFAWithDomainsSeen) {
+ // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any
+ // create a dummy one and add any universal actions.
+
+ DFA dummyDFA = DFA::empty();
+ addUniversalActionsToDFA(dummyDFA, universalActionsWithDomains);
+
+ Vector<DFABytecode> bytecode;
+ DFABytecodeCompiler compiler(dummyDFA, bytecode);
+ compiler.compile();
+ LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
+ client.writeFiltersWithDomainsBytecode(WTFMove(bytecode));
+ }
+ LOG_LARGE_STRUCTURES(universalActionsWithDomains, universalActionsWithDomains.capacity() * sizeof(unsigned));
+ universalActionsWithDomains.clear();
+
+ domainFilters.processNFAs(maxNFASize, [&](NFA&& nfa) {
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ dataLogF("domainFilters NFA\n");
+ nfa.debugPrintDot();
+#endif
+ LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed());
+ DFA dfa = NFAToDFA::convert(nfa);
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ dataLogF("domainFilters DFA\n");
+ dfa.debugPrintDot();
+#endif
+ LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
+ // Minimizing this DFA would not be effective because all actions are unique
+ // and because of the tree-like structure of this DFA.
+ ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "There should not be any domains that match everything.");
+
+ Vector<DFABytecode> bytecode;
+ DFABytecodeCompiler compiler(dfa, bytecode);
+ compiler.compile();
+ LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
+ client.writeDomainFiltersBytecode(WTFMove(bytecode));
+ });
+ ASSERT(domainFilters.isEmpty());
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double totalNFAToByteCodeBuildTimeEnd = monotonicallyIncreasingTime();
+ dataLogF(" Time spent building and compiling the DFAs: %f\n", (totalNFAToByteCodeBuildTimeEnd - totalNFAToByteCodeBuildTimeStart));
+
+ dataLogF(" Number of machines without domain filters: %d (total bytecode size = %d)\n", machinesWithoutDomainsCount, totalBytecodeSizeForMachinesWithoutDomains);
+ dataLogF(" Number of machines with domain filters: %d (total bytecode size = %d)\n", machinesWithDomainsCount, totalBytecodeSizeForMachinesWithDomains);
+#endif
+
+ client.finalize();
+
+ return { };
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionCompiler.h b/Source/WebCore/contentextensions/ContentExtensionCompiler.h
new file mode 100644
index 000000000..3928f9544
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionCompiler.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "CompiledContentExtension.h"
+#include <system_error>
+#include <wtf/Forward.h>
+#include <wtf/Ref.h>
+
+namespace WebCore {
+namespace ContentExtensions {
+
+class ContentExtensionCompilationClient {
+public:
+ virtual ~ContentExtensionCompilationClient() { }
+
+ // Functions should be called in this order. All except writeActions and finalize can be called multiple times, though.
+ virtual void writeActions(Vector<SerializedActionByte>&&) = 0;
+ virtual void writeFiltersWithoutDomainsBytecode(Vector<DFABytecode>&&) = 0;
+ virtual void writeFiltersWithDomainsBytecode(Vector<DFABytecode>&&) = 0;
+ virtual void writeDomainFiltersBytecode(Vector<DFABytecode>&&) = 0;
+ virtual void finalize() = 0;
+};
+
+WEBCORE_EXPORT std::error_code compileRuleList(ContentExtensionCompilationClient&, String&&);
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionError.cpp b/Source/WebCore/contentextensions/ContentExtensionError.cpp
new file mode 100644
index 000000000..52e9cf2fd
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionError.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "ContentExtensionError.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include <string>
+#include <wtf/NeverDestroyed.h>
+
+namespace WebCore {
+namespace ContentExtensions {
+
+const char* WebKitContentBlockerDomain = "WebKitContentBlockerDomain";
+
+const std::error_category& contentExtensionErrorCategory()
+{
+ class ContentExtensionErrorCategory : public std::error_category {
+ const char* name() const noexcept override
+ {
+ return "content extension";
+ }
+
+ std::string message(int errorCode) const override
+ {
+ switch (static_cast<ContentExtensionError>(errorCode)) {
+ case ContentExtensionError::JSONInvalid:
+ return "Failed to parse the JSON String.";
+ case ContentExtensionError::JSONTopLevelStructureNotAnObject:
+ return "Invalid input, the top level structure is not an object.";
+ case ContentExtensionError::JSONTopLevelStructureNotAnArray:
+ return "Invalid input, the top level structure is not an array.";
+ case ContentExtensionError::JSONInvalidObjectInTopLevelArray:
+ return "Invalid object in the top level array.";
+ case ContentExtensionError::JSONInvalidRule:
+ return "Invalid rule.";
+ case ContentExtensionError::JSONContainsNoRules:
+ return "Empty extension.";
+ case ContentExtensionError::JSONInvalidTrigger:
+ return "Invalid trigger object.";
+ case ContentExtensionError::JSONInvalidURLFilterInTrigger:
+ return "Invalid url-filter object.";
+ case ContentExtensionError::JSONInvalidTriggerFlagsArray:
+ return "Invalid trigger flags array.";
+ case ContentExtensionError::JSONInvalidObjectInTriggerFlagsArray:
+ return "Invalid object in the trigger flags array.";
+ case ContentExtensionError::JSONInvalidStringInTriggerFlagsArray:
+ return "Invalid string in the trigger flags array.";
+ case ContentExtensionError::JSONInvalidAction:
+ return "Invalid action object.";
+ case ContentExtensionError::JSONInvalidActionType:
+ return "Invalid action type.";
+ case ContentExtensionError::JSONInvalidCSSDisplayNoneActionType:
+ return "Invalid css-display-none action type. Requires a selector.";
+ case ContentExtensionError::JSONInvalidRegex:
+ return "Invalid or unsupported regular expression.";
+ case ContentExtensionError::JSONInvalidDomainList:
+ return "Invalid domain list.";
+ case ContentExtensionError::JSONTooManyRules:
+ return "Too many rules in JSON array.";
+ case ContentExtensionError::JSONDomainNotLowerCaseASCII:
+ return "Domains must be lower case ASCII. Use punycode to encode non-ASCII characters.";
+ case ContentExtensionError::JSONUnlessAndIfDomain:
+ return "A trigger cannot have both unless- and if-domain.";
+ }
+
+ return std::string();
+ }
+ };
+
+ static NeverDestroyed<ContentExtensionErrorCategory> contentExtensionErrorCategory;
+ return contentExtensionErrorCategory;
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionError.h b/Source/WebCore/contentextensions/ContentExtensionError.h
new file mode 100644
index 000000000..35462bacc
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionError.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "PlatformExportMacros.h"
+#include <system_error>
+
+namespace WebCore {
+namespace ContentExtensions {
+
+enum class ContentExtensionError {
+ // JSON parser error
+ JSONInvalid = 1,
+
+ // JSON semantics error
+ JSONTopLevelStructureNotAnObject,
+ JSONTopLevelStructureNotAnArray,
+ JSONInvalidObjectInTopLevelArray,
+ JSONInvalidRule,
+ JSONContainsNoRules,
+
+ JSONInvalidTrigger,
+ JSONInvalidURLFilterInTrigger,
+ JSONInvalidTriggerFlagsArray,
+ JSONInvalidObjectInTriggerFlagsArray,
+ JSONInvalidStringInTriggerFlagsArray,
+ JSONInvalidDomainList,
+ JSONDomainNotLowerCaseASCII,
+ JSONUnlessAndIfDomain,
+ JSONTooManyRules,
+
+ JSONInvalidAction,
+ JSONInvalidActionType,
+ JSONInvalidCSSDisplayNoneActionType,
+ JSONInvalidRegex,
+};
+
+extern const char* WebKitContentBlockerDomain;
+
+WEBCORE_EXPORT const std::error_category& contentExtensionErrorCategory();
+
+inline std::error_code make_error_code(ContentExtensionError error)
+{
+ return { static_cast<int>(error), contentExtensionErrorCategory() };
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+namespace std {
+ template<> struct is_error_code_enum<WebCore::ContentExtensions::ContentExtensionError> : public true_type { };
+}
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionParser.cpp b/Source/WebCore/contentextensions/ContentExtensionParser.cpp
new file mode 100644
index 000000000..c0b3e3bd2
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionParser.cpp
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "ContentExtensionParser.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "CSSParser.h"
+#include "CSSParserMode.h"
+#include "CSSSelectorList.h"
+#include "ContentExtensionError.h"
+#include "ContentExtensionRule.h"
+#include "ContentExtensionsBackend.h"
+#include "ContentExtensionsDebugging.h"
+#include <JavaScriptCore/JSCInlines.h>
+#include <JavaScriptCore/JSGlobalObject.h>
+#include <JavaScriptCore/JSONObject.h>
+#include <JavaScriptCore/VM.h>
+#include <wtf/CurrentTime.h>
+#include <wtf/text/WTFString.h>
+
+using namespace JSC;
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+static bool containsOnlyASCIIWithNoUppercase(const String& domain)
+{
+ for (unsigned i = 0; i < domain.length(); ++i) {
+ UChar c = domain.at(i);
+ if (!isASCII(c) || isASCIIUpper(c))
+ return false;
+ }
+ return true;
+}
+
+static std::error_code getDomainList(ExecState& exec, const JSObject* arrayObject, Vector<String>& vector)
+{
+ VM& vm = exec.vm();
+ auto scope = DECLARE_THROW_SCOPE(vm);
+
+ ASSERT(vector.isEmpty());
+ if (!arrayObject || !isJSArray(arrayObject))
+ return ContentExtensionError::JSONInvalidDomainList;
+ const JSArray* array = jsCast<const JSArray*>(arrayObject);
+
+ unsigned length = array->length();
+ for (unsigned i = 0; i < length; ++i) {
+ const JSValue value = array->getIndex(&exec, i);
+ if (scope.exception() || !value.isString())
+ return ContentExtensionError::JSONInvalidDomainList;
+
+ // Domains should be punycode encoded lower case.
+ const String& domain = asString(value)->value(&exec);
+ if (domain.isEmpty())
+ return ContentExtensionError::JSONInvalidDomainList;
+ if (!containsOnlyASCIIWithNoUppercase(domain))
+ return ContentExtensionError::JSONDomainNotLowerCaseASCII;
+ vector.append(domain);
+ }
+ return { };
+}
+
+static std::error_code getTypeFlags(ExecState& exec, const JSValue& typeValue, ResourceFlags& flags, uint16_t (*stringToType)(const String&))
+{
+ VM& vm = exec.vm();
+ auto scope = DECLARE_THROW_SCOPE(vm);
+
+ if (!typeValue.isObject())
+ return { };
+
+ const JSObject* object = typeValue.toObject(&exec);
+ ASSERT(!scope.exception());
+ if (!isJSArray(object))
+ return ContentExtensionError::JSONInvalidTriggerFlagsArray;
+
+ const JSArray* array = jsCast<const JSArray*>(object);
+
+ unsigned length = array->length();
+ for (unsigned i = 0; i < length; ++i) {
+ const JSValue value = array->getIndex(&exec, i);
+ if (scope.exception() || !value)
+ return ContentExtensionError::JSONInvalidObjectInTriggerFlagsArray;
+
+ String name = value.toWTFString(&exec);
+ uint16_t type = stringToType(name);
+ if (!type)
+ return ContentExtensionError::JSONInvalidStringInTriggerFlagsArray;
+
+ flags |= type;
+ }
+
+ return { };
+}
+
+static std::error_code loadTrigger(ExecState& exec, const JSObject& ruleObject, Trigger& trigger)
+{
+ VM& vm = exec.vm();
+ auto scope = DECLARE_THROW_SCOPE(vm);
+
+ const JSValue triggerObject = ruleObject.get(&exec, Identifier::fromString(&exec, "trigger"));
+ if (!triggerObject || scope.exception() || !triggerObject.isObject())
+ return ContentExtensionError::JSONInvalidTrigger;
+
+ const JSValue urlFilterObject = triggerObject.get(&exec, Identifier::fromString(&exec, "url-filter"));
+ if (!urlFilterObject || scope.exception() || !urlFilterObject.isString())
+ return ContentExtensionError::JSONInvalidURLFilterInTrigger;
+
+ String urlFilter = asString(urlFilterObject)->value(&exec);
+ if (urlFilter.isEmpty())
+ return ContentExtensionError::JSONInvalidURLFilterInTrigger;
+
+ trigger.urlFilter = urlFilter;
+
+ const JSValue urlFilterCaseValue = triggerObject.get(&exec, Identifier::fromString(&exec, "url-filter-is-case-sensitive"));
+ if (urlFilterCaseValue && !scope.exception() && urlFilterCaseValue.isBoolean())
+ trigger.urlFilterIsCaseSensitive = urlFilterCaseValue.toBoolean(&exec);
+
+ const JSValue resourceTypeValue = triggerObject.get(&exec, Identifier::fromString(&exec, "resource-type"));
+ if (!scope.exception() && resourceTypeValue.isObject()) {
+ auto typeFlagsError = getTypeFlags(exec, resourceTypeValue, trigger.flags, readResourceType);
+ if (typeFlagsError)
+ return typeFlagsError;
+ } else if (!resourceTypeValue.isUndefined())
+ return ContentExtensionError::JSONInvalidTriggerFlagsArray;
+
+ const JSValue loadTypeValue = triggerObject.get(&exec, Identifier::fromString(&exec, "load-type"));
+ if (!scope.exception() && loadTypeValue.isObject()) {
+ auto typeFlagsError = getTypeFlags(exec, loadTypeValue, trigger.flags, readLoadType);
+ if (typeFlagsError)
+ return typeFlagsError;
+ } else if (!loadTypeValue.isUndefined())
+ return ContentExtensionError::JSONInvalidTriggerFlagsArray;
+
+ const JSValue ifDomain = triggerObject.get(&exec, Identifier::fromString(&exec, "if-domain"));
+ if (!scope.exception() && ifDomain.isObject()) {
+ auto ifDomainError = getDomainList(exec, asObject(ifDomain), trigger.domains);
+ if (ifDomainError)
+ return ifDomainError;
+ if (trigger.domains.isEmpty())
+ return ContentExtensionError::JSONInvalidDomainList;
+ ASSERT(trigger.domainCondition == Trigger::DomainCondition::None);
+ trigger.domainCondition = Trigger::DomainCondition::IfDomain;
+ } else if (!ifDomain.isUndefined())
+ return ContentExtensionError::JSONInvalidDomainList;
+
+ const JSValue unlessDomain = triggerObject.get(&exec, Identifier::fromString(&exec, "unless-domain"));
+ if (!scope.exception() && unlessDomain.isObject()) {
+ if (trigger.domainCondition != Trigger::DomainCondition::None)
+ return ContentExtensionError::JSONUnlessAndIfDomain;
+ auto unlessDomainError = getDomainList(exec, asObject(unlessDomain), trigger.domains);
+ if (unlessDomainError)
+ return unlessDomainError;
+ if (trigger.domains.isEmpty())
+ return ContentExtensionError::JSONInvalidDomainList;
+ trigger.domainCondition = Trigger::DomainCondition::UnlessDomain;
+ } else if (!unlessDomain.isUndefined())
+ return ContentExtensionError::JSONInvalidDomainList;
+
+ return { };
+}
+
+static bool isValidSelector(const String& selector)
+{
+ CSSParserContext context(HTMLQuirksMode);
+ CSSParser parser(context);
+ CSSSelectorList selectorList;
+ parser.parseSelector(selector, selectorList);
+ return selectorList.isValid();
+}
+
+static std::error_code loadAction(ExecState& exec, const JSObject& ruleObject, Action& action, bool& validSelector)
+{
+ VM& vm = exec.vm();
+ auto scope = DECLARE_THROW_SCOPE(vm);
+
+ validSelector = true;
+ const JSValue actionObject = ruleObject.get(&exec, Identifier::fromString(&exec, "action"));
+ if (!actionObject || scope.exception() || !actionObject.isObject())
+ return ContentExtensionError::JSONInvalidAction;
+
+ const JSValue typeObject = actionObject.get(&exec, Identifier::fromString(&exec, "type"));
+ if (!typeObject || scope.exception() || !typeObject.isString())
+ return ContentExtensionError::JSONInvalidActionType;
+
+ String actionType = asString(typeObject)->value(&exec);
+
+ if (actionType == "block")
+ action = ActionType::BlockLoad;
+ else if (actionType == "ignore-previous-rules")
+ action = ActionType::IgnorePreviousRules;
+ else if (actionType == "block-cookies")
+ action = ActionType::BlockCookies;
+ else if (actionType == "css-display-none") {
+ JSValue selector = actionObject.get(&exec, Identifier::fromString(&exec, "selector"));
+ if (!selector || scope.exception() || !selector.isString())
+ return ContentExtensionError::JSONInvalidCSSDisplayNoneActionType;
+
+ String selectorString = asString(selector)->value(&exec);
+ if (!isValidSelector(selectorString)) {
+ // Skip rules with invalid selectors to be backwards-compatible.
+ validSelector = false;
+ return { };
+ }
+ action = Action(ActionType::CSSDisplayNoneSelector, selectorString);
+ } else if (actionType == "make-https") {
+ action = ActionType::MakeHTTPS;
+ } else
+ return ContentExtensionError::JSONInvalidActionType;
+
+ return { };
+}
+
+static std::error_code loadRule(ExecState& exec, const JSObject& ruleObject, Vector<ContentExtensionRule>& ruleList)
+{
+ Trigger trigger;
+ auto triggerError = loadTrigger(exec, ruleObject, trigger);
+ if (triggerError)
+ return triggerError;
+
+ Action action;
+ bool validSelector;
+ auto actionError = loadAction(exec, ruleObject, action, validSelector);
+ if (actionError)
+ return actionError;
+
+ if (validSelector)
+ ruleList.append(ContentExtensionRule(trigger, action));
+ return { };
+}
+
+static std::error_code loadEncodedRules(ExecState& exec, const String& rules, Vector<ContentExtensionRule>& ruleList)
+{
+ VM& vm = exec.vm();
+ auto scope = DECLARE_THROW_SCOPE(vm);
+
+ // FIXME: JSONParse should require callbacks instead of an ExecState.
+ const JSValue decodedRules = JSONParse(&exec, rules);
+
+ if (scope.exception() || !decodedRules)
+ return ContentExtensionError::JSONInvalid;
+
+ if (!decodedRules.isObject())
+ return ContentExtensionError::JSONTopLevelStructureNotAnObject;
+
+ const JSObject* topLevelObject = decodedRules.toObject(&exec);
+ if (!topLevelObject || scope.exception())
+ return ContentExtensionError::JSONTopLevelStructureNotAnObject;
+
+ if (!isJSArray(topLevelObject))
+ return ContentExtensionError::JSONTopLevelStructureNotAnArray;
+
+ const JSArray* topLevelArray = jsCast<const JSArray*>(topLevelObject);
+
+ Vector<ContentExtensionRule> localRuleList;
+
+ unsigned length = topLevelArray->length();
+ const unsigned maxRuleCount = 50000;
+ if (length > maxRuleCount)
+ return ContentExtensionError::JSONTooManyRules;
+ for (unsigned i = 0; i < length; ++i) {
+ const JSValue value = topLevelArray->getIndex(&exec, i);
+ if (scope.exception() || !value)
+ return ContentExtensionError::JSONInvalidObjectInTopLevelArray;
+
+ const JSObject* ruleObject = value.toObject(&exec);
+ if (!ruleObject || scope.exception())
+ return ContentExtensionError::JSONInvalidRule;
+
+ auto error = loadRule(exec, *ruleObject, localRuleList);
+ if (error)
+ return error;
+ }
+
+ ruleList = WTFMove(localRuleList);
+ return { };
+}
+
+std::error_code parseRuleList(const String& rules, Vector<ContentExtensionRule>& ruleList)
+{
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double loadExtensionStartTime = monotonicallyIncreasingTime();
+#endif
+ RefPtr<VM> vm = VM::create();
+
+ JSLockHolder locker(vm.get());
+ JSGlobalObject* globalObject = JSGlobalObject::create(*vm, JSGlobalObject::createStructure(*vm, jsNull()));
+
+ ExecState* exec = globalObject->globalExec();
+ auto error = loadEncodedRules(*exec, rules, ruleList);
+
+ vm = nullptr;
+
+ if (error)
+ return error;
+
+ if (ruleList.isEmpty())
+ return ContentExtensionError::JSONContainsNoRules;
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double loadExtensionEndTime = monotonicallyIncreasingTime();
+ dataLogF("Time spent loading extension %f\n", (loadExtensionEndTime - loadExtensionStartTime));
+#endif
+
+ return { };
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionParser.h b/Source/WebCore/contentextensions/ContentExtensionParser.h
new file mode 100644
index 000000000..9fbca15e5
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionParser.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include <system_error>
+#include <wtf/Forward.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+class ContentExtensionRule;
+
+std::error_code parseRuleList(const String& rules, Vector<ContentExtensionRule>&);
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionRule.cpp b/Source/WebCore/contentextensions/ContentExtensionRule.cpp
new file mode 100644
index 000000000..2dc73275c
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionRule.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "ContentExtensionRule.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+ContentExtensionRule::ContentExtensionRule(const Trigger& trigger, const Action& action)
+ : m_trigger(trigger)
+ , m_action(action)
+{
+ ASSERT(!m_trigger.urlFilter.isEmpty());
+}
+
+Action Action::deserialize(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location)
+{
+ RELEASE_ASSERT(location < actionsLength);
+ switch (static_cast<ActionType>(actions[location])) {
+ case ActionType::BlockCookies:
+ return Action(ActionType::BlockCookies, location);
+ case ActionType::BlockLoad:
+ return Action(ActionType::BlockLoad, location);
+ case ActionType::IgnorePreviousRules:
+ return Action(ActionType::IgnorePreviousRules, location);
+ case ActionType::MakeHTTPS:
+ return Action(ActionType::MakeHTTPS, location);
+ case ActionType::CSSDisplayNoneSelector: {
+ uint32_t headerLength = sizeof(ActionType) + sizeof(uint32_t) + sizeof(bool);
+ uint32_t stringStartIndex = location + headerLength;
+ RELEASE_ASSERT(actionsLength >= stringStartIndex);
+ uint32_t selectorLength = *reinterpret_cast<const unsigned*>(&actions[location + sizeof(ActionType)]);
+ bool wideCharacters = actions[location + sizeof(ActionType) + sizeof(uint32_t)];
+
+ if (wideCharacters) {
+ RELEASE_ASSERT(actionsLength >= stringStartIndex + selectorLength * sizeof(UChar));
+ return Action(ActionType::CSSDisplayNoneSelector, String(reinterpret_cast<const UChar*>(&actions[stringStartIndex]), selectorLength), location);
+ }
+ RELEASE_ASSERT(actionsLength >= stringStartIndex + selectorLength * sizeof(LChar));
+ return Action(ActionType::CSSDisplayNoneSelector, String(reinterpret_cast<const LChar*>(&actions[stringStartIndex]), selectorLength), location);
+ }
+ case ActionType::CSSDisplayNoneStyleSheet:
+ case ActionType::InvalidAction:
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+}
+
+ActionType Action::deserializeType(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location)
+{
+ RELEASE_ASSERT(location < actionsLength);
+ ActionType type = static_cast<ActionType>(actions[location]);
+ switch (type) {
+ case ActionType::BlockCookies:
+ case ActionType::BlockLoad:
+ case ActionType::IgnorePreviousRules:
+ case ActionType::CSSDisplayNoneSelector:
+ case ActionType::MakeHTTPS:
+ return type;
+ case ActionType::CSSDisplayNoneStyleSheet:
+ case ActionType::InvalidAction:
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+}
+
+uint32_t Action::serializedLength(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location)
+{
+ RELEASE_ASSERT(location < actionsLength);
+ switch (static_cast<ActionType>(actions[location])) {
+ case ActionType::BlockCookies:
+ case ActionType::BlockLoad:
+ case ActionType::IgnorePreviousRules:
+ case ActionType::MakeHTTPS:
+ return sizeof(ActionType);
+ case ActionType::CSSDisplayNoneSelector: {
+ uint32_t headerLength = sizeof(ActionType) + sizeof(uint32_t) + sizeof(bool);
+ uint32_t stringStartIndex = location + headerLength;
+ RELEASE_ASSERT(actionsLength >= stringStartIndex);
+ uint32_t selectorLength = *reinterpret_cast<const unsigned*>(&actions[location + sizeof(ActionType)]);
+ bool wideCharacters = actions[location + sizeof(ActionType) + sizeof(uint32_t)];
+
+ if (wideCharacters)
+ return headerLength + selectorLength * sizeof(UChar);
+ return headerLength + selectorLength * sizeof(LChar);
+ }
+ case ActionType::CSSDisplayNoneStyleSheet:
+ case ActionType::InvalidAction:
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+}
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionRule.h b/Source/WebCore/contentextensions/ContentExtensionRule.h
new file mode 100644
index 000000000..8983a8a30
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionRule.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionActions.h"
+#include "ResourceLoadInfo.h"
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+// A ContentExtensionRule is the smallest unit in a ContentExtension.
+//
+// It is composed of a trigger and an action. The trigger defines on what kind of content this extension should apply.
+// The action defines what to perform on that content.
+
+struct Trigger {
+ String urlFilter;
+ bool urlFilterIsCaseSensitive { false };
+ ResourceFlags flags { 0 };
+ Vector<String> domains;
+ enum class DomainCondition {
+ None,
+ IfDomain,
+ UnlessDomain,
+ } domainCondition { DomainCondition::None };
+
+ ~Trigger()
+ {
+ ASSERT(domains.isEmpty() == (domainCondition == DomainCondition::None));
+ }
+
+ bool isEmpty() const
+ {
+ return urlFilter.isEmpty()
+ && !urlFilterIsCaseSensitive
+ && !flags
+ && domains.isEmpty()
+ && domainCondition == DomainCondition::None;
+ }
+
+ bool operator==(const Trigger& other) const
+ {
+ return urlFilter == other.urlFilter
+ && urlFilterIsCaseSensitive == other.urlFilterIsCaseSensitive
+ && flags == other.flags
+ && domains == other.domains
+ && domainCondition == other.domainCondition;
+ }
+};
+
+struct TriggerHash {
+ static unsigned hash(const Trigger& trigger)
+ {
+ unsigned hash = trigger.urlFilterIsCaseSensitive ? 10619863 : 40960001;
+ if (!trigger.urlFilter.isNull())
+ hash ^= StringHash::hash(trigger.urlFilter);
+ hash = WTF::pairIntHash(hash, DefaultHash<ResourceFlags>::Hash::hash(trigger.flags));
+
+ for (const String& domain : trigger.domains)
+ hash ^= StringHash::hash(domain);
+
+ if (trigger.domainCondition == Trigger::DomainCondition::IfDomain)
+ hash |= 1 << 16;
+ else if (trigger.domainCondition == Trigger::DomainCondition::IfDomain)
+ hash |= 1 << 31;
+ return hash;
+ }
+
+ static bool equal(const Trigger& a, const Trigger& b)
+ {
+ return a == b;
+ }
+
+ static const bool safeToCompareToEmptyOrDeleted = false;
+};
+
+struct TriggerHashTraits : public WTF::CustomHashTraits<Trigger> {
+ static const bool emptyValueIsZero = false;
+ static const bool hasIsEmptyValueFunction = true;
+
+ static void constructDeletedValue(Trigger& trigger)
+ {
+ new (NotNull, std::addressof(trigger.urlFilter)) String(WTF::HashTableDeletedValue);
+ }
+
+ static bool isDeletedValue(const Trigger& trigger)
+ {
+ return trigger.urlFilter.isHashTableDeletedValue();
+ }
+
+ static Trigger emptyValue()
+ {
+ return Trigger();
+ }
+
+ static bool isEmptyValue(const Trigger& trigger)
+ {
+ return trigger.isEmpty();
+ }
+};
+
+struct Action {
+ Action()
+ : m_type(ActionType::InvalidAction)
+ , m_actionID(std::numeric_limits<uint32_t>::max())
+ {
+ }
+
+ Action(ActionType type, const String& stringArgument, uint32_t actionID = std::numeric_limits<uint32_t>::max())
+ : m_type(type)
+ , m_actionID(actionID)
+ , m_stringArgument(stringArgument)
+ {
+ ASSERT(type == ActionType::CSSDisplayNoneSelector || type == ActionType::CSSDisplayNoneStyleSheet);
+ }
+
+ Action(ActionType type, uint32_t actionID = std::numeric_limits<uint32_t>::max())
+ : m_type(type)
+ , m_actionID(actionID)
+ {
+ ASSERT(type != ActionType::CSSDisplayNoneSelector && type != ActionType::CSSDisplayNoneStyleSheet);
+ }
+
+ bool operator==(const Action& other) const
+ {
+ return m_type == other.m_type
+ && m_extensionIdentifier == other.m_extensionIdentifier
+ && m_actionID == other.m_actionID
+ && m_stringArgument == other.m_stringArgument;
+ }
+
+ static Action deserialize(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location);
+ static ActionType deserializeType(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location);
+ static uint32_t serializedLength(const SerializedActionByte* actions, const uint32_t actionsLength, uint32_t location);
+
+ void setExtensionIdentifier(const String& extensionIdentifier) { m_extensionIdentifier = extensionIdentifier; }
+ const String& extensionIdentifier() const { return m_extensionIdentifier; }
+ ActionType type() const { return m_type; }
+ uint32_t actionID() const { return m_actionID; }
+ const String& stringArgument() const { return m_stringArgument; }
+
+private:
+ String m_extensionIdentifier;
+ ActionType m_type;
+ uint32_t m_actionID;
+ String m_stringArgument;
+};
+
+class ContentExtensionRule {
+public:
+ ContentExtensionRule(const Trigger&, const Action&);
+
+ const Trigger& trigger() const { return m_trigger; }
+ const Action& action() const { return m_action; }
+
+private:
+ Trigger m_trigger;
+ Action m_action;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionStyleSheet.cpp b/Source/WebCore/contentextensions/ContentExtensionStyleSheet.cpp
new file mode 100644
index 000000000..e24a3669d
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionStyleSheet.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2015-2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "ContentExtensionStyleSheet.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "CSSStyleSheet.h"
+#include "ContentExtensionsBackend.h"
+#include "Document.h"
+#include "StyleSheetContents.h"
+#include <wtf/text/StringBuilder.h>
+
+namespace WebCore {
+namespace ContentExtensions {
+
+ContentExtensionStyleSheet::ContentExtensionStyleSheet(Document& document)
+ : m_styleSheet(CSSStyleSheet::create(StyleSheetContents::create(), document))
+{
+ m_styleSheet->contents().setIsUserStyleSheet(true);
+}
+
+ContentExtensionStyleSheet::~ContentExtensionStyleSheet()
+{
+ m_styleSheet->clearOwnerNode();
+}
+
+bool ContentExtensionStyleSheet::addDisplayNoneSelector(const String& selector, uint32_t selectorID)
+{
+ ASSERT(selectorID != std::numeric_limits<uint32_t>::max());
+
+ if (!m_addedSelectorIDs.add(selectorID).isNewEntry)
+ return false;
+
+ StringBuilder css;
+ css.append(selector);
+ css.append('{');
+ css.append(ContentExtensionsBackend::displayNoneCSSRule());
+ css.append('}');
+ m_styleSheet->contents().parseString(css.toString());
+ return true;
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionStyleSheet.h b/Source/WebCore/contentextensions/ContentExtensionStyleSheet.h
new file mode 100644
index 000000000..307736235
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionStyleSheet.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2015-2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "CSSStyleSheet.h"
+#include <wtf/HashSet.h>
+#include <wtf/Ref.h>
+#include <wtf/RefCounted.h>
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+class Document;
+
+namespace ContentExtensions {
+
+class ContentExtensionStyleSheet : public RefCounted<ContentExtensionStyleSheet> {
+public:
+ static Ref<ContentExtensionStyleSheet> create(Document& document)
+ {
+ return adoptRef(*new ContentExtensionStyleSheet(document));
+ }
+ virtual ~ContentExtensionStyleSheet();
+
+ bool addDisplayNoneSelector(const String& selector, uint32_t selectorID);
+
+ CSSStyleSheet& styleSheet() { return m_styleSheet.get(); }
+
+private:
+ ContentExtensionStyleSheet(Document&);
+
+ Ref<CSSStyleSheet> m_styleSheet;
+ HashSet<uint32_t, DefaultHash<uint32_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint32_t>> m_addedSelectorIDs;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp b/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp
new file mode 100644
index 000000000..83ea96e4e
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "ContentExtensionsBackend.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "CompiledContentExtension.h"
+#include "ContentExtension.h"
+#include "ContentExtensionsDebugging.h"
+#include "DFABytecodeInterpreter.h"
+#include "Document.h"
+#include "DocumentLoader.h"
+#include "ExtensionStyleSheets.h"
+#include "Frame.h"
+#include "FrameLoaderClient.h"
+#include "MainFrame.h"
+#include "ResourceLoadInfo.h"
+#include "URL.h"
+#include "UserContentController.h"
+#include <wtf/NeverDestroyed.h>
+#include <wtf/text/CString.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+void ContentExtensionsBackend::addContentExtension(const String& identifier, RefPtr<CompiledContentExtension> compiledContentExtension)
+{
+ ASSERT(!identifier.isEmpty());
+ if (identifier.isEmpty())
+ return;
+
+ if (!compiledContentExtension) {
+ removeContentExtension(identifier);
+ return;
+ }
+
+ RefPtr<ContentExtension> extension = ContentExtension::create(identifier, adoptRef(*compiledContentExtension.leakRef()));
+ m_contentExtensions.set(identifier, WTFMove(extension));
+}
+
+void ContentExtensionsBackend::removeContentExtension(const String& identifier)
+{
+ m_contentExtensions.remove(identifier);
+}
+
+void ContentExtensionsBackend::removeAllContentExtensions()
+{
+ m_contentExtensions.clear();
+}
+
+Vector<Action> ContentExtensionsBackend::actionsForResourceLoad(const ResourceLoadInfo& resourceLoadInfo) const
+{
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double addedTimeStart = monotonicallyIncreasingTime();
+#endif
+ if (resourceLoadInfo.resourceURL.protocolIsData())
+ return Vector<Action>();
+
+ const String& urlString = resourceLoadInfo.resourceURL.string();
+ ASSERT_WITH_MESSAGE(urlString.containsOnlyASCII(), "A decoded URL should only contain ASCII characters. The matching algorithm assumes the input is ASCII.");
+ const CString& urlCString = urlString.utf8();
+
+ Vector<Action> finalActions;
+ ResourceFlags flags = resourceLoadInfo.getResourceFlags();
+ for (auto& contentExtension : m_contentExtensions.values()) {
+ RELEASE_ASSERT(contentExtension);
+ const CompiledContentExtension& compiledExtension = contentExtension->compiledExtension();
+
+ DFABytecodeInterpreter withoutDomainsInterpreter(compiledExtension.filtersWithoutDomainsBytecode(), compiledExtension.filtersWithoutDomainsBytecodeLength());
+ DFABytecodeInterpreter::Actions withoutDomainsActions = withoutDomainsInterpreter.interpret(urlCString, flags);
+
+ String domain = resourceLoadInfo.mainDocumentURL.host();
+ DFABytecodeInterpreter withDomainsInterpreter(compiledExtension.filtersWithDomainsBytecode(), compiledExtension.filtersWithDomainsBytecodeLength());
+ DFABytecodeInterpreter::Actions withDomainsActions = withDomainsInterpreter.interpretWithDomains(urlCString, flags, contentExtension->cachedDomainActions(domain));
+
+ const SerializedActionByte* actions = compiledExtension.actions();
+ const unsigned actionsLength = compiledExtension.actionsLength();
+
+ bool sawIgnorePreviousRules = false;
+ const Vector<uint32_t>& universalWithDomains = contentExtension->universalActionsWithDomains(domain);
+ const Vector<uint32_t>& universalWithoutDomains = contentExtension->universalActionsWithoutDomains();
+ if (!withoutDomainsActions.isEmpty() || !withDomainsActions.isEmpty() || !universalWithDomains.isEmpty() || !universalWithoutDomains.isEmpty()) {
+ Vector<uint32_t> actionLocations;
+ actionLocations.reserveInitialCapacity(withoutDomainsActions.size() + withDomainsActions.size() + universalWithoutDomains.size() + universalWithDomains.size());
+ for (uint64_t actionLocation : withoutDomainsActions)
+ actionLocations.uncheckedAppend(static_cast<uint32_t>(actionLocation));
+ for (uint64_t actionLocation : withDomainsActions)
+ actionLocations.uncheckedAppend(static_cast<uint32_t>(actionLocation));
+ for (uint32_t actionLocation : universalWithoutDomains)
+ actionLocations.uncheckedAppend(actionLocation);
+ for (uint32_t actionLocation : universalWithDomains)
+ actionLocations.uncheckedAppend(actionLocation);
+ std::sort(actionLocations.begin(), actionLocations.end());
+
+ // Add actions in reverse order to properly deal with IgnorePreviousRules.
+ for (unsigned i = actionLocations.size(); i; i--) {
+ Action action = Action::deserialize(actions, actionsLength, actionLocations[i - 1]);
+ action.setExtensionIdentifier(contentExtension->identifier());
+ if (action.type() == ActionType::IgnorePreviousRules) {
+ sawIgnorePreviousRules = true;
+ break;
+ }
+ finalActions.append(action);
+ }
+ }
+ if (!sawIgnorePreviousRules) {
+ finalActions.append(Action(ActionType::CSSDisplayNoneStyleSheet, contentExtension->identifier()));
+ finalActions.last().setExtensionIdentifier(contentExtension->identifier());
+ }
+ }
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double addedTimeEnd = monotonicallyIncreasingTime();
+ dataLogF("Time added: %f microseconds %s \n", (addedTimeEnd - addedTimeStart) * 1.0e6, resourceLoadInfo.resourceURL.string().utf8().data());
+#endif
+ return finalActions;
+}
+
+StyleSheetContents* ContentExtensionsBackend::globalDisplayNoneStyleSheet(const String& identifier) const
+{
+ const auto& contentExtension = m_contentExtensions.get(identifier);
+ return contentExtension ? contentExtension->globalDisplayNoneStyleSheet() : nullptr;
+}
+
+BlockedStatus ContentExtensionsBackend::processContentExtensionRulesForLoad(const URL& url, ResourceType resourceType, DocumentLoader& initiatingDocumentLoader)
+{
+ if (m_contentExtensions.isEmpty())
+ return { };
+
+ Document* currentDocument = nullptr;
+ URL mainDocumentURL;
+
+ if (Frame* frame = initiatingDocumentLoader.frame()) {
+ currentDocument = frame->document();
+
+ if (initiatingDocumentLoader.isLoadingMainResource()
+ && frame->isMainFrame()
+ && resourceType == ResourceType::Document)
+ mainDocumentURL = url;
+ else if (Document* mainDocument = frame->mainFrame().document())
+ mainDocumentURL = mainDocument->url();
+ }
+
+ ResourceLoadInfo resourceLoadInfo = { url, mainDocumentURL, resourceType };
+ Vector<ContentExtensions::Action> actions = actionsForResourceLoad(resourceLoadInfo);
+
+ bool willBlockLoad = false;
+ bool willBlockCookies = false;
+ bool willMakeHTTPS = false;
+ for (const auto& action : actions) {
+ switch (action.type()) {
+ case ContentExtensions::ActionType::BlockLoad:
+ willBlockLoad = true;
+ break;
+ case ContentExtensions::ActionType::BlockCookies:
+ willBlockCookies = true;
+ break;
+ case ContentExtensions::ActionType::CSSDisplayNoneSelector:
+ if (resourceType == ResourceType::Document)
+ initiatingDocumentLoader.addPendingContentExtensionDisplayNoneSelector(action.extensionIdentifier(), action.stringArgument(), action.actionID());
+ else if (currentDocument)
+ currentDocument->extensionStyleSheets().addDisplayNoneSelector(action.extensionIdentifier(), action.stringArgument(), action.actionID());
+ break;
+ case ContentExtensions::ActionType::CSSDisplayNoneStyleSheet: {
+ StyleSheetContents* styleSheetContents = globalDisplayNoneStyleSheet(action.stringArgument());
+ if (styleSheetContents) {
+ if (resourceType == ResourceType::Document)
+ initiatingDocumentLoader.addPendingContentExtensionSheet(action.stringArgument(), *styleSheetContents);
+ else if (currentDocument)
+ currentDocument->extensionStyleSheets().maybeAddContentExtensionSheet(action.stringArgument(), *styleSheetContents);
+ }
+ break;
+ }
+ case ContentExtensions::ActionType::MakeHTTPS: {
+ if ((url.protocolIs("http") || url.protocolIs("ws"))
+ && (!url.port() || isDefaultPortForProtocol(url.port().value(), url.protocol())))
+ willMakeHTTPS = true;
+ break;
+ }
+ case ContentExtensions::ActionType::IgnorePreviousRules:
+ case ContentExtensions::ActionType::InvalidAction:
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+ }
+
+ if (currentDocument) {
+ if (willMakeHTTPS) {
+ ASSERT(url.protocolIs("http") || url.protocolIs("ws"));
+ String newProtocol = url.protocolIs("http") ? ASCIILiteral("https") : ASCIILiteral("wss");
+ currentDocument->addConsoleMessage(MessageSource::ContentBlocker, MessageLevel::Info, makeString("Content blocker promoted URL from ", url.string(), " to ", newProtocol));
+ }
+ if (willBlockLoad)
+ currentDocument->addConsoleMessage(MessageSource::ContentBlocker, MessageLevel::Info, makeString("Content blocker prevented frame displaying ", mainDocumentURL.string(), " from loading a resource from ", url.string()));
+ }
+ return { willBlockLoad, willBlockCookies, willMakeHTTPS };
+}
+
+const String& ContentExtensionsBackend::displayNoneCSSRule()
+{
+ static NeverDestroyed<const String> rule(ASCIILiteral("display:none !important;"));
+ return rule;
+}
+
+void applyBlockedStatusToRequest(const BlockedStatus& status, ResourceRequest& request)
+{
+ if (status.blockedCookies)
+ request.setAllowCookies(false);
+
+ if (status.madeHTTPS) {
+ const URL& originalURL = request.url();
+ ASSERT(originalURL.protocolIs("http"));
+ ASSERT(!originalURL.port() || isDefaultPortForProtocol(originalURL.port().value(), originalURL.protocol()));
+
+ URL newURL = originalURL;
+ newURL.setProtocol("https");
+ if (originalURL.port())
+ newURL.setPort(defaultPortForProtocol("https").value());
+ request.setURL(newURL);
+ }
+}
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionsBackend.h b/Source/WebCore/contentextensions/ContentExtensionsBackend.h
new file mode 100644
index 000000000..f15ff8c43
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionsBackend.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtension.h"
+#include "ContentExtensionRule.h"
+#include <wtf/HashMap.h>
+#include <wtf/text/StringHash.h>
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+class DocumentLoader;
+class ResourceRequest;
+class URL;
+
+struct ResourceLoadInfo;
+
+namespace ContentExtensions {
+
+class CompiledContentExtension;
+
+// The ContentExtensionsBackend is the internal model of all the content extensions.
+//
+// It provides two services:
+// 1) It stores the rules for each content extension.
+// 2) It provides APIs for the WebCore interfaces to use those rules efficiently.
+class ContentExtensionsBackend {
+public:
+ // - Rule management interface. This can be used by upper layer.
+
+ // Set a list of rules for a given name. If there were existing rules for the name, they are overriden.
+ // The identifier cannot be empty.
+ WEBCORE_EXPORT void addContentExtension(const String& identifier, RefPtr<CompiledContentExtension>);
+ WEBCORE_EXPORT void removeContentExtension(const String& identifier);
+ WEBCORE_EXPORT void removeAllContentExtensions();
+
+ // - Internal WebCore Interface.
+ WEBCORE_EXPORT Vector<Action> actionsForResourceLoad(const ResourceLoadInfo&) const;
+ WEBCORE_EXPORT StyleSheetContents* globalDisplayNoneStyleSheet(const String& identifier) const;
+
+ BlockedStatus processContentExtensionRulesForLoad(const URL&, ResourceType, DocumentLoader& initiatingDocumentLoader);
+
+ static const String& displayNoneCSSRule();
+
+private:
+ HashMap<String, RefPtr<ContentExtension>> m_contentExtensions;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ContentExtensionsDebugging.h b/Source/WebCore/contentextensions/ContentExtensionsDebugging.h
new file mode 100644
index 000000000..2100c26a1
--- /dev/null
+++ b/Source/WebCore/contentextensions/ContentExtensionsDebugging.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include <wtf/Vector.h>
+
+#define CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING 0
+
+#define CONTENT_EXTENSIONS_PERFORMANCE_REPORTING 0
+
+#define CONTENT_EXTENSIONS_MEMORY_REPORTING 0
+#define CONTENT_EXTENSIONS_PAGE_SIZE 16384
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+typedef WTF::CrashOnOverflow ContentExtensionsOverflowHandler;
+#else
+typedef UnsafeVectorOverflow ContentExtensionsOverflowHandler;
+#endif
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+#define LOG_LARGE_STRUCTURES(name, size) if (size > 1000000) { dataLogF("NAME: %s SIZE %d\n", #name, (int)(size)); };
+#else
+#define LOG_LARGE_STRUCTURES(name, size)
+#endif
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFA.cpp b/Source/WebCore/contentextensions/DFA.cpp
new file mode 100644
index 000000000..40dee4a32
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFA.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "DFA.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "DFAMinimizer.h"
+#include <wtf/DataLog.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+DFA DFA::empty()
+{
+ DFA newDFA;
+ newDFA.nodes.append(DFANode());
+ return newDFA;
+}
+
+size_t DFA::memoryUsed() const
+{
+ return sizeof(DFA)
+ + actions.capacity() * sizeof(uint64_t)
+ + transitionRanges.capacity() * sizeof(CharRange)
+ + transitionDestinations.capacity() * sizeof(uint32_t)
+ + nodes.capacity() * sizeof(DFANode);
+}
+
+void DFA::shrinkToFit()
+{
+ nodes.shrinkToFit();
+ actions.shrinkToFit();
+ transitionRanges.shrinkToFit();
+ transitionDestinations.shrinkToFit();
+}
+
+void DFA::minimize()
+{
+ DFAMinimizer::minimize(*this);
+}
+
+unsigned DFA::graphSize() const
+{
+ unsigned count = 0;
+ for (const DFANode& node : nodes) {
+ if (!node.isKilled())
+ ++count;
+ }
+ return count;
+}
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+static void printRange(bool firstRange, char rangeStart, char rangeEnd)
+{
+ if (!firstRange)
+ dataLogF(", ");
+ if (rangeStart == rangeEnd) {
+ if (rangeStart == '"' || rangeStart == '\\')
+ dataLogF("\\%c", rangeStart);
+ else if (rangeStart >= '!' && rangeStart <= '~')
+ dataLogF("%c", rangeStart);
+ else
+ dataLogF("\\\\%d", rangeStart);
+ } else
+ dataLogF("\\\\%d-\\\\%d", rangeStart, rangeEnd);
+}
+
+static void printTransitions(const DFA& dfa, unsigned sourceNodeId)
+{
+ const DFANode& sourceNode = dfa.nodes[sourceNodeId];
+ auto transitions = sourceNode.transitions(dfa);
+
+ if (transitions.begin() == transitions.end())
+ return;
+
+ HashMap<unsigned, Vector<uint16_t>, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> transitionsPerTarget;
+
+ // First, we build the list of transitions coming to each target node.
+ for (const auto& transition : transitions) {
+ unsigned target = transition.target();
+ transitionsPerTarget.add(target, Vector<uint16_t>());
+
+ for (unsigned offset = 0; offset < transition.range().size(); ++offset)
+ transitionsPerTarget.find(target)->value.append(transition.first() + offset);
+ }
+
+ // Then we go over each one an display the ranges one by one.
+ for (const auto& transitionPerTarget : transitionsPerTarget) {
+ dataLogF(" %d -> %d [label=\"", sourceNodeId, transitionPerTarget.key);
+
+ Vector<uint16_t> incommingCharacters = transitionPerTarget.value;
+ std::sort(incommingCharacters.begin(), incommingCharacters.end());
+
+ char rangeStart = incommingCharacters.first();
+ char rangeEnd = rangeStart;
+ bool first = true;
+ for (unsigned sortedTransitionIndex = 1; sortedTransitionIndex < incommingCharacters.size(); ++sortedTransitionIndex) {
+ char nextChar = incommingCharacters[sortedTransitionIndex];
+ if (nextChar == rangeEnd+1) {
+ rangeEnd = nextChar;
+ continue;
+ }
+ printRange(first, rangeStart, rangeEnd);
+ rangeStart = rangeEnd = nextChar;
+ first = false;
+ }
+ printRange(first, rangeStart, rangeEnd);
+
+ dataLogF("\"];\n");
+ }
+}
+
+void DFA::debugPrintDot() const
+{
+ dataLogF("digraph DFA_Transitions {\n");
+ dataLogF(" rankdir=LR;\n");
+ dataLogF(" node [shape=circle];\n");
+ dataLogF(" {\n");
+ for (unsigned i = 0; i < nodes.size(); ++i) {
+ if (nodes[i].isKilled())
+ continue;
+
+ dataLogF(" %d [label=<Node %d", i, i);
+ const Vector<uint64_t>& actions = nodes[i].actions(*this);
+ if (!actions.isEmpty()) {
+ dataLogF("<BR/>Actions: ");
+ for (unsigned actionIndex = 0; actionIndex < actions.size(); ++actionIndex) {
+ if (actionIndex)
+ dataLogF(", ");
+ dataLogF("%llu", actions[actionIndex]);
+ }
+ }
+ dataLogF(">]");
+
+ if (!actions.isEmpty())
+ dataLogF(" [shape=doublecircle]");
+
+ dataLogF(";\n");
+ }
+ dataLogF(" }\n");
+
+ dataLogF(" {\n");
+ for (unsigned i = 0; i < nodes.size(); ++i)
+ printTransitions(*this, i);
+
+ dataLogF(" }\n");
+ dataLogF("}\n");
+}
+#endif
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFA.h b/Source/WebCore/contentextensions/DFA.h
new file mode 100644
index 000000000..289ae592c
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFA.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionsDebugging.h"
+#include "DFANode.h"
+#include "PlatformExportMacros.h"
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+// The DFA abstract a partial DFA graph in a compact form.
+struct WEBCORE_EXPORT DFA {
+ static DFA empty();
+
+ void shrinkToFit();
+ void minimize();
+ unsigned graphSize() const;
+ size_t memoryUsed() const;
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ void debugPrintDot() const;
+#endif
+
+ Vector<DFANode, 0, ContentExtensionsOverflowHandler> nodes;
+ Vector<uint64_t, 0, ContentExtensionsOverflowHandler> actions;
+ Vector<CharRange, 0, ContentExtensionsOverflowHandler> transitionRanges;
+ Vector<uint32_t, 0, ContentExtensionsOverflowHandler> transitionDestinations;
+ unsigned root { 0 };
+};
+
+inline const CharRange& DFANode::ConstRangeIterator::range() const
+{
+ return dfa.transitionRanges[position];
+}
+
+inline uint32_t DFANode::ConstRangeIterator::target() const
+{
+ return dfa.transitionDestinations[position];
+}
+
+inline const CharRange& DFANode::RangeIterator::range() const
+{
+ return dfa.transitionRanges[position];
+}
+
+inline uint32_t DFANode::RangeIterator::target() const
+{
+ return dfa.transitionDestinations[position];
+}
+
+inline void DFANode::RangeIterator::resetTarget(uint32_t newTarget)
+{
+ dfa.transitionDestinations[position] = newTarget;
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFABytecode.h b/Source/WebCore/contentextensions/DFABytecode.h
new file mode 100644
index 000000000..50981c278
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFABytecode.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+typedef uint8_t DFABytecode;
+
+// Increment UserContentExtensionStore::CurrentContentExtensionFileVersion
+// when making any non-backwards-compatible changes to the bytecode.
+// FIXME: Changes here should not require changes in WebKit2. Move all versioning to WebCore.
+enum class DFABytecodeInstruction : uint8_t {
+
+ // CheckValue has two arguments:
+ // The value to check (1 byte),
+ // The distance to jump if the values are equal (1-4 bytes, signed).
+ CheckValueCaseInsensitive = 0x0,
+ CheckValueCaseSensitive = 0x1,
+
+ // Jump table if the input value is within a certain range.
+ // The lower value (1 byte).
+ // The higher value (1 byte).
+ // The distance to jump if the value is in the range
+ // for every character in the range (1-4 bytes, signed).
+ JumpTableCaseInsensitive = 0x2,
+ JumpTableCaseSensitive = 0x3,
+
+ // Jump to an offset if the input value is within a certain range.
+ // The lower value (1 byte).
+ // The higher value (1 byte).
+ // The distance to jump if the value is in the range (1-4 bytes, signed).
+ CheckValueRangeCaseInsensitive = 0x4,
+ CheckValueRangeCaseSensitive = 0x5,
+
+ // AppendAction has one argument:
+ // The action to append (4 bytes).
+ AppendAction = 0x6,
+ AppendActionWithIfDomain = 0x7,
+
+ // TestFlagsAndAppendAction has two arguments:
+ // The flags to check before appending (2 bytes).
+ // The action to append (4 bytes).
+ TestFlagsAndAppendAction = 0x8,
+ TestFlagsAndAppendActionWithIfDomain = 0x9,
+
+ // Terminate has no arguments.
+ Terminate = 0xA,
+
+ // Jump has one argument:
+ // The distance to jump (1-4 bytes, signed).
+ Jump = 0xB,
+};
+
+// The last four bits contain the instruction type.
+const uint8_t DFABytecodeInstructionMask = 0x0F;
+const uint8_t DFABytecodeJumpSizeMask = 0xF0;
+
+// DFA bytecode starts with a 4 byte header which contains the size of this DFA.
+typedef uint32_t DFAHeader;
+
+// A DFABytecodeJumpSize is stored in the top four bits of the DFABytecodeInstructions that have a jump.
+enum DFABytecodeJumpSize {
+ Int8 = 0x10,
+ Int16 = 0x20,
+ Int24 = 0x30,
+ Int32 = 0x40,
+};
+const int32_t Int24Max = (1 << 23) - 1;
+const int32_t Int24Min = -(1 << 23);
+
+static inline DFABytecodeJumpSize smallestPossibleJumpSize(int32_t longestPossibleJump)
+{
+ if (longestPossibleJump <= std::numeric_limits<int8_t>::max() && longestPossibleJump >= std::numeric_limits<int8_t>::min())
+ return Int8;
+ if (longestPossibleJump <= std::numeric_limits<int16_t>::max() && longestPossibleJump >= std::numeric_limits<int16_t>::min())
+ return Int16;
+ if (longestPossibleJump <= Int24Max && longestPossibleJump >= Int24Min)
+ return Int24;
+ return Int32;
+}
+
+static inline size_t instructionSizeWithArguments(DFABytecodeInstruction instruction)
+{
+ switch (instruction) {
+ case DFABytecodeInstruction::CheckValueCaseSensitive:
+ case DFABytecodeInstruction::CheckValueCaseInsensitive:
+ case DFABytecodeInstruction::JumpTableCaseInsensitive:
+ case DFABytecodeInstruction::JumpTableCaseSensitive:
+ case DFABytecodeInstruction::CheckValueRangeCaseSensitive:
+ case DFABytecodeInstruction::CheckValueRangeCaseInsensitive:
+ case DFABytecodeInstruction::Jump:
+ RELEASE_ASSERT_NOT_REACHED(); // Variable instruction size.
+ case DFABytecodeInstruction::AppendAction:
+ case DFABytecodeInstruction::AppendActionWithIfDomain:
+ return sizeof(DFABytecodeInstruction) + sizeof(uint32_t);
+ case DFABytecodeInstruction::TestFlagsAndAppendAction:
+ case DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain:
+ return sizeof(DFABytecodeInstruction) + sizeof(uint16_t) + sizeof(uint32_t);
+ case DFABytecodeInstruction::Terminate:
+ return sizeof(DFABytecodeInstruction);
+ }
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp b/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp
new file mode 100644
index 000000000..c9c5f8b8e
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "DFABytecodeCompiler.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionRule.h"
+#include "DFA.h"
+#include "DFANode.h"
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+template <typename IntType>
+inline void append(Vector<DFABytecode>& bytecode, IntType value)
+{
+ bytecode.resize(bytecode.size() + sizeof(IntType));
+ *reinterpret_cast<IntType*>(&bytecode[bytecode.size() - sizeof(IntType)]) = value;
+}
+
+inline void appendZeroes(Vector<DFABytecode>& bytecode, DFABytecodeJumpSize jumpSize)
+{
+ switch (jumpSize) {
+ case DFABytecodeJumpSize::Int8:
+ append<int8_t>(bytecode, 0); // This value will be set when linking.
+ break;
+ case DFABytecodeJumpSize::Int16:
+ append<int16_t>(bytecode, 0); // This value will be set when linking.
+ break;
+ case DFABytecodeJumpSize::Int24:
+ append<uint16_t>(bytecode, 0);
+ append<int8_t>(bytecode, 0); // These values will be set when linking.
+ break;
+ case DFABytecodeJumpSize::Int32:
+ append<int32_t>(bytecode, 0); // This value will be set when linking.
+ break;
+ }
+}
+
+template <typename IntType>
+inline void setBits(Vector<DFABytecode>& bytecode, uint32_t index, IntType value)
+{
+ RELEASE_ASSERT(index + sizeof(IntType) <= bytecode.size());
+ ASSERT_WITH_MESSAGE(!*reinterpret_cast<IntType*>(&bytecode[index]), "Right now we should only be using setBits to overwrite values that were zero as a placeholder.");
+ *reinterpret_cast<IntType*>(&bytecode[index]) = value;
+}
+
+static unsigned appendActionBytecodeSize(uint64_t action)
+{
+ if (action & ActionFlagMask)
+ return sizeof(DFABytecodeInstruction) + sizeof(uint16_t) + sizeof(uint32_t);
+ return sizeof(DFABytecodeInstruction) + sizeof(uint32_t);
+}
+
+void DFABytecodeCompiler::emitAppendAction(uint64_t action)
+{
+ // High bits are used to store flags. See compileRuleList.
+ if (action & ActionFlagMask) {
+ if (action & IfDomainFlag)
+ append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain);
+ else
+ append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendAction);
+ append<uint16_t>(m_bytecode, static_cast<uint16_t>(action >> 32));
+ append<uint32_t>(m_bytecode, static_cast<uint32_t>(action));
+ } else {
+ if (action & IfDomainFlag)
+ append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendActionWithIfDomain);
+ else
+ append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendAction);
+ append<uint32_t>(m_bytecode, static_cast<uint32_t>(action));
+ }
+}
+
+int32_t DFABytecodeCompiler::longestPossibleJump(uint32_t instructionLocation, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex)
+{
+ if (m_nodeStartOffsets[destinationNodeIndex] == std::numeric_limits<uint32_t>::max()) {
+ // Jumping to a node that hasn't been compiled yet, we don't know exactly how far forward we will need to jump,
+ // so make sure we have enough room for the worst possible case, the farthest possible jump
+ // which would be the distance if there were no compacted branches between this jump and its destination.
+ ASSERT(instructionLocation >= m_nodeStartOffsets[sourceNodeIndex]);
+ ASSERT(m_maxNodeStartOffsets[destinationNodeIndex] > m_maxNodeStartOffsets[sourceNodeIndex]);
+ ASSERT(m_nodeStartOffsets[sourceNodeIndex] != std::numeric_limits<uint32_t>::max());
+ return m_maxNodeStartOffsets[destinationNodeIndex] - m_maxNodeStartOffsets[sourceNodeIndex] - (m_nodeStartOffsets[sourceNodeIndex] - instructionLocation);
+ }
+
+ // Jumping to an already compiled node, we already know exactly where we will need to jump to.
+ ASSERT(m_nodeStartOffsets[destinationNodeIndex] <= instructionLocation);
+ return m_nodeStartOffsets[destinationNodeIndex] - instructionLocation;
+}
+
+void DFABytecodeCompiler::emitJump(uint32_t sourceNodeIndex, uint32_t destinationNodeIndex)
+{
+ uint32_t instructionLocation = m_bytecode.size();
+ uint32_t jumpLocation = instructionLocation + sizeof(uint8_t);
+ int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
+ DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
+ append<uint8_t>(m_bytecode, static_cast<uint8_t>(DFABytecodeInstruction::Jump) | jumpSize);
+
+ m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
+ appendZeroes(m_bytecode, jumpSize);
+}
+
+void DFABytecodeCompiler::emitCheckValue(uint8_t value, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive)
+{
+ uint32_t instructionLocation = m_bytecode.size();
+ uint32_t jumpLocation = instructionLocation + 2 * sizeof(uint8_t);
+ int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
+ DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
+ DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueCaseSensitive : DFABytecodeInstruction::CheckValueCaseInsensitive;
+ append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize);
+ append<uint8_t>(m_bytecode, value);
+ m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
+ appendZeroes(m_bytecode, jumpSize);
+}
+
+void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive)
+{
+ ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is strictly less than highValue.");
+
+ uint32_t instructionLocation = m_bytecode.size();
+ uint32_t jumpLocation = instructionLocation + 3 * sizeof(uint8_t);
+ int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
+ DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
+ DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueRangeCaseSensitive : DFABytecodeInstruction::CheckValueRangeCaseInsensitive;
+ append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize);
+ append<uint8_t>(m_bytecode, lowValue);
+ append<uint8_t>(m_bytecode, highValue);
+ m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
+ appendZeroes(m_bytecode, jumpSize);
+}
+
+void DFABytecodeCompiler::emitTerminate()
+{
+ append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::Terminate);
+}
+
+void DFABytecodeCompiler::compileNode(uint32_t index, bool root)
+{
+ unsigned startSize = m_bytecode.size();
+
+ const DFANode& node = m_dfa.nodes[index];
+ if (node.isKilled()) {
+ ASSERT(m_nodeStartOffsets[index] == std::numeric_limits<uint32_t>::max());
+ return;
+ }
+
+ // Record starting index for linking.
+ if (!root)
+ m_nodeStartOffsets[index] = m_bytecode.size();
+
+ for (uint64_t action : node.actions(m_dfa))
+ emitAppendAction(action);
+
+ // If we jump to the root, we don't want to re-add its actions to a HashSet.
+ // We know we have already added them because the root is always compiled first and we always start interpreting at the beginning.
+ if (root)
+ m_nodeStartOffsets[index] = m_bytecode.size();
+
+ compileNodeTransitions(index);
+
+ ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= compiledNodeMaxBytecodeSize(index));
+}
+
+unsigned DFABytecodeCompiler::compiledNodeMaxBytecodeSize(uint32_t index)
+{
+ const DFANode& node = m_dfa.nodes[index];
+ if (node.isKilled())
+ return 0;
+ unsigned size = 0;
+ for (uint64_t action : node.actions(m_dfa))
+ size += appendActionBytecodeSize(action);
+ size += nodeTransitionsMaxBytecodeSize(node);
+ return size;
+}
+
+DFABytecodeCompiler::JumpTable DFABytecodeCompiler::extractJumpTable(Vector<DFABytecodeCompiler::Range>& ranges, unsigned firstRange, unsigned lastRange)
+{
+ ASSERT(lastRange > firstRange);
+ ASSERT(lastRange < ranges.size());
+
+ JumpTable jumpTable;
+ jumpTable.min = ranges[firstRange].min;
+ jumpTable.max = ranges[lastRange].max;
+ jumpTable.caseSensitive = ranges[lastRange].caseSensitive;
+
+ unsigned size = lastRange - firstRange + 1;
+ jumpTable.destinations.reserveInitialCapacity(size);
+ for (unsigned i = firstRange; i <= lastRange; ++i) {
+ const Range& range = ranges[i];
+
+ ASSERT(range.caseSensitive == jumpTable.caseSensitive);
+ ASSERT(range.min == range.max);
+ ASSERT(range.min >= jumpTable.min);
+ ASSERT(range.min <= jumpTable.max);
+
+ jumpTable.destinations.uncheckedAppend(range.destination);
+ }
+
+ ranges.remove(firstRange, size);
+
+ return jumpTable;
+}
+
+DFABytecodeCompiler::Transitions DFABytecodeCompiler::transitions(const DFANode& node)
+{
+ Transitions transitions;
+
+ uint32_t destinations[128];
+ memset(destinations, 0xff, sizeof(destinations));
+ const uint32_t noDestination = std::numeric_limits<uint32_t>::max();
+
+ transitions.useFallbackTransition = node.canUseFallbackTransition(m_dfa);
+ if (transitions.useFallbackTransition)
+ transitions.fallbackTransitionTarget = node.bestFallbackTarget(m_dfa);
+
+ for (const auto& transition : node.transitions(m_dfa)) {
+ uint32_t targetNodeIndex = transition.target();
+ if (transitions.useFallbackTransition && transitions.fallbackTransitionTarget == targetNodeIndex)
+ continue;
+
+ for (uint16_t i = transition.range().first; i <= transition.range().last; ++i)
+ destinations[i] = targetNodeIndex;
+ }
+
+ Vector<Range>& ranges = transitions.ranges;
+ uint8_t rangeMin;
+ bool hasRangeMin = false;
+ for (uint8_t i = 0; i < 128; i++) {
+ if (hasRangeMin) {
+ if (destinations[i] != destinations[rangeMin]) {
+
+ // This is the end of a range. Check if it can be case insensitive.
+ uint8_t rangeMax = i - 1;
+ bool caseSensitive = true;
+ if (rangeMin >= 'A' && rangeMax <= 'Z') {
+ caseSensitive = false;
+ for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
+ if (destinations[rangeMin] != destinations[toASCIILower(rangeIndex)]) {
+ caseSensitive = true;
+ break;
+ }
+ }
+ }
+
+ if (!caseSensitive) {
+ // If all the lower-case destinations are the same as the upper-case destinations,
+ // then they will be covered by a case-insensitive range and will not need their own range.
+ for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
+ ASSERT(destinations[rangeMin] == destinations[toASCIILower(rangeIndex)]);
+ destinations[toASCIILower(rangeIndex)] = noDestination;
+ }
+ ranges.append(Range(toASCIILower(rangeMin), toASCIILower(rangeMax), destinations[rangeMin], caseSensitive));
+ } else
+ ranges.append(Range(rangeMin, rangeMax, destinations[rangeMin], caseSensitive));
+
+ if (destinations[i] == noDestination)
+ hasRangeMin = false;
+ else
+ rangeMin = i;
+ }
+ } else {
+ if (destinations[i] != noDestination) {
+ rangeMin = i;
+ hasRangeMin = true;
+ }
+ }
+ }
+ if (hasRangeMin) {
+ // Ranges are appended after passing the end of them.
+ // If a range goes to 127, we will have an uncommitted rangeMin because the loop does not check 128.
+ // If a range goes to 127, there will never be values higher than it, so checking for case-insensitive ranges would always fail.
+ ranges.append(Range(rangeMin, 127, destinations[rangeMin], true));
+ }
+
+ Vector<JumpTable>& jumpTables = transitions.jumpTables;
+ unsigned rangePosition = 0;
+ unsigned baseRangePosition = std::numeric_limits<unsigned>::max();
+ Range* baseRange = nullptr;
+ while (rangePosition < ranges.size()) {
+ auto& range = ranges[rangePosition];
+ if (baseRange) {
+ if (range.min != range.max
+ || baseRange->caseSensitive != range.caseSensitive
+ || ranges[rangePosition - 1].max + 1 != range.min) {
+ if (rangePosition - baseRangePosition > 1) {
+ jumpTables.append(extractJumpTable(ranges, baseRangePosition, rangePosition - 1));
+ rangePosition = baseRangePosition;
+ }
+ baseRangePosition = std::numeric_limits<unsigned>::max();
+ baseRange = nullptr;
+ }
+ } else {
+ if (range.min == range.max) {
+ baseRangePosition = rangePosition;
+ baseRange = &range;
+ }
+ }
+ ++rangePosition;
+ }
+
+ if (baseRange && ranges.size() - baseRangePosition > 1)
+ jumpTables.append(extractJumpTable(ranges, baseRangePosition, ranges.size() - 1));
+
+ return transitions;
+}
+
+unsigned DFABytecodeCompiler::checkForJumpTableMaxBytecodeSize(const JumpTable& jumpTable)
+{
+ unsigned baselineSize = sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t);
+ unsigned targetsSize = (jumpTable.max - jumpTable.min + 1) * sizeof(uint32_t);
+ return baselineSize + targetsSize;
+}
+
+unsigned DFABytecodeCompiler::checkForRangeMaxBytecodeSize(const Range& range)
+{
+ if (range.min == range.max)
+ return sizeof(DFABytecodeInstruction::CheckValueCaseInsensitive) + sizeof(uint8_t) + sizeof(uint32_t);
+ return sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t) + sizeof(uint32_t);
+}
+
+void DFABytecodeCompiler::compileJumpTable(uint32_t nodeIndex, const JumpTable& jumpTable)
+{
+ unsigned startSize = m_bytecode.size();
+ ASSERT_WITH_MESSAGE(jumpTable.max < 128, "The DFA engine only supports the ASCII alphabet.");
+ ASSERT(jumpTable.min <= jumpTable.max);
+
+ uint32_t instructionLocation = m_bytecode.size();
+ DFABytecodeJumpSize jumpSize = Int8;
+ for (uint32_t destinationNodeIndex : jumpTable.destinations) {
+ int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex);
+ DFABytecodeJumpSize localJumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
+ jumpSize = std::max(jumpSize, localJumpSize);
+ }
+
+ DFABytecodeInstruction instruction = jumpTable.caseSensitive ? DFABytecodeInstruction::JumpTableCaseSensitive : DFABytecodeInstruction::JumpTableCaseInsensitive;
+ append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize);
+ append<uint8_t>(m_bytecode, jumpTable.min);
+ append<uint8_t>(m_bytecode, jumpTable.max);
+
+ for (uint32_t destinationNodeIndex : jumpTable.destinations) {
+ int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex);
+ uint32_t jumpLocation = m_bytecode.size();
+ m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
+ appendZeroes(m_bytecode, jumpSize);
+ }
+
+ ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForJumpTableMaxBytecodeSize(jumpTable));
+}
+
+void DFABytecodeCompiler::compileCheckForRange(uint32_t nodeIndex, const Range& range)
+{
+ unsigned startSize = m_bytecode.size();
+ ASSERT_WITH_MESSAGE(range.max < 128, "The DFA engine only supports the ASCII alphabet.");
+ ASSERT(range.min <= range.max);
+
+ if (range.min == range.max)
+ emitCheckValue(range.min, nodeIndex, range.destination, range.caseSensitive);
+ else
+ emitCheckValueRange(range.min, range.max, nodeIndex, range.destination, range.caseSensitive);
+
+ ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForRangeMaxBytecodeSize(range));
+}
+
+unsigned DFABytecodeCompiler::nodeTransitionsMaxBytecodeSize(const DFANode& node)
+{
+ unsigned size = 0;
+ Transitions nodeTransitions = transitions(node);
+ for (const auto& jumpTable : nodeTransitions.jumpTables)
+ size += checkForJumpTableMaxBytecodeSize(jumpTable);
+ for (const auto& range : nodeTransitions.ranges)
+ size += checkForRangeMaxBytecodeSize(range);
+ if (nodeTransitions.useFallbackTransition)
+ size += sizeof(DFABytecodeInstruction::Jump) + sizeof(uint32_t);
+ else
+ size += instructionSizeWithArguments(DFABytecodeInstruction::Terminate);
+ return size;
+}
+
+void DFABytecodeCompiler::compileNodeTransitions(uint32_t nodeIndex)
+{
+ const DFANode& node = m_dfa.nodes[nodeIndex];
+ unsigned startSize = m_bytecode.size();
+
+ Transitions nodeTransitions = transitions(node);
+ for (const auto& jumpTable : nodeTransitions.jumpTables)
+ compileJumpTable(nodeIndex, jumpTable);
+ for (const auto& range : nodeTransitions.ranges)
+ compileCheckForRange(nodeIndex, range);
+ if (nodeTransitions.useFallbackTransition)
+ emitJump(nodeIndex, nodeTransitions.fallbackTransitionTarget);
+ else
+ emitTerminate();
+
+ ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= nodeTransitionsMaxBytecodeSize(node));
+}
+
+void DFABytecodeCompiler::compile()
+{
+ uint32_t startLocation = m_bytecode.size();
+ append<DFAHeader>(m_bytecode, 0); // This will be set when we are finished compiling this DFA.
+
+ m_nodeStartOffsets.resize(m_dfa.nodes.size());
+ for (unsigned i = 0; i < m_dfa.nodes.size(); ++i)
+ m_nodeStartOffsets[i] = std::numeric_limits<uint32_t>::max();
+
+ // Populate m_maxNodeStartOffsets with a worst-case index of where the node would be with no branch compaction.
+ // Compacting the branches using 1-4 byte signed jump distances should only make nodes closer together than this.
+ ASSERT(m_maxNodeStartOffsets.isEmpty());
+ m_maxNodeStartOffsets.clear();
+ m_maxNodeStartOffsets.resize(m_dfa.nodes.size());
+ unsigned rootActionsSize = 0;
+ for (uint64_t action : m_dfa.nodes[m_dfa.root].actions(m_dfa))
+ rootActionsSize += appendActionBytecodeSize(action);
+ m_maxNodeStartOffsets[m_dfa.root] = sizeof(DFAHeader) + rootActionsSize;
+ unsigned nextIndex = sizeof(DFAHeader) + compiledNodeMaxBytecodeSize(m_dfa.root);
+ for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) {
+ if (i != m_dfa.root) {
+ m_maxNodeStartOffsets[i] = nextIndex;
+ nextIndex += compiledNodeMaxBytecodeSize(i);
+ }
+ }
+
+ // Make sure the root is always at the beginning of the bytecode.
+ compileNode(m_dfa.root, true);
+ for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) {
+ if (i != m_dfa.root)
+ compileNode(i, false);
+ }
+
+ ASSERT(m_maxNodeStartOffsets.size() == m_nodeStartOffsets.size());
+ for (unsigned i = 0; i < m_dfa.nodes.size(); ++i) {
+ if (m_nodeStartOffsets[i] != std::numeric_limits<uint32_t>::max())
+ ASSERT(m_maxNodeStartOffsets[i] >= m_nodeStartOffsets[i]);
+ }
+
+ // Link.
+ for (const auto& linkRecord : m_linkRecords) {
+ uint32_t destination = m_nodeStartOffsets[linkRecord.destinationNodeIndex];
+ RELEASE_ASSERT(destination < std::numeric_limits<int32_t>::max());
+ int32_t distance = destination - linkRecord.instructionLocation;
+ ASSERT(abs(distance) <= abs(linkRecord.longestPossibleJump));
+
+ switch (linkRecord.jumpSize) {
+ case Int8:
+ RELEASE_ASSERT(distance == static_cast<int8_t>(distance));
+ setBits<int8_t>(m_bytecode, linkRecord.jumpLocation, static_cast<int8_t>(distance));
+ break;
+ case Int16:
+ RELEASE_ASSERT(distance == static_cast<int16_t>(distance));
+ setBits<int16_t>(m_bytecode, linkRecord.jumpLocation, static_cast<int16_t>(distance));
+ break;
+ case Int24:
+ RELEASE_ASSERT(distance >= Int24Min && distance <= Int24Max);
+ setBits<uint16_t>(m_bytecode, linkRecord.jumpLocation, static_cast<uint16_t>(distance));
+ setBits<int8_t>(m_bytecode, linkRecord.jumpLocation + sizeof(int16_t), static_cast<int8_t>(distance >> 16));
+ break;
+ case Int32:
+ setBits<int32_t>(m_bytecode, linkRecord.jumpLocation, distance);
+ break;
+ }
+ }
+
+ setBits<DFAHeader>(m_bytecode, startLocation, m_bytecode.size() - startLocation);
+}
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFABytecodeCompiler.h b/Source/WebCore/contentextensions/DFABytecodeCompiler.h
new file mode 100644
index 000000000..5ef0bd789
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFABytecodeCompiler.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "DFABytecode.h"
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+struct DFA;
+class DFANode;
+
+class WEBCORE_EXPORT DFABytecodeCompiler {
+public:
+ DFABytecodeCompiler(const DFA& dfa, Vector<DFABytecode>& bytecode)
+ : m_bytecode(bytecode)
+ , m_dfa(dfa)
+ {
+ }
+
+ void compile();
+
+private:
+ struct Range {
+ Range(uint8_t min, uint8_t max, uint32_t destination, bool caseSensitive)
+ : min(min)
+ , max(max)
+ , destination(destination)
+ , caseSensitive(caseSensitive)
+ {
+ }
+ uint8_t min;
+ uint8_t max;
+ uint32_t destination;
+ bool caseSensitive;
+ };
+ struct JumpTable {
+ ~JumpTable()
+ {
+ ASSERT(min + destinations.size() == max + 1);
+ ASSERT(min == max || destinations.size() > 1);
+ }
+
+ uint8_t min { 0 };
+ uint8_t max { 0 };
+ bool caseSensitive { true };
+ Vector<uint32_t> destinations;
+ };
+ struct Transitions {
+ Vector<JumpTable> jumpTables;
+ Vector<Range> ranges;
+ bool useFallbackTransition { false };
+ uint32_t fallbackTransitionTarget { std::numeric_limits<uint32_t>::max() };
+ };
+ JumpTable extractJumpTable(Vector<Range>&, unsigned first, unsigned last);
+ Transitions transitions(const DFANode&);
+
+ unsigned compiledNodeMaxBytecodeSize(uint32_t index);
+ void compileNode(uint32_t index, bool root);
+ unsigned nodeTransitionsMaxBytecodeSize(const DFANode&);
+ void compileNodeTransitions(uint32_t nodeIndex);
+ unsigned checkForJumpTableMaxBytecodeSize(const JumpTable&);
+ unsigned checkForRangeMaxBytecodeSize(const Range&);
+ void compileJumpTable(uint32_t nodeIndex, const JumpTable&);
+ void compileCheckForRange(uint32_t nodeIndex, const Range&);
+ int32_t longestPossibleJump(uint32_t jumpLocation, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex);
+
+ void emitAppendAction(uint64_t);
+ void emitJump(uint32_t sourceNodeIndex, uint32_t destinationNodeIndex);
+ void emitCheckValue(uint8_t value, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive);
+ void emitCheckValueRange(uint8_t lowValue, uint8_t highValue, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive);
+ void emitTerminate();
+
+ Vector<DFABytecode>& m_bytecode;
+ const DFA& m_dfa;
+
+ Vector<uint32_t> m_maxNodeStartOffsets;
+ Vector<uint32_t> m_nodeStartOffsets;
+
+ struct LinkRecord {
+ DFABytecodeJumpSize jumpSize;
+ int32_t longestPossibleJump;
+ uint32_t instructionLocation;
+ uint32_t jumpLocation;
+ uint32_t destinationNodeIndex;
+ };
+ Vector<LinkRecord> m_linkRecords;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp b/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp
new file mode 100644
index 000000000..a0da084a6
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "DFABytecodeInterpreter.h"
+
+#include "ContentExtensionsDebugging.h"
+#include <wtf/text/CString.h>
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+template <typename IntType>
+static inline IntType getBits(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index)
+{
+ ASSERT_UNUSED(bytecodeLength, index + sizeof(IntType) <= bytecodeLength);
+ return *reinterpret_cast<const IntType*>(&bytecode[index]);
+}
+
+static inline DFABytecodeInstruction getInstruction(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index)
+{
+ return static_cast<DFABytecodeInstruction>(getBits<uint8_t>(bytecode, bytecodeLength, index) & DFABytecodeInstructionMask);
+}
+
+static inline size_t jumpSizeInBytes(DFABytecodeJumpSize jumpSize)
+{
+ switch (jumpSize) {
+ case Int8:
+ return sizeof(int8_t);
+ case Int16:
+ return sizeof(int16_t);
+ case Int24:
+ return sizeof(uint16_t) + sizeof(int8_t);
+ case Int32:
+ return sizeof(int32_t);
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+}
+
+static inline DFABytecodeJumpSize getJumpSize(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index)
+{
+ DFABytecodeJumpSize jumpSize = static_cast<DFABytecodeJumpSize>(getBits<uint8_t>(bytecode, bytecodeLength, index) & DFABytecodeJumpSizeMask);
+ ASSERT(jumpSize == DFABytecodeJumpSize::Int32 || jumpSize == DFABytecodeJumpSize::Int24 || jumpSize == DFABytecodeJumpSize::Int16 || jumpSize == DFABytecodeJumpSize::Int8);
+ return jumpSize;
+}
+
+static inline int32_t getJumpDistance(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index, DFABytecodeJumpSize jumpSize)
+{
+ switch (jumpSize) {
+ case Int8:
+ return getBits<int8_t>(bytecode, bytecodeLength, index);
+ case Int16:
+ return getBits<int16_t>(bytecode, bytecodeLength, index);
+ case Int24:
+ return getBits<uint16_t>(bytecode, bytecodeLength, index) | (static_cast<int32_t>(getBits<int8_t>(bytecode, bytecodeLength, index + sizeof(uint16_t))) << 16);
+ case Int32:
+ return getBits<int32_t>(bytecode, bytecodeLength, index);
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+}
+
+static inline bool matchesDomain(uint64_t actionAndFlags, const DFABytecodeInterpreter::Actions& domainActions)
+{
+ bool ifDomain = actionAndFlags & IfDomainFlag;
+ bool domain = domainActions.contains(actionAndFlags);
+ return ifDomain == domain;
+}
+
+void DFABytecodeInterpreter::interpretAppendAction(uint32_t& programCounter, Actions& actions, bool ifDomain)
+{
+ ASSERT(getInstruction(m_bytecode, m_bytecodeLength, programCounter) == DFABytecodeInstruction::AppendAction
+ || getInstruction(m_bytecode, m_bytecodeLength, programCounter) == DFABytecodeInstruction::AppendActionWithIfDomain);
+ uint64_t action = (ifDomain ? IfDomainFlag : 0) | static_cast<uint64_t>(getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction)));
+ if (!m_domainActions || matchesDomain(action, *m_domainActions))
+ actions.add(action);
+
+ programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendAction);
+ ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::AppendAction) == instructionSizeWithArguments(DFABytecodeInstruction::AppendActionWithIfDomain));
+}
+
+void DFABytecodeInterpreter::interpretTestFlagsAndAppendAction(uint32_t& programCounter, uint16_t flags, Actions& actions, bool ifDomain)
+{
+ ASSERT(getInstruction(m_bytecode, m_bytecodeLength, programCounter) == DFABytecodeInstruction::TestFlagsAndAppendAction
+ || getInstruction(m_bytecode, m_bytecodeLength, programCounter) == DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain);
+ uint16_t flagsToCheck = getBits<uint16_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction));
+
+ uint16_t loadTypeFlags = flagsToCheck & LoadTypeMask;
+ uint16_t resourceTypeFlags = flagsToCheck & ResourceTypeMask;
+
+ bool loadTypeMatches = loadTypeFlags ? (loadTypeFlags & flags) : true;
+ bool resourceTypeMatches = resourceTypeFlags ? (resourceTypeFlags & flags) : true;
+
+ if (loadTypeMatches && resourceTypeMatches) {
+ uint64_t actionAndFlags = (ifDomain ? IfDomainFlag : 0) | (static_cast<uint64_t>(flagsToCheck) << 32) | static_cast<uint64_t>(getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint16_t)));
+ if (!m_domainActions || matchesDomain(actionAndFlags, *m_domainActions))
+ actions.add(actionAndFlags);
+ }
+ programCounter += instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction);
+ ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction) == instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain));
+}
+
+template<bool caseSensitive>
+inline void DFABytecodeInterpreter::interpetJumpTable(const char* url, uint32_t& urlIndex, uint32_t& programCounter, bool& urlIndexIsAfterEndOfString)
+{
+ DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter);
+
+ char character = caseSensitive ? url[urlIndex] : toASCIILower(url[urlIndex]);
+ uint8_t firstCharacter = getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction));
+ uint8_t lastCharacter = getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t));
+ if (character >= firstCharacter && character <= lastCharacter) {
+ uint32_t startOffset = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t);
+ uint32_t jumpLocation = startOffset + (character - firstCharacter) * jumpSizeInBytes(jumpSize);
+ programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize);
+ if (!character)
+ urlIndexIsAfterEndOfString = true;
+ urlIndex++; // This represents an edge in the DFA.
+ } else
+ programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize) * (lastCharacter - firstCharacter + 1);
+}
+
+DFABytecodeInterpreter::Actions DFABytecodeInterpreter::actionsMatchingEverything()
+{
+ Actions actions;
+
+ // DFA header.
+ uint32_t dfaBytecodeLength = getBits<uint32_t>(m_bytecode, m_bytecodeLength, 0);
+ uint32_t programCounter = sizeof(uint32_t);
+
+ while (programCounter < dfaBytecodeLength) {
+ DFABytecodeInstruction instruction = getInstruction(m_bytecode, m_bytecodeLength, programCounter);
+ if (instruction == DFABytecodeInstruction::AppendAction)
+ interpretAppendAction(programCounter, actions, false);
+ else if (instruction == DFABytecodeInstruction::AppendActionWithIfDomain)
+ interpretAppendAction(programCounter, actions, true);
+ else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendAction)
+ programCounter += instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction);
+ else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain)
+ programCounter += instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain);
+ else
+ break;
+ }
+ return actions;
+}
+
+DFABytecodeInterpreter::Actions DFABytecodeInterpreter::interpretWithDomains(const CString& urlCString, uint16_t flags, const DFABytecodeInterpreter::Actions& domainActions)
+{
+ ASSERT(!m_domainActions);
+ m_domainActions = &domainActions;
+ DFABytecodeInterpreter::Actions actions = interpret(urlCString, flags);
+ m_domainActions = nullptr;
+ return actions;
+}
+
+DFABytecodeInterpreter::Actions DFABytecodeInterpreter::interpret(const CString& urlCString, uint16_t flags)
+{
+ const char* url = urlCString.data();
+ ASSERT(url);
+
+ Actions actions;
+
+ uint32_t programCounter = 0;
+ while (programCounter < m_bytecodeLength) {
+
+ // DFA header.
+ uint32_t dfaStart = programCounter;
+ uint32_t dfaBytecodeLength = getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter);
+ programCounter += sizeof(uint32_t);
+
+ // Skip the actions without flags on the DFA root. These are accessed via actionsMatchingEverything.
+ if (!dfaStart) {
+ while (programCounter < dfaBytecodeLength) {
+ DFABytecodeInstruction instruction = getInstruction(m_bytecode, m_bytecodeLength, programCounter);
+ if (instruction == DFABytecodeInstruction::AppendAction)
+ programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendAction);
+ else if (instruction == DFABytecodeInstruction::AppendActionWithIfDomain)
+ programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendActionWithIfDomain);
+ else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendAction)
+ interpretTestFlagsAndAppendAction(programCounter, flags, actions, false);
+ else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain)
+ interpretTestFlagsAndAppendAction(programCounter, flags, actions, true);
+ else
+ break;
+ }
+ if (programCounter >= m_bytecodeLength)
+ return actions;
+ } else {
+ ASSERT_WITH_MESSAGE(getInstruction(m_bytecode, m_bytecodeLength, programCounter) != DFABytecodeInstruction::AppendAction
+ && getInstruction(m_bytecode, m_bytecodeLength, programCounter) != DFABytecodeInstruction::AppendActionWithIfDomain
+ && getInstruction(m_bytecode, m_bytecodeLength, programCounter) != DFABytecodeInstruction::TestFlagsAndAppendAction
+ && getInstruction(m_bytecode, m_bytecodeLength, programCounter) != DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain,
+ "Triggers that match everything should only be in the first DFA.");
+ }
+
+ // Interpret the bytecode from this DFA.
+ // This should always terminate if interpreting correctly compiled bytecode.
+ uint32_t urlIndex = 0;
+ bool urlIndexIsAfterEndOfString = false;
+ while (true) {
+ ASSERT(programCounter <= m_bytecodeLength);
+ switch (getInstruction(m_bytecode, m_bytecodeLength, programCounter)) {
+
+ case DFABytecodeInstruction::Terminate:
+ goto nextDFA;
+
+ case DFABytecodeInstruction::CheckValueCaseSensitive: {
+ if (urlIndexIsAfterEndOfString)
+ goto nextDFA;
+
+ // Check to see if the next character in the url is the value stored with the bytecode.
+ char character = url[urlIndex];
+ DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter);
+ if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction))) {
+ uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t);
+ programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize);
+ if (!character)
+ urlIndexIsAfterEndOfString = true;
+ urlIndex++; // This represents an edge in the DFA.
+ } else
+ programCounter += sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
+ break;
+ }
+
+ case DFABytecodeInstruction::CheckValueCaseInsensitive: {
+ if (urlIndexIsAfterEndOfString)
+ goto nextDFA;
+
+ // Check to see if the next character in the url is the value stored with the bytecode.
+ char character = toASCIILower(url[urlIndex]);
+ DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter);
+ if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction))) {
+ uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t);
+ programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize);
+ if (!character)
+ urlIndexIsAfterEndOfString = true;
+ urlIndex++; // This represents an edge in the DFA.
+ } else
+ programCounter += sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
+ break;
+ }
+
+ case DFABytecodeInstruction::JumpTableCaseInsensitive:
+ if (urlIndexIsAfterEndOfString)
+ goto nextDFA;
+
+ interpetJumpTable<false>(url, urlIndex, programCounter, urlIndexIsAfterEndOfString);
+ break;
+ case DFABytecodeInstruction::JumpTableCaseSensitive:
+ if (urlIndexIsAfterEndOfString)
+ goto nextDFA;
+
+ interpetJumpTable<true>(url, urlIndex, programCounter, urlIndexIsAfterEndOfString);
+ break;
+
+ case DFABytecodeInstruction::CheckValueRangeCaseSensitive: {
+ if (urlIndexIsAfterEndOfString)
+ goto nextDFA;
+
+ char character = url[urlIndex];
+ DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter);
+ if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction))
+ && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t))) {
+ uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t);
+ programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize);
+ if (!character)
+ urlIndexIsAfterEndOfString = true;
+ urlIndex++; // This represents an edge in the DFA.
+ } else
+ programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
+ break;
+ }
+
+ case DFABytecodeInstruction::CheckValueRangeCaseInsensitive: {
+ if (urlIndexIsAfterEndOfString)
+ goto nextDFA;
+
+ char character = toASCIILower(url[urlIndex]);
+ DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter);
+ if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction))
+ && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t))) {
+ uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t);
+ programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize);
+ if (!character)
+ urlIndexIsAfterEndOfString = true;
+ urlIndex++; // This represents an edge in the DFA.
+ } else
+ programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
+ break;
+ }
+
+ case DFABytecodeInstruction::Jump: {
+ if (!url[urlIndex] || urlIndexIsAfterEndOfString)
+ goto nextDFA;
+
+ uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction);
+ DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter);
+ programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, jumpSize);
+ urlIndex++; // This represents an edge in the DFA.
+ break;
+ }
+
+ case DFABytecodeInstruction::AppendAction:
+ interpretAppendAction(programCounter, actions, false);
+ break;
+
+ case DFABytecodeInstruction::AppendActionWithIfDomain:
+ interpretAppendAction(programCounter, actions, true);
+ break;
+
+ case DFABytecodeInstruction::TestFlagsAndAppendAction:
+ interpretTestFlagsAndAppendAction(programCounter, flags, actions, false);
+ break;
+
+ case DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain:
+ interpretTestFlagsAndAppendAction(programCounter, flags, actions, true);
+ break;
+
+ default:
+ RELEASE_ASSERT_NOT_REACHED(); // Invalid bytecode.
+ }
+ // We should always terminate before or at a null character at the end of a String.
+ ASSERT(urlIndex <= urlCString.length() || (urlIndexIsAfterEndOfString && urlIndex <= urlCString.length() + 1));
+ }
+ RELEASE_ASSERT_NOT_REACHED(); // The while loop can only be exited using goto nextDFA.
+ nextDFA:
+ ASSERT(dfaBytecodeLength);
+ programCounter = dfaStart + dfaBytecodeLength;
+ }
+ return actions;
+}
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFABytecodeInterpreter.h b/Source/WebCore/contentextensions/DFABytecodeInterpreter.h
new file mode 100644
index 000000000..4c69e2660
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFABytecodeInterpreter.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionRule.h"
+#include "ContentExtensionsDebugging.h"
+#include "DFABytecode.h"
+#include <wtf/DataLog.h>
+#include <wtf/HashSet.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+class WEBCORE_EXPORT DFABytecodeInterpreter {
+public:
+ DFABytecodeInterpreter(const DFABytecode* bytecode, unsigned bytecodeLength)
+ : m_bytecode(bytecode)
+ , m_bytecodeLength(bytecodeLength)
+ {
+ }
+
+ typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> Actions;
+
+ Actions interpret(const CString&, uint16_t flags);
+ Actions interpretWithDomains(const CString&, uint16_t flags, const DFABytecodeInterpreter::Actions& domainActions);
+ Actions actionsMatchingEverything();
+
+private:
+ void interpretAppendAction(unsigned& programCounter, Actions&, bool ifDomain);
+ void interpretTestFlagsAndAppendAction(unsigned& programCounter, uint16_t flags, Actions&, bool ifDomain);
+
+ template<bool caseSensitive>
+ void interpetJumpTable(const char* url, uint32_t& urlIndex, uint32_t& programCounter, bool& urlIndexIsAfterEndOfString);
+
+ const DFABytecode* m_bytecode;
+ const unsigned m_bytecodeLength;
+ const DFABytecodeInterpreter::Actions* m_domainActions { nullptr };
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFACombiner.cpp b/Source/WebCore/contentextensions/DFACombiner.cpp
new file mode 100644
index 000000000..513567540
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFACombiner.cpp
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "DFACombiner.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "MutableRangeList.h"
+#include <wtf/HashMap.h>
+#include <wtf/HashSet.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+class DFAMerger {
+ typedef MutableRangeList<char, uint64_t, 128> CombinedTransitionsMutableRangeList;
+
+ enum class WhichDFA {
+ A,
+ B
+ };
+
+ template<WhichDFA whichDFA>
+ struct TargetConverter {
+ uint64_t convert(uint32_t target)
+ {
+ uint64_t value = 0xffffffffffffffff;
+ extend(value, target);
+ return value;
+ }
+
+ void extend(uint64_t& destination, uint32_t target)
+ {
+ setHalfSignature(destination, target);
+ }
+ private:
+ void setHalfSignature(uint64_t& signature, uint32_t value)
+ {
+ unsigned shiftAmount = (whichDFA == WhichDFA::A) ? 32 : 0;
+ uint64_t mask = static_cast<uint64_t>(0xffffffff) << (32 - shiftAmount);
+ signature = (signature & mask) | static_cast<uint64_t>(value) << shiftAmount;
+ }
+ };
+
+public:
+ DFAMerger(const DFA& a, const DFA& b)
+ : m_dfaA(a)
+ , m_dfaB(b)
+ {
+ }
+
+ DFA merge()
+ {
+ if (!m_nodeMapping.isEmpty())
+ return m_output;
+
+ uint64_t rootSignature = signatureForIndices(m_dfaA.root, m_dfaB.root);
+ getOrCreateCombinedNode(rootSignature);
+
+ CombinedTransitionsMutableRangeList combinedTransitions;
+ while (!m_unprocessedNodes.isEmpty()) {
+ combinedTransitions.clear();
+
+ uint64_t unprocessedNode = m_unprocessedNodes.takeLast();
+
+ uint32_t indexA = extractIndexA(unprocessedNode);
+ if (indexA != invalidNodeIndex) {
+ const DFANode& nodeA = m_dfaA.nodes[indexA];
+ auto transitionsA = nodeA.transitions(m_dfaA);
+ TargetConverter<WhichDFA::A> converterA;
+ combinedTransitions.extend(transitionsA.begin(), transitionsA.end(), converterA);
+ }
+
+ uint32_t indexB = extractIndexB(unprocessedNode);
+ if (indexB != invalidNodeIndex) {
+ const DFANode& nodeB = m_dfaB.nodes[indexB];
+ auto transitionsB = nodeB.transitions(m_dfaB);
+ TargetConverter<WhichDFA::B> converterB;
+ combinedTransitions.extend(transitionsB.begin(), transitionsB.end(), converterB);
+ }
+
+ unsigned transitionsStart = m_output.transitionRanges.size();
+ for (const auto& range : combinedTransitions) {
+ unsigned targetNodeId = getOrCreateCombinedNode(range.data);
+ m_output.transitionRanges.append({ range.first, range.last });
+ m_output.transitionDestinations.append(targetNodeId);
+ }
+ unsigned transitionsEnd = m_output.transitionRanges.size();
+ unsigned transitionsLength = transitionsEnd - transitionsStart;
+
+ uint32_t sourceNodeId = m_nodeMapping.get(unprocessedNode);
+ DFANode& dfaSourceNode = m_output.nodes[sourceNodeId];
+ dfaSourceNode.setTransitions(transitionsStart, static_cast<uint8_t>(transitionsLength));
+ }
+ return m_output;
+ }
+
+private:
+ uint32_t invalidNodeIndex = 0xffffffff;
+
+ static uint64_t signatureForIndices(uint32_t aIndex, uint32_t bIndex)
+ {
+ return static_cast<uint64_t>(aIndex) << 32 | bIndex;
+ }
+
+ static uint32_t extractIndexA(uint64_t signature)
+ {
+ return static_cast<uint32_t>(signature >> 32);
+ }
+
+ static uint32_t extractIndexB(uint64_t signature)
+ {
+ return static_cast<uint32_t>(signature);
+ }
+
+ uint32_t getOrCreateCombinedNode(uint64_t newNodeSignature)
+ {
+ auto addResult = m_nodeMapping.add(newNodeSignature, invalidNodeIndex);
+ if (!addResult.isNewEntry)
+ return addResult.iterator->value;
+
+ m_output.nodes.append(DFANode());
+ uint32_t newNodeIndex = m_output.nodes.size() - 1;
+ addResult.iterator->value = newNodeIndex;
+ m_unprocessedNodes.append(newNodeSignature);
+
+ uint32_t indexA = extractIndexA(newNodeSignature);
+ uint32_t indexB = extractIndexB(newNodeSignature);
+
+ HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions;
+ if (indexA != invalidNodeIndex) {
+ const DFANode& node = m_dfaA.nodes[indexA];
+ uint32_t actionsStart = node.actionsStart();
+ uint32_t actionsEnd = actionsStart + node.actionsLength();
+ for (uint32_t i = actionsStart; i < actionsEnd; ++i)
+ actions.add(m_dfaA.actions[i]);
+ }
+ if (indexB != invalidNodeIndex) {
+ const DFANode& node = m_dfaB.nodes[indexB];
+ uint32_t actionsStart = node.actionsStart();
+ uint32_t actionsEnd = actionsStart + node.actionsLength();
+ for (uint32_t i = actionsStart; i < actionsEnd; ++i)
+ actions.add(m_dfaB.actions[i]);
+ }
+
+ uint32_t actionsStart = m_output.actions.size();
+ for (uint64_t action : actions)
+ m_output.actions.append(action);
+ uint32_t actionsEnd = m_output.actions.size();
+ uint16_t actionsLength = static_cast<uint16_t>(actionsEnd - actionsStart);
+
+ m_output.nodes.last().setActions(actionsStart, actionsLength);
+ return newNodeIndex;
+ }
+
+ const DFA& m_dfaA;
+ const DFA& m_dfaB;
+ DFA m_output;
+ HashMap<uint64_t, uint32_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> m_nodeMapping;
+ Vector<uint64_t, 0, ContentExtensionsOverflowHandler> m_unprocessedNodes;
+};
+
+void DFACombiner::combineDFAs(unsigned minimumSize, std::function<void(DFA&&)> handler)
+{
+ if (m_dfas.isEmpty())
+ return;
+
+ for (unsigned i = m_dfas.size(); i--;) {
+ if (m_dfas[i].graphSize() > minimumSize) {
+ handler(WTFMove(m_dfas[i]));
+ m_dfas.remove(i);
+ }
+ }
+
+ while (!m_dfas.isEmpty()) {
+ if (m_dfas.size() == 1) {
+ handler(WTFMove(m_dfas.first()));
+ return;
+ }
+
+ DFA a = m_dfas.takeLast();
+ DFA b = m_dfas.takeLast();
+ DFAMerger dfaMerger(a, b);
+ DFA c = dfaMerger.merge();
+
+ if (c.graphSize() > minimumSize || m_dfas.isEmpty()) {
+ // Minimizing is somewhat expensive. We only do it in bulk when we reach the threshold
+ // to reduce the load.
+ c.minimize();
+ }
+
+ if (c.graphSize() > minimumSize)
+ handler(WTFMove(c));
+ else
+ m_dfas.append(c);
+ }
+}
+
+}
+
+} // namespace WebCore
+
+#endif
diff --git a/Source/WebCore/contentextensions/DFACombiner.h b/Source/WebCore/contentextensions/DFACombiner.h
new file mode 100644
index 000000000..090946b12
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFACombiner.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "DFA.h"
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+class WEBCORE_EXPORT DFACombiner {
+public:
+ void addDFA(DFA&&);
+ void combineDFAs(unsigned minimumSize, std::function<void(DFA&&)> handler);
+
+private:
+ Vector<DFA> m_dfas;
+};
+
+inline void DFACombiner::addDFA(DFA&& dfa)
+{
+ dfa.minimize();
+ m_dfas.append(WTFMove(dfa));
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFAMinimizer.cpp b/Source/WebCore/contentextensions/DFAMinimizer.cpp
new file mode 100644
index 000000000..4408e84bf
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFAMinimizer.cpp
@@ -0,0 +1,509 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "DFAMinimizer.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "DFA.h"
+#include "DFANode.h"
+#include "MutableRangeList.h"
+#include <wtf/HashMap.h>
+#include <wtf/Hasher.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+namespace ContentExtensions {
+
+namespace {
+
+template<typename VectorType, typename Iterable, typename Function>
+static inline void iterateIntersections(const VectorType& singularTransitionsFirsts, const Iterable& iterableTransitionList, const Function& intersectionHandler)
+{
+ ASSERT(!singularTransitionsFirsts.isEmpty());
+ auto otherIterator = iterableTransitionList.begin();
+ auto otherEnd = iterableTransitionList.end();
+
+ if (otherIterator == otherEnd)
+ return;
+
+ unsigned singularTransitionsLength = singularTransitionsFirsts.size();
+ unsigned singularTransitionsFirstsIndex = 0;
+ for (; otherIterator != otherEnd; ++otherIterator) {
+ auto firstCharacter = otherIterator.first();
+ while (singularTransitionsFirstsIndex < singularTransitionsLength
+ && singularTransitionsFirsts[singularTransitionsFirstsIndex] != firstCharacter)
+ ++singularTransitionsFirstsIndex;
+
+ intersectionHandler(singularTransitionsFirstsIndex, otherIterator);
+ ++singularTransitionsFirstsIndex;
+
+ auto lastCharacter = otherIterator.last();
+ while (singularTransitionsFirstsIndex < singularTransitionsLength
+ && singularTransitionsFirsts[singularTransitionsFirstsIndex] <= lastCharacter) {
+ intersectionHandler(singularTransitionsFirstsIndex, otherIterator);
+ ++singularTransitionsFirstsIndex;
+ }
+ }
+}
+
+class Partition {
+public:
+ void initialize(unsigned size)
+ {
+ if (!size)
+ return;
+
+ m_sets.reserveInitialCapacity(size);
+ m_partitionedElements.resize(size);
+ m_elementPositionInPartitionedNodes.resize(size);
+ m_elementToSetMap.resize(size);
+
+ for (unsigned i = 0; i < size; ++i) {
+ m_partitionedElements[i] = i;
+ m_elementPositionInPartitionedNodes[i] = i;
+ m_elementToSetMap[i] = 0;
+ }
+ m_sets.uncheckedAppend(SetDescriptor { 0, size, 0 });
+ }
+
+ void reserveUninitializedCapacity(unsigned elementCount)
+ {
+ m_partitionedElements.resize(elementCount);
+ m_elementPositionInPartitionedNodes.resize(elementCount);
+ m_elementToSetMap.resize(elementCount);
+ }
+
+ void addSetUnchecked(unsigned start, unsigned size)
+ {
+ m_sets.append(SetDescriptor { start, size, 0 });
+ }
+
+ void setElementUnchecked(unsigned elementIndex, unsigned positionInPartition, unsigned setIndex)
+ {
+ ASSERT(setIndex < m_sets.size());
+ m_partitionedElements[positionInPartition] = elementIndex;
+ m_elementPositionInPartitionedNodes[elementIndex] = positionInPartition;
+ m_elementToSetMap[elementIndex] = setIndex;
+ }
+
+ unsigned startOffsetOfSet(unsigned setIndex) const
+ {
+ return m_sets[setIndex].start;
+ }
+
+ ALWAYS_INLINE void markElementInCurrentGeneration(unsigned elementIndex)
+ {
+ // Swap the node with the first unmarked node.
+ unsigned setIndex = m_elementToSetMap[elementIndex];
+ SetDescriptor& setDescriptor = m_sets[setIndex];
+
+ unsigned elementPositionInPartition = m_elementPositionInPartitionedNodes[elementIndex];
+ ASSERT(elementPositionInPartition >= setDescriptor.start);
+ ASSERT(elementPositionInPartition < setDescriptor.end());
+
+ unsigned firstUnmarkedElementPositionInPartition = setDescriptor.indexAfterMarkedElements();
+ ASSERT(firstUnmarkedElementPositionInPartition >= setDescriptor.start && firstUnmarkedElementPositionInPartition < setDescriptor.end());
+ ASSERT(firstUnmarkedElementPositionInPartition >= firstUnmarkedElementPositionInPartition);
+
+ // Swap the nodes in the set.
+ unsigned firstUnmarkedElement = m_partitionedElements[firstUnmarkedElementPositionInPartition];
+ m_partitionedElements[firstUnmarkedElementPositionInPartition] = elementIndex;
+ m_partitionedElements[elementPositionInPartition] = firstUnmarkedElement;
+
+ // Update their index.
+ m_elementPositionInPartitionedNodes[elementIndex] = firstUnmarkedElementPositionInPartition;
+ m_elementPositionInPartitionedNodes[firstUnmarkedElement] = elementPositionInPartition;
+
+ if (!setDescriptor.markedCount) {
+ ASSERT(!m_setsMarkedInCurrentGeneration.contains(setIndex));
+ m_setsMarkedInCurrentGeneration.append(setIndex);
+ }
+ ++setDescriptor.markedCount;
+ }
+
+ // The function passed as argument MUST not modify the partition.
+ template<typename Function>
+ void refineGeneration(const Function& function)
+ {
+ for (unsigned setIndex : m_setsMarkedInCurrentGeneration) {
+ SetDescriptor& setDescriptor = m_sets[setIndex];
+ if (setDescriptor.markedCount == setDescriptor.size) {
+ // Everything is marked, there is nothing to refine.
+ setDescriptor.markedCount = 0;
+ continue;
+ }
+
+ SetDescriptor newSet;
+ bool newSetIsMarkedSet = setDescriptor.markedCount * 2 <= setDescriptor.size;
+ if (newSetIsMarkedSet) {
+ // Less than half of the nodes have been marked.
+ newSet = { setDescriptor.start, setDescriptor.markedCount, 0 };
+ setDescriptor.start = setDescriptor.start + setDescriptor.markedCount;
+ } else
+ newSet = { setDescriptor.start + setDescriptor.markedCount, setDescriptor.size - setDescriptor.markedCount, 0 };
+ setDescriptor.size -= newSet.size;
+ setDescriptor.markedCount = 0;
+
+ unsigned newSetIndex = m_sets.size();
+ m_sets.append(newSet);
+
+ for (unsigned i = newSet.start; i < newSet.end(); ++i)
+ m_elementToSetMap[m_partitionedElements[i]] = newSetIndex;
+
+ function(newSetIndex);
+ }
+ m_setsMarkedInCurrentGeneration.clear();
+ }
+
+ // Call Function() on every node of a given subset.
+ template<typename Function>
+ void iterateSet(unsigned setIndex, const Function& function)
+ {
+ SetDescriptor& setDescriptor = m_sets[setIndex];
+ for (unsigned i = setDescriptor.start; i < setDescriptor.end(); ++i)
+ function(m_partitionedElements[i]);
+ }
+
+ // Index of the set containing the Node.
+ unsigned setIndex(unsigned elementIndex) const
+ {
+ return m_elementToSetMap[elementIndex];
+ }
+
+ // NodeIndex of the first element in the set.
+ unsigned firstElementInSet(unsigned setIndex) const
+ {
+ return m_partitionedElements[m_sets[setIndex].start];
+ }
+
+ unsigned size() const
+ {
+ return m_sets.size();
+ }
+
+private:
+ struct SetDescriptor {
+ unsigned start;
+ unsigned size;
+ unsigned markedCount;
+
+ unsigned indexAfterMarkedElements() const { return start + markedCount; }
+ unsigned end() const { return start + size; }
+ };
+
+ // List of sets.
+ Vector<SetDescriptor, 0, ContentExtensionsOverflowHandler> m_sets;
+
+ // All the element indices such that two elements of the same set never have a element of a different set between them.
+ Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_partitionedElements;
+
+ // Map elementIndex->position in the partitionedElements.
+ Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_elementPositionInPartitionedNodes;
+
+ // Map elementIndex->SetIndex.
+ Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_elementToSetMap;
+
+ // List of sets with any marked node. Each set can appear at most once.
+ // FIXME: find a good inline size for this.
+ Vector<unsigned, 128, ContentExtensionsOverflowHandler> m_setsMarkedInCurrentGeneration;
+};
+
+class FullGraphPartition {
+ typedef MutableRangeList<char, uint32_t, 128> SingularTransitionsMutableRangeList;
+public:
+ FullGraphPartition(const DFA& dfa)
+ {
+ m_nodePartition.initialize(dfa.nodes.size());
+
+ SingularTransitionsMutableRangeList singularTransitions;
+ CounterConverter counterConverter;
+ for (const DFANode& node : dfa.nodes) {
+ if (node.isKilled())
+ continue;
+ auto transitions = node.transitions(dfa);
+ singularTransitions.extend(transitions.begin(), transitions.end(), counterConverter);
+ }
+
+ // Count the number of transition for each singular range. This will give us the bucket size
+ // for the transition partition, where transitions are partitioned by "symbol".
+ unsigned rangeIndexAccumulator = 0;
+ for (const auto& transition : singularTransitions) {
+ m_transitionPartition.addSetUnchecked(rangeIndexAccumulator, transition.data);
+ rangeIndexAccumulator += transition.data;
+ }
+
+ // Count the number of incoming transitions per node.
+ m_flattenedTransitionsStartOffsetPerNode.resize(dfa.nodes.size());
+ memset(m_flattenedTransitionsStartOffsetPerNode.data(), 0, m_flattenedTransitionsStartOffsetPerNode.size() * sizeof(unsigned));
+
+ Vector<char, 0, ContentExtensionsOverflowHandler> singularTransitionsFirsts;
+ singularTransitionsFirsts.reserveInitialCapacity(singularTransitions.m_ranges.size());
+ for (const auto& transition : singularTransitions)
+ singularTransitionsFirsts.uncheckedAppend(transition.first);
+
+ for (const DFANode& node : dfa.nodes) {
+ if (node.isKilled())
+ continue;
+ auto transitions = node.transitions(dfa);
+ iterateIntersections(singularTransitionsFirsts, transitions, [&](unsigned, const DFANode::ConstRangeIterator& origin) {
+ uint32_t targetNodeIndex = origin.target();
+ ++m_flattenedTransitionsStartOffsetPerNode[targetNodeIndex];
+ });
+ }
+
+ // Accumulate the offsets. This gives us the start position of each bucket.
+ unsigned transitionAccumulator = 0;
+ for (unsigned i = 0; i < m_flattenedTransitionsStartOffsetPerNode.size(); ++i) {
+ unsigned transitionsCountForNode = m_flattenedTransitionsStartOffsetPerNode[i];
+ m_flattenedTransitionsStartOffsetPerNode[i] = transitionAccumulator;
+ transitionAccumulator += transitionsCountForNode;
+ }
+ unsigned flattenedTransitionsSize = transitionAccumulator;
+ ASSERT_WITH_MESSAGE(flattenedTransitionsSize == rangeIndexAccumulator, "The number of transitions should be the same, regardless of how they are arranged in buckets.");
+
+ m_transitionPartition.reserveUninitializedCapacity(flattenedTransitionsSize);
+
+ // Next, let's fill the transition table and set up the size of each group at the same time.
+ m_flattenedTransitionsSizePerNode.resize(dfa.nodes.size());
+ for (unsigned& counter : m_flattenedTransitionsSizePerNode)
+ counter = 0;
+ m_flattenedTransitions.resize(flattenedTransitionsSize);
+
+ Vector<uint32_t> transitionPerRangeOffset(m_transitionPartition.size());
+ memset(transitionPerRangeOffset.data(), 0, transitionPerRangeOffset.size() * sizeof(uint32_t));
+
+ for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
+ const DFANode& node = dfa.nodes[i];
+ if (node.isKilled())
+ continue;
+
+ auto transitions = node.transitions(dfa);
+ iterateIntersections(singularTransitionsFirsts, transitions, [&](unsigned singularTransitonIndex, const DFANode::ConstRangeIterator& origin) {
+ uint32_t targetNodeIndex = origin.target();
+
+ unsigned start = m_flattenedTransitionsStartOffsetPerNode[targetNodeIndex];
+ unsigned offset = m_flattenedTransitionsSizePerNode[targetNodeIndex];
+ unsigned positionInFlattenedTransitions = start + offset;
+ m_flattenedTransitions[positionInFlattenedTransitions] = Transition({ i });
+
+ uint32_t& inRangeOffset = transitionPerRangeOffset[singularTransitonIndex];
+ unsigned positionInTransitionPartition = m_transitionPartition.startOffsetOfSet(singularTransitonIndex) + inRangeOffset;
+ ++inRangeOffset;
+
+ m_transitionPartition.setElementUnchecked(positionInFlattenedTransitions, positionInTransitionPartition, singularTransitonIndex);
+
+ ++m_flattenedTransitionsSizePerNode[targetNodeIndex];
+ });
+ }
+ }
+
+ void markNode(unsigned nodeIndex)
+ {
+ m_nodePartition.markElementInCurrentGeneration(nodeIndex);
+ }
+
+ void refinePartitions()
+ {
+ m_nodePartition.refineGeneration([&](unsigned smallestSetIndex) {
+ m_nodePartition.iterateSet(smallestSetIndex, [&](unsigned nodeIndex) {
+ unsigned incomingTransitionsStartForNode = m_flattenedTransitionsStartOffsetPerNode[nodeIndex];
+ unsigned incomingTransitionsSizeForNode = m_flattenedTransitionsSizePerNode[nodeIndex];
+
+ for (unsigned i = 0; i < incomingTransitionsSizeForNode; ++i)
+ m_transitionPartition.markElementInCurrentGeneration(incomingTransitionsStartForNode + i);
+ });
+
+ // We only need to split the transitions, we handle the new sets through the main loop.
+ m_transitionPartition.refineGeneration([](unsigned) { });
+ });
+ }
+
+ void splitByUniqueTransitions()
+ {
+ for (; m_nextTransitionSetToProcess < m_transitionPartition.size(); ++m_nextTransitionSetToProcess) {
+ // We use the known splitters to refine the set.
+ m_transitionPartition.iterateSet(m_nextTransitionSetToProcess, [&](unsigned transitionIndex) {
+ unsigned sourceNodeIndex = m_flattenedTransitions[transitionIndex].source;
+ m_nodePartition.markElementInCurrentGeneration(sourceNodeIndex);
+ });
+
+ refinePartitions();
+ }
+ }
+
+ unsigned nodeReplacement(unsigned nodeIndex)
+ {
+ unsigned setIndex = m_nodePartition.setIndex(nodeIndex);
+ return m_nodePartition.firstElementInSet(setIndex);
+ }
+
+private:
+ struct Transition {
+ unsigned source;
+ };
+
+ struct CounterConverter {
+ uint32_t convert(uint32_t)
+ {
+ return 1;
+ }
+
+ void extend(uint32_t& destination, uint32_t)
+ {
+ ++destination;
+ }
+ };
+
+ Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_flattenedTransitionsStartOffsetPerNode;
+ Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_flattenedTransitionsSizePerNode;
+ Vector<Transition, 0, ContentExtensionsOverflowHandler> m_flattenedTransitions;
+
+ Partition m_nodePartition;
+ Partition m_transitionPartition;
+
+ unsigned m_nextTransitionSetToProcess { 0 };
+};
+
+struct ActionKey {
+ enum DeletedValueTag { DeletedValue };
+ explicit ActionKey(DeletedValueTag) { state = Deleted; }
+
+ enum EmptyValueTag { EmptyValue };
+ explicit ActionKey(EmptyValueTag) { state = Empty; }
+
+ explicit ActionKey(const DFA* dfa, uint32_t actionsStart, uint16_t actionsLength)
+ : dfa(dfa)
+ , actionsStart(actionsStart)
+ , actionsLength(actionsLength)
+ , state(Valid)
+ {
+ StringHasher hasher;
+ hasher.addCharactersAssumingAligned(reinterpret_cast<const UChar*>(&dfa->actions[actionsStart]), actionsLength * sizeof(uint64_t) / sizeof(UChar));
+ hash = hasher.hash();
+ }
+
+ bool isEmptyValue() const { return state == Empty; }
+ bool isDeletedValue() const { return state == Deleted; }
+
+ unsigned hash;
+
+ const DFA* dfa;
+ uint32_t actionsStart;
+ uint16_t actionsLength;
+
+ enum {
+ Valid,
+ Empty,
+ Deleted
+ } state;
+};
+
+struct ActionKeyHash {
+ static unsigned hash(const ActionKey& actionKey)
+ {
+ return actionKey.hash;
+ }
+
+ static bool equal(const ActionKey& a, const ActionKey& b)
+ {
+ if (a.state != b.state
+ || a.dfa != b.dfa
+ || a.actionsLength != b.actionsLength)
+ return false;
+ for (uint16_t i = 0; i < a.actionsLength; ++i) {
+ if (a.dfa->actions[a.actionsStart + i] != a.dfa->actions[b.actionsStart + i])
+ return false;
+ }
+ return true;
+ }
+ static const bool safeToCompareToEmptyOrDeleted = false;
+};
+
+struct ActionKeyHashTraits : public WTF::CustomHashTraits<ActionKey> {
+ static const bool emptyValueIsZero = true;
+};
+
+} // anonymous namespace.
+
+void DFAMinimizer::minimize(DFA& dfa)
+{
+ FullGraphPartition fullGraphPartition(dfa);
+
+ // Unlike traditional minimization final states can be differentiated by their action.
+ // Instead of creating a single set for the final state, we partition by actions from
+ // the start.
+ HashMap<ActionKey, Vector<unsigned>, ActionKeyHash, ActionKeyHashTraits> finalStates;
+ for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
+ const DFANode& node = dfa.nodes[i];
+ if (node.hasActions()) {
+ // FIXME: Sort the actions in the dfa to make nodes that have the same actions in different order equal.
+ auto addResult = finalStates.add(ActionKey(&dfa, node.actionsStart(), node.actionsLength()), Vector<unsigned>());
+ addResult.iterator->value.append(i);
+ }
+ }
+
+ for (const auto& slot : finalStates) {
+ for (unsigned finalStateIndex : slot.value)
+ fullGraphPartition.markNode(finalStateIndex);
+ fullGraphPartition.refinePartitions();
+ }
+
+ // Use every splitter to refine the node partitions.
+ fullGraphPartition.splitByUniqueTransitions();
+
+ Vector<unsigned> relocationVector;
+ relocationVector.reserveInitialCapacity(dfa.nodes.size());
+ for (unsigned i = 0; i < dfa.nodes.size(); ++i)
+ relocationVector.uncheckedAppend(i);
+
+ // Update all the transitions.
+ for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
+ unsigned replacement = fullGraphPartition.nodeReplacement(i);
+ if (i != replacement) {
+ relocationVector[i] = replacement;
+ dfa.nodes[i].kill(dfa);
+ }
+ }
+
+ dfa.root = relocationVector[dfa.root];
+ for (DFANode& node : dfa.nodes) {
+ if (node.isKilled())
+ continue;
+
+ for (auto& transition : node.transitions(dfa)) {
+ uint32_t target = transition.target();
+ uint32_t relocatedTarget = relocationVector[target];
+ if (target != relocatedTarget)
+ transition.resetTarget(relocatedTarget);
+ }
+ }
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFAMinimizer.h b/Source/WebCore/contentextensions/DFAMinimizer.h
new file mode 100644
index 000000000..9e0aafc95
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFAMinimizer.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+namespace ContentExtensions {
+
+struct DFA;
+
+class DFAMinimizer {
+public:
+ WEBCORE_EXPORT static void minimize(DFA&);
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFANode.cpp b/Source/WebCore/contentextensions/DFANode.cpp
new file mode 100644
index 000000000..762a13558
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFANode.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "DFANode.h"
+
+#include "DFA.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+Vector<uint64_t> DFANode::actions(const DFA& dfa) const
+{
+ // FIXME: Use iterators instead of copying the Vector elements.
+ Vector<uint64_t> vector;
+ vector.reserveInitialCapacity(m_actionsLength);
+ for (uint32_t i = m_actionsStart; i < m_actionsStart + m_actionsLength; ++i)
+ vector.uncheckedAppend(dfa.actions[i]);
+ return vector;
+}
+
+bool DFANode::containsTransition(char transition, const DFA& dfa) const
+{
+ // Called from DFAMinimizer, this loops though a maximum of 128 transitions, so it's not too slow.
+ ASSERT(m_transitionsLength <= 128);
+ for (unsigned i = m_transitionsStart; i < m_transitionsStart + m_transitionsLength; ++i) {
+ if (dfa.transitionRanges[i].first <= transition
+ && dfa.transitionRanges[i].last >= transition)
+ return true;
+ }
+ return false;
+}
+
+void DFANode::kill(DFA& dfa)
+{
+ ASSERT(m_flags != IsKilled);
+ m_flags = IsKilled; // Killed nodes don't have any other flags.
+
+ // Invalidate the now-unused memory in the DFA to make finding bugs easier.
+ for (unsigned i = m_transitionsStart; i < m_transitionsStart + m_transitionsLength; ++i) {
+ dfa.transitionRanges[i] = { -1, -1 };
+ dfa.transitionDestinations[i] = std::numeric_limits<uint32_t>::max();
+ }
+ for (unsigned i = m_actionsStart; i < m_actionsStart + m_actionsLength; ++i)
+ dfa.actions[i] = std::numeric_limits<uint64_t>::max();
+
+ m_actionsStart = 0;
+ m_actionsLength = 0;
+ m_transitionsStart = 0;
+ m_transitionsLength = 0;
+};
+
+bool DFANode::canUseFallbackTransition(const DFA& dfa) const
+{
+ // Transitions can contain '\0' if the expression has a end-of-line marker.
+ // Fallback transitions cover 1-127. We have to be careful with the first.
+
+ IterableConstRange iterableTransitions = transitions(dfa);
+ auto iterator = iterableTransitions.begin();
+ auto end = iterableTransitions.end();
+ if (iterator == end)
+ return false;
+
+ char lastSeenCharacter = 0;
+ if (!iterator.first()) {
+ lastSeenCharacter = iterator.last();
+ if (lastSeenCharacter == 127)
+ return true;
+ ++iterator;
+ }
+
+ for (;iterator != end; ++iterator) {
+ ASSERT(iterator.first() > lastSeenCharacter);
+ if (iterator.first() != lastSeenCharacter + 1)
+ return false;
+
+ if (iterator.range().last == 127)
+ return true;
+ lastSeenCharacter = iterator.last();
+ }
+ return false;
+}
+
+uint32_t DFANode::bestFallbackTarget(const DFA& dfa) const
+{
+ ASSERT(canUseFallbackTransition(dfa));
+
+ HashMap<uint32_t, unsigned, DefaultHash<uint32_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint32_t>> histogram;
+
+ IterableConstRange iterableTransitions = transitions(dfa);
+ auto iterator = iterableTransitions.begin();
+ auto end = iterableTransitions.end();
+ ASSERT_WITH_MESSAGE(iterator != end, "An empty range list cannot use a fallback transition.");
+
+ if (!iterator.first() && !iterator.last())
+ ++iterator;
+ ASSERT_WITH_MESSAGE(iterator != end, "An empty range list matching only zero cannot use a fallback transition.");
+
+ uint32_t bestTarget = iterator.target();
+ unsigned bestTargetScore = !iterator.range().first ? iterator.range().size() - 1 : iterator.range().size();
+ histogram.add(bestTarget, bestTargetScore);
+ ++iterator;
+
+ for (;iterator != end; ++iterator) {
+ unsigned rangeSize = iterator.range().size();
+ uint32_t target = iterator.target();
+ auto addResult = histogram.add(target, rangeSize);
+ if (!addResult.isNewEntry)
+ addResult.iterator->value += rangeSize;
+ if (addResult.iterator->value > bestTargetScore) {
+ bestTargetScore = addResult.iterator->value;
+ bestTarget = target;
+ }
+ }
+ return bestTarget;
+}
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/DFANode.h b/Source/WebCore/contentextensions/DFANode.h
new file mode 100644
index 000000000..342ef9167
--- /dev/null
+++ b/Source/WebCore/contentextensions/DFANode.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionsDebugging.h"
+#include <wtf/HashMap.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+struct DFA;
+
+struct CharRange {
+ char first;
+ char last;
+ unsigned size() const { return last - first + 1; }
+};
+
+// A DFANode abstract the transition table out of a DFA state. If a state is accepting, the DFANode also have
+// the actions for that state.
+class DFANode {
+public:
+ struct ConstRangeIterator {
+ const DFA& dfa;
+ uint32_t position;
+
+ const ConstRangeIterator& operator*() const { return *this; }
+
+ bool operator==(const ConstRangeIterator& other) const
+ {
+ ASSERT(&dfa == &other.dfa);
+ return position == other.position;
+ }
+ bool operator!=(const ConstRangeIterator& other) const { return !(*this == other); }
+
+ ConstRangeIterator& operator++()
+ {
+ ++position;
+ return *this;
+ }
+
+ const CharRange& range() const;
+ uint32_t target() const;
+
+ char first() const { return range().first; }
+ char last() const { return range().last; }
+ uint32_t data() const { return target(); }
+ };
+
+ struct IterableConstRange {
+ const DFA& dfa;
+ uint32_t rangesStart;
+ uint32_t rangesEnd;
+
+ ConstRangeIterator begin() const { return { dfa, rangesStart }; }
+ ConstRangeIterator end() const { return { dfa, rangesEnd }; }
+ };
+
+ IterableConstRange transitions(const DFA& dfa) const
+ {
+ return IterableConstRange { dfa, m_transitionsStart, m_transitionsStart + m_transitionsLength };
+ }
+
+ struct RangeIterator {
+ DFA& dfa;
+ uint32_t position;
+
+ RangeIterator& operator*() { return *this; }
+
+ bool operator==(const RangeIterator& other) const
+ {
+ ASSERT(&dfa == &other.dfa);
+ return position == other.position;
+ }
+ bool operator!=(const RangeIterator& other) const { return !(*this == other); }
+
+ RangeIterator& operator++()
+ {
+ ++position;
+ return *this;
+ }
+
+ const CharRange& range() const;
+ uint32_t target() const;
+ void resetTarget(uint32_t);
+
+ char first() const { return range().first; }
+ char last() const { return range().last; }
+ uint32_t data() const { return target(); }
+ };
+
+ struct IterableRange {
+ DFA& dfa;
+ uint32_t rangesStart;
+ uint32_t rangesEnd;
+
+ RangeIterator begin() const { return { dfa, rangesStart }; }
+ RangeIterator end() const { return { dfa, rangesEnd }; }
+ };
+
+ IterableRange transitions(DFA& dfa)
+ {
+ return IterableRange { dfa, m_transitionsStart, m_transitionsStart + m_transitionsLength };
+ }
+
+ // FIXME: Stop minimizing killed nodes and add ASSERT(!isKilled()) in many functions here.
+ void kill(DFA&);
+ Vector<uint64_t> actions(const DFA&) const;
+ bool containsTransition(char, const DFA&) const;
+
+ bool isKilled() const { return m_flags & IsKilled; }
+ bool hasActions() const { return !!m_actionsLength; }
+ uint16_t actionsLength() const { return m_actionsLength; }
+ uint32_t actionsStart() const { return m_actionsStart; }
+
+ bool canUseFallbackTransition(const DFA&) const;
+ uint32_t bestFallbackTarget(const DFA&) const;
+
+ void setActions(uint32_t start, uint16_t length)
+ {
+ ASSERT(!m_actionsStart);
+ ASSERT(!m_actionsLength);
+ m_actionsStart = start;
+ m_actionsLength = length;
+ }
+ void setTransitions(uint32_t start, uint16_t length)
+ {
+ ASSERT(!m_transitionsStart);
+ ASSERT(!m_transitionsLength);
+ m_transitionsStart = start;
+ m_transitionsLength = length;
+ }
+
+private:
+ uint32_t m_actionsStart { 0 };
+ uint32_t m_transitionsStart { 0 };
+ uint16_t m_actionsLength { 0 };
+ uint8_t m_transitionsLength { 0 };
+
+ uint8_t m_flags { 0 };
+ const uint8_t IsKilled = 0x01;
+};
+
+COMPILE_ASSERT(sizeof(DFANode) <= 16, Keep the DFANodes small);
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/HashableActionList.h b/Source/WebCore/contentextensions/HashableActionList.h
new file mode 100644
index 000000000..1039c1da5
--- /dev/null
+++ b/Source/WebCore/contentextensions/HashableActionList.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <wtf/Hasher.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+struct HashableActionList {
+ enum DeletedValueTag { DeletedValue };
+ explicit HashableActionList(DeletedValueTag) { state = Deleted; }
+
+ enum EmptyValueTag { EmptyValue };
+ explicit HashableActionList(EmptyValueTag) { state = Empty; }
+
+ template<typename AnyVectorType>
+ explicit HashableActionList(const AnyVectorType& otherActions)
+ : actions(otherActions)
+ , state(Valid)
+ {
+ std::sort(actions.begin(), actions.end());
+ StringHasher hasher;
+ hasher.addCharactersAssumingAligned(reinterpret_cast<const UChar*>(actions.data()), actions.size() * sizeof(uint64_t) / sizeof(UChar));
+ hash = hasher.hash();
+ }
+
+ bool isEmptyValue() const { return state == Empty; }
+ bool isDeletedValue() const { return state == Deleted; }
+
+ bool operator==(const HashableActionList other) const
+ {
+ return state == other.state && actions == other.actions;
+ }
+
+ bool operator!=(const HashableActionList other) const
+ {
+ return !(*this == other);
+ }
+
+ Vector<uint64_t> actions;
+ unsigned hash;
+ enum {
+ Valid,
+ Empty,
+ Deleted
+ } state;
+};
+
+struct HashableActionListHash {
+ static unsigned hash(const HashableActionList& actionKey)
+ {
+ return actionKey.hash;
+ }
+
+ static bool equal(const HashableActionList& a, const HashableActionList& b)
+ {
+ return a == b;
+ }
+ static const bool safeToCompareToEmptyOrDeleted = false;
+};
+
+struct HashableActionListHashTraits : public WTF::CustomHashTraits<HashableActionList> {
+ static const bool emptyValueIsZero = false;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
diff --git a/Source/WebCore/contentextensions/ImmutableNFA.h b/Source/WebCore/contentextensions/ImmutableNFA.h
new file mode 100644
index 000000000..f5d21aeba
--- /dev/null
+++ b/Source/WebCore/contentextensions/ImmutableNFA.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+template <typename CharacterType>
+struct ImmutableRange {
+ uint32_t targetStart;
+ uint32_t targetEnd;
+ CharacterType first;
+ CharacterType last;
+};
+
+struct ImmutableNFANode {
+ uint32_t rangesStart { 0 };
+ uint32_t rangesEnd { 0 };
+ uint32_t epsilonTransitionTargetsStart { 0 };
+ uint32_t epsilonTransitionTargetsEnd { 0 };
+ uint32_t actionStart { 0 };
+ uint32_t actionEnd { 0 };
+};
+
+template <typename CharacterType, typename ActionType>
+struct ImmutableNFA {
+ Vector<ImmutableNFANode, 0, ContentExtensionsOverflowHandler> nodes;
+ Vector<ImmutableRange<CharacterType>, 0, ContentExtensionsOverflowHandler> transitions;
+ Vector<uint32_t, 0, ContentExtensionsOverflowHandler> targets;
+ Vector<uint32_t, 0, ContentExtensionsOverflowHandler> epsilonTransitionsTargets;
+ Vector<ActionType, 0, ContentExtensionsOverflowHandler> actions;
+
+ struct ConstTargetIterator {
+ const ImmutableNFA& immutableNFA;
+ uint32_t position;
+
+ const uint32_t& operator*() const { return immutableNFA.targets[position]; }
+ const uint32_t* operator->() const { return &immutableNFA.targets[position]; }
+
+ bool operator==(const ConstTargetIterator& other) const
+ {
+ ASSERT(&immutableNFA == &other.immutableNFA);
+ return position == other.position;
+ }
+ bool operator!=(const ConstTargetIterator& other) const { return !(*this == other); }
+
+ ConstTargetIterator& operator++()
+ {
+ ++position;
+ return *this;
+ }
+ };
+
+ struct IterableConstTargets {
+ const ImmutableNFA& immutableNFA;
+ uint32_t targetStart;
+ uint32_t targetEnd;
+
+ ConstTargetIterator begin() const { return { immutableNFA, targetStart }; }
+ ConstTargetIterator end() const { return { immutableNFA, targetEnd }; }
+ };
+
+ struct ConstRangeIterator {
+ const ImmutableNFA& immutableNFA;
+ uint32_t position;
+
+ bool operator==(const ConstRangeIterator& other) const
+ {
+ ASSERT(&immutableNFA == &other.immutableNFA);
+ return position == other.position;
+ }
+ bool operator!=(const ConstRangeIterator& other) const { return !(*this == other); }
+
+ ConstRangeIterator& operator++()
+ {
+ ++position;
+ return *this;
+ }
+
+ CharacterType first() const
+ {
+ return range().first;
+ }
+
+ CharacterType last() const
+ {
+ return range().last;
+ }
+
+ IterableConstTargets data() const
+ {
+ const ImmutableRange<CharacterType>& range = this->range();
+ return { immutableNFA, range.targetStart, range.targetEnd };
+ };
+
+ private:
+ const ImmutableRange<CharacterType>& range() const
+ {
+ return immutableNFA.transitions[position];
+ }
+ };
+
+ struct IterableConstRange {
+ const ImmutableNFA& immutableNFA;
+ uint32_t rangesStart;
+ uint32_t rangesEnd;
+
+ ConstRangeIterator begin() const { return { immutableNFA, rangesStart }; }
+ ConstRangeIterator end() const { return { immutableNFA, rangesEnd }; }
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ void debugPrint() const
+ {
+ for (const auto& range : *this)
+ WTFLogAlways(" %d-%d", range.first, range.last);
+ }
+#endif
+ };
+
+ IterableConstRange transitionsForNode(uint32_t nodeId) const
+ {
+ const ImmutableNFANode& node = nodes[nodeId];
+ return { *this, node.rangesStart, node.rangesEnd };
+ };
+
+ uint32_t root() const
+ {
+ RELEASE_ASSERT(!nodes.isEmpty());
+ return 0;
+ }
+
+ void finalize()
+ {
+ nodes.shrinkToFit();
+ transitions.shrinkToFit();
+ targets.shrinkToFit();
+ epsilonTransitionsTargets.shrinkToFit();
+ actions.shrinkToFit();
+ }
+
+ size_t memoryUsed() const
+ {
+ return nodes.capacity() * sizeof(ImmutableNFANode)
+ + transitions.capacity() * sizeof(ImmutableRange<CharacterType>)
+ + targets.capacity() * sizeof(uint32_t)
+ + epsilonTransitionsTargets.capacity() * sizeof(uint32_t)
+ + actions.capacity() * sizeof(ActionType);
+ }
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/ImmutableNFANodeBuilder.h b/Source/WebCore/contentextensions/ImmutableNFANodeBuilder.h
new file mode 100644
index 000000000..e7cd31cf7
--- /dev/null
+++ b/Source/WebCore/contentextensions/ImmutableNFANodeBuilder.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include "ImmutableNFA.h"
+#include "MutableRangeList.h"
+#include <wtf/HashSet.h>
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+// A ImmutableNFANodeBuilder let you build a NFA node by adding states and linking with other nodes.
+// Whe a builder is destructed, all its properties are finalized into the NFA. Using the NA with a live
+// builder results in undefined behaviors.
+template <typename CharacterType, typename ActionType>
+class ImmutableNFANodeBuilder {
+ typedef ImmutableNFA<CharacterType, ActionType> TypedImmutableNFA;
+ typedef HashSet<uint32_t, DefaultHash<uint32_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint32_t>> TargetSet;
+public:
+ ImmutableNFANodeBuilder() { }
+
+ ImmutableNFANodeBuilder(TypedImmutableNFA& immutableNFA)
+ : m_immutableNFA(&immutableNFA)
+ , m_finalized(false)
+ {
+ m_nodeId = m_immutableNFA->nodes.size();
+ m_immutableNFA->nodes.append(ImmutableNFANode());
+ }
+
+ ImmutableNFANodeBuilder(ImmutableNFANodeBuilder&& other)
+ : m_immutableNFA(other.m_immutableNFA)
+ , m_ranges(WTFMove(other.m_ranges))
+ , m_epsilonTransitionTargets(WTFMove(other.m_epsilonTransitionTargets))
+ , m_actions(WTFMove(other.m_actions))
+ , m_nodeId(other.m_nodeId)
+ , m_finalized(other.m_finalized)
+ {
+ other.m_immutableNFA = nullptr;
+ other.m_finalized = true;
+ }
+
+ ~ImmutableNFANodeBuilder()
+ {
+ if (!m_finalized)
+ finalize();
+ }
+
+ bool isValid() const
+ {
+ return !!m_immutableNFA;
+ }
+
+ uint32_t nodeId() const
+ {
+ ASSERT(isValid());
+ return m_nodeId;
+ }
+
+ struct TrivialRange {
+ CharacterType first;
+ CharacterType last;
+ };
+
+ struct FakeRangeIterator {
+ CharacterType first() const { return range.first; }
+ CharacterType last() const { return range.last; }
+ uint32_t data() const { return targetId; }
+ bool operator==(const FakeRangeIterator& other)
+ {
+ return this->isEnd == other.isEnd;
+ }
+ bool operator!=(const FakeRangeIterator& other) { return !(*this == other); }
+ FakeRangeIterator operator++()
+ {
+ isEnd = true;
+ return *this;
+ }
+
+ TrivialRange range;
+ uint32_t targetId;
+ bool isEnd;
+ };
+
+ void addTransition(CharacterType first, CharacterType last, uint32_t targetNodeId)
+ {
+ ASSERT(!m_finalized);
+ ASSERT(m_immutableNFA);
+
+ struct Converter {
+ TargetSet convert(uint32_t target)
+ {
+ return TargetSet({ target });
+ }
+ void extend(TargetSet& existingTargets, uint32_t target)
+ {
+ existingTargets.add(target);
+ }
+ };
+
+ Converter converter;
+ m_ranges.extend(FakeRangeIterator { { first, last }, targetNodeId, false }, FakeRangeIterator { { 0, 0 }, targetNodeId, true }, converter);
+ }
+
+ void addEpsilonTransition(const ImmutableNFANodeBuilder& target)
+ {
+ ASSERT(m_immutableNFA == target.m_immutableNFA);
+ addEpsilonTransition(target.m_nodeId);
+ }
+
+ void addEpsilonTransition(uint32_t targetNodeId)
+ {
+ ASSERT(!m_finalized);
+ ASSERT(m_immutableNFA);
+ m_epsilonTransitionTargets.add(targetNodeId);
+ }
+
+ template<typename ActionIterator>
+ void setActions(ActionIterator begin, ActionIterator end)
+ {
+ ASSERT(!m_finalized);
+ ASSERT(m_immutableNFA);
+
+ m_actions.add(begin, end);
+ }
+
+ ImmutableNFANodeBuilder& operator=(ImmutableNFANodeBuilder&& other)
+ {
+ if (!m_finalized)
+ finalize();
+
+ m_immutableNFA = other.m_immutableNFA;
+ m_ranges = WTFMove(other.m_ranges);
+ m_epsilonTransitionTargets = WTFMove(other.m_epsilonTransitionTargets);
+ m_actions = WTFMove(other.m_actions);
+ m_nodeId = other.m_nodeId;
+ m_finalized = other.m_finalized;
+
+ other.m_immutableNFA = nullptr;
+ other.m_finalized = true;
+ return *this;
+ }
+
+private:
+ void finalize()
+ {
+ ASSERT_WITH_MESSAGE(!m_finalized, "The API contract is that the builder can only be finalized once.");
+ m_finalized = true;
+ ImmutableNFANode& immutableNFANode = m_immutableNFA->nodes[m_nodeId];
+ sinkActions(immutableNFANode);
+ sinkEpsilonTransitions(immutableNFANode);
+ sinkTransitions(immutableNFANode);
+ }
+
+ void sinkActions(ImmutableNFANode& immutableNFANode)
+ {
+ unsigned actionStart = m_immutableNFA->actions.size();
+ for (const ActionType& action : m_actions)
+ m_immutableNFA->actions.append(action);
+ unsigned actionEnd = m_immutableNFA->actions.size();
+ immutableNFANode.actionStart = actionStart;
+ immutableNFANode.actionEnd = actionEnd;
+ }
+
+ void sinkTransitions(ImmutableNFANode& immutableNFANode)
+ {
+ unsigned transitionsStart = m_immutableNFA->transitions.size();
+ for (const auto& range : m_ranges) {
+ unsigned targetsStart = m_immutableNFA->targets.size();
+ for (uint32_t target : range.data)
+ m_immutableNFA->targets.append(target);
+ unsigned targetsEnd = m_immutableNFA->targets.size();
+
+ m_immutableNFA->transitions.append(ImmutableRange<CharacterType> { targetsStart, targetsEnd, range.first, range.last });
+ }
+ unsigned transitionsEnd = m_immutableNFA->transitions.size();
+
+ immutableNFANode.rangesStart = transitionsStart;
+ immutableNFANode.rangesEnd = transitionsEnd;
+ }
+
+ void sinkEpsilonTransitions(ImmutableNFANode& immutableNFANode)
+ {
+ unsigned start = m_immutableNFA->epsilonTransitionsTargets.size();
+ for (uint32_t target : m_epsilonTransitionTargets)
+ m_immutableNFA->epsilonTransitionsTargets.append(target);
+ unsigned end = m_immutableNFA->epsilonTransitionsTargets.size();
+
+ immutableNFANode.epsilonTransitionTargetsStart = start;
+ immutableNFANode.epsilonTransitionTargetsEnd = end;
+ }
+
+ TypedImmutableNFA* m_immutableNFA { nullptr };
+ MutableRangeList<CharacterType, TargetSet> m_ranges;
+ TargetSet m_epsilonTransitionTargets;
+ HashSet<ActionType, WTF::IntHash<ActionType>, WTF::UnsignedWithZeroKeyHashTraits<ActionType>> m_actions;
+ uint32_t m_nodeId;
+ bool m_finalized { true };
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/MutableRange.h b/Source/WebCore/contentextensions/MutableRange.h
new file mode 100644
index 000000000..2802881ff
--- /dev/null
+++ b/Source/WebCore/contentextensions/MutableRange.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+template <typename CharacterType, typename DataType>
+class MutableRange {
+ typedef MutableRange<CharacterType, DataType> TypedMutableRange;
+public:
+ MutableRange(uint32_t nextRangeIndex, CharacterType first, CharacterType last)
+ : nextRangeIndex(nextRangeIndex)
+ , first(first)
+ , last(last)
+ {
+ ASSERT(first <= last);
+ }
+
+ MutableRange(const DataType& data, uint32_t nextRangeIndex, CharacterType first, CharacterType last)
+ : data(data)
+ , nextRangeIndex(nextRangeIndex)
+ , first(first)
+ , last(last)
+ {
+ ASSERT(first <= last);
+ }
+
+ MutableRange(DataType&& data, uint32_t nextRangeIndex, CharacterType first, CharacterType last)
+ : data(WTFMove(data))
+ , nextRangeIndex(nextRangeIndex)
+ , first(first)
+ , last(last)
+ {
+ ASSERT(first <= last);
+ }
+
+ MutableRange(MutableRange&& other)
+ : data(WTFMove(other.data))
+ , nextRangeIndex(other.nextRangeIndex)
+ , first(other.first)
+ , last(other.last)
+ {
+ ASSERT(first <= last);
+ }
+
+ TypedMutableRange& operator=(TypedMutableRange&& other)
+ {
+ data = WTFMove(other.data);
+ nextRangeIndex = WTFMove(other.nextRangeIndex);
+ first = WTFMove(other.first);
+ last = WTFMove(other.last);
+ return *this;
+ }
+
+ DataType data;
+
+ // We use a funny convention: if there are no nextRange, the nextRangeIndex is zero.
+ // This is faster to check than a special value in many cases.
+ // We can use zero because ranges can only form a chain, and the first range is always zero by convention.
+ // When we insert something in from of the first range, we swap the values.
+ uint32_t nextRangeIndex;
+ CharacterType first;
+ CharacterType last;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/MutableRangeList.h b/Source/WebCore/contentextensions/MutableRangeList.h
new file mode 100644
index 000000000..9b20687b8
--- /dev/null
+++ b/Source/WebCore/contentextensions/MutableRangeList.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include "MutableRange.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+// A range list keeps ranges sorted. Ranges are not sorted in the vector, but
+template <typename CharacterType, typename DataType, unsigned inlineCapacity = 0>
+class MutableRangeList {
+ typedef MutableRange<CharacterType, DataType> TypedMutableRange;
+public:
+ class ConstIterator {
+ public:
+ const MutableRangeList& rangeList;
+ uint32_t index;
+ bool atEnd;
+
+ const TypedMutableRange& operator*() const { return rangeList.m_ranges[index]; }
+ const TypedMutableRange* operator->() const { return &rangeList.m_ranges[index]; }
+
+ CharacterType first() const { return rangeList.m_ranges[index].first; }
+ CharacterType last() const { return rangeList.m_ranges[index].last; }
+ CharacterType data() const { return rangeList.m_ranges[index].data; }
+
+ bool operator==(const ConstIterator& other) const
+ {
+ ASSERT(&rangeList == &other.rangeList);
+ if (atEnd || other.atEnd)
+ return atEnd == other.atEnd;
+ return index == other.index;
+ }
+ bool operator!=(const ConstIterator& other) const { return !(*this == other); }
+
+ ConstIterator& operator++()
+ {
+ ASSERT(!atEnd);
+ index = rangeList.m_ranges[index].nextRangeIndex;
+ if (!index)
+ atEnd = true;
+ return *this;
+ }
+ };
+
+ ConstIterator begin() const { return ConstIterator { *this, 0, m_ranges.isEmpty() }; }
+ ConstIterator end() const { return ConstIterator { *this, 0, true }; }
+
+ uint32_t appendRange(uint32_t lastRangeIndex, CharacterType first, CharacterType last, const DataType& data)
+ {
+ uint32_t newRangeIndex = m_ranges.size();
+ m_ranges.append(TypedMutableRange(data, 0, first, last));
+ if (!newRangeIndex)
+ return 0;
+
+ ASSERT(!m_ranges[lastRangeIndex].nextRangeIndex);
+ ASSERT(m_ranges[lastRangeIndex].last < first);
+
+ m_ranges[lastRangeIndex].nextRangeIndex = newRangeIndex;
+ return newRangeIndex;
+ }
+
+ template <typename RangeIterator, typename DataConverter>
+ void extend(RangeIterator otherIterator, RangeIterator otherEnd, DataConverter dataConverter)
+ {
+ if (otherIterator == otherEnd)
+ return;
+
+ if (m_ranges.isEmpty()) {
+ initializeFrom(otherIterator, otherEnd, dataConverter);
+ return;
+ }
+
+ bool reachedSelfEnd = false;
+ uint32_t lastSelfRangeIndex = 0;
+ uint32_t selfRangeIndex = 0;
+
+ auto otherRangeOffset = otherIterator.first(); // To get the right type :)
+ otherRangeOffset = 0;
+
+ do {
+ TypedMutableRange* activeSelfRange = &m_ranges[selfRangeIndex];
+
+ // First, we move forward until we find something interesting.
+ if (activeSelfRange->last < otherIterator.first() + otherRangeOffset) {
+ lastSelfRangeIndex = selfRangeIndex;
+ selfRangeIndex = activeSelfRange->nextRangeIndex;
+ reachedSelfEnd = !selfRangeIndex;
+ continue;
+ }
+ if (otherIterator.last() < activeSelfRange->first) {
+ insertBetween(lastSelfRangeIndex, selfRangeIndex, otherIterator.first() + otherRangeOffset, otherIterator.last(), dataConverter.convert(otherIterator.data()));
+
+ ++otherIterator;
+ otherRangeOffset = 0;
+ continue;
+ }
+
+ // If we reached here, we have:
+ // 1) activeRangeA->last >= activeRangeB->first.
+ // 2) activeRangeA->first <= activeRangeB->last.
+ // But we don't know how they collide.
+
+ // Do we have a part on the left? Create a new range for it.
+ if (activeSelfRange->first < otherIterator.first() + otherRangeOffset) {
+ DataType copiedData = activeSelfRange->data;
+ CharacterType newRangeFirstCharacter = activeSelfRange->first;
+ activeSelfRange->first = otherIterator.first() + otherRangeOffset;
+ insertBetween(lastSelfRangeIndex, selfRangeIndex, newRangeFirstCharacter, otherIterator.first() + otherRangeOffset - 1, WTFMove(copiedData));
+ activeSelfRange = &m_ranges[selfRangeIndex];
+ } else if (otherIterator.first() + otherRangeOffset < activeSelfRange->first) {
+ insertBetween(lastSelfRangeIndex, selfRangeIndex, otherIterator.first() + otherRangeOffset, activeSelfRange->first - 1, dataConverter.convert(otherIterator.data()));
+
+ activeSelfRange = &m_ranges[selfRangeIndex];
+ ASSERT_WITH_MESSAGE(otherRangeOffset < activeSelfRange->first - otherIterator.first(), "The offset must move forward or we could get stuck on this operation.");
+ otherRangeOffset = activeSelfRange->first - otherIterator.first();
+ }
+
+ // Here, we know both ranges start at the same point, we need to create the part that intersect
+ // and merge the data.
+
+ if (activeSelfRange->last == otherIterator.last()) {
+ // If they finish together, things are really easy: we just add B to A.
+ dataConverter.extend(activeSelfRange->data, otherIterator.data());
+
+ lastSelfRangeIndex = selfRangeIndex;
+ selfRangeIndex = activeSelfRange->nextRangeIndex;
+ reachedSelfEnd = !selfRangeIndex;
+
+ ++otherIterator;
+ otherRangeOffset = 0;
+ continue;
+ }
+
+ if (activeSelfRange->last > otherIterator.last()) {
+ // If A is bigger than B, we add a merged version and move A to the right.
+
+ CharacterType combinedPartStart = activeSelfRange->first;
+ activeSelfRange->first = otherIterator.last() + 1;
+
+ DataType combinedData = activeSelfRange->data;
+ dataConverter.extend(combinedData, otherIterator.data());
+ insertBetween(lastSelfRangeIndex, selfRangeIndex, combinedPartStart, otherIterator.last(), WTFMove(combinedData));
+
+ ++otherIterator;
+ otherRangeOffset = 0;
+ continue;
+ }
+
+ // If we reached here, B ends after A. We merge the intersection and move on.
+ ASSERT(otherIterator.last() > activeSelfRange->last);
+ dataConverter.extend(activeSelfRange->data, otherIterator.data());
+
+ otherRangeOffset = activeSelfRange->last - otherIterator.first() + 1;
+ lastSelfRangeIndex = selfRangeIndex;
+ selfRangeIndex = activeSelfRange->nextRangeIndex;
+ reachedSelfEnd = !selfRangeIndex;
+ } while (!reachedSelfEnd && otherIterator != otherEnd);
+
+ while (otherIterator != otherEnd) {
+ lastSelfRangeIndex = appendRange(lastSelfRangeIndex, otherIterator.first() + otherRangeOffset, otherIterator.last(), dataConverter.convert(otherIterator.data()));
+ otherRangeOffset = 0;
+ ++otherIterator;
+ }
+ }
+
+ unsigned size() const
+ {
+ return m_ranges.size();
+ }
+
+ bool isEmpty() const
+ {
+ return m_ranges.isEmpty();
+ }
+
+ void clear()
+ {
+ m_ranges.clear();
+ }
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ void debugPrint() const
+ {
+ for (const TypedMutableRange& range : *this)
+ WTFLogAlways(" %d-%d", range.first, range.last);
+ }
+#endif
+
+ Vector<MutableRange<CharacterType, DataType>, inlineCapacity, ContentExtensionsOverflowHandler> m_ranges;
+private:
+ void insertBetween(uint32_t& leftRangeIndex, uint32_t& rightRangeIndex, CharacterType first, CharacterType last, DataType&& data)
+ {
+ ASSERT(m_ranges[rightRangeIndex].first > last);
+
+ if (!rightRangeIndex) {
+ // This is a special case. We always keep the first range as the first element in the vector.
+ uint32_t movedRangeIndex = m_ranges.size();
+ m_ranges.append(WTFMove(m_ranges.first()));
+ m_ranges[0] = TypedMutableRange(WTFMove(data), movedRangeIndex, first, last);
+ leftRangeIndex = 0;
+ rightRangeIndex = movedRangeIndex;
+ return;
+ }
+
+ ASSERT(m_ranges[leftRangeIndex].nextRangeIndex == rightRangeIndex);
+ ASSERT(m_ranges[leftRangeIndex].last < first);
+
+ uint32_t newRangeIndex = m_ranges.size();
+ m_ranges.append(TypedMutableRange(WTFMove(data), rightRangeIndex, first, last));
+ m_ranges[leftRangeIndex].nextRangeIndex = newRangeIndex;
+ leftRangeIndex = newRangeIndex;
+ }
+
+ template <typename RangeIterator, typename DataConverter>
+ void initializeFrom(RangeIterator otherIterator, RangeIterator otherEnd, DataConverter dataConverter)
+ {
+ ASSERT_WITH_MESSAGE(otherIterator != otherEnd, "We should never do anything when extending with a null range.");
+ ASSERT_WITH_MESSAGE(m_ranges.isEmpty(), "This code does not handle splitting, it can only be used on empty RangeList.");
+
+ uint32_t loopCounter = 0;
+ do {
+ m_ranges.append(TypedMutableRange(dataConverter.convert(otherIterator.data()),
+ loopCounter + 1,
+ otherIterator.first(),
+ otherIterator.last()));
+ ++loopCounter;
+ ++otherIterator;
+ } while (otherIterator != otherEnd);
+
+ if (!m_ranges.isEmpty())
+ m_ranges.last().nextRangeIndex = 0;
+ }
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/NFA.cpp b/Source/WebCore/contentextensions/NFA.cpp
new file mode 100644
index 000000000..bc9921a79
--- /dev/null
+++ b/Source/WebCore/contentextensions/NFA.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "NFA.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include <wtf/ASCIICType.h>
+#include <wtf/DataLog.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+void NFA::debugPrintDot() const
+{
+ dataLogF("digraph NFA_Transitions {\n");
+ dataLogF(" rankdir=LR;\n");
+ dataLogF(" node [shape=circle];\n");
+ dataLogF(" {\n");
+
+ for (unsigned i = 0; i < nodes.size(); ++i) {
+ dataLogF(" %d [label=<Node %d", i, i);
+
+ const auto& node = nodes[i];
+
+ if (node.actionStart != node.actionEnd) {
+ dataLogF("<BR/>(Final: ");
+ bool isFirst = true;
+ for (unsigned actionIndex = node.actionStart; actionIndex < node.actionEnd; ++actionIndex) {
+ if (!isFirst)
+ dataLogF(", ");
+ dataLogF("%llu", actions[actionIndex]);
+ isFirst = false;
+ }
+ dataLogF(")");
+ }
+ dataLogF(">]");
+
+ if (node.actionStart != node.actionEnd)
+ dataLogF(" [shape=doublecircle]");
+ dataLogF(";\n");
+ }
+ dataLogF(" }\n");
+
+ dataLogF(" {\n");
+ for (unsigned i = 0; i < nodes.size(); ++i) {
+ const auto& node = nodes[i];
+
+ HashMap<uint32_t, Vector<uint32_t>, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> transitionsPerTarget;
+
+ for (uint32_t transitionIndex = node.rangesStart; transitionIndex < node.rangesEnd; ++transitionIndex) {
+ const ImmutableCharRange& range = transitions[transitionIndex];
+ for (uint32_t targetIndex = range.targetStart; targetIndex < range.targetEnd; ++targetIndex) {
+ uint32_t target = targets[targetIndex];
+ auto addResult = transitionsPerTarget.add(target, Vector<uint32_t>());
+ addResult.iterator->value.append(transitionIndex);
+ }
+ }
+
+ for (const auto& slot : transitionsPerTarget) {
+ dataLogF(" %d -> %d [label=\"", i, slot.key);
+
+ bool isFirst = true;
+ for (uint32_t rangeIndex : slot.value) {
+ if (!isFirst)
+ dataLogF(", ");
+ else
+ isFirst = false;
+
+ const ImmutableCharRange& range = transitions[rangeIndex];
+ if (range.first == range.last) {
+ if (isASCIIPrintable(range.first))
+ dataLogF("%c", range.first);
+ else
+ dataLogF("%d", range.first);
+ } else {
+ if (isASCIIPrintable(range.first) && isASCIIPrintable(range.last))
+ dataLogF("%c-%c", range.first, range.last);
+ else
+ dataLogF("%d-%d", range.first, range.last);
+ }
+ }
+ dataLogF("\"]\n");
+ }
+
+ for (uint32_t epsilonTargetIndex = node.epsilonTransitionTargetsStart; epsilonTargetIndex < node.epsilonTransitionTargetsEnd; ++epsilonTargetIndex) {
+ uint32_t target = epsilonTransitionsTargets[epsilonTargetIndex];
+ dataLogF(" %d -> %d [label=\"É›\"]\n", i, target);
+ }
+ }
+
+ dataLogF(" }\n");
+ dataLogF("}\n");
+}
+#endif
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/NFA.h b/Source/WebCore/contentextensions/NFA.h
new file mode 100644
index 000000000..c86f92b52
--- /dev/null
+++ b/Source/WebCore/contentextensions/NFA.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionsDebugging.h"
+#include "ImmutableNFANodeBuilder.h"
+#include "NFANode.h"
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+typedef ImmutableRange<char> ImmutableCharRange;
+typedef ImmutableNFANodeBuilder<char, uint64_t> ImmutableCharNFANodeBuilder;
+
+struct NFA : public ImmutableNFA<char, uint64_t> {
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ void debugPrintDot() const;
+#endif
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/NFANode.h b/Source/WebCore/contentextensions/NFANode.h
new file mode 100644
index 000000000..b70349b3f
--- /dev/null
+++ b/Source/WebCore/contentextensions/NFANode.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionsDebugging.h"
+#include <wtf/HashMap.h>
+#include <wtf/HashSet.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+// A NFANode abstract the transition table out of a NFA state.
+
+typedef Vector<uint64_t, 0, WTF::CrashOnOverflow, 1> ActionList;
+typedef HashSet<unsigned, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> NFANodeIndexSet;
+typedef HashMap<uint16_t, NFANodeIndexSet, DefaultHash<uint16_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint16_t>> NFANodeTransitions;
+
+class NFANode {
+public:
+ NFANodeTransitions transitions;
+ NFANodeIndexSet transitionsOnAnyCharacter;
+
+ ActionList finalRuleIds;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/NFAToDFA.cpp b/Source/WebCore/contentextensions/NFAToDFA.cpp
new file mode 100644
index 000000000..843df3008
--- /dev/null
+++ b/Source/WebCore/contentextensions/NFAToDFA.cpp
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "NFAToDFA.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionsDebugging.h"
+#include "DFANode.h"
+#include "ImmutableNFA.h"
+#include "MutableRangeList.h"
+#include "NFA.h"
+#include <wtf/DataLog.h>
+#include <wtf/HashMap.h>
+#include <wtf/HashSet.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+typedef MutableRange<char, NFANodeIndexSet> NFANodeRange;
+typedef MutableRangeList<char, NFANodeIndexSet> NFANodeRangeList;
+typedef MutableRangeList<char, NFANodeIndexSet, 128> PreallocatedNFANodeRangeList;
+typedef Vector<uint32_t, 0, ContentExtensionsOverflowHandler> UniqueNodeList;
+typedef Vector<UniqueNodeList, 0, ContentExtensionsOverflowHandler> NFANodeClosures;
+
+// FIXME: set a better initial size.
+// FIXME: include the hash inside NodeIdSet.
+typedef NFANodeIndexSet NodeIdSet;
+
+static inline void epsilonClosureExcludingSelf(NFA& nfa, unsigned nodeId, UniqueNodeList& output)
+{
+ NodeIdSet closure({ nodeId });
+ Vector<unsigned, 64, ContentExtensionsOverflowHandler> unprocessedNodes({ nodeId });
+
+ do {
+ unsigned unprocessedNodeId = unprocessedNodes.takeLast();
+ const auto& node = nfa.nodes[unprocessedNodeId];
+
+ for (uint32_t epsilonTargetIndex = node.epsilonTransitionTargetsStart; epsilonTargetIndex < node.epsilonTransitionTargetsEnd; ++epsilonTargetIndex) {
+ uint32_t targetNodeId = nfa.epsilonTransitionsTargets[epsilonTargetIndex];
+ auto addResult = closure.add(targetNodeId);
+ if (addResult.isNewEntry) {
+ unprocessedNodes.append(targetNodeId);
+ output.append(targetNodeId);
+ }
+ }
+ } while (!unprocessedNodes.isEmpty());
+
+ output.shrinkToFit();
+}
+
+static void resolveEpsilonClosures(NFA& nfa, NFANodeClosures& nfaNodeClosures)
+{
+ unsigned nfaGraphSize = nfa.nodes.size();
+ nfaNodeClosures.resize(nfaGraphSize);
+
+ for (unsigned nodeId = 0; nodeId < nfaGraphSize; ++nodeId)
+ epsilonClosureExcludingSelf(nfa, nodeId, nfaNodeClosures[nodeId]);
+
+ // Every nodes still point to that table, but we won't use it ever again.
+ // Clear it to get back the memory. That's not pretty but memory is important here, we have both
+ // graphs existing at the same time.
+ nfa.epsilonTransitionsTargets.clear();
+}
+
+static ALWAYS_INLINE void extendSetWithClosure(const NFANodeClosures& nfaNodeClosures, unsigned nodeId, NodeIdSet& set)
+{
+ ASSERT(set.contains(nodeId));
+ const UniqueNodeList& nodeClosure = nfaNodeClosures[nodeId];
+ if (!nodeClosure.isEmpty())
+ set.add(nodeClosure.begin(), nodeClosure.end());
+}
+
+struct UniqueNodeIdSetImpl {
+ unsigned* buffer()
+ {
+ return m_buffer;
+ }
+
+ const unsigned* buffer() const
+ {
+ return m_buffer;
+ }
+
+ unsigned m_size;
+ unsigned m_hash;
+ unsigned m_dfaNodeId;
+private:
+ unsigned m_buffer[1];
+};
+
+typedef Vector<UniqueNodeIdSetImpl*, 128, ContentExtensionsOverflowHandler> UniqueNodeQueue;
+
+class UniqueNodeIdSet {
+public:
+ UniqueNodeIdSet() { }
+ enum EmptyValueTag { EmptyValue };
+ enum DeletedValueTag { DeletedValue };
+
+ UniqueNodeIdSet(EmptyValueTag) { }
+ UniqueNodeIdSet(DeletedValueTag)
+ : m_uniqueNodeIdSetBuffer(reinterpret_cast<UniqueNodeIdSetImpl*>(-1))
+ {
+ }
+
+ UniqueNodeIdSet(const NodeIdSet& nodeIdSet, unsigned hash, unsigned dfaNodeId)
+ {
+ ASSERT(nodeIdSet.size());
+
+ unsigned size = nodeIdSet.size();
+ size_t byteSize = sizeof(UniqueNodeIdSetImpl) + (size - 1) * sizeof(unsigned);
+ m_uniqueNodeIdSetBuffer = static_cast<UniqueNodeIdSetImpl*>(fastMalloc(byteSize));
+
+ m_uniqueNodeIdSetBuffer->m_size = size;
+ m_uniqueNodeIdSetBuffer->m_hash = hash;
+ m_uniqueNodeIdSetBuffer->m_dfaNodeId = dfaNodeId;
+
+ unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer();
+ for (unsigned nodeId : nodeIdSet) {
+ *buffer = nodeId;
+ ++buffer;
+ }
+ }
+
+ UniqueNodeIdSet(UniqueNodeIdSet&& other)
+ : m_uniqueNodeIdSetBuffer(other.m_uniqueNodeIdSetBuffer)
+ {
+ other.m_uniqueNodeIdSetBuffer = nullptr;
+ }
+
+ UniqueNodeIdSet& operator=(UniqueNodeIdSet&& other)
+ {
+ m_uniqueNodeIdSetBuffer = other.m_uniqueNodeIdSetBuffer;
+ other.m_uniqueNodeIdSetBuffer = nullptr;
+ return *this;
+ }
+
+ ~UniqueNodeIdSet()
+ {
+ fastFree(m_uniqueNodeIdSetBuffer);
+ }
+
+ bool operator==(const UniqueNodeIdSet& other) const
+ {
+ return m_uniqueNodeIdSetBuffer == other.m_uniqueNodeIdSetBuffer;
+ }
+
+ bool operator==(const NodeIdSet& other) const
+ {
+ if (m_uniqueNodeIdSetBuffer->m_size != static_cast<unsigned>(other.size()))
+ return false;
+ unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer();
+ for (unsigned i = 0; i < m_uniqueNodeIdSetBuffer->m_size; ++i) {
+ if (!other.contains(buffer[i]))
+ return false;
+ }
+ return true;
+ }
+
+ UniqueNodeIdSetImpl* impl() const { return m_uniqueNodeIdSetBuffer; }
+
+ unsigned hash() const { return m_uniqueNodeIdSetBuffer->m_hash; }
+ bool isEmptyValue() const { return !m_uniqueNodeIdSetBuffer; }
+ bool isDeletedValue() const { return m_uniqueNodeIdSetBuffer == reinterpret_cast<UniqueNodeIdSetImpl*>(-1); }
+
+private:
+ UniqueNodeIdSetImpl* m_uniqueNodeIdSetBuffer = nullptr;
+};
+
+struct UniqueNodeIdSetHash {
+ static unsigned hash(const UniqueNodeIdSet& p)
+ {
+ return p.hash();
+ }
+
+ static bool equal(const UniqueNodeIdSet& a, const UniqueNodeIdSet& b)
+ {
+ return a == b;
+ }
+ // It would be fine to compare empty or deleted here, but not for the HashTranslator.
+ static const bool safeToCompareToEmptyOrDeleted = false;
+};
+
+struct UniqueNodeIdSetHashHashTraits : public WTF::CustomHashTraits<UniqueNodeIdSet> {
+ static const bool emptyValueIsZero = true;
+
+ // FIXME: Get a good size.
+ static const int minimumTableSize = 128;
+};
+
+typedef HashSet<std::unique_ptr<UniqueNodeIdSet>, UniqueNodeIdSetHash, UniqueNodeIdSetHashHashTraits> UniqueNodeIdSetTable;
+
+struct NodeIdSetToUniqueNodeIdSetSource {
+ NodeIdSetToUniqueNodeIdSetSource(DFA& dfa, const NFA& nfa, const NodeIdSet& nodeIdSet)
+ : dfa(dfa)
+ , nfa(nfa)
+ , nodeIdSet(nodeIdSet)
+ {
+ // The hashing operation must be independant of the nodeId.
+ unsigned hash = 4207445155;
+ for (unsigned nodeId : nodeIdSet)
+ hash += nodeId;
+ this->hash = DefaultHash<unsigned>::Hash::hash(hash);
+ }
+ DFA& dfa;
+ const NFA& nfa;
+ const NodeIdSet& nodeIdSet;
+ unsigned hash;
+};
+
+struct NodeIdSetToUniqueNodeIdSetTranslator {
+ static unsigned hash(const NodeIdSetToUniqueNodeIdSetSource& source)
+ {
+ return source.hash;
+ }
+
+ static inline bool equal(const UniqueNodeIdSet& a, const NodeIdSetToUniqueNodeIdSetSource& b)
+ {
+ return a == b.nodeIdSet;
+ }
+
+ static void translate(UniqueNodeIdSet& location, const NodeIdSetToUniqueNodeIdSetSource& source, unsigned hash)
+ {
+ DFANode newDFANode;
+
+ HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions;
+
+ for (unsigned nfaNodeId : source.nodeIdSet) {
+ const auto& nfaNode = source.nfa.nodes[nfaNodeId];
+ for (unsigned actionIndex = nfaNode.actionStart; actionIndex < nfaNode.actionEnd; ++actionIndex)
+ actions.add(source.nfa.actions[actionIndex]);
+ }
+
+ unsigned actionsStart = source.dfa.actions.size();
+ for (uint64_t action : actions)
+ source.dfa.actions.append(action);
+ unsigned actionsEnd = source.dfa.actions.size();
+ unsigned actionsLength = actionsEnd - actionsStart;
+ RELEASE_ASSERT_WITH_MESSAGE(actionsLength <= std::numeric_limits<uint16_t>::max(), "Too many actions for the current DFANode size.");
+ newDFANode.setActions(actionsStart, static_cast<uint16_t>(actionsLength));
+
+ unsigned dfaNodeId = source.dfa.nodes.size();
+ source.dfa.nodes.append(newDFANode);
+ new (NotNull, &location) UniqueNodeIdSet(source.nodeIdSet, hash, dfaNodeId);
+
+ ASSERT(location.impl());
+ }
+};
+
+class SetTransitions {
+public:
+ NodeIdSet& operator[](unsigned index)
+ {
+ ASSERT(index < size());
+ return m_targets[index];
+ }
+
+ unsigned size() const
+ {
+ return WTF_ARRAY_LENGTH(m_targets);
+ }
+
+ NodeIdSet* begin()
+ {
+ return m_targets;
+ }
+
+ NodeIdSet* end()
+ {
+ return m_targets + size();
+ }
+
+private:
+ NodeIdSet m_targets[128];
+};
+
+struct DataConverterWithEpsilonClosure {
+ const NFANodeClosures& nfaNodeclosures;
+
+ template<typename Iterable>
+ NFANodeIndexSet convert(const Iterable& iterable)
+ {
+ NFANodeIndexSet result;
+ for (unsigned nodeId : iterable) {
+ result.add(nodeId);
+ const UniqueNodeList& nodeClosure = nfaNodeclosures[nodeId];
+ result.add(nodeClosure.begin(), nodeClosure.end());
+ }
+ return result;
+ }
+
+ template<typename Iterable>
+ void extend(NFANodeIndexSet& destination, const Iterable& iterable)
+ {
+ for (unsigned nodeId : iterable) {
+ auto addResult = destination.add(nodeId);
+ if (addResult.isNewEntry) {
+ const UniqueNodeList& nodeClosure = nfaNodeclosures[nodeId];
+ destination.add(nodeClosure.begin(), nodeClosure.end());
+ }
+ }
+ }
+};
+
+static inline void createCombinedTransition(PreallocatedNFANodeRangeList& combinedRangeList, const UniqueNodeIdSetImpl& sourceNodeSet, const NFA& immutableNFA, const NFANodeClosures& nfaNodeclosures)
+{
+ combinedRangeList.clear();
+
+ const unsigned* buffer = sourceNodeSet.buffer();
+
+ DataConverterWithEpsilonClosure converter { nfaNodeclosures };
+ for (unsigned i = 0; i < sourceNodeSet.m_size; ++i) {
+ unsigned nodeId = buffer[i];
+ auto transitions = immutableNFA.transitionsForNode(nodeId);
+ combinedRangeList.extend(transitions.begin(), transitions.end(), converter);
+ }
+}
+
+static ALWAYS_INLINE unsigned getOrCreateDFANode(const NodeIdSet& nfaNodeSet, const NFA& nfa, DFA& dfa, UniqueNodeIdSetTable& uniqueNodeIdSetTable, UniqueNodeQueue& unprocessedNodes)
+{
+ NodeIdSetToUniqueNodeIdSetSource nodeIdSetToUniqueNodeIdSetSource(dfa, nfa, nfaNodeSet);
+ auto uniqueNodeIdAddResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(nodeIdSetToUniqueNodeIdSetSource);
+ if (uniqueNodeIdAddResult.isNewEntry)
+ unprocessedNodes.append(uniqueNodeIdAddResult.iterator->impl());
+
+ return uniqueNodeIdAddResult.iterator->impl()->m_dfaNodeId;
+}
+
+DFA NFAToDFA::convert(NFA& nfa)
+{
+ NFANodeClosures nfaNodeClosures;
+ resolveEpsilonClosures(nfa, nfaNodeClosures);
+
+ DFA dfa;
+
+ NodeIdSet initialSet({ nfa.root() });
+ extendSetWithClosure(nfaNodeClosures, nfa.root(), initialSet);
+
+ UniqueNodeIdSetTable uniqueNodeIdSetTable;
+
+ NodeIdSetToUniqueNodeIdSetSource initialNodeIdSetToUniqueNodeIdSetSource(dfa, nfa, initialSet);
+ auto addResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(initialNodeIdSetToUniqueNodeIdSetSource);
+
+ UniqueNodeQueue unprocessedNodes;
+ unprocessedNodes.append(addResult.iterator->impl());
+
+ PreallocatedNFANodeRangeList combinedRangeList;
+ do {
+ UniqueNodeIdSetImpl* uniqueNodeIdSetImpl = unprocessedNodes.takeLast();
+ createCombinedTransition(combinedRangeList, *uniqueNodeIdSetImpl, nfa, nfaNodeClosures);
+
+ unsigned transitionsStart = dfa.transitionRanges.size();
+ for (const NFANodeRange& range : combinedRangeList) {
+ unsigned targetNodeId = getOrCreateDFANode(range.data, nfa, dfa, uniqueNodeIdSetTable, unprocessedNodes);
+ dfa.transitionRanges.append({ range.first, range.last });
+ dfa.transitionDestinations.append(targetNodeId);
+ }
+ unsigned transitionsEnd = dfa.transitionRanges.size();
+ unsigned transitionsLength = transitionsEnd - transitionsStart;
+
+ unsigned dfaNodeId = uniqueNodeIdSetImpl->m_dfaNodeId;
+ DFANode& dfaSourceNode = dfa.nodes[dfaNodeId];
+ dfaSourceNode.setTransitions(transitionsStart, static_cast<uint8_t>(transitionsLength));
+ } while (!unprocessedNodes.isEmpty());
+
+ dfa.shrinkToFit();
+ return dfa;
+}
+
+} // namespace ContentExtensions
+
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/NFAToDFA.h b/Source/WebCore/contentextensions/NFAToDFA.h
new file mode 100644
index 000000000..8bee48823
--- /dev/null
+++ b/Source/WebCore/contentextensions/NFAToDFA.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "DFA.h"
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+struct NFA;
+
+// NFAToDFA provides a way to build a DFA corresponding to a NFA.
+class NFAToDFA {
+public:
+ WEBCORE_EXPORT static DFA convert(NFA&);
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/Term.h b/Source/WebCore/contentextensions/Term.h
new file mode 100644
index 000000000..0d51833f5
--- /dev/null
+++ b/Source/WebCore/contentextensions/Term.h
@@ -0,0 +1,687 @@
+/*
+ * Copyright (C) 2014-2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionsDebugging.h"
+#include "NFA.h"
+#include <unicode/utypes.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/HashMap.h>
+#include <wtf/Vector.h>
+#include <wtf/text/StringBuilder.h>
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+enum class AtomQuantifier : uint8_t {
+ One,
+ ZeroOrOne,
+ ZeroOrMore,
+ OneOrMore
+};
+
+class Term {
+public:
+ Term();
+ Term(char character, bool isCaseSensitive);
+
+ enum UniversalTransitionTag { UniversalTransition };
+ explicit Term(UniversalTransitionTag);
+
+ enum CharacterSetTermTag { CharacterSetTerm };
+ explicit Term(CharacterSetTermTag, bool isInverted);
+
+ enum GroupTermTag { GroupTerm };
+ explicit Term(GroupTermTag);
+
+ enum EndOfLineAssertionTermTag { EndOfLineAssertionTerm };
+ explicit Term(EndOfLineAssertionTermTag);
+
+ Term(const Term& other);
+ Term(Term&& other);
+
+ enum EmptyValueTag { EmptyValue };
+ Term(EmptyValueTag);
+
+ ~Term();
+
+ bool isValid() const;
+
+ // CharacterSet terms only.
+ void addCharacter(UChar character, bool isCaseSensitive);
+
+ // Group terms only.
+ void extendGroupSubpattern(const Term&);
+
+ void quantify(const AtomQuantifier&);
+
+ ImmutableCharNFANodeBuilder generateGraph(NFA&, ImmutableCharNFANodeBuilder& source, const ActionList& finalActions) const;
+ void generateGraph(NFA&, ImmutableCharNFANodeBuilder& source, uint32_t destination) const;
+
+ bool isEndOfLineAssertion() const;
+
+ bool matchesAtLeastOneCharacter() const;
+
+ // Matches any string, the empty string included.
+ // This is very conservative. Patterns matching any string can return false here.
+ bool isKnownToMatchAnyString() const;
+
+ // Return true if the term can only match input of a single fixed length.
+ bool hasFixedLength() const;
+
+ Term& operator=(const Term& other);
+ Term& operator=(Term&& other);
+
+ bool operator==(const Term& other) const;
+
+ unsigned hash() const;
+
+ bool isEmptyValue() const;
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ String toString() const;
+#endif
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ size_t memoryUsed() const;
+#endif
+
+private:
+ // This is exact for character sets but conservative for groups.
+ // The return value can be false for a group equivalent to a universal transition.
+ bool isUniversalTransition() const;
+
+ void generateSubgraphForAtom(NFA&, ImmutableCharNFANodeBuilder& source, const ImmutableCharNFANodeBuilder& destination) const;
+ void generateSubgraphForAtom(NFA&, ImmutableCharNFANodeBuilder& source, uint32_t destination) const;
+
+ void destroy();
+
+ enum class TermType : uint8_t {
+ Empty,
+ CharacterSet,
+ Group
+ };
+
+ TermType m_termType { TermType::Empty };
+ AtomQuantifier m_quantifier { AtomQuantifier::One };
+
+ class CharacterSet {
+ public:
+ void set(UChar character)
+ {
+ RELEASE_ASSERT(character < 128);
+ m_characters[character / 64] |= (uint64_t(1) << (character % 64));
+ }
+
+ bool get(UChar character) const
+ {
+ RELEASE_ASSERT(character < 128);
+ return m_characters[character / 64] & (uint64_t(1) << (character % 64));
+ }
+
+ void invert()
+ {
+ ASSERT(!m_inverted);
+ m_inverted = true;
+ }
+
+ bool inverted() const { return m_inverted; }
+
+ unsigned bitCount() const
+ {
+ return WTF::bitCount(m_characters[0]) + WTF::bitCount(m_characters[1]);
+ }
+
+ bool operator==(const CharacterSet& other) const
+ {
+ return other.m_inverted == m_inverted
+ && other.m_characters[0] == m_characters[0]
+ && other.m_characters[1] == m_characters[1];
+ }
+
+ unsigned hash() const
+ {
+ return WTF::pairIntHash(WTF::pairIntHash(WTF::intHash(m_characters[0]), WTF::intHash(m_characters[1])), m_inverted);
+ }
+ private:
+ bool m_inverted { false };
+ uint64_t m_characters[2] { 0, 0 };
+ };
+
+ struct Group {
+ Vector<Term> terms;
+
+ bool operator==(const Group& other) const
+ {
+ return other.terms == terms;
+ }
+
+ unsigned hash() const
+ {
+ unsigned hash = 6421749;
+ for (const Term& term : terms) {
+ unsigned termHash = term.hash();
+ hash = (hash << 16) ^ ((termHash << 11) ^ hash);
+ hash += hash >> 11;
+ }
+ return hash;
+ }
+ };
+
+ union AtomData {
+ AtomData()
+ : invalidTerm(0)
+ {
+ }
+ ~AtomData()
+ {
+ }
+
+ char invalidTerm;
+ CharacterSet characterSet;
+ Group group;
+ } m_atomData;
+};
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+inline String quantifierToString(AtomQuantifier quantifier)
+{
+ switch (quantifier) {
+ case AtomQuantifier::One:
+ return "";
+ case AtomQuantifier::ZeroOrOne:
+ return "?";
+ case AtomQuantifier::ZeroOrMore:
+ return "*";
+ case AtomQuantifier::OneOrMore:
+ return "+";
+ }
+}
+
+inline String Term::toString() const
+{
+ switch (m_termType) {
+ case TermType::Empty:
+ return "(Empty)";
+ case TermType::CharacterSet: {
+ StringBuilder builder;
+ builder.append('[');
+ for (UChar c = 0; c < 128; c++) {
+ if (m_atomData.characterSet.get(c)) {
+ if (isASCIIPrintable(c) && !isASCIISpace(c))
+ builder.append(c);
+ else {
+ builder.append('\\');
+ builder.append('u');
+ builder.appendNumber(c);
+ }
+ }
+ }
+ builder.append(']');
+ builder.append(quantifierToString(m_quantifier));
+ return builder.toString();
+ }
+ case TermType::Group: {
+ StringBuilder builder;
+ builder.append('(');
+ for (const Term& term : m_atomData.group.terms)
+ builder.append(term.toString());
+ builder.append(')');
+ builder.append(quantifierToString(m_quantifier));
+ return builder.toString();
+ }
+ }
+}
+#endif
+
+inline Term::Term()
+{
+}
+
+inline Term::Term(char character, bool isCaseSensitive)
+ : m_termType(TermType::CharacterSet)
+{
+ new (NotNull, &m_atomData.characterSet) CharacterSet();
+ addCharacter(character, isCaseSensitive);
+}
+
+inline Term::Term(UniversalTransitionTag)
+ : m_termType(TermType::CharacterSet)
+{
+ new (NotNull, &m_atomData.characterSet) CharacterSet();
+ for (UChar i = 1; i < 128; ++i)
+ m_atomData.characterSet.set(i);
+}
+
+inline Term::Term(CharacterSetTermTag, bool isInverted)
+ : m_termType(TermType::CharacterSet)
+{
+ new (NotNull, &m_atomData.characterSet) CharacterSet();
+ if (isInverted)
+ m_atomData.characterSet.invert();
+}
+
+inline Term::Term(GroupTermTag)
+ : m_termType(TermType::Group)
+{
+ new (NotNull, &m_atomData.group) Group();
+}
+
+inline Term::Term(EndOfLineAssertionTermTag)
+ : Term(CharacterSetTerm, false)
+{
+ m_atomData.characterSet.set(0);
+}
+
+inline Term::Term(const Term& other)
+ : m_termType(other.m_termType)
+ , m_quantifier(other.m_quantifier)
+{
+ switch (m_termType) {
+ case TermType::Empty:
+ break;
+ case TermType::CharacterSet:
+ new (NotNull, &m_atomData.characterSet) CharacterSet(other.m_atomData.characterSet);
+ break;
+ case TermType::Group:
+ new (NotNull, &m_atomData.group) Group(other.m_atomData.group);
+ break;
+ }
+}
+
+inline Term::Term(Term&& other)
+ : m_termType(WTFMove(other.m_termType))
+ , m_quantifier(WTFMove(other.m_quantifier))
+{
+ switch (m_termType) {
+ case TermType::Empty:
+ break;
+ case TermType::CharacterSet:
+ new (NotNull, &m_atomData.characterSet) CharacterSet(WTFMove(other.m_atomData.characterSet));
+ break;
+ case TermType::Group:
+ new (NotNull, &m_atomData.group) Group(WTFMove(other.m_atomData.group));
+ break;
+ }
+ other.destroy();
+}
+
+inline Term::Term(EmptyValueTag)
+ : m_termType(TermType::Empty)
+{
+}
+
+inline Term::~Term()
+{
+ destroy();
+}
+
+inline bool Term::isValid() const
+{
+ return m_termType != TermType::Empty;
+}
+
+inline void Term::addCharacter(UChar character, bool isCaseSensitive)
+{
+ ASSERT(isASCII(character));
+
+ ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::CharacterSet);
+ if (m_termType != TermType::CharacterSet)
+ return;
+
+ if (isCaseSensitive || !isASCIIAlpha(character))
+ m_atomData.characterSet.set(character);
+ else {
+ m_atomData.characterSet.set(toASCIIUpper(character));
+ m_atomData.characterSet.set(toASCIILower(character));
+ }
+}
+
+inline void Term::extendGroupSubpattern(const Term& term)
+{
+ ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::Group);
+ if (m_termType != TermType::Group)
+ return;
+ m_atomData.group.terms.append(term);
+}
+
+inline void Term::quantify(const AtomQuantifier& quantifier)
+{
+ ASSERT_WITH_MESSAGE(m_quantifier == AtomQuantifier::One, "Transition to quantified term should only happen once.");
+ m_quantifier = quantifier;
+}
+
+inline ImmutableCharNFANodeBuilder Term::generateGraph(NFA& nfa, ImmutableCharNFANodeBuilder& source, const ActionList& finalActions) const
+{
+ ImmutableCharNFANodeBuilder newEnd(nfa);
+ generateGraph(nfa, source, newEnd.nodeId());
+ newEnd.setActions(finalActions.begin(), finalActions.end());
+ return newEnd;
+}
+
+inline void Term::generateGraph(NFA& nfa, ImmutableCharNFANodeBuilder& source, uint32_t destination) const
+{
+ ASSERT(isValid());
+
+ switch (m_quantifier) {
+ case AtomQuantifier::One: {
+ generateSubgraphForAtom(nfa, source, destination);
+ break;
+ }
+ case AtomQuantifier::ZeroOrOne: {
+ generateSubgraphForAtom(nfa, source, destination);
+ source.addEpsilonTransition(destination);
+ break;
+ }
+ case AtomQuantifier::ZeroOrMore: {
+ ImmutableCharNFANodeBuilder repeatStart(nfa);
+ source.addEpsilonTransition(repeatStart);
+
+ ImmutableCharNFANodeBuilder repeatEnd(nfa);
+ generateSubgraphForAtom(nfa, repeatStart, repeatEnd);
+ repeatEnd.addEpsilonTransition(repeatStart);
+
+ repeatEnd.addEpsilonTransition(destination);
+ source.addEpsilonTransition(destination);
+ break;
+ }
+ case AtomQuantifier::OneOrMore: {
+ ImmutableCharNFANodeBuilder repeatStart(nfa);
+ source.addEpsilonTransition(repeatStart);
+
+ ImmutableCharNFANodeBuilder repeatEnd(nfa);
+ generateSubgraphForAtom(nfa, repeatStart, repeatEnd);
+ repeatEnd.addEpsilonTransition(repeatStart);
+
+ repeatEnd.addEpsilonTransition(destination);
+ break;
+ }
+ }
+}
+
+inline bool Term::isEndOfLineAssertion() const
+{
+ return m_termType == TermType::CharacterSet && m_atomData.characterSet.bitCount() == 1 && m_atomData.characterSet.get(0);
+}
+
+inline bool Term::matchesAtLeastOneCharacter() const
+{
+ ASSERT(isValid());
+
+ if (m_quantifier == AtomQuantifier::ZeroOrOne || m_quantifier == AtomQuantifier::ZeroOrMore)
+ return false;
+ if (isEndOfLineAssertion())
+ return false;
+
+ if (m_termType == TermType::Group) {
+ for (const Term& term : m_atomData.group.terms) {
+ if (term.matchesAtLeastOneCharacter())
+ return true;
+ }
+ return false;
+ }
+ return true;
+}
+
+inline bool Term::isKnownToMatchAnyString() const
+{
+ ASSERT(isValid());
+
+ switch (m_termType) {
+ case TermType::Empty:
+ ASSERT_NOT_REACHED();
+ break;
+ case TermType::CharacterSet:
+ // ".*" is the only simple term matching any string.
+ return isUniversalTransition() && m_quantifier == AtomQuantifier::ZeroOrMore;
+ break;
+ case TermType::Group: {
+ // There are infinitely many ways to match anything with groups, we just handle simple cases
+ if (m_atomData.group.terms.size() != 1)
+ return false;
+
+ const Term& firstTermInGroup = m_atomData.group.terms.first();
+ // -(.*) with any quantifier.
+ if (firstTermInGroup.isKnownToMatchAnyString())
+ return true;
+
+ if (firstTermInGroup.isUniversalTransition()) {
+ // -(.)*, (.+)*, (.?)* etc.
+ if (m_quantifier == AtomQuantifier::ZeroOrMore)
+ return true;
+
+ // -(.+)?.
+ if (m_quantifier == AtomQuantifier::ZeroOrOne && firstTermInGroup.m_quantifier == AtomQuantifier::OneOrMore)
+ return true;
+
+ // -(.?)+.
+ if (m_quantifier == AtomQuantifier::OneOrMore && firstTermInGroup.m_quantifier == AtomQuantifier::ZeroOrOne)
+ return true;
+ }
+ break;
+ }
+ }
+ return false;
+}
+
+inline bool Term::hasFixedLength() const
+{
+ ASSERT(isValid());
+
+ switch (m_termType) {
+ case TermType::Empty:
+ ASSERT_NOT_REACHED();
+ break;
+ case TermType::CharacterSet:
+ return m_quantifier == AtomQuantifier::One;
+ case TermType::Group: {
+ if (m_quantifier != AtomQuantifier::One)
+ return false;
+ for (const Term& term : m_atomData.group.terms) {
+ if (!term.hasFixedLength())
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+inline Term& Term::operator=(const Term& other)
+{
+ destroy();
+ new (NotNull, this) Term(other);
+ return *this;
+}
+
+inline Term& Term::operator=(Term&& other)
+{
+ destroy();
+ new (NotNull, this) Term(WTFMove(other));
+ return *this;
+}
+
+inline bool Term::operator==(const Term& other) const
+{
+ if (other.m_termType != m_termType || other.m_quantifier != m_quantifier)
+ return false;
+
+ switch (m_termType) {
+ case TermType::Empty:
+ return true;
+ case TermType::CharacterSet:
+ return m_atomData.characterSet == other.m_atomData.characterSet;
+ case TermType::Group:
+ return m_atomData.group == other.m_atomData.group;
+ }
+ ASSERT_NOT_REACHED();
+ return false;
+}
+
+inline unsigned Term::hash() const
+{
+ unsigned primary = static_cast<unsigned>(m_termType) << 16 | static_cast<unsigned>(m_quantifier);
+ unsigned secondary = 0;
+ switch (m_termType) {
+ case TermType::Empty:
+ secondary = 52184393;
+ break;
+ case TermType::CharacterSet:
+ secondary = m_atomData.characterSet.hash();
+ break;
+ case TermType::Group:
+ secondary = m_atomData.group.hash();
+ break;
+ }
+ return WTF::pairIntHash(primary, secondary);
+}
+
+inline bool Term::isEmptyValue() const
+{
+ return m_termType == TermType::Empty;
+}
+
+inline bool Term::isUniversalTransition() const
+{
+ ASSERT(isValid());
+
+ switch (m_termType) {
+ case TermType::Empty:
+ ASSERT_NOT_REACHED();
+ break;
+ case TermType::CharacterSet:
+ return (m_atomData.characterSet.inverted() && !m_atomData.characterSet.bitCount())
+ || (!m_atomData.characterSet.inverted() && m_atomData.characterSet.bitCount() == 127 && !m_atomData.characterSet.get(0));
+ case TermType::Group:
+ return m_atomData.group.terms.size() == 1 && m_atomData.group.terms.first().isUniversalTransition();
+ }
+ return false;
+}
+
+inline void Term::generateSubgraphForAtom(NFA& nfa, ImmutableCharNFANodeBuilder& source, const ImmutableCharNFANodeBuilder& destination) const
+{
+ generateSubgraphForAtom(nfa, source, destination.nodeId());
+}
+
+inline void Term::generateSubgraphForAtom(NFA& nfa, ImmutableCharNFANodeBuilder& source, uint32_t destination) const
+{
+ switch (m_termType) {
+ case TermType::Empty:
+ ASSERT_NOT_REACHED();
+ source.addEpsilonTransition(destination);
+ break;
+ case TermType::CharacterSet: {
+ if (!m_atomData.characterSet.inverted()) {
+ UChar i = 0;
+ while (true) {
+ while (i < 128 && !m_atomData.characterSet.get(i))
+ ++i;
+ if (i == 128)
+ break;
+
+ UChar start = i;
+ ++i;
+ while (i < 128 && m_atomData.characterSet.get(i))
+ ++i;
+ source.addTransition(start, i - 1, destination);
+ }
+ } else {
+ UChar i = 1;
+ while (true) {
+ while (i < 128 && m_atomData.characterSet.get(i))
+ ++i;
+ if (i == 128)
+ break;
+
+ UChar start = i;
+ ++i;
+ while (i < 128 && !m_atomData.characterSet.get(i))
+ ++i;
+ source.addTransition(start, i - 1, destination);
+ }
+ }
+ break;
+ }
+ case TermType::Group: {
+ if (m_atomData.group.terms.isEmpty()) {
+ // FIXME: any kind of empty term could be avoided in the parser. This case should turned into an assertion.
+ source.addEpsilonTransition(destination);
+ return;
+ }
+
+ if (m_atomData.group.terms.size() == 1) {
+ m_atomData.group.terms.first().generateGraph(nfa, source, destination);
+ return;
+ }
+
+ ImmutableCharNFANodeBuilder lastTarget = m_atomData.group.terms.first().generateGraph(nfa, source, ActionList());
+ for (unsigned i = 1; i < m_atomData.group.terms.size() - 1; ++i) {
+ const Term& currentTerm = m_atomData.group.terms[i];
+ ImmutableCharNFANodeBuilder newNode = currentTerm.generateGraph(nfa, lastTarget, ActionList());
+ lastTarget = WTFMove(newNode);
+ }
+ const Term& lastTerm = m_atomData.group.terms.last();
+ lastTerm.generateGraph(nfa, lastTarget, destination);
+ break;
+ }
+ }
+}
+
+inline void Term::destroy()
+{
+ switch (m_termType) {
+ case TermType::Empty:
+ break;
+ case TermType::CharacterSet:
+ m_atomData.characterSet.~CharacterSet();
+ break;
+ case TermType::Group:
+ m_atomData.group.~Group();
+ break;
+ }
+ m_termType = TermType::Empty;
+}
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+inline size_t Term::memoryUsed() const
+{
+ size_t extraMemory = 0;
+ if (m_termType == TermType::Group) {
+ for (const Term& term : m_atomData.group.terms)
+ extraMemory += term.memoryUsed();
+ }
+ return sizeof(Term) + extraMemory;
+}
+#endif
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/URLFilterParser.cpp b/Source/WebCore/contentextensions/URLFilterParser.cpp
new file mode 100644
index 000000000..3588c6517
--- /dev/null
+++ b/Source/WebCore/contentextensions/URLFilterParser.cpp
@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "URLFilterParser.h"
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "CombinedURLFilters.h"
+#include "Term.h"
+#include <JavaScriptCore/YarrParser.h>
+#include <wtf/Deque.h>
+#include <wtf/text/CString.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+class PatternParser {
+public:
+ PatternParser(bool patternIsCaseSensitive)
+ : m_patternIsCaseSensitive(patternIsCaseSensitive)
+ , m_parseStatus(URLFilterParser::Ok)
+ {
+ }
+
+ void finalize(uint64_t patternId, CombinedURLFilters& combinedURLFilters)
+ {
+ if (hasError())
+ return;
+
+ sinkFloatingTermIfNecessary();
+
+ simplifySunkTerms();
+
+ // Check to see if there are any terms without ? or *.
+ bool matchesEverything = true;
+ for (const auto& term : m_sunkTerms) {
+ if (term.matchesAtLeastOneCharacter()) {
+ matchesEverything = false;
+ break;
+ }
+ }
+ if (matchesEverything) {
+ fail(URLFilterParser::MatchesEverything);
+ return;
+ }
+
+ combinedURLFilters.addPattern(patternId, m_sunkTerms);
+ }
+
+ URLFilterParser::ParseStatus parseStatus() const
+ {
+ return m_parseStatus;
+ }
+
+ void atomPatternCharacter(UChar character)
+ {
+ if (hasError())
+ return;
+
+ if (!isASCII(character)) {
+ fail(URLFilterParser::NonASCII);
+ return;
+ }
+
+ sinkFloatingTermIfNecessary();
+ ASSERT(!m_floatingTerm.isValid());
+
+ char asciiChararacter = static_cast<char>(character);
+ m_floatingTerm = Term(asciiChararacter, m_patternIsCaseSensitive);
+ }
+
+ void atomBuiltInCharacterClass(JSC::Yarr::BuiltInCharacterClassID builtInCharacterClassID, bool inverted)
+ {
+ if (hasError())
+ return;
+
+ sinkFloatingTermIfNecessary();
+ ASSERT(!m_floatingTerm.isValid());
+
+ if (builtInCharacterClassID == JSC::Yarr::NewlineClassID && inverted)
+ m_floatingTerm = Term(Term::UniversalTransition);
+ else
+ fail(URLFilterParser::UnsupportedCharacterClass);
+ }
+
+ void quantifyAtom(unsigned minimum, unsigned maximum, bool)
+ {
+ if (hasError())
+ return;
+
+ ASSERT(m_floatingTerm.isValid());
+
+ if (!minimum && maximum == 1)
+ m_floatingTerm.quantify(AtomQuantifier::ZeroOrOne);
+ else if (!minimum && maximum == JSC::Yarr::quantifyInfinite)
+ m_floatingTerm.quantify(AtomQuantifier::ZeroOrMore);
+ else if (minimum == 1 && maximum == JSC::Yarr::quantifyInfinite)
+ m_floatingTerm.quantify(AtomQuantifier::OneOrMore);
+ else
+ fail(URLFilterParser::InvalidQuantifier);
+ }
+
+ void atomBackReference(unsigned)
+ {
+ fail(URLFilterParser::BackReference);
+ }
+
+ void assertionBOL()
+ {
+ if (hasError())
+ return;
+
+ if (m_floatingTerm.isValid() || !m_sunkTerms.isEmpty() || !m_openGroups.isEmpty()) {
+ fail(URLFilterParser::MisplacedStartOfLine);
+ return;
+ }
+
+ m_hasBeginningOfLineAssertion = true;
+ }
+
+ void assertionEOL()
+ {
+ if (hasError())
+ return;
+
+ sinkFloatingTermIfNecessary();
+ ASSERT(!m_floatingTerm.isValid());
+
+ m_floatingTerm = Term(Term::EndOfLineAssertionTerm);
+ }
+
+ void assertionWordBoundary(bool)
+ {
+ fail(URLFilterParser::WordBoundary);
+ }
+
+ void atomCharacterClassBegin(bool inverted = false)
+ {
+ if (hasError())
+ return;
+
+ sinkFloatingTermIfNecessary();
+ ASSERT(!m_floatingTerm.isValid());
+
+ m_floatingTerm = Term(Term::CharacterSetTerm, inverted);
+ }
+
+ void atomCharacterClassAtom(UChar character)
+ {
+ if (hasError())
+ return;
+
+ ASSERT(isASCII(character));
+
+ m_floatingTerm.addCharacter(character, m_patternIsCaseSensitive);
+ }
+
+ void atomCharacterClassRange(UChar a, UChar b)
+ {
+ if (hasError())
+ return;
+
+ ASSERT(a);
+ ASSERT(b);
+ ASSERT(isASCII(a));
+ ASSERT(isASCII(b));
+
+ for (unsigned i = a; i <= b; ++i)
+ m_floatingTerm.addCharacter(static_cast<UChar>(i), m_patternIsCaseSensitive);
+ }
+
+ void atomCharacterClassEnd()
+ {
+ // Nothing to do here. The character set atom may have a quantifier, we sink the atom lazily.
+ }
+
+ void atomCharacterClassBuiltIn(JSC::Yarr::BuiltInCharacterClassID, bool)
+ {
+ fail(URLFilterParser::AtomCharacter);
+ }
+
+ void atomParenthesesSubpatternBegin(bool = true)
+ {
+ if (hasError())
+ return;
+
+ sinkFloatingTermIfNecessary();
+
+ m_openGroups.append(Term(Term::GroupTerm));
+ }
+
+ void atomParentheticalAssertionBegin(bool = false)
+ {
+ fail(URLFilterParser::Group);
+ }
+
+ void atomParenthesesEnd()
+ {
+ if (hasError())
+ return;
+
+ sinkFloatingTermIfNecessary();
+ ASSERT(!m_floatingTerm.isValid());
+
+ m_floatingTerm = m_openGroups.takeLast();
+ }
+
+ void disjunction()
+ {
+ fail(URLFilterParser::Disjunction);
+ }
+
+private:
+ bool hasError() const
+ {
+ return m_parseStatus != URLFilterParser::Ok;
+ }
+
+ void fail(URLFilterParser::ParseStatus reason)
+ {
+ if (hasError())
+ return;
+
+ m_parseStatus = reason;
+ }
+
+ void sinkFloatingTermIfNecessary()
+ {
+ if (!m_floatingTerm.isValid())
+ return;
+
+ if (m_hasProcessedEndOfLineAssertion) {
+ fail(URLFilterParser::MisplacedEndOfLine);
+ m_floatingTerm = Term();
+ return;
+ }
+
+ if (m_floatingTerm.isEndOfLineAssertion())
+ m_hasProcessedEndOfLineAssertion = true;
+
+ if (!m_openGroups.isEmpty()) {
+ m_openGroups.last().extendGroupSubpattern(m_floatingTerm);
+ m_floatingTerm = Term();
+ return;
+ }
+
+ m_sunkTerms.append(m_floatingTerm);
+ m_floatingTerm = Term();
+ }
+
+ void simplifySunkTerms()
+ {
+ ASSERT(!m_floatingTerm.isValid());
+
+ if (m_sunkTerms.isEmpty())
+ return;
+
+ Term canonicalDotStar(Term::UniversalTransition);
+ canonicalDotStar.quantify(AtomQuantifier::ZeroOrMore);
+
+ // Replace every ".*"-like terms by our canonical version. Remove any duplicate ".*".
+ {
+ unsigned termIndex = 0;
+ bool isAfterDotStar = false;
+ while (termIndex < m_sunkTerms.size()) {
+ if (isAfterDotStar && m_sunkTerms[termIndex].isKnownToMatchAnyString()) {
+ m_sunkTerms.remove(termIndex);
+ continue;
+ }
+ isAfterDotStar = false;
+
+ if (m_sunkTerms[termIndex].isKnownToMatchAnyString()) {
+ m_sunkTerms[termIndex] = canonicalDotStar;
+ isAfterDotStar = true;
+ }
+ ++termIndex;
+ }
+ }
+
+ // Add our ".*" in front if needed.
+ if (!m_hasBeginningOfLineAssertion && !m_sunkTerms.first().isKnownToMatchAnyString())
+ m_sunkTerms.insert(0, canonicalDotStar);
+
+ // Remove trailing ".*$".
+ if (m_sunkTerms.size() > 2 && m_sunkTerms.last().isEndOfLineAssertion() && m_sunkTerms[m_sunkTerms.size() - 2].isKnownToMatchAnyString())
+ m_sunkTerms.shrink(m_sunkTerms.size() - 2);
+
+ // Remove irrelevant terms that can match empty. For example in "foob?", matching "b" is irrelevant.
+ if (m_sunkTerms.last().isEndOfLineAssertion())
+ return;
+ while (!m_sunkTerms.isEmpty() && !m_sunkTerms.last().matchesAtLeastOneCharacter())
+ m_sunkTerms.removeLast();
+ }
+
+ bool m_patternIsCaseSensitive;
+
+ Deque<Term> m_openGroups;
+ Vector<Term> m_sunkTerms;
+ Term m_floatingTerm;
+ bool m_hasBeginningOfLineAssertion { false };
+ bool m_hasProcessedEndOfLineAssertion { false };
+
+ URLFilterParser::ParseStatus m_parseStatus;
+};
+
+URLFilterParser::URLFilterParser(CombinedURLFilters& combinedURLFilters)
+ : m_combinedURLFilters(combinedURLFilters)
+{
+}
+
+URLFilterParser::~URLFilterParser()
+{
+}
+
+URLFilterParser::ParseStatus URLFilterParser::addPattern(const String& pattern, bool patternIsCaseSensitive, uint64_t patternId)
+{
+ if (!pattern.containsOnlyASCII())
+ return NonASCII;
+ if (pattern.isEmpty())
+ return EmptyPattern;
+
+ ParseStatus status = Ok;
+ PatternParser patternParser(patternIsCaseSensitive);
+ String error = String(JSC::Yarr::parse(patternParser, pattern, false, 0));
+ if (error.isNull())
+ patternParser.finalize(patternId, m_combinedURLFilters);
+ else
+ status = YarrError;
+
+ if (status == Ok)
+ status = patternParser.parseStatus();
+
+ return status;
+}
+
+String URLFilterParser::statusString(ParseStatus status)
+{
+ switch (status) {
+ case Ok:
+ return "Ok";
+ case MatchesEverything:
+ return "Matches everything.";
+ case NonASCII:
+ return "Only ASCII characters are supported in pattern.";
+ case UnsupportedCharacterClass:
+ return "Character class is not supported.";
+ case BackReference:
+ return "Patterns cannot contain backreferences.";
+ case MisplacedStartOfLine:
+ return "Start of line assertion can only appear as the first term in a filter.";
+ case WordBoundary:
+ return "Word boundaries assertions are not supported yet.";
+ case AtomCharacter:
+ return "Builtins character class atoms are not supported yet.";
+ case Group:
+ return "Groups are not supported yet.";
+ case Disjunction:
+ return "Disjunctions are not supported yet.";
+ case MisplacedEndOfLine:
+ return "The end of line assertion must be the last term in an expression.";
+ case EmptyPattern:
+ return "Empty pattern.";
+ case YarrError:
+ return "Internal error in YARR.";
+ case InvalidQuantifier:
+ return "Arbitrary atom repetitions are not supported.";
+ }
+}
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
diff --git a/Source/WebCore/contentextensions/URLFilterParser.h b/Source/WebCore/contentextensions/URLFilterParser.h
new file mode 100644
index 000000000..be19b9f70
--- /dev/null
+++ b/Source/WebCore/contentextensions/URLFilterParser.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+class CombinedURLFilters;
+
+class WEBCORE_EXPORT URLFilterParser {
+public:
+ enum ParseStatus {
+ Ok,
+ MatchesEverything,
+ NonASCII,
+ UnsupportedCharacterClass,
+ BackReference,
+ MisplacedStartOfLine,
+ WordBoundary,
+ AtomCharacter,
+ Group,
+ Disjunction,
+ MisplacedEndOfLine,
+ EmptyPattern,
+ YarrError,
+ InvalidQuantifier,
+ };
+ static String statusString(ParseStatus);
+ explicit URLFilterParser(CombinedURLFilters&);
+ ~URLFilterParser();
+ ParseStatus addPattern(const String& pattern, bool patternIsCaseSensitive, uint64_t patternId);
+
+private:
+ CombinedURLFilters& m_combinedURLFilters;
+};
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)