webkitgtk-2.16.5HEAD webkitgtk-2.16.5 master

author: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-06-27 06:07:23 +0000
committer: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-06-27 06:07:23 +0000
commit: 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree: 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/contentextensions/Term.h
parent: 32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
download: WebKitGtk-tarball-master.tar.gz
1 files changed, 687 insertions, 0 deletions
diff --git a/Source/WebCore/contentextensions/Term.h b/Source/WebCore/contentextensions/Term.h
new file mode 100644
index 000000000..0d51833f5
--- /dev/null
+++ b/Source/WebCore/contentextensions/Term.h
@@ -0,0 +1,687 @@
+/*
+ * Copyright (C) 2014-2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#include "ContentExtensionsDebugging.h"
+#include "NFA.h"
+#include <unicode/utypes.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/HashMap.h>
+#include <wtf/Vector.h>
+#include <wtf/text/StringBuilder.h>
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+namespace ContentExtensions {
+
+enum class AtomQuantifier : uint8_t {
+    One,
+    ZeroOrOne,
+    ZeroOrMore,
+    OneOrMore
+};
+
+class Term {
+public:
+    Term();
+    Term(char character, bool isCaseSensitive);
+
+    enum UniversalTransitionTag { UniversalTransition };
+    explicit Term(UniversalTransitionTag);
+
+    enum CharacterSetTermTag { CharacterSetTerm };
+    explicit Term(CharacterSetTermTag, bool isInverted);
+
+    enum GroupTermTag { GroupTerm };
+    explicit Term(GroupTermTag);
+
+    enum EndOfLineAssertionTermTag { EndOfLineAssertionTerm };
+    explicit Term(EndOfLineAssertionTermTag);
+
+    Term(const Term& other);
+    Term(Term&& other);
+
+    enum EmptyValueTag { EmptyValue };
+    Term(EmptyValueTag);
+
+    ~Term();
+
+    bool isValid() const;
+
+    // CharacterSet terms only.
+    void addCharacter(UChar character, bool isCaseSensitive);
+
+    // Group terms only.
+    void extendGroupSubpattern(const Term&);
+
+    void quantify(const AtomQuantifier&);
+
+    ImmutableCharNFANodeBuilder generateGraph(NFA&, ImmutableCharNFANodeBuilder& source, const ActionList& finalActions) const;
+    void generateGraph(NFA&, ImmutableCharNFANodeBuilder& source, uint32_t destination) const;
+
+    bool isEndOfLineAssertion() const;
+
+    bool matchesAtLeastOneCharacter() const;
+
+    // Matches any string, the empty string included.
+    // This is very conservative. Patterns matching any string can return false here.
+    bool isKnownToMatchAnyString() const;
+
+    // Return true if the term can only match input of a single fixed length.
+    bool hasFixedLength() const;
+
+    Term& operator=(const Term& other);
+    Term& operator=(Term&& other);
+
+    bool operator==(const Term& other) const;
+
+    unsigned hash() const;
+
+    bool isEmptyValue() const;
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+    String toString() const;
+#endif
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+    size_t memoryUsed() const;
+#endif
+    
+private:
+    // This is exact for character sets but conservative for groups.
+    // The return value can be false for a group equivalent to a universal transition.
+    bool isUniversalTransition() const;
+
+    void generateSubgraphForAtom(NFA&, ImmutableCharNFANodeBuilder& source, const ImmutableCharNFANodeBuilder& destination) const;
+    void generateSubgraphForAtom(NFA&, ImmutableCharNFANodeBuilder& source, uint32_t destination) const;
+
+    void destroy();
+
+    enum class TermType : uint8_t {
+        Empty,
+        CharacterSet,
+        Group
+    };
+
+    TermType m_termType { TermType::Empty };
+    AtomQuantifier m_quantifier { AtomQuantifier::One };
+
+    class CharacterSet {
+    public:
+        void set(UChar character)
+        {
+            RELEASE_ASSERT(character < 128);
+            m_characters[character / 64] |= (uint64_t(1) << (character % 64));
+        }
+        
+        bool get(UChar character) const
+        {
+            RELEASE_ASSERT(character < 128);
+            return m_characters[character / 64] & (uint64_t(1) << (character % 64));
+        }
+        
+        void invert()
+        {
+            ASSERT(!m_inverted);
+            m_inverted = true;
+        }
+        
+        bool inverted() const { return m_inverted; }
+        
+        unsigned bitCount() const
+        {
+            return WTF::bitCount(m_characters[0]) + WTF::bitCount(m_characters[1]);
+        }
+        
+        bool operator==(const CharacterSet& other) const
+        {
+            return other.m_inverted == m_inverted
+                && other.m_characters[0] == m_characters[0]
+                && other.m_characters[1] == m_characters[1];
+        }
+
+        unsigned hash() const
+        {
+            return WTF::pairIntHash(WTF::pairIntHash(WTF::intHash(m_characters[0]), WTF::intHash(m_characters[1])), m_inverted);
+        }
+    private:
+        bool m_inverted { false };
+        uint64_t m_characters[2] { 0, 0 };
+    };
+
+    struct Group {
+        Vector<Term> terms;
+
+        bool operator==(const Group& other) const
+        {
+            return other.terms == terms;
+        }
+
+        unsigned hash() const
+        {
+            unsigned hash = 6421749;
+            for (const Term& term : terms) {
+                unsigned termHash = term.hash();
+                hash = (hash << 16) ^ ((termHash << 11) ^ hash);
+                hash += hash >> 11;
+            }
+            return hash;
+        }
+    };
+
+    union AtomData {
+        AtomData()
+            : invalidTerm(0)
+        {
+        }
+        ~AtomData()
+        {
+        }
+
+        char invalidTerm;
+        CharacterSet characterSet;
+        Group group;
+    } m_atomData;
+};
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+inline String quantifierToString(AtomQuantifier quantifier)
+{
+    switch (quantifier) {
+    case AtomQuantifier::One:
+        return "";
+    case AtomQuantifier::ZeroOrOne:
+        return "?";
+    case AtomQuantifier::ZeroOrMore:
+        return "*";
+    case AtomQuantifier::OneOrMore:
+        return "+";
+    }
+}
+    
+inline String Term::toString() const
+{
+    switch (m_termType) {
+    case TermType::Empty:
+        return "(Empty)";
+    case TermType::CharacterSet: {
+        StringBuilder builder;
+        builder.append('[');
+        for (UChar c = 0; c < 128; c++) {
+            if (m_atomData.characterSet.get(c)) {
+                if (isASCIIPrintable(c) && !isASCIISpace(c))
+                    builder.append(c);
+                else {
+                    builder.append('\\');
+                    builder.append('u');
+                    builder.appendNumber(c);
+                }
+            }
+        }
+        builder.append(']');
+        builder.append(quantifierToString(m_quantifier));
+        return builder.toString();
+    }
+    case TermType::Group: {
+        StringBuilder builder;
+        builder.append('(');
+        for (const Term& term : m_atomData.group.terms)
+            builder.append(term.toString());
+        builder.append(')');
+        builder.append(quantifierToString(m_quantifier));
+        return builder.toString();
+    }
+    }
+}
+#endif
+
+inline Term::Term()
+{
+}
+
+inline Term::Term(char character, bool isCaseSensitive)
+    : m_termType(TermType::CharacterSet)
+{
+    new (NotNull, &m_atomData.characterSet) CharacterSet();
+    addCharacter(character, isCaseSensitive);
+}
+
+inline Term::Term(UniversalTransitionTag)
+    : m_termType(TermType::CharacterSet)
+{
+    new (NotNull, &m_atomData.characterSet) CharacterSet();
+    for (UChar i = 1; i < 128; ++i)
+        m_atomData.characterSet.set(i);
+}
+
+inline Term::Term(CharacterSetTermTag, bool isInverted)
+    : m_termType(TermType::CharacterSet)
+{
+    new (NotNull, &m_atomData.characterSet) CharacterSet();
+    if (isInverted)
+        m_atomData.characterSet.invert();
+}
+
+inline Term::Term(GroupTermTag)
+    : m_termType(TermType::Group)
+{
+    new (NotNull, &m_atomData.group) Group();
+}
+
+inline Term::Term(EndOfLineAssertionTermTag)
+    : Term(CharacterSetTerm, false)
+{
+    m_atomData.characterSet.set(0);
+}
+
+inline Term::Term(const Term& other)
+    : m_termType(other.m_termType)
+    , m_quantifier(other.m_quantifier)
+{
+    switch (m_termType) {
+    case TermType::Empty:
+        break;
+    case TermType::CharacterSet:
+        new (NotNull, &m_atomData.characterSet) CharacterSet(other.m_atomData.characterSet);
+        break;
+    case TermType::Group:
+        new (NotNull, &m_atomData.group) Group(other.m_atomData.group);
+        break;
+    }
+}
+
+inline Term::Term(Term&& other)
+    : m_termType(WTFMove(other.m_termType))
+    , m_quantifier(WTFMove(other.m_quantifier))
+{
+    switch (m_termType) {
+    case TermType::Empty:
+        break;
+    case TermType::CharacterSet:
+        new (NotNull, &m_atomData.characterSet) CharacterSet(WTFMove(other.m_atomData.characterSet));
+        break;
+    case TermType::Group:
+        new (NotNull, &m_atomData.group) Group(WTFMove(other.m_atomData.group));
+        break;
+    }
+    other.destroy();
+}
+
+inline Term::Term(EmptyValueTag)
+    : m_termType(TermType::Empty)
+{
+}
+
+inline Term::~Term()
+{
+    destroy();
+}
+
+inline bool Term::isValid() const
+{
+    return m_termType != TermType::Empty;
+}
+
+inline void Term::addCharacter(UChar character, bool isCaseSensitive)
+{
+    ASSERT(isASCII(character));
+
+    ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::CharacterSet);
+    if (m_termType != TermType::CharacterSet)
+        return;
+
+    if (isCaseSensitive || !isASCIIAlpha(character))
+        m_atomData.characterSet.set(character);
+    else {
+        m_atomData.characterSet.set(toASCIIUpper(character));
+        m_atomData.characterSet.set(toASCIILower(character));
+    }
+}
+
+inline void Term::extendGroupSubpattern(const Term& term)
+{
+    ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::Group);
+    if (m_termType != TermType::Group)
+        return;
+    m_atomData.group.terms.append(term);
+}
+
+inline void Term::quantify(const AtomQuantifier& quantifier)
+{
+    ASSERT_WITH_MESSAGE(m_quantifier == AtomQuantifier::One, "Transition to quantified term should only happen once.");
+    m_quantifier = quantifier;
+}
+
+inline ImmutableCharNFANodeBuilder Term::generateGraph(NFA& nfa, ImmutableCharNFANodeBuilder& source, const ActionList& finalActions) const
+{
+    ImmutableCharNFANodeBuilder newEnd(nfa);
+    generateGraph(nfa, source, newEnd.nodeId());
+    newEnd.setActions(finalActions.begin(), finalActions.end());
+    return newEnd;
+}
+
+inline void Term::generateGraph(NFA& nfa, ImmutableCharNFANodeBuilder& source, uint32_t destination) const
+{
+    ASSERT(isValid());
+
+    switch (m_quantifier) {
+    case AtomQuantifier::One: {
+        generateSubgraphForAtom(nfa, source, destination);
+        break;
+    }
+    case AtomQuantifier::ZeroOrOne: {
+        generateSubgraphForAtom(nfa, source, destination);
+        source.addEpsilonTransition(destination);
+        break;
+    }
+    case AtomQuantifier::ZeroOrMore: {
+        ImmutableCharNFANodeBuilder repeatStart(nfa);
+        source.addEpsilonTransition(repeatStart);
+
+        ImmutableCharNFANodeBuilder repeatEnd(nfa);
+        generateSubgraphForAtom(nfa, repeatStart, repeatEnd);
+        repeatEnd.addEpsilonTransition(repeatStart);
+
+        repeatEnd.addEpsilonTransition(destination);
+        source.addEpsilonTransition(destination);
+        break;
+    }
+    case AtomQuantifier::OneOrMore: {
+        ImmutableCharNFANodeBuilder repeatStart(nfa);
+        source.addEpsilonTransition(repeatStart);
+
+        ImmutableCharNFANodeBuilder repeatEnd(nfa);
+        generateSubgraphForAtom(nfa, repeatStart, repeatEnd);
+        repeatEnd.addEpsilonTransition(repeatStart);
+
+        repeatEnd.addEpsilonTransition(destination);
+        break;
+    }
+    }
+}
+
+inline bool Term::isEndOfLineAssertion() const
+{
+    return m_termType == TermType::CharacterSet && m_atomData.characterSet.bitCount() == 1 && m_atomData.characterSet.get(0);
+}
+
+inline bool Term::matchesAtLeastOneCharacter() const
+{
+    ASSERT(isValid());
+
+    if (m_quantifier == AtomQuantifier::ZeroOrOne || m_quantifier == AtomQuantifier::ZeroOrMore)
+        return false;
+    if (isEndOfLineAssertion())
+        return false;
+
+    if (m_termType == TermType::Group) {
+        for (const Term& term : m_atomData.group.terms) {
+            if (term.matchesAtLeastOneCharacter())
+                return true;
+        }
+        return false;
+    }
+    return true;
+}
+
+inline bool Term::isKnownToMatchAnyString() const
+{
+    ASSERT(isValid());
+
+    switch (m_termType) {
+    case TermType::Empty:
+        ASSERT_NOT_REACHED();
+        break;
+    case TermType::CharacterSet:
+        // ".*" is the only simple term matching any string.
+        return isUniversalTransition() && m_quantifier == AtomQuantifier::ZeroOrMore;
+        break;
+    case TermType::Group: {
+        // There are infinitely many ways to match anything with groups, we just handle simple cases
+        if (m_atomData.group.terms.size() != 1)
+            return false;
+
+        const Term& firstTermInGroup = m_atomData.group.terms.first();
+        // -(.*) with any quantifier.
+        if (firstTermInGroup.isKnownToMatchAnyString())
+            return true;
+
+        if (firstTermInGroup.isUniversalTransition()) {
+            // -(.)*, (.+)*, (.?)* etc.
+            if (m_quantifier == AtomQuantifier::ZeroOrMore)
+                return true;
+
+            // -(.+)?.
+            if (m_quantifier == AtomQuantifier::ZeroOrOne && firstTermInGroup.m_quantifier == AtomQuantifier::OneOrMore)
+                return true;
+
+            // -(.?)+.
+            if (m_quantifier == AtomQuantifier::OneOrMore && firstTermInGroup.m_quantifier == AtomQuantifier::ZeroOrOne)
+                return true;
+        }
+        break;
+    }
+    }
+    return false;
+}
+
+inline bool Term::hasFixedLength() const
+{
+    ASSERT(isValid());
+
+    switch (m_termType) {
+    case TermType::Empty:
+        ASSERT_NOT_REACHED();
+        break;
+    case TermType::CharacterSet:
+        return m_quantifier == AtomQuantifier::One;
+    case TermType::Group: {
+        if (m_quantifier != AtomQuantifier::One)
+            return false;
+        for (const Term& term : m_atomData.group.terms) {
+            if (!term.hasFixedLength())
+                return false;
+        }
+        return true;
+    }
+    }
+    return false;
+}
+
+inline Term& Term::operator=(const Term& other)
+{
+    destroy();
+    new (NotNull, this) Term(other);
+    return *this;
+}
+
+inline Term& Term::operator=(Term&& other)
+{
+    destroy();
+    new (NotNull, this) Term(WTFMove(other));
+    return *this;
+}
+
+inline bool Term::operator==(const Term& other) const
+{
+    if (other.m_termType != m_termType || other.m_quantifier != m_quantifier)
+        return false;
+
+    switch (m_termType) {
+    case TermType::Empty:
+        return true;
+    case TermType::CharacterSet:
+        return m_atomData.characterSet == other.m_atomData.characterSet;
+    case TermType::Group:
+        return m_atomData.group == other.m_atomData.group;
+    }
+    ASSERT_NOT_REACHED();
+    return false;
+}
+
+inline unsigned Term::hash() const
+{
+    unsigned primary = static_cast<unsigned>(m_termType) << 16 | static_cast<unsigned>(m_quantifier);
+    unsigned secondary = 0;
+    switch (m_termType) {
+    case TermType::Empty:
+        secondary = 52184393;
+        break;
+    case TermType::CharacterSet:
+        secondary = m_atomData.characterSet.hash();
+        break;
+    case TermType::Group:
+        secondary = m_atomData.group.hash();
+        break;
+    }
+    return WTF::pairIntHash(primary, secondary);
+}
+
+inline bool Term::isEmptyValue() const
+{
+    return m_termType == TermType::Empty;
+}
+
+inline bool Term::isUniversalTransition() const
+{
+    ASSERT(isValid());
+
+    switch (m_termType) {
+    case TermType::Empty:
+        ASSERT_NOT_REACHED();
+        break;
+    case TermType::CharacterSet:
+        return (m_atomData.characterSet.inverted() && !m_atomData.characterSet.bitCount())
+            || (!m_atomData.characterSet.inverted() && m_atomData.characterSet.bitCount() == 127 && !m_atomData.characterSet.get(0));
+    case TermType::Group:
+        return m_atomData.group.terms.size() == 1 && m_atomData.group.terms.first().isUniversalTransition();
+    }
+    return false;
+}
+
+inline void Term::generateSubgraphForAtom(NFA& nfa, ImmutableCharNFANodeBuilder& source, const ImmutableCharNFANodeBuilder& destination) const
+{
+    generateSubgraphForAtom(nfa, source, destination.nodeId());
+}
+
+inline void Term::generateSubgraphForAtom(NFA& nfa, ImmutableCharNFANodeBuilder& source, uint32_t destination) const
+{
+    switch (m_termType) {
+    case TermType::Empty:
+        ASSERT_NOT_REACHED();
+        source.addEpsilonTransition(destination);
+        break;
+    case TermType::CharacterSet: {
+        if (!m_atomData.characterSet.inverted()) {
+            UChar i = 0;
+            while (true) {
+                while (i < 128 && !m_atomData.characterSet.get(i))
+                    ++i;
+                if (i == 128)
+                    break;
+
+                UChar start = i;
+                ++i;
+                while (i < 128 && m_atomData.characterSet.get(i))
+                    ++i;
+                source.addTransition(start, i - 1, destination);
+            }
+        } else {
+            UChar i = 1;
+            while (true) {
+                while (i < 128 && m_atomData.characterSet.get(i))
+                    ++i;
+                if (i == 128)
+                    break;
+
+                UChar start = i;
+                ++i;
+                while (i < 128 && !m_atomData.characterSet.get(i))
+                    ++i;
+                source.addTransition(start, i - 1, destination);
+            }
+        }
+        break;
+    }
+    case TermType::Group: {
+        if (m_atomData.group.terms.isEmpty()) {
+            // FIXME: any kind of empty term could be avoided in the parser. This case should turned into an assertion.
+            source.addEpsilonTransition(destination);
+            return;
+        }
+
+        if (m_atomData.group.terms.size() == 1) {
+            m_atomData.group.terms.first().generateGraph(nfa, source, destination);
+            return;
+        }
+
+        ImmutableCharNFANodeBuilder lastTarget = m_atomData.group.terms.first().generateGraph(nfa, source, ActionList());
+        for (unsigned i = 1; i < m_atomData.group.terms.size() - 1; ++i) {
+            const Term& currentTerm = m_atomData.group.terms[i];
+            ImmutableCharNFANodeBuilder newNode = currentTerm.generateGraph(nfa, lastTarget, ActionList());
+            lastTarget = WTFMove(newNode);
+        }
+        const Term& lastTerm = m_atomData.group.terms.last();
+        lastTerm.generateGraph(nfa, lastTarget, destination);
+        break;
+    }
+    }
+}
+
+inline void Term::destroy()
+{
+    switch (m_termType) {
+    case TermType::Empty:
+        break;
+    case TermType::CharacterSet:
+        m_atomData.characterSet.~CharacterSet();
+        break;
+    case TermType::Group:
+        m_atomData.group.~Group();
+        break;
+    }
+    m_termType = TermType::Empty;
+}
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+inline size_t Term::memoryUsed() const
+{
+    size_t extraMemory = 0;
+    if (m_termType == TermType::Group) {
+        for (const Term& term : m_atomData.group.terms)
+            extraMemory += term.memoryUsed();
+    }
+    return sizeof(Term) + extraMemory;
+}
+#endif
+
+} // namespace ContentExtensions
+} // namespace WebCore
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
author	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-06-27 06:07:23 +0000
committer	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-06-27 06:07:23 +0000
commit	1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree	46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/contentextensions/Term.h
parent	32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
download	WebKitGtk-tarball-master.tar.gz