diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/HTMLToken.h | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/WebCore/html/parser/HTMLToken.h')
-rw-r--r-- | Source/WebCore/html/parser/HTMLToken.h | 679 |
1 files changed, 340 insertions, 339 deletions
diff --git a/Source/WebCore/html/parser/HTMLToken.h b/Source/WebCore/html/parser/HTMLToken.h index 722ed9080..f71b21d27 100644 --- a/Source/WebCore/html/parser/HTMLToken.h +++ b/Source/WebCore/html/parser/HTMLToken.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2013 Google, Inc. All Rights Reserved. + * Copyright (C) 2015 Apple Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,47 +24,23 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef HTMLToken_h -#define HTMLToken_h +#pragma once #include "Attribute.h" -#include "HTMLToken.h" -#include <wtf/OwnPtr.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/RefCounted.h> -#include <wtf/RefPtr.h> namespace WebCore { -class DoctypeData { - WTF_MAKE_NONCOPYABLE(DoctypeData); +struct DoctypeData { + WTF_MAKE_FAST_ALLOCATED; public: - DoctypeData() - : m_hasPublicIdentifier(false) - , m_hasSystemIdentifier(false) - , m_forceQuirks(false) - { - } - - // FIXME: This should use String instead of Vector<UChar>. - bool m_hasPublicIdentifier; - bool m_hasSystemIdentifier; - WTF::Vector<UChar> m_publicIdentifier; - WTF::Vector<UChar> m_systemIdentifier; - bool m_forceQuirks; + bool hasPublicIdentifier { false }; + bool hasSystemIdentifier { false }; + Vector<UChar> publicIdentifier; + Vector<UChar> systemIdentifier; + bool forceQuirks { false }; }; -static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name) -{ - for (unsigned i = 0; i < attributes.size(); ++i) { - if (attributes.at(i).name().matches(name)) - return &attributes.at(i); - } - return 0; -} - class HTMLToken { - WTF_MAKE_NONCOPYABLE(HTMLToken); WTF_MAKE_FAST_ALLOCATED; public: enum Type { @@ -76,378 +53,402 @@ public: EndOfFile, }; - class Attribute { - public: - class Range { - public: - int start; - int end; - }; - - Range nameRange; - Range valueRange; + struct Attribute { Vector<UChar, 32> name; Vector<UChar, 32> value; + + // Used by HTMLSourceTracker. + unsigned startOffset; + unsigned endOffset; }; typedef Vector<Attribute, 10> AttributeList; typedef Vector<UChar, 256> DataVector; - HTMLToken() { clear(); } + HTMLToken(); - void clear() - { - m_type = Uninitialized; - m_range.start = 0; - m_range.end = 0; - m_baseOffset = 0; - m_data.clear(); - m_orAllData = 0; - } + void clear(); - bool isUninitialized() { return m_type == Uninitialized; } - Type type() const { return m_type; } + Type type() const; - void makeEndOfFile() - { - ASSERT(m_type == Uninitialized); - m_type = EndOfFile; - } + // EndOfFile - /* Range and offset methods exposed for HTMLSourceTracker and HTMLViewSourceParser */ - int startIndex() const { return m_range.start; } - int endIndex() const { return m_range.end; } + void makeEndOfFile(); - void setBaseOffset(int offset) - { - m_baseOffset = offset; - } + // StartTag, EndTag, DOCTYPE. - void end(int endOffset) - { - m_range.end = endOffset - m_baseOffset; - } + const DataVector& name() const; - const DataVector& data() const - { - ASSERT(m_type == Character || m_type == Comment || m_type == StartTag || m_type == EndTag); - return m_data; - } + void appendToName(UChar); - bool isAll8BitData() const - { - return (m_orAllData <= 0xff); - } + // DOCTYPE. - const DataVector& name() const - { - ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); - return m_data; - } + void beginDOCTYPE(); + void beginDOCTYPE(UChar); - void appendToName(UChar character) - { - ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); - ASSERT(character); - m_data.append(character); - m_orAllData |= character; - } + void setForceQuirks(); - /* DOCTYPE Tokens */ + void setPublicIdentifierToEmptyString(); + void setSystemIdentifierToEmptyString(); - bool forceQuirks() const - { - ASSERT(m_type == DOCTYPE); - return m_doctypeData->m_forceQuirks; - } + void appendToPublicIdentifier(UChar); + void appendToSystemIdentifier(UChar); - void setForceQuirks() - { - ASSERT(m_type == DOCTYPE); - m_doctypeData->m_forceQuirks = true; - } + std::unique_ptr<DoctypeData> releaseDoctypeData(); - void beginDOCTYPE() - { - ASSERT(m_type == Uninitialized); - m_type = DOCTYPE; - m_doctypeData = std::make_unique<DoctypeData>(); - } + // StartTag, EndTag. - void beginDOCTYPE(UChar character) - { - ASSERT(character); - beginDOCTYPE(); - m_data.append(character); - m_orAllData |= character; - } + bool selfClosing() const; + const AttributeList& attributes() const; - // FIXME: Distinguish between a missing public identifer and an empty one. - const WTF::Vector<UChar>& publicIdentifier() const - { - ASSERT(m_type == DOCTYPE); - return m_doctypeData->m_publicIdentifier; - } + void beginStartTag(UChar); - // FIXME: Distinguish between a missing system identifer and an empty one. - const WTF::Vector<UChar>& systemIdentifier() const - { - ASSERT(m_type == DOCTYPE); - return m_doctypeData->m_systemIdentifier; - } + void beginEndTag(LChar); + void beginEndTag(const Vector<LChar, 32>&); - void setPublicIdentifierToEmptyString() - { - ASSERT(m_type == DOCTYPE); - m_doctypeData->m_hasPublicIdentifier = true; - m_doctypeData->m_publicIdentifier.clear(); - } + void beginAttribute(unsigned offset); + void appendToAttributeName(UChar); + void appendToAttributeValue(UChar); + void endAttribute(unsigned offset); - void setSystemIdentifierToEmptyString() - { - ASSERT(m_type == DOCTYPE); - m_doctypeData->m_hasSystemIdentifier = true; - m_doctypeData->m_systemIdentifier.clear(); - } + void setSelfClosing(); - void appendToPublicIdentifier(UChar character) - { - ASSERT(character); - ASSERT(m_type == DOCTYPE); - ASSERT(m_doctypeData->m_hasPublicIdentifier); - m_doctypeData->m_publicIdentifier.append(character); - } + // Used by HTMLTokenizer on behalf of HTMLSourceTracker. + void setAttributeBaseOffset(unsigned attributeBaseOffset) { m_attributeBaseOffset = attributeBaseOffset; } - void appendToSystemIdentifier(UChar character) - { - ASSERT(character); - ASSERT(m_type == DOCTYPE); - ASSERT(m_doctypeData->m_hasSystemIdentifier); - m_doctypeData->m_systemIdentifier.append(character); - } +public: + // Used by the XSSAuditor to nuke XSS-laden attributes. + void eraseValueOfAttribute(unsigned index); + void appendToAttributeValue(unsigned index, StringView value); - std::unique_ptr<DoctypeData> releaseDoctypeData() - { - return std::move(m_doctypeData); - } + // Character. - /* Start/End Tag Tokens */ + // Starting a character token works slightly differently than starting + // other types of tokens because we want to save a per-character branch. + // There is no beginCharacters, and appending a character sets the type. - bool selfClosing() const - { - ASSERT(m_type == StartTag || m_type == EndTag); - return m_selfClosing; - } + const DataVector& characters() const; + bool charactersIsAll8BitData() const; - void setSelfClosing() - { - ASSERT(m_type == StartTag || m_type == EndTag); - m_selfClosing = true; - } + void appendToCharacter(LChar); + void appendToCharacter(UChar); + void appendToCharacter(const Vector<LChar, 32>&); - void beginStartTag(UChar character) - { - ASSERT(character); - ASSERT(m_type == Uninitialized); - m_type = StartTag; - m_selfClosing = false; - m_currentAttribute = 0; - m_attributes.clear(); - - m_data.append(character); - m_orAllData |= character; - } + // Comment. - void beginEndTag(LChar character) - { - ASSERT(m_type == Uninitialized); - m_type = EndTag; - m_selfClosing = false; - m_currentAttribute = 0; - m_attributes.clear(); + const DataVector& comment() const; + bool commentIsAll8BitData() const; - m_data.append(character); - } + void beginComment(); + void appendToComment(UChar); - void beginEndTag(const Vector<LChar, 32>& characters) - { - ASSERT(m_type == Uninitialized); - m_type = EndTag; - m_selfClosing = false; - m_currentAttribute = 0; - m_attributes.clear(); +private: + Type m_type; - m_data.appendVector(characters); - } + DataVector m_data; + UChar m_data8BitCheck; + + // For StartTag and EndTag + bool m_selfClosing; + AttributeList m_attributes; + Attribute* m_currentAttribute; + + // For DOCTYPE + std::unique_ptr<DoctypeData> m_doctypeData; + + unsigned m_attributeBaseOffset { 0 }; // Changes across document.write() boundaries. +}; + +const HTMLToken::Attribute* findAttribute(const Vector<HTMLToken::Attribute>&, StringView name); + +inline HTMLToken::HTMLToken() + : m_type(Uninitialized) + , m_data8BitCheck(0) +{ +} + +inline void HTMLToken::clear() +{ + m_type = Uninitialized; + m_data.clear(); + m_data8BitCheck = 0; +} + +inline HTMLToken::Type HTMLToken::type() const +{ + return m_type; +} + +inline void HTMLToken::makeEndOfFile() +{ + ASSERT(m_type == Uninitialized); + m_type = EndOfFile; +} + +inline const HTMLToken::DataVector& HTMLToken::name() const +{ + ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); + return m_data; +} + +inline void HTMLToken::appendToName(UChar character) +{ + ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); + ASSERT(character); + m_data.append(character); + m_data8BitCheck |= character; +} + +inline void HTMLToken::setForceQuirks() +{ + ASSERT(m_type == DOCTYPE); + m_doctypeData->forceQuirks = true; +} + +inline void HTMLToken::beginDOCTYPE() +{ + ASSERT(m_type == Uninitialized); + m_type = DOCTYPE; + m_doctypeData = std::make_unique<DoctypeData>(); +} + +inline void HTMLToken::beginDOCTYPE(UChar character) +{ + ASSERT(character); + beginDOCTYPE(); + m_data.append(character); + m_data8BitCheck |= character; +} + +inline void HTMLToken::setPublicIdentifierToEmptyString() +{ + ASSERT(m_type == DOCTYPE); + m_doctypeData->hasPublicIdentifier = true; + m_doctypeData->publicIdentifier.clear(); +} + +inline void HTMLToken::setSystemIdentifierToEmptyString() +{ + ASSERT(m_type == DOCTYPE); + m_doctypeData->hasSystemIdentifier = true; + m_doctypeData->systemIdentifier.clear(); +} + +inline void HTMLToken::appendToPublicIdentifier(UChar character) +{ + ASSERT(character); + ASSERT(m_type == DOCTYPE); + ASSERT(m_doctypeData->hasPublicIdentifier); + m_doctypeData->publicIdentifier.append(character); +} + +inline void HTMLToken::appendToSystemIdentifier(UChar character) +{ + ASSERT(character); + ASSERT(m_type == DOCTYPE); + ASSERT(m_doctypeData->hasSystemIdentifier); + m_doctypeData->systemIdentifier.append(character); +} + +inline std::unique_ptr<DoctypeData> HTMLToken::releaseDoctypeData() +{ + return WTFMove(m_doctypeData); +} + +inline bool HTMLToken::selfClosing() const +{ + ASSERT(m_type == StartTag || m_type == EndTag); + return m_selfClosing; +} + +inline void HTMLToken::setSelfClosing() +{ + ASSERT(m_type == StartTag || m_type == EndTag); + m_selfClosing = true; +} - void addNewAttribute() - { - ASSERT(m_type == StartTag || m_type == EndTag); - m_attributes.grow(m_attributes.size() + 1); - m_currentAttribute = &m_attributes.last(); -#ifndef NDEBUG - m_currentAttribute->nameRange.start = 0; - m_currentAttribute->nameRange.end = 0; - m_currentAttribute->valueRange.start = 0; - m_currentAttribute->valueRange.end = 0; +inline void HTMLToken::beginStartTag(UChar character) +{ + ASSERT(character); + ASSERT(m_type == Uninitialized); + m_type = StartTag; + m_selfClosing = false; + m_attributes.clear(); + +#if !ASSERT_DISABLED + m_currentAttribute = nullptr; #endif - } - void beginAttributeName(int offset) - { - m_currentAttribute->nameRange.start = offset - m_baseOffset; - } + m_data.append(character); + m_data8BitCheck = character; +} - void endAttributeName(int offset) - { - int index = offset - m_baseOffset; - m_currentAttribute->nameRange.end = index; - m_currentAttribute->valueRange.start = index; - m_currentAttribute->valueRange.end = index; - } +inline void HTMLToken::beginEndTag(LChar character) +{ + ASSERT(m_type == Uninitialized); + m_type = EndTag; + m_selfClosing = false; + m_attributes.clear(); - void beginAttributeValue(int offset) - { - m_currentAttribute->valueRange.start = offset - m_baseOffset; -#ifndef NDEBUG - m_currentAttribute->valueRange.end = 0; +#if !ASSERT_DISABLED + m_currentAttribute = nullptr; #endif - } - void endAttributeValue(int offset) - { - m_currentAttribute->valueRange.end = offset - m_baseOffset; - } + m_data.append(character); +} - void appendToAttributeName(UChar character) - { - ASSERT(character); - ASSERT(m_type == StartTag || m_type == EndTag); - // FIXME: We should be able to add the following ASSERT once we fix - // https://bugs.webkit.org/show_bug.cgi?id=62971 - // ASSERT(m_currentAttribute->nameRange.start); - m_currentAttribute->name.append(character); - } +inline void HTMLToken::beginEndTag(const Vector<LChar, 32>& characters) +{ + ASSERT(m_type == Uninitialized); + m_type = EndTag; + m_selfClosing = false; + m_attributes.clear(); - void appendToAttributeValue(UChar character) - { - ASSERT(character); - ASSERT(m_type == StartTag || m_type == EndTag); - ASSERT(m_currentAttribute->valueRange.start); - m_currentAttribute->value.append(character); - } +#if !ASSERT_DISABLED + m_currentAttribute = nullptr; +#endif - void appendToAttributeValue(size_t i, const String& value) - { - ASSERT(!value.isEmpty()); - ASSERT(m_type == StartTag || m_type == EndTag); - append(m_attributes[i].value, value); - } + m_data.appendVector(characters); +} - const AttributeList& attributes() const - { - ASSERT(m_type == StartTag || m_type == EndTag); - return m_attributes; - } +inline void HTMLToken::beginAttribute(unsigned offset) +{ + ASSERT(m_type == StartTag || m_type == EndTag); + ASSERT(offset); - const Attribute* getAttributeItem(const QualifiedName& name) const - { - for (unsigned i = 0; i < m_attributes.size(); ++i) { - if (AtomicString(m_attributes.at(i).name) == name.localName()) - return &m_attributes.at(i); - } - return 0; - } + m_attributes.grow(m_attributes.size() + 1); + m_currentAttribute = &m_attributes.last(); - // Used by the XSSAuditor to nuke XSS-laden attributes. - void eraseValueOfAttribute(size_t i) - { - ASSERT(m_type == StartTag || m_type == EndTag); - m_attributes[i].value.clear(); - } + m_currentAttribute->startOffset = offset - m_attributeBaseOffset; +} - /* Character Tokens */ +inline void HTMLToken::endAttribute(unsigned offset) +{ + ASSERT(offset); + ASSERT(m_currentAttribute); + m_currentAttribute->endOffset = offset - m_attributeBaseOffset; +#if !ASSERT_DISABLED + m_currentAttribute = nullptr; +#endif +} - // Starting a character token works slightly differently than starting - // other types of tokens because we want to save a per-character branch. - void ensureIsCharacterToken() - { - ASSERT(m_type == Uninitialized || m_type == Character); - m_type = Character; - } +inline void HTMLToken::appendToAttributeName(UChar character) +{ + ASSERT(character); + ASSERT(m_type == StartTag || m_type == EndTag); + ASSERT(m_currentAttribute); + m_currentAttribute->name.append(character); +} - const DataVector& characters() const - { - ASSERT(m_type == Character); - return m_data; - } +inline void HTMLToken::appendToAttributeValue(UChar character) +{ + ASSERT(character); + ASSERT(m_type == StartTag || m_type == EndTag); + ASSERT(m_currentAttribute); + m_currentAttribute->value.append(character); +} - void appendToCharacter(char character) - { - ASSERT(m_type == Character); - m_data.append(character); - } +inline void HTMLToken::appendToAttributeValue(unsigned i, StringView value) +{ + ASSERT(!value.isEmpty()); + ASSERT(m_type == StartTag || m_type == EndTag); + append(m_attributes[i].value, value); +} - void appendToCharacter(UChar character) - { - ASSERT(m_type == Character); - m_data.append(character); - m_orAllData |= character; - } +inline const HTMLToken::AttributeList& HTMLToken::attributes() const +{ + ASSERT(m_type == StartTag || m_type == EndTag); + return m_attributes; +} - void appendToCharacter(const Vector<LChar, 32>& characters) - { - ASSERT(m_type == Character); - m_data.appendVector(characters); - } +// Used by the XSSAuditor to nuke XSS-laden attributes. +inline void HTMLToken::eraseValueOfAttribute(unsigned i) +{ + ASSERT(m_type == StartTag || m_type == EndTag); + ASSERT(i < m_attributes.size()); + m_attributes[i].value.clear(); +} - /* Comment Tokens */ +inline const HTMLToken::DataVector& HTMLToken::characters() const +{ + ASSERT(m_type == Character); + return m_data; +} - const DataVector& comment() const - { - ASSERT(m_type == Comment); - return m_data; - } +inline bool HTMLToken::charactersIsAll8BitData() const +{ + ASSERT(m_type == Character); + return m_data8BitCheck <= 0xFF; +} - void beginComment() - { - ASSERT(m_type == Uninitialized); - m_type = Comment; - } +inline void HTMLToken::appendToCharacter(LChar character) +{ + ASSERT(m_type == Uninitialized || m_type == Character); + m_type = Character; + m_data.append(character); +} - void appendToComment(UChar character) - { - ASSERT(character); - ASSERT(m_type == Comment); - m_data.append(character); - m_orAllData |= character; - } +inline void HTMLToken::appendToCharacter(UChar character) +{ + ASSERT(m_type == Uninitialized || m_type == Character); + m_type = Character; + m_data.append(character); + m_data8BitCheck |= character; +} - void eraseCharacters() - { - ASSERT(m_type == Character); - m_data.clear(); - m_orAllData = 0; - } +inline void HTMLToken::appendToCharacter(const Vector<LChar, 32>& characters) +{ + ASSERT(m_type == Uninitialized || m_type == Character); + m_type = Character; + m_data.appendVector(characters); +} -private: - Type m_type; - Attribute::Range m_range; // Always starts at zero. - int m_baseOffset; - DataVector m_data; - UChar m_orAllData; +inline const HTMLToken::DataVector& HTMLToken::comment() const +{ + ASSERT(m_type == Comment); + return m_data; +} - // For StartTag and EndTag - bool m_selfClosing; - AttributeList m_attributes; +inline bool HTMLToken::commentIsAll8BitData() const +{ + ASSERT(m_type == Comment); + return m_data8BitCheck <= 0xFF; +} - // A pointer into m_attributes used during lexing. - Attribute* m_currentAttribute; +inline void HTMLToken::beginComment() +{ + ASSERT(m_type == Uninitialized); + m_type = Comment; +} - // For DOCTYPE - std::unique_ptr<DoctypeData> m_doctypeData; -}; +inline void HTMLToken::appendToComment(UChar character) +{ + ASSERT(character); + ASSERT(m_type == Comment); + m_data.append(character); + m_data8BitCheck |= character; +} +inline bool nameMatches(const HTMLToken::Attribute& attribute, StringView name) +{ + unsigned size = name.length(); + if (attribute.name.size() != size) + return false; + for (unsigned i = 0; i < size; ++i) { + // FIXME: The one caller that uses this probably wants to ignore letter case. + if (attribute.name[i] != name[i]) + return false; + } + return true; } -#endif +inline const HTMLToken::Attribute* findAttribute(const HTMLToken::AttributeList& attributes, StringView name) +{ + for (auto& attribute : attributes) { + if (nameMatches(attribute, name)) + return &attribute; + } + return nullptr; +} + +} // namespace WebCore |