summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/parser/HTMLToken.h
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WebCore/html/parser/HTMLToken.h')
-rw-r--r--Source/WebCore/html/parser/HTMLToken.h679
1 files changed, 340 insertions, 339 deletions
diff --git a/Source/WebCore/html/parser/HTMLToken.h b/Source/WebCore/html/parser/HTMLToken.h
index 722ed9080..f71b21d27 100644
--- a/Source/WebCore/html/parser/HTMLToken.h
+++ b/Source/WebCore/html/parser/HTMLToken.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2013 Google, Inc. All Rights Reserved.
+ * Copyright (C) 2015 Apple Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -23,47 +24,23 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef HTMLToken_h
-#define HTMLToken_h
+#pragma once
#include "Attribute.h"
-#include "HTMLToken.h"
-#include <wtf/OwnPtr.h>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/RefCounted.h>
-#include <wtf/RefPtr.h>
namespace WebCore {
-class DoctypeData {
- WTF_MAKE_NONCOPYABLE(DoctypeData);
+struct DoctypeData {
+ WTF_MAKE_FAST_ALLOCATED;
public:
- DoctypeData()
- : m_hasPublicIdentifier(false)
- , m_hasSystemIdentifier(false)
- , m_forceQuirks(false)
- {
- }
-
- // FIXME: This should use String instead of Vector<UChar>.
- bool m_hasPublicIdentifier;
- bool m_hasSystemIdentifier;
- WTF::Vector<UChar> m_publicIdentifier;
- WTF::Vector<UChar> m_systemIdentifier;
- bool m_forceQuirks;
+ bool hasPublicIdentifier { false };
+ bool hasSystemIdentifier { false };
+ Vector<UChar> publicIdentifier;
+ Vector<UChar> systemIdentifier;
+ bool forceQuirks { false };
};
-static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name)
-{
- for (unsigned i = 0; i < attributes.size(); ++i) {
- if (attributes.at(i).name().matches(name))
- return &attributes.at(i);
- }
- return 0;
-}
-
class HTMLToken {
- WTF_MAKE_NONCOPYABLE(HTMLToken);
WTF_MAKE_FAST_ALLOCATED;
public:
enum Type {
@@ -76,378 +53,402 @@ public:
EndOfFile,
};
- class Attribute {
- public:
- class Range {
- public:
- int start;
- int end;
- };
-
- Range nameRange;
- Range valueRange;
+ struct Attribute {
Vector<UChar, 32> name;
Vector<UChar, 32> value;
+
+ // Used by HTMLSourceTracker.
+ unsigned startOffset;
+ unsigned endOffset;
};
typedef Vector<Attribute, 10> AttributeList;
typedef Vector<UChar, 256> DataVector;
- HTMLToken() { clear(); }
+ HTMLToken();
- void clear()
- {
- m_type = Uninitialized;
- m_range.start = 0;
- m_range.end = 0;
- m_baseOffset = 0;
- m_data.clear();
- m_orAllData = 0;
- }
+ void clear();
- bool isUninitialized() { return m_type == Uninitialized; }
- Type type() const { return m_type; }
+ Type type() const;
- void makeEndOfFile()
- {
- ASSERT(m_type == Uninitialized);
- m_type = EndOfFile;
- }
+ // EndOfFile
- /* Range and offset methods exposed for HTMLSourceTracker and HTMLViewSourceParser */
- int startIndex() const { return m_range.start; }
- int endIndex() const { return m_range.end; }
+ void makeEndOfFile();
- void setBaseOffset(int offset)
- {
- m_baseOffset = offset;
- }
+ // StartTag, EndTag, DOCTYPE.
- void end(int endOffset)
- {
- m_range.end = endOffset - m_baseOffset;
- }
+ const DataVector& name() const;
- const DataVector& data() const
- {
- ASSERT(m_type == Character || m_type == Comment || m_type == StartTag || m_type == EndTag);
- return m_data;
- }
+ void appendToName(UChar);
- bool isAll8BitData() const
- {
- return (m_orAllData <= 0xff);
- }
+ // DOCTYPE.
- const DataVector& name() const
- {
- ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
- return m_data;
- }
+ void beginDOCTYPE();
+ void beginDOCTYPE(UChar);
- void appendToName(UChar character)
- {
- ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
- ASSERT(character);
- m_data.append(character);
- m_orAllData |= character;
- }
+ void setForceQuirks();
- /* DOCTYPE Tokens */
+ void setPublicIdentifierToEmptyString();
+ void setSystemIdentifierToEmptyString();
- bool forceQuirks() const
- {
- ASSERT(m_type == DOCTYPE);
- return m_doctypeData->m_forceQuirks;
- }
+ void appendToPublicIdentifier(UChar);
+ void appendToSystemIdentifier(UChar);
- void setForceQuirks()
- {
- ASSERT(m_type == DOCTYPE);
- m_doctypeData->m_forceQuirks = true;
- }
+ std::unique_ptr<DoctypeData> releaseDoctypeData();
- void beginDOCTYPE()
- {
- ASSERT(m_type == Uninitialized);
- m_type = DOCTYPE;
- m_doctypeData = std::make_unique<DoctypeData>();
- }
+ // StartTag, EndTag.
- void beginDOCTYPE(UChar character)
- {
- ASSERT(character);
- beginDOCTYPE();
- m_data.append(character);
- m_orAllData |= character;
- }
+ bool selfClosing() const;
+ const AttributeList& attributes() const;
- // FIXME: Distinguish between a missing public identifer and an empty one.
- const WTF::Vector<UChar>& publicIdentifier() const
- {
- ASSERT(m_type == DOCTYPE);
- return m_doctypeData->m_publicIdentifier;
- }
+ void beginStartTag(UChar);
- // FIXME: Distinguish between a missing system identifer and an empty one.
- const WTF::Vector<UChar>& systemIdentifier() const
- {
- ASSERT(m_type == DOCTYPE);
- return m_doctypeData->m_systemIdentifier;
- }
+ void beginEndTag(LChar);
+ void beginEndTag(const Vector<LChar, 32>&);
- void setPublicIdentifierToEmptyString()
- {
- ASSERT(m_type == DOCTYPE);
- m_doctypeData->m_hasPublicIdentifier = true;
- m_doctypeData->m_publicIdentifier.clear();
- }
+ void beginAttribute(unsigned offset);
+ void appendToAttributeName(UChar);
+ void appendToAttributeValue(UChar);
+ void endAttribute(unsigned offset);
- void setSystemIdentifierToEmptyString()
- {
- ASSERT(m_type == DOCTYPE);
- m_doctypeData->m_hasSystemIdentifier = true;
- m_doctypeData->m_systemIdentifier.clear();
- }
+ void setSelfClosing();
- void appendToPublicIdentifier(UChar character)
- {
- ASSERT(character);
- ASSERT(m_type == DOCTYPE);
- ASSERT(m_doctypeData->m_hasPublicIdentifier);
- m_doctypeData->m_publicIdentifier.append(character);
- }
+ // Used by HTMLTokenizer on behalf of HTMLSourceTracker.
+ void setAttributeBaseOffset(unsigned attributeBaseOffset) { m_attributeBaseOffset = attributeBaseOffset; }
- void appendToSystemIdentifier(UChar character)
- {
- ASSERT(character);
- ASSERT(m_type == DOCTYPE);
- ASSERT(m_doctypeData->m_hasSystemIdentifier);
- m_doctypeData->m_systemIdentifier.append(character);
- }
+public:
+ // Used by the XSSAuditor to nuke XSS-laden attributes.
+ void eraseValueOfAttribute(unsigned index);
+ void appendToAttributeValue(unsigned index, StringView value);
- std::unique_ptr<DoctypeData> releaseDoctypeData()
- {
- return std::move(m_doctypeData);
- }
+ // Character.
- /* Start/End Tag Tokens */
+ // Starting a character token works slightly differently than starting
+ // other types of tokens because we want to save a per-character branch.
+ // There is no beginCharacters, and appending a character sets the type.
- bool selfClosing() const
- {
- ASSERT(m_type == StartTag || m_type == EndTag);
- return m_selfClosing;
- }
+ const DataVector& characters() const;
+ bool charactersIsAll8BitData() const;
- void setSelfClosing()
- {
- ASSERT(m_type == StartTag || m_type == EndTag);
- m_selfClosing = true;
- }
+ void appendToCharacter(LChar);
+ void appendToCharacter(UChar);
+ void appendToCharacter(const Vector<LChar, 32>&);
- void beginStartTag(UChar character)
- {
- ASSERT(character);
- ASSERT(m_type == Uninitialized);
- m_type = StartTag;
- m_selfClosing = false;
- m_currentAttribute = 0;
- m_attributes.clear();
-
- m_data.append(character);
- m_orAllData |= character;
- }
+ // Comment.
- void beginEndTag(LChar character)
- {
- ASSERT(m_type == Uninitialized);
- m_type = EndTag;
- m_selfClosing = false;
- m_currentAttribute = 0;
- m_attributes.clear();
+ const DataVector& comment() const;
+ bool commentIsAll8BitData() const;
- m_data.append(character);
- }
+ void beginComment();
+ void appendToComment(UChar);
- void beginEndTag(const Vector<LChar, 32>& characters)
- {
- ASSERT(m_type == Uninitialized);
- m_type = EndTag;
- m_selfClosing = false;
- m_currentAttribute = 0;
- m_attributes.clear();
+private:
+ Type m_type;
- m_data.appendVector(characters);
- }
+ DataVector m_data;
+ UChar m_data8BitCheck;
+
+ // For StartTag and EndTag
+ bool m_selfClosing;
+ AttributeList m_attributes;
+ Attribute* m_currentAttribute;
+
+ // For DOCTYPE
+ std::unique_ptr<DoctypeData> m_doctypeData;
+
+ unsigned m_attributeBaseOffset { 0 }; // Changes across document.write() boundaries.
+};
+
+const HTMLToken::Attribute* findAttribute(const Vector<HTMLToken::Attribute>&, StringView name);
+
+inline HTMLToken::HTMLToken()
+ : m_type(Uninitialized)
+ , m_data8BitCheck(0)
+{
+}
+
+inline void HTMLToken::clear()
+{
+ m_type = Uninitialized;
+ m_data.clear();
+ m_data8BitCheck = 0;
+}
+
+inline HTMLToken::Type HTMLToken::type() const
+{
+ return m_type;
+}
+
+inline void HTMLToken::makeEndOfFile()
+{
+ ASSERT(m_type == Uninitialized);
+ m_type = EndOfFile;
+}
+
+inline const HTMLToken::DataVector& HTMLToken::name() const
+{
+ ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
+ return m_data;
+}
+
+inline void HTMLToken::appendToName(UChar character)
+{
+ ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
+ ASSERT(character);
+ m_data.append(character);
+ m_data8BitCheck |= character;
+}
+
+inline void HTMLToken::setForceQuirks()
+{
+ ASSERT(m_type == DOCTYPE);
+ m_doctypeData->forceQuirks = true;
+}
+
+inline void HTMLToken::beginDOCTYPE()
+{
+ ASSERT(m_type == Uninitialized);
+ m_type = DOCTYPE;
+ m_doctypeData = std::make_unique<DoctypeData>();
+}
+
+inline void HTMLToken::beginDOCTYPE(UChar character)
+{
+ ASSERT(character);
+ beginDOCTYPE();
+ m_data.append(character);
+ m_data8BitCheck |= character;
+}
+
+inline void HTMLToken::setPublicIdentifierToEmptyString()
+{
+ ASSERT(m_type == DOCTYPE);
+ m_doctypeData->hasPublicIdentifier = true;
+ m_doctypeData->publicIdentifier.clear();
+}
+
+inline void HTMLToken::setSystemIdentifierToEmptyString()
+{
+ ASSERT(m_type == DOCTYPE);
+ m_doctypeData->hasSystemIdentifier = true;
+ m_doctypeData->systemIdentifier.clear();
+}
+
+inline void HTMLToken::appendToPublicIdentifier(UChar character)
+{
+ ASSERT(character);
+ ASSERT(m_type == DOCTYPE);
+ ASSERT(m_doctypeData->hasPublicIdentifier);
+ m_doctypeData->publicIdentifier.append(character);
+}
+
+inline void HTMLToken::appendToSystemIdentifier(UChar character)
+{
+ ASSERT(character);
+ ASSERT(m_type == DOCTYPE);
+ ASSERT(m_doctypeData->hasSystemIdentifier);
+ m_doctypeData->systemIdentifier.append(character);
+}
+
+inline std::unique_ptr<DoctypeData> HTMLToken::releaseDoctypeData()
+{
+ return WTFMove(m_doctypeData);
+}
+
+inline bool HTMLToken::selfClosing() const
+{
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ return m_selfClosing;
+}
+
+inline void HTMLToken::setSelfClosing()
+{
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ m_selfClosing = true;
+}
- void addNewAttribute()
- {
- ASSERT(m_type == StartTag || m_type == EndTag);
- m_attributes.grow(m_attributes.size() + 1);
- m_currentAttribute = &m_attributes.last();
-#ifndef NDEBUG
- m_currentAttribute->nameRange.start = 0;
- m_currentAttribute->nameRange.end = 0;
- m_currentAttribute->valueRange.start = 0;
- m_currentAttribute->valueRange.end = 0;
+inline void HTMLToken::beginStartTag(UChar character)
+{
+ ASSERT(character);
+ ASSERT(m_type == Uninitialized);
+ m_type = StartTag;
+ m_selfClosing = false;
+ m_attributes.clear();
+
+#if !ASSERT_DISABLED
+ m_currentAttribute = nullptr;
#endif
- }
- void beginAttributeName(int offset)
- {
- m_currentAttribute->nameRange.start = offset - m_baseOffset;
- }
+ m_data.append(character);
+ m_data8BitCheck = character;
+}
- void endAttributeName(int offset)
- {
- int index = offset - m_baseOffset;
- m_currentAttribute->nameRange.end = index;
- m_currentAttribute->valueRange.start = index;
- m_currentAttribute->valueRange.end = index;
- }
+inline void HTMLToken::beginEndTag(LChar character)
+{
+ ASSERT(m_type == Uninitialized);
+ m_type = EndTag;
+ m_selfClosing = false;
+ m_attributes.clear();
- void beginAttributeValue(int offset)
- {
- m_currentAttribute->valueRange.start = offset - m_baseOffset;
-#ifndef NDEBUG
- m_currentAttribute->valueRange.end = 0;
+#if !ASSERT_DISABLED
+ m_currentAttribute = nullptr;
#endif
- }
- void endAttributeValue(int offset)
- {
- m_currentAttribute->valueRange.end = offset - m_baseOffset;
- }
+ m_data.append(character);
+}
- void appendToAttributeName(UChar character)
- {
- ASSERT(character);
- ASSERT(m_type == StartTag || m_type == EndTag);
- // FIXME: We should be able to add the following ASSERT once we fix
- // https://bugs.webkit.org/show_bug.cgi?id=62971
- // ASSERT(m_currentAttribute->nameRange.start);
- m_currentAttribute->name.append(character);
- }
+inline void HTMLToken::beginEndTag(const Vector<LChar, 32>& characters)
+{
+ ASSERT(m_type == Uninitialized);
+ m_type = EndTag;
+ m_selfClosing = false;
+ m_attributes.clear();
- void appendToAttributeValue(UChar character)
- {
- ASSERT(character);
- ASSERT(m_type == StartTag || m_type == EndTag);
- ASSERT(m_currentAttribute->valueRange.start);
- m_currentAttribute->value.append(character);
- }
+#if !ASSERT_DISABLED
+ m_currentAttribute = nullptr;
+#endif
- void appendToAttributeValue(size_t i, const String& value)
- {
- ASSERT(!value.isEmpty());
- ASSERT(m_type == StartTag || m_type == EndTag);
- append(m_attributes[i].value, value);
- }
+ m_data.appendVector(characters);
+}
- const AttributeList& attributes() const
- {
- ASSERT(m_type == StartTag || m_type == EndTag);
- return m_attributes;
- }
+inline void HTMLToken::beginAttribute(unsigned offset)
+{
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ ASSERT(offset);
- const Attribute* getAttributeItem(const QualifiedName& name) const
- {
- for (unsigned i = 0; i < m_attributes.size(); ++i) {
- if (AtomicString(m_attributes.at(i).name) == name.localName())
- return &m_attributes.at(i);
- }
- return 0;
- }
+ m_attributes.grow(m_attributes.size() + 1);
+ m_currentAttribute = &m_attributes.last();
- // Used by the XSSAuditor to nuke XSS-laden attributes.
- void eraseValueOfAttribute(size_t i)
- {
- ASSERT(m_type == StartTag || m_type == EndTag);
- m_attributes[i].value.clear();
- }
+ m_currentAttribute->startOffset = offset - m_attributeBaseOffset;
+}
- /* Character Tokens */
+inline void HTMLToken::endAttribute(unsigned offset)
+{
+ ASSERT(offset);
+ ASSERT(m_currentAttribute);
+ m_currentAttribute->endOffset = offset - m_attributeBaseOffset;
+#if !ASSERT_DISABLED
+ m_currentAttribute = nullptr;
+#endif
+}
- // Starting a character token works slightly differently than starting
- // other types of tokens because we want to save a per-character branch.
- void ensureIsCharacterToken()
- {
- ASSERT(m_type == Uninitialized || m_type == Character);
- m_type = Character;
- }
+inline void HTMLToken::appendToAttributeName(UChar character)
+{
+ ASSERT(character);
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ ASSERT(m_currentAttribute);
+ m_currentAttribute->name.append(character);
+}
- const DataVector& characters() const
- {
- ASSERT(m_type == Character);
- return m_data;
- }
+inline void HTMLToken::appendToAttributeValue(UChar character)
+{
+ ASSERT(character);
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ ASSERT(m_currentAttribute);
+ m_currentAttribute->value.append(character);
+}
- void appendToCharacter(char character)
- {
- ASSERT(m_type == Character);
- m_data.append(character);
- }
+inline void HTMLToken::appendToAttributeValue(unsigned i, StringView value)
+{
+ ASSERT(!value.isEmpty());
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ append(m_attributes[i].value, value);
+}
- void appendToCharacter(UChar character)
- {
- ASSERT(m_type == Character);
- m_data.append(character);
- m_orAllData |= character;
- }
+inline const HTMLToken::AttributeList& HTMLToken::attributes() const
+{
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ return m_attributes;
+}
- void appendToCharacter(const Vector<LChar, 32>& characters)
- {
- ASSERT(m_type == Character);
- m_data.appendVector(characters);
- }
+// Used by the XSSAuditor to nuke XSS-laden attributes.
+inline void HTMLToken::eraseValueOfAttribute(unsigned i)
+{
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ ASSERT(i < m_attributes.size());
+ m_attributes[i].value.clear();
+}
- /* Comment Tokens */
+inline const HTMLToken::DataVector& HTMLToken::characters() const
+{
+ ASSERT(m_type == Character);
+ return m_data;
+}
- const DataVector& comment() const
- {
- ASSERT(m_type == Comment);
- return m_data;
- }
+inline bool HTMLToken::charactersIsAll8BitData() const
+{
+ ASSERT(m_type == Character);
+ return m_data8BitCheck <= 0xFF;
+}
- void beginComment()
- {
- ASSERT(m_type == Uninitialized);
- m_type = Comment;
- }
+inline void HTMLToken::appendToCharacter(LChar character)
+{
+ ASSERT(m_type == Uninitialized || m_type == Character);
+ m_type = Character;
+ m_data.append(character);
+}
- void appendToComment(UChar character)
- {
- ASSERT(character);
- ASSERT(m_type == Comment);
- m_data.append(character);
- m_orAllData |= character;
- }
+inline void HTMLToken::appendToCharacter(UChar character)
+{
+ ASSERT(m_type == Uninitialized || m_type == Character);
+ m_type = Character;
+ m_data.append(character);
+ m_data8BitCheck |= character;
+}
- void eraseCharacters()
- {
- ASSERT(m_type == Character);
- m_data.clear();
- m_orAllData = 0;
- }
+inline void HTMLToken::appendToCharacter(const Vector<LChar, 32>& characters)
+{
+ ASSERT(m_type == Uninitialized || m_type == Character);
+ m_type = Character;
+ m_data.appendVector(characters);
+}
-private:
- Type m_type;
- Attribute::Range m_range; // Always starts at zero.
- int m_baseOffset;
- DataVector m_data;
- UChar m_orAllData;
+inline const HTMLToken::DataVector& HTMLToken::comment() const
+{
+ ASSERT(m_type == Comment);
+ return m_data;
+}
- // For StartTag and EndTag
- bool m_selfClosing;
- AttributeList m_attributes;
+inline bool HTMLToken::commentIsAll8BitData() const
+{
+ ASSERT(m_type == Comment);
+ return m_data8BitCheck <= 0xFF;
+}
- // A pointer into m_attributes used during lexing.
- Attribute* m_currentAttribute;
+inline void HTMLToken::beginComment()
+{
+ ASSERT(m_type == Uninitialized);
+ m_type = Comment;
+}
- // For DOCTYPE
- std::unique_ptr<DoctypeData> m_doctypeData;
-};
+inline void HTMLToken::appendToComment(UChar character)
+{
+ ASSERT(character);
+ ASSERT(m_type == Comment);
+ m_data.append(character);
+ m_data8BitCheck |= character;
+}
+inline bool nameMatches(const HTMLToken::Attribute& attribute, StringView name)
+{
+ unsigned size = name.length();
+ if (attribute.name.size() != size)
+ return false;
+ for (unsigned i = 0; i < size; ++i) {
+ // FIXME: The one caller that uses this probably wants to ignore letter case.
+ if (attribute.name[i] != name[i])
+ return false;
+ }
+ return true;
}
-#endif
+inline const HTMLToken::Attribute* findAttribute(const HTMLToken::AttributeList& attributes, StringView name)
+{
+ for (auto& attribute : attributes) {
+ if (nameMatches(attribute, name))
+ return &attribute;
+ }
+ return nullptr;
+}
+
+} // namespace WebCore