summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/parser/HTMLTokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WebCore/html/parser/HTMLTokenizer.cpp')
-rw-r--r--Source/WebCore/html/parser/HTMLTokenizer.cpp2235
1 files changed, 1035 insertions, 1200 deletions
diff --git a/Source/WebCore/html/parser/HTMLTokenizer.cpp b/Source/WebCore/html/parser/HTMLTokenizer.cpp
index 2abefaf68..985c618dd 100644
--- a/Source/WebCore/html/parser/HTMLTokenizer.cpp
+++ b/Source/WebCore/html/parser/HTMLTokenizer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2008-2016 Apple Inc. All Rights Reserved.
* Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
@@ -29,13 +29,9 @@
#include "HTMLTokenizer.h"
#include "HTMLEntityParser.h"
-#include "HTMLTreeBuilder.h"
+#include "HTMLNames.h"
#include "MarkupTokenizerInlines.h"
-#include "NotImplemented.h"
-#include <wtf/ASCIICType.h>
-#include <wtf/CurrentTime.h>
-#include <wtf/text/CString.h>
-#include <wtf/unicode/Unicode.h>
+#include <wtf/text/StringBuilder.h>
using namespace WTF;
@@ -43,81 +39,95 @@ namespace WebCore {
using namespace HTMLNames;
-// This has to go in a .cpp file, as the linker doesn't like it being included more than once.
-// We don't have an HTMLToken.cpp though, so this is the next best place.
-QualifiedName AtomicHTMLToken::nameForAttribute(const HTMLToken::Attribute& attribute) const
+static inline LChar convertASCIIAlphaToLower(UChar character)
{
- return QualifiedName(nullAtom, AtomicString(attribute.name), nullAtom);
+ ASSERT(isASCIIAlpha(character));
+ return toASCIILowerUnchecked(character);
}
-bool AtomicHTMLToken::usesName() const
+static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const char* string)
{
- return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE;
+ unsigned size = vector.size();
+ for (unsigned i = 0; i < size; ++i) {
+ if (!string[i] || vector[i] != string[i])
+ return false;
+ }
+ return !string[size];
}
-bool AtomicHTMLToken::usesAttributes() const
+inline bool HTMLTokenizer::inEndTagBufferingState() const
{
- return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
+ switch (m_state) {
+ case RCDATAEndTagOpenState:
+ case RCDATAEndTagNameState:
+ case RAWTEXTEndTagOpenState:
+ case RAWTEXTEndTagNameState:
+ case ScriptDataEndTagOpenState:
+ case ScriptDataEndTagNameState:
+ case ScriptDataEscapedEndTagOpenState:
+ case ScriptDataEscapedEndTagNameState:
+ return true;
+ default:
+ return false;
+ }
}
-static inline UChar toLowerCase(UChar cc)
+HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
+ : m_preprocessor(*this)
+ , m_options(options)
{
- ASSERT(isASCIIUpper(cc));
- const int lowerCaseOffset = 0x20;
- return cc + lowerCaseOffset;
}
-static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const String& string)
+inline void HTMLTokenizer::bufferASCIICharacter(UChar character)
{
- if (vector.size() != string.length())
- return false;
-
- if (!string.length())
- return true;
+ ASSERT(character != kEndOfFileMarker);
+ ASSERT(isASCII(character));
+ LChar narrowedCharacter = character;
+ m_token.appendToCharacter(narrowedCharacter);
+}
- return equal(string.impl(), vector.data(), vector.size());
+inline void HTMLTokenizer::bufferCharacter(UChar character)
+{
+ ASSERT(character != kEndOfFileMarker);
+ m_token.appendToCharacter(character);
}
-static inline bool isEndTagBufferingState(HTMLTokenizer::State state)
+inline bool HTMLTokenizer::emitAndResumeInDataState(SegmentedString& source)
{
- switch (state) {
- case HTMLTokenizer::RCDATAEndTagOpenState:
- case HTMLTokenizer::RCDATAEndTagNameState:
- case HTMLTokenizer::RAWTEXTEndTagOpenState:
- case HTMLTokenizer::RAWTEXTEndTagNameState:
- case HTMLTokenizer::ScriptDataEndTagOpenState:
- case HTMLTokenizer::ScriptDataEndTagNameState:
- case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
- case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
- return true;
- default:
- return false;
- }
+ saveEndTagNameIfNeeded();
+ m_state = DataState;
+ source.advancePastNonNewline();
+ return true;
}
-#define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
-#define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
-#define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
-#define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
+inline bool HTMLTokenizer::emitAndReconsumeInDataState()
+{
+ saveEndTagNameIfNeeded();
+ m_state = DataState;
+ return true;
+}
-HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
- : m_inputStreamPreprocessor(this)
- , m_options(options)
+inline bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
{
- reset();
+ m_state = DataState;
+ if (haveBufferedCharacterToken())
+ return true;
+ source.advance();
+ m_token.clear();
+ m_token.makeEndOfFile();
+ return true;
}
-HTMLTokenizer::~HTMLTokenizer()
+inline void HTMLTokenizer::saveEndTagNameIfNeeded()
{
+ ASSERT(m_token.type() != HTMLToken::Uninitialized);
+ if (m_token.type() == HTMLToken::StartTag)
+ m_appropriateEndTagName = m_token.name();
}
-void HTMLTokenizer::reset()
+inline bool HTMLTokenizer::haveBufferedCharacterToken() const
{
- m_state = HTMLTokenizer::DataState;
- m_token = 0;
- m_forceNullCharacterReplacement = false;
- m_shouldAllowCDATA = false;
- m_additionalAllowedCharacter = '\0';
+ return m_token.type() == HTMLToken::Character;
}
inline bool HTMLTokenizer::processEntity(SegmentedString& source)
@@ -129,7 +139,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
return false;
if (!success) {
ASSERT(decodedEntity.isEmpty());
- bufferCharacter('&');
+ bufferASCIICharacter('&');
} else {
for (unsigned i = 0; i < decodedEntity.length(); ++i)
bufferCharacter(decodedEntity[i]);
@@ -137,1426 +147,1246 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
return true;
}
-bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
+void HTMLTokenizer::flushBufferedEndTag()
{
- ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
- source.advanceAndUpdateLineNumber();
- if (m_token->type() == HTMLToken::Character)
- return true;
- m_token->beginEndTag(m_bufferedEndTagName);
+ m_token.beginEndTag(m_bufferedEndTagName);
m_bufferedEndTagName.clear();
m_appropriateEndTagName.clear();
m_temporaryBuffer.clear();
+}
+
+bool HTMLTokenizer::commitToPartialEndTag(SegmentedString& source, UChar character, State state)
+{
+ ASSERT(source.currentCharacter() == character);
+ appendToTemporaryBuffer(character);
+ source.advancePastNonNewline();
+
+ if (haveBufferedCharacterToken()) {
+ // Emit the buffered character token.
+ // The next call to processToken will flush the buffered end tag and continue parsing it.
+ m_state = state;
+ return true;
+ }
+
+ flushBufferedEndTag();
return false;
}
-#define FLUSH_AND_ADVANCE_TO(stateName) \
- do { \
- m_state = HTMLTokenizer::stateName; \
- if (flushBufferedEndTag(source)) \
- return true; \
- if (source.isEmpty() \
- || !m_inputStreamPreprocessor.peek(source)) \
- return haveBufferedCharacterToken(); \
- cc = m_inputStreamPreprocessor.nextInputCharacter(); \
- goto stateName; \
- } while (false)
-
-bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, HTMLTokenizer::State state)
+bool HTMLTokenizer::commitToCompleteEndTag(SegmentedString& source)
{
- m_state = state;
- flushBufferedEndTag(source);
+ ASSERT(source.currentCharacter() == '>');
+ appendToTemporaryBuffer('>');
+ source.advancePastNonNewline();
+
+ m_state = DataState;
+
+ if (haveBufferedCharacterToken()) {
+ // Emit the character token we already have.
+ // The next call to processToken will flush the buffered end tag and emit it.
+ return true;
+ }
+
+ flushBufferedEndTag();
return true;
}
-bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
+bool HTMLTokenizer::processToken(SegmentedString& source)
{
- // If we have a token in progress, then we're supposed to be called back
- // with the same token so we can finish it.
- ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
- m_token = &token;
-
- if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
- // FIXME: This should call flushBufferedEndTag().
- // We started an end tag during our last iteration.
- m_token->beginEndTag(m_bufferedEndTagName);
- m_bufferedEndTagName.clear();
- m_appropriateEndTagName.clear();
- m_temporaryBuffer.clear();
- if (m_state == HTMLTokenizer::DataState) {
- // We're back in the data state, so we must be done with the tag.
+ if (!m_bufferedEndTagName.isEmpty() && !inEndTagBufferingState()) {
+ // We are back here after emitting a character token that came just before an end tag.
+ // To continue parsing the end tag we need to move the buffered tag name into the token.
+ flushBufferedEndTag();
+
+ // If we are in the data state, the end tag is already complete and we should emit it
+ // now, otherwise, we want to resume parsing the partial end tag.
+ if (m_state == DataState)
return true;
- }
}
- if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
+ if (!m_preprocessor.peek(source, isNullCharacterSkippingState(m_state)))
return haveBufferedCharacterToken();
- UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
+ UChar character = m_preprocessor.nextInputCharacter();
- // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
+ // https://html.spec.whatwg.org/#tokenization
switch (m_state) {
- HTML_BEGIN_STATE(DataState) {
- if (cc == '&')
- HTML_ADVANCE_TO(CharacterReferenceInDataState);
- else if (cc == '<') {
- if (m_token->type() == HTMLToken::Character) {
- // We have a bunch of character tokens queued up that we
- // are emitting lazily here.
- return true;
- }
- HTML_ADVANCE_TO(TagOpenState);
- } else if (cc == kEndOfFileMarker)
- return emitEndOfFile(source);
- else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(DataState);
- }
- }
- END_STATE()
-
- HTML_BEGIN_STATE(CharacterReferenceInDataState) {
- if (!processEntity(source))
- return haveBufferedCharacterToken();
- HTML_SWITCH_TO(DataState);
- }
- END_STATE()
- HTML_BEGIN_STATE(RCDATAState) {
- if (cc == '&')
- HTML_ADVANCE_TO(CharacterReferenceInRCDATAState);
- else if (cc == '<')
- HTML_ADVANCE_TO(RCDATALessThanSignState);
- else if (cc == kEndOfFileMarker)
- return emitEndOfFile(source);
- else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(RCDATAState);
+ BEGIN_STATE(DataState)
+ if (character == '&')
+ ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInDataState);
+ if (character == '<') {
+ if (haveBufferedCharacterToken())
+ RETURN_IN_CURRENT_STATE(true);
+ ADVANCE_PAST_NON_NEWLINE_TO(TagOpenState);
}
- }
+ if (character == kEndOfFileMarker)
+ return emitEndOfFile(source);
+ bufferCharacter(character);
+ ADVANCE_TO(DataState);
END_STATE()
- HTML_BEGIN_STATE(CharacterReferenceInRCDATAState) {
+ BEGIN_STATE(CharacterReferenceInDataState)
if (!processEntity(source))
- return haveBufferedCharacterToken();
- HTML_SWITCH_TO(RCDATAState);
- }
+ RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ SWITCH_TO(DataState);
END_STATE()
- HTML_BEGIN_STATE(RAWTEXTState) {
- if (cc == '<')
- HTML_ADVANCE_TO(RAWTEXTLessThanSignState);
- else if (cc == kEndOfFileMarker)
- return emitEndOfFile(source);
- else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(RAWTEXTState);
- }
- }
+ BEGIN_STATE(RCDATAState)
+ if (character == '&')
+ ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInRCDATAState);
+ if (character == '<')
+ ADVANCE_PAST_NON_NEWLINE_TO(RCDATALessThanSignState);
+ if (character == kEndOfFileMarker)
+ RECONSUME_IN(DataState);
+ bufferCharacter(character);
+ ADVANCE_TO(RCDATAState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataState) {
- if (cc == '<')
- HTML_ADVANCE_TO(ScriptDataLessThanSignState);
- else if (cc == kEndOfFileMarker)
- return emitEndOfFile(source);
- else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataState);
+ BEGIN_STATE(CharacterReferenceInRCDATAState)
+ if (!processEntity(source))
+ RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ SWITCH_TO(RCDATAState);
+ END_STATE()
+
+ BEGIN_STATE(RAWTEXTState)
+ if (character == '<')
+ ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTLessThanSignState);
+ if (character == kEndOfFileMarker)
+ RECONSUME_IN(DataState);
+ bufferCharacter(character);
+ ADVANCE_TO(RAWTEXTState);
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataState)
+ if (character == '<')
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataLessThanSignState);
+ if (character == kEndOfFileMarker)
+ RECONSUME_IN(DataState);
+ bufferCharacter(character);
+ ADVANCE_TO(ScriptDataState);
+ END_STATE()
+
+ BEGIN_STATE(PLAINTEXTState)
+ if (character == kEndOfFileMarker)
+ RECONSUME_IN(DataState);
+ bufferCharacter(character);
+ ADVANCE_TO(PLAINTEXTState);
+ END_STATE()
+
+ BEGIN_STATE(TagOpenState)
+ if (character == '!')
+ ADVANCE_PAST_NON_NEWLINE_TO(MarkupDeclarationOpenState);
+ if (character == '/')
+ ADVANCE_PAST_NON_NEWLINE_TO(EndTagOpenState);
+ if (isASCIIAlpha(character)) {
+ m_token.beginStartTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
}
- }
- END_STATE()
-
- HTML_BEGIN_STATE(PLAINTEXTState) {
- if (cc == kEndOfFileMarker)
- return emitEndOfFile(source);
- bufferCharacter(cc);
- HTML_ADVANCE_TO(PLAINTEXTState);
- }
- END_STATE()
-
- HTML_BEGIN_STATE(TagOpenState) {
- if (cc == '!')
- HTML_ADVANCE_TO(MarkupDeclarationOpenState);
- else if (cc == '/')
- HTML_ADVANCE_TO(EndTagOpenState);
- else if (isASCIIUpper(cc)) {
- m_token->beginStartTag(toLowerCase(cc));
- HTML_ADVANCE_TO(TagNameState);
- } else if (isASCIILower(cc)) {
- m_token->beginStartTag(cc);
- HTML_ADVANCE_TO(TagNameState);
- } else if (cc == '?') {
+ if (character == '?') {
parseError();
// The spec consumes the current character before switching
// to the bogus comment state, but it's easier to implement
// if we reconsume the current character.
- HTML_RECONSUME_IN(BogusCommentState);
- } else {
- parseError();
- bufferCharacter('<');
- HTML_RECONSUME_IN(DataState);
+ RECONSUME_IN(BogusCommentState);
}
- }
+ parseError();
+ bufferASCIICharacter('<');
+ RECONSUME_IN(DataState);
END_STATE()
- HTML_BEGIN_STATE(EndTagOpenState) {
- if (isASCIIUpper(cc)) {
- m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
+ BEGIN_STATE(EndTagOpenState)
+ if (isASCIIAlpha(character)) {
+ m_token.beginEndTag(convertASCIIAlphaToLower(character));
m_appropriateEndTagName.clear();
- HTML_ADVANCE_TO(TagNameState);
- } else if (isASCIILower(cc)) {
- m_token->beginEndTag(static_cast<LChar>(cc));
- m_appropriateEndTagName.clear();
- HTML_ADVANCE_TO(TagNameState);
- } else if (cc == '>') {
- parseError();
- HTML_ADVANCE_TO(DataState);
- } else if (cc == kEndOfFileMarker) {
+ ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
+ }
+ if (character == '>') {
parseError();
- bufferCharacter('<');
- bufferCharacter('/');
- HTML_RECONSUME_IN(DataState);
- } else {
+ ADVANCE_PAST_NON_NEWLINE_TO(DataState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- HTML_RECONSUME_IN(BogusCommentState);
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ RECONSUME_IN(DataState);
}
- }
+ parseError();
+ RECONSUME_IN(BogusCommentState);
END_STATE()
- HTML_BEGIN_STATE(TagNameState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeAttributeNameState);
- else if (cc == '/')
- HTML_ADVANCE_TO(SelfClosingStartTagState);
- else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (m_options.usePreHTML5ParserQuirks && cc == '<')
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- else if (isASCIIUpper(cc)) {
- m_token->appendToName(toLowerCase(cc));
- HTML_ADVANCE_TO(TagNameState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- m_token->appendToName(cc);
- HTML_ADVANCE_TO(TagNameState);
+ BEGIN_STATE(TagNameState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeAttributeNameState);
+ if (character == '/')
+ ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (m_options.usePreHTML5ParserQuirks && character == '<')
+ return emitAndReconsumeInDataState();
+ if (character == kEndOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
}
- }
+ m_token.appendToName(toASCIILower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
END_STATE()
- HTML_BEGIN_STATE(RCDATALessThanSignState) {
- if (cc == '/') {
+ BEGIN_STATE(RCDATALessThanSignState)
+ if (character == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
- HTML_ADVANCE_TO(RCDATAEndTagOpenState);
- } else {
- bufferCharacter('<');
- HTML_RECONSUME_IN(RCDATAState);
+ ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagOpenState);
}
- }
+ bufferASCIICharacter('<');
+ RECONSUME_IN(RCDATAState);
END_STATE()
- HTML_BEGIN_STATE(RCDATAEndTagOpenState) {
- if (isASCIIUpper(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
- HTML_ADVANCE_TO(RCDATAEndTagNameState);
- } else if (isASCIILower(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(RCDATAEndTagNameState);
- } else {
- bufferCharacter('<');
- bufferCharacter('/');
- HTML_RECONSUME_IN(RCDATAState);
+ BEGIN_STATE(RCDATAEndTagOpenState)
+ if (isASCIIAlpha(character)) {
+ appendToTemporaryBuffer(character);
+ appendToPossibleEndTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);
}
- }
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ RECONSUME_IN(RCDATAState);
END_STATE()
- HTML_BEGIN_STATE(RCDATAEndTagNameState) {
- if (isASCIIUpper(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
- HTML_ADVANCE_TO(RCDATAEndTagNameState);
- } else if (isASCIILower(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(RCDATAEndTagNameState);
- } else {
- if (isTokenizerWhitespace(cc)) {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
- }
- } else if (cc == '/') {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
- }
- } else if (cc == '>') {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
- }
+ BEGIN_STATE(RCDATAEndTagNameState)
+ if (isASCIIAlpha(character)) {
+ appendToTemporaryBuffer(character);
+ appendToPossibleEndTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);
+ }
+ if (isTokenizerWhitespace(character)) {
+ if (isAppropriateEndTag()) {
+ if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
+ return true;
+ SWITCH_TO(BeforeAttributeNameState);
}
- bufferCharacter('<');
- bufferCharacter('/');
- m_token->appendToCharacter(m_temporaryBuffer);
- m_bufferedEndTagName.clear();
- m_temporaryBuffer.clear();
- HTML_RECONSUME_IN(RCDATAState);
+ } else if (character == '/') {
+ if (isAppropriateEndTag()) {
+ if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
+ return true;
+ SWITCH_TO(SelfClosingStartTagState);
+ }
+ } else if (character == '>') {
+ if (isAppropriateEndTag())
+ return commitToCompleteEndTag(source);
}
- }
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ m_token.appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ m_temporaryBuffer.clear();
+ RECONSUME_IN(RCDATAState);
END_STATE()
- HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {
- if (cc == '/') {
+ BEGIN_STATE(RAWTEXTLessThanSignState)
+ if (character == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
- HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);
- } else {
- bufferCharacter('<');
- HTML_RECONSUME_IN(RAWTEXTState);
+ ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagOpenState);
}
- }
+ bufferASCIICharacter('<');
+ RECONSUME_IN(RAWTEXTState);
END_STATE()
- HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {
- if (isASCIIUpper(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
- HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
- } else if (isASCIILower(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
- } else {
- bufferCharacter('<');
- bufferCharacter('/');
- HTML_RECONSUME_IN(RAWTEXTState);
+ BEGIN_STATE(RAWTEXTEndTagOpenState)
+ if (isASCIIAlpha(character)) {
+ appendToTemporaryBuffer(character);
+ appendToPossibleEndTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);
}
- }
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ RECONSUME_IN(RAWTEXTState);
END_STATE()
- HTML_BEGIN_STATE(RAWTEXTEndTagNameState) {
- if (isASCIIUpper(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
- HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
- } else if (isASCIILower(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
- } else {
- if (isTokenizerWhitespace(cc)) {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
- }
- } else if (cc == '/') {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
- }
- } else if (cc == '>') {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
- }
+ BEGIN_STATE(RAWTEXTEndTagNameState)
+ if (isASCIIAlpha(character)) {
+ appendToTemporaryBuffer(character);
+ appendToPossibleEndTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);
+ }
+ if (isTokenizerWhitespace(character)) {
+ if (isAppropriateEndTag()) {
+ if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
+ return true;
+ SWITCH_TO(BeforeAttributeNameState);
}
- bufferCharacter('<');
- bufferCharacter('/');
- m_token->appendToCharacter(m_temporaryBuffer);
- m_bufferedEndTagName.clear();
- m_temporaryBuffer.clear();
- HTML_RECONSUME_IN(RAWTEXTState);
+ } else if (character == '/') {
+ if (isAppropriateEndTag()) {
+ if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
+ return true;
+ SWITCH_TO(SelfClosingStartTagState);
+ }
+ } else if (character == '>') {
+ if (isAppropriateEndTag())
+ return commitToCompleteEndTag(source);
}
- }
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ m_token.appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ m_temporaryBuffer.clear();
+ RECONSUME_IN(RAWTEXTState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataLessThanSignState) {
- if (cc == '/') {
+ BEGIN_STATE(ScriptDataLessThanSignState)
+ if (character == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
- HTML_ADVANCE_TO(ScriptDataEndTagOpenState);
- } else if (cc == '!') {
- bufferCharacter('<');
- bufferCharacter('!');
- HTML_ADVANCE_TO(ScriptDataEscapeStartState);
- } else {
- bufferCharacter('<');
- HTML_RECONSUME_IN(ScriptDataState);
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagOpenState);
}
- }
+ if (character == '!') {
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('!');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartState);
+ }
+ bufferASCIICharacter('<');
+ RECONSUME_IN(ScriptDataState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEndTagOpenState) {
- if (isASCIIUpper(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
- HTML_ADVANCE_TO(ScriptDataEndTagNameState);
- } else if (isASCIILower(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(ScriptDataEndTagNameState);
- } else {
- bufferCharacter('<');
- bufferCharacter('/');
- HTML_RECONSUME_IN(ScriptDataState);
+ BEGIN_STATE(ScriptDataEndTagOpenState)
+ if (isASCIIAlpha(character)) {
+ appendToTemporaryBuffer(character);
+ appendToPossibleEndTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);
}
- }
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ RECONSUME_IN(ScriptDataState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEndTagNameState) {
- if (isASCIIUpper(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
- HTML_ADVANCE_TO(ScriptDataEndTagNameState);
- } else if (isASCIILower(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(ScriptDataEndTagNameState);
- } else {
- if (isTokenizerWhitespace(cc)) {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
- }
- } else if (cc == '/') {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
- }
- } else if (cc == '>') {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
- }
+ BEGIN_STATE(ScriptDataEndTagNameState)
+ if (isASCIIAlpha(character)) {
+ appendToTemporaryBuffer(character);
+ appendToPossibleEndTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);
+ }
+ if (isTokenizerWhitespace(character)) {
+ if (isAppropriateEndTag()) {
+ if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
+ return true;
+ SWITCH_TO(BeforeAttributeNameState);
}
- bufferCharacter('<');
- bufferCharacter('/');
- m_token->appendToCharacter(m_temporaryBuffer);
- m_bufferedEndTagName.clear();
- m_temporaryBuffer.clear();
- HTML_RECONSUME_IN(ScriptDataState);
+ } else if (character == '/') {
+ if (isAppropriateEndTag()) {
+ if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
+ return true;
+ SWITCH_TO(SelfClosingStartTagState);
+ }
+ } else if (character == '>') {
+ if (isAppropriateEndTag())
+ return commitToCompleteEndTag(source);
}
- }
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ m_token.appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ m_temporaryBuffer.clear();
+ RECONSUME_IN(ScriptDataState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEscapeStartState) {
- if (cc == '-') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataEscapeStartDashState);
+ BEGIN_STATE(ScriptDataEscapeStartState)
+ if (character == '-') {
+ bufferASCIICharacter('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartDashState);
} else
- HTML_RECONSUME_IN(ScriptDataState);
- }
+ RECONSUME_IN(ScriptDataState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEscapeStartDashState) {
- if (cc == '-') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
+ BEGIN_STATE(ScriptDataEscapeStartDashState)
+ if (character == '-') {
+ bufferASCIICharacter('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
} else
- HTML_RECONSUME_IN(ScriptDataState);
- }
+ RECONSUME_IN(ScriptDataState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEscapedState) {
- if (cc == '-') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataEscapedDashState);
- } else if (cc == '<')
- HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
- else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(ScriptDataEscapedState)
+ if (character == '-') {
+ bufferASCIICharacter('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashState);
+ }
+ if (character == '<')
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
+ if (character == kEndOfFileMarker) {
parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataEscapedState);
+ RECONSUME_IN(DataState);
}
- }
+ bufferCharacter(character);
+ ADVANCE_TO(ScriptDataEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEscapedDashState) {
- if (cc == '-') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
- } else if (cc == '<')
- HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
- else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(ScriptDataEscapedDashState)
+ if (character == '-') {
+ bufferASCIICharacter('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
+ }
+ if (character == '<')
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
+ if (character == kEndOfFileMarker) {
parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataEscapedState);
+ RECONSUME_IN(DataState);
}
- }
+ bufferCharacter(character);
+ ADVANCE_TO(ScriptDataEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEscapedDashDashState) {
- if (cc == '-') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
- } else if (cc == '<')
- HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
- else if (cc == '>') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataState);
- } else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(ScriptDataEscapedDashDashState)
+ if (character == '-') {
+ bufferASCIICharacter('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
+ }
+ if (character == '<')
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
+ if (character == '>') {
+ bufferASCIICharacter('>');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataEscapedState);
+ RECONSUME_IN(DataState);
}
- }
+ bufferCharacter(character);
+ ADVANCE_TO(ScriptDataEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
- if (cc == '/') {
+ BEGIN_STATE(ScriptDataEscapedLessThanSignState)
+ if (character == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
- HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
- } else if (isASCIIUpper(cc)) {
- bufferCharacter('<');
- bufferCharacter(cc);
- m_temporaryBuffer.clear();
- m_temporaryBuffer.append(toLowerCase(cc));
- HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
- } else if (isASCIILower(cc)) {
- bufferCharacter('<');
- bufferCharacter(cc);
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagOpenState);
+ }
+ if (isASCIIAlpha(character)) {
+ bufferASCIICharacter('<');
+ bufferASCIICharacter(character);
m_temporaryBuffer.clear();
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
- } else {
- bufferCharacter('<');
- HTML_RECONSUME_IN(ScriptDataEscapedState);
+ appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);
}
- }
+ bufferASCIICharacter('<');
+ RECONSUME_IN(ScriptDataEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
- if (isASCIIUpper(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
- HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
- } else if (isASCIILower(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
- } else {
- bufferCharacter('<');
- bufferCharacter('/');
- HTML_RECONSUME_IN(ScriptDataEscapedState);
+ BEGIN_STATE(ScriptDataEscapedEndTagOpenState)
+ if (isASCIIAlpha(character)) {
+ appendToTemporaryBuffer(character);
+ appendToPossibleEndTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);
}
- }
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ RECONSUME_IN(ScriptDataEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
- if (isASCIIUpper(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
- HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
- } else if (isASCIILower(cc)) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- addToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
- } else {
- if (isTokenizerWhitespace(cc)) {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
- }
- } else if (cc == '/') {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
- }
- } else if (cc == '>') {
- if (isAppropriateEndTag()) {
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
- }
+ BEGIN_STATE(ScriptDataEscapedEndTagNameState)
+ if (isASCIIAlpha(character)) {
+ appendToTemporaryBuffer(character);
+ appendToPossibleEndTag(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);
+ }
+ if (isTokenizerWhitespace(character)) {
+ if (isAppropriateEndTag()) {
+ if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
+ return true;
+ SWITCH_TO(BeforeAttributeNameState);
}
- bufferCharacter('<');
- bufferCharacter('/');
- m_token->appendToCharacter(m_temporaryBuffer);
- m_bufferedEndTagName.clear();
- m_temporaryBuffer.clear();
- HTML_RECONSUME_IN(ScriptDataEscapedState);
+ } else if (character == '/') {
+ if (isAppropriateEndTag()) {
+ if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
+ return true;
+ SWITCH_TO(SelfClosingStartTagState);
+ }
+ } else if (character == '>') {
+ if (isAppropriateEndTag())
+ return commitToCompleteEndTag(source);
}
- }
+ bufferASCIICharacter('<');
+ bufferASCIICharacter('/');
+ m_token.appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ m_temporaryBuffer.clear();
+ RECONSUME_IN(ScriptDataEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
- if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
- bufferCharacter(cc);
- if (temporaryBufferIs(scriptTag.localName()))
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
+ BEGIN_STATE(ScriptDataDoubleEscapeStartState)
+ if (isTokenizerWhitespace(character) || character == '/' || character == '>') {
+ bufferASCIICharacter(character);
+ if (temporaryBufferIs("script"))
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
else
- HTML_ADVANCE_TO(ScriptDataEscapedState);
- } else if (isASCIIUpper(cc)) {
- bufferCharacter(cc);
- m_temporaryBuffer.append(toLowerCase(cc));
- HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
- } else if (isASCIILower(cc)) {
- bufferCharacter(cc);
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
- } else
- HTML_RECONSUME_IN(ScriptDataEscapedState);
- }
+ ADVANCE_TO(ScriptDataEscapedState);
+ }
+ if (isASCIIAlpha(character)) {
+ bufferASCIICharacter(character);
+ appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);
+ }
+ RECONSUME_IN(ScriptDataEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataDoubleEscapedState) {
- if (cc == '-') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashState);
- } else if (cc == '<') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
- } else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(ScriptDataDoubleEscapedState)
+ if (character == '-') {
+ bufferASCIICharacter('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashState);
+ }
+ if (character == '<') {
+ bufferASCIICharacter('<');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
+ RECONSUME_IN(DataState);
}
- }
+ bufferCharacter(character);
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
- if (cc == '-') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
- } else if (cc == '<') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
- } else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(ScriptDataDoubleEscapedDashState)
+ if (character == '-') {
+ bufferASCIICharacter('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);
+ }
+ if (character == '<') {
+ bufferASCIICharacter('<');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
+ RECONSUME_IN(DataState);
}
- }
+ bufferCharacter(character);
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
- if (cc == '-') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
- } else if (cc == '<') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
- } else if (cc == '>') {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataState);
- } else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(ScriptDataDoubleEscapedDashDashState)
+ if (character == '-') {
+ bufferASCIICharacter('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);
+ }
+ if (character == '<') {
+ bufferASCIICharacter('<');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ }
+ if (character == '>') {
+ bufferASCIICharacter('>');
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
+ RECONSUME_IN(DataState);
}
- }
+ bufferCharacter(character);
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
- if (cc == '/') {
- bufferCharacter(cc);
+ BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState)
+ if (character == '/') {
+ bufferASCIICharacter('/');
m_temporaryBuffer.clear();
- HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
- } else
- HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
- }
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);
+ }
+ RECONSUME_IN(ScriptDataDoubleEscapedState);
END_STATE()
- HTML_BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
- if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
- bufferCharacter(cc);
- if (temporaryBufferIs(scriptTag.localName()))
- HTML_ADVANCE_TO(ScriptDataEscapedState);
+ BEGIN_STATE(ScriptDataDoubleEscapeEndState)
+ if (isTokenizerWhitespace(character) || character == '/' || character == '>') {
+ bufferASCIICharacter(character);
+ if (temporaryBufferIs("script"))
+ ADVANCE_TO(ScriptDataEscapedState);
else
- HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
- } else if (isASCIIUpper(cc)) {
- bufferCharacter(cc);
- m_temporaryBuffer.append(toLowerCase(cc));
- HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
- } else if (isASCIILower(cc)) {
- bufferCharacter(cc);
- m_temporaryBuffer.append(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
- } else
- HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
- }
- END_STATE()
-
- HTML_BEGIN_STATE(BeforeAttributeNameState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeAttributeNameState);
- else if (cc == '/')
- HTML_ADVANCE_TO(SelfClosingStartTagState);
- else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (m_options.usePreHTML5ParserQuirks && cc == '<')
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- else if (isASCIIUpper(cc)) {
- m_token->addNewAttribute();
- m_token->beginAttributeName(source.numberOfCharactersConsumed());
- m_token->appendToAttributeName(toLowerCase(cc));
- HTML_ADVANCE_TO(AttributeNameState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
- parseError();
- m_token->addNewAttribute();
- m_token->beginAttributeName(source.numberOfCharactersConsumed());
- m_token->appendToAttributeName(cc);
- HTML_ADVANCE_TO(AttributeNameState);
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
}
- }
- END_STATE()
-
- HTML_BEGIN_STATE(AttributeNameState) {
- if (isTokenizerWhitespace(cc)) {
- m_token->endAttributeName(source.numberOfCharactersConsumed());
- HTML_ADVANCE_TO(AfterAttributeNameState);
- } else if (cc == '/') {
- m_token->endAttributeName(source.numberOfCharactersConsumed());
- HTML_ADVANCE_TO(SelfClosingStartTagState);
- } else if (cc == '=') {
- m_token->endAttributeName(source.numberOfCharactersConsumed());
- HTML_ADVANCE_TO(BeforeAttributeValueState);
- } else if (cc == '>') {
- m_token->endAttributeName(source.numberOfCharactersConsumed());
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (m_options.usePreHTML5ParserQuirks && cc == '<') {
- m_token->endAttributeName(source.numberOfCharactersConsumed());
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else if (isASCIIUpper(cc)) {
- m_token->appendToAttributeName(toLowerCase(cc));
- HTML_ADVANCE_TO(AttributeNameState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- m_token->endAttributeName(source.numberOfCharactersConsumed());
- HTML_RECONSUME_IN(DataState);
- } else {
- if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
- parseError();
- m_token->appendToAttributeName(cc);
- HTML_ADVANCE_TO(AttributeNameState);
+ if (isASCIIAlpha(character)) {
+ bufferASCIICharacter(character);
+ appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);
}
- }
+ RECONSUME_IN(ScriptDataDoubleEscapedState);
END_STATE()
- HTML_BEGIN_STATE(AfterAttributeNameState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(AfterAttributeNameState);
- else if (cc == '/')
- HTML_ADVANCE_TO(SelfClosingStartTagState);
- else if (cc == '=')
- HTML_ADVANCE_TO(BeforeAttributeValueState);
- else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (m_options.usePreHTML5ParserQuirks && cc == '<')
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- else if (isASCIIUpper(cc)) {
- m_token->addNewAttribute();
- m_token->beginAttributeName(source.numberOfCharactersConsumed());
- m_token->appendToAttributeName(toLowerCase(cc));
- HTML_ADVANCE_TO(AttributeNameState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- if (cc == '"' || cc == '\'' || cc == '<')
- parseError();
- m_token->addNewAttribute();
- m_token->beginAttributeName(source.numberOfCharactersConsumed());
- m_token->appendToAttributeName(cc);
- HTML_ADVANCE_TO(AttributeNameState);
+ BEGIN_STATE(BeforeAttributeNameState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeAttributeNameState);
+ if (character == '/')
+ ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (m_options.usePreHTML5ParserQuirks && character == '<')
+ return emitAndReconsumeInDataState();
+ if (character == kEndOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
}
- }
+ if (character == '"' || character == '\'' || character == '<' || character == '=')
+ parseError();
+ m_token.beginAttribute(source.numberOfCharactersConsumed());
+ m_token.appendToAttributeName(toASCIILower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
+ END_STATE()
+
+ BEGIN_STATE(AttributeNameState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(AfterAttributeNameState);
+ if (character == '/')
+ ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+ if (character == '=')
+ ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (m_options.usePreHTML5ParserQuirks && character == '<')
+ return emitAndReconsumeInDataState();
+ if (character == kEndOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ }
+ if (character == '"' || character == '\'' || character == '<' || character == '=')
+ parseError();
+ m_token.appendToAttributeName(toASCIILower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
+ END_STATE()
+
+ BEGIN_STATE(AfterAttributeNameState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(AfterAttributeNameState);
+ if (character == '/')
+ ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+ if (character == '=')
+ ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (m_options.usePreHTML5ParserQuirks && character == '<')
+ return emitAndReconsumeInDataState();
+ if (character == kEndOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ }
+ if (character == '"' || character == '\'' || character == '<')
+ parseError();
+ m_token.beginAttribute(source.numberOfCharactersConsumed());
+ m_token.appendToAttributeName(toASCIILower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
END_STATE()
- HTML_BEGIN_STATE(BeforeAttributeValueState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeAttributeValueState);
- else if (cc == '"') {
- m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
- HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
- } else if (cc == '&') {
- m_token->beginAttributeValue(source.numberOfCharactersConsumed());
- HTML_RECONSUME_IN(AttributeValueUnquotedState);
- } else if (cc == '\'') {
- m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
- HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
- } else if (cc == '>') {
- parseError();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- if (cc == '<' || cc == '=' || cc == '`')
- parseError();
- m_token->beginAttributeValue(source.numberOfCharactersConsumed());
- m_token->appendToAttributeValue(cc);
- HTML_ADVANCE_TO(AttributeValueUnquotedState);
+ BEGIN_STATE(BeforeAttributeValueState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeAttributeValueState);
+ if (character == '"')
+ ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueDoubleQuotedState);
+ if (character == '&')
+ RECONSUME_IN(AttributeValueUnquotedState);
+ if (character == '\'')
+ ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueSingleQuotedState);
+ if (character == '>') {
+ parseError();
+ return emitAndResumeInDataState(source);
}
- }
+ if (character == kEndOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ }
+ if (character == '<' || character == '=' || character == '`')
+ parseError();
+ m_token.appendToAttributeValue(character);
+ ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);
END_STATE()
- HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
- if (cc == '"') {
- m_token->endAttributeValue(source.numberOfCharactersConsumed());
- HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
- } else if (cc == '&') {
+ BEGIN_STATE(AttributeValueDoubleQuotedState)
+ if (character == '"') {
+ m_token.endAttribute(source.numberOfCharactersConsumed());
+ ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);
+ }
+ if (character == '&') {
m_additionalAllowedCharacter = '"';
- HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
- } else if (cc == kEndOfFileMarker) {
+ ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->endAttributeValue(source.numberOfCharactersConsumed());
- HTML_RECONSUME_IN(DataState);
- } else {
- m_token->appendToAttributeValue(cc);
- HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
+ m_token.endAttribute(source.numberOfCharactersConsumed());
+ RECONSUME_IN(DataState);
}
- }
+ m_token.appendToAttributeValue(character);
+ ADVANCE_TO(AttributeValueDoubleQuotedState);
END_STATE()
- HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
- if (cc == '\'') {
- m_token->endAttributeValue(source.numberOfCharactersConsumed());
- HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
- } else if (cc == '&') {
+ BEGIN_STATE(AttributeValueSingleQuotedState)
+ if (character == '\'') {
+ m_token.endAttribute(source.numberOfCharactersConsumed());
+ ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);
+ }
+ if (character == '&') {
m_additionalAllowedCharacter = '\'';
- HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
- } else if (cc == kEndOfFileMarker) {
+ ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->endAttributeValue(source.numberOfCharactersConsumed());
- HTML_RECONSUME_IN(DataState);
- } else {
- m_token->appendToAttributeValue(cc);
- HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
+ m_token.endAttribute(source.numberOfCharactersConsumed());
+ RECONSUME_IN(DataState);
}
- }
+ m_token.appendToAttributeValue(character);
+ ADVANCE_TO(AttributeValueSingleQuotedState);
END_STATE()
- HTML_BEGIN_STATE(AttributeValueUnquotedState) {
- if (isTokenizerWhitespace(cc)) {
- m_token->endAttributeValue(source.numberOfCharactersConsumed());
- HTML_ADVANCE_TO(BeforeAttributeNameState);
- } else if (cc == '&') {
+ BEGIN_STATE(AttributeValueUnquotedState)
+ if (isTokenizerWhitespace(character)) {
+ m_token.endAttribute(source.numberOfCharactersConsumed());
+ ADVANCE_TO(BeforeAttributeNameState);
+ }
+ if (character == '&') {
m_additionalAllowedCharacter = '>';
- HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
- } else if (cc == '>') {
- m_token->endAttributeValue(source.numberOfCharactersConsumed());
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- m_token->endAttributeValue(source.numberOfCharactersConsumed());
- HTML_RECONSUME_IN(DataState);
- } else {
- if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
- parseError();
- m_token->appendToAttributeValue(cc);
- HTML_ADVANCE_TO(AttributeValueUnquotedState);
+ ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
}
- }
+ if (character == '>') {
+ m_token.endAttribute(source.numberOfCharactersConsumed());
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
+ parseError();
+ m_token.endAttribute(source.numberOfCharactersConsumed());
+ RECONSUME_IN(DataState);
+ }
+ if (character == '"' || character == '\'' || character == '<' || character == '=' || character == '`')
+ parseError();
+ m_token.appendToAttributeValue(character);
+ ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);
END_STATE()
- HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
+ BEGIN_STATE(CharacterReferenceInAttributeValueState)
bool notEnoughCharacters = false;
StringBuilder decodedEntity;
bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
if (notEnoughCharacters)
- return haveBufferedCharacterToken();
+ RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
if (!success) {
ASSERT(decodedEntity.isEmpty());
- m_token->appendToAttributeValue('&');
+ m_token.appendToAttributeValue('&');
} else {
for (unsigned i = 0; i < decodedEntity.length(); ++i)
- m_token->appendToAttributeValue(decodedEntity[i]);
+ m_token.appendToAttributeValue(decodedEntity[i]);
}
// We're supposed to switch back to the attribute value state that
// we were in when we were switched into this state. Rather than
// keeping track of this explictly, we observe that the previous
// state can be determined by m_additionalAllowedCharacter.
if (m_additionalAllowedCharacter == '"')
- HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
- else if (m_additionalAllowedCharacter == '\'')
- HTML_SWITCH_TO(AttributeValueSingleQuotedState);
- else if (m_additionalAllowedCharacter == '>')
- HTML_SWITCH_TO(AttributeValueUnquotedState);
- else
- ASSERT_NOT_REACHED();
- }
- END_STATE()
-
- HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeAttributeNameState);
- else if (cc == '/')
- HTML_ADVANCE_TO(SelfClosingStartTagState);
- else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (m_options.usePreHTML5ParserQuirks && cc == '<')
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker) {
- parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
- parseError();
- HTML_RECONSUME_IN(BeforeAttributeNameState);
+ SWITCH_TO(AttributeValueDoubleQuotedState);
+ if (m_additionalAllowedCharacter == '\'')
+ SWITCH_TO(AttributeValueSingleQuotedState);
+ ASSERT(m_additionalAllowedCharacter == '>');
+ SWITCH_TO(AttributeValueUnquotedState);
+ END_STATE()
+
+ BEGIN_STATE(AfterAttributeValueQuotedState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeAttributeNameState);
+ if (character == '/')
+ ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (m_options.usePreHTML5ParserQuirks && character == '<')
+ return emitAndReconsumeInDataState();
+ if (character == kEndOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
}
- }
+ parseError();
+ RECONSUME_IN(BeforeAttributeNameState);
END_STATE()
- HTML_BEGIN_STATE(SelfClosingStartTagState) {
- if (cc == '>') {
- m_token->setSelfClosing();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- HTML_RECONSUME_IN(DataState);
- } else {
+ BEGIN_STATE(SelfClosingStartTagState)
+ if (character == '>') {
+ m_token.setSelfClosing();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- HTML_RECONSUME_IN(BeforeAttributeNameState);
+ RECONSUME_IN(DataState);
}
- }
+ parseError();
+ RECONSUME_IN(BeforeAttributeNameState);
END_STATE()
- HTML_BEGIN_STATE(BogusCommentState) {
- m_token->beginComment();
- HTML_RECONSUME_IN(ContinueBogusCommentState);
- }
+ BEGIN_STATE(BogusCommentState)
+ m_token.beginComment();
+ RECONSUME_IN(ContinueBogusCommentState);
END_STATE()
- HTML_BEGIN_STATE(ContinueBogusCommentState) {
- if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker)
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- else {
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(ContinueBogusCommentState);
- }
- }
+ BEGIN_STATE(ContinueBogusCommentState)
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == kEndOfFileMarker)
+ return emitAndReconsumeInDataState();
+ m_token.appendToComment(character);
+ ADVANCE_TO(ContinueBogusCommentState);
END_STATE()
- HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
- DEFINE_STATIC_LOCAL(String, dashDashString, (ASCIILiteral("--")));
- DEFINE_STATIC_LOCAL(String, doctypeString, (ASCIILiteral("doctype")));
- DEFINE_STATIC_LOCAL(String, cdataString, (ASCIILiteral("[CDATA[")));
- if (cc == '-') {
- SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
- if (result == SegmentedString::DidMatch) {
- source.advanceAndASSERT('-');
- source.advanceAndASSERT('-');
- m_token->beginComment();
- HTML_SWITCH_TO(CommentStartState);
- } else if (result == SegmentedString::NotEnoughCharacters)
- return haveBufferedCharacterToken();
- } else if (cc == 'D' || cc == 'd') {
- SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
+ BEGIN_STATE(MarkupDeclarationOpenState)
+ if (character == '-') {
+ auto result = source.advancePast("--");
if (result == SegmentedString::DidMatch) {
- advanceStringAndASSERTIgnoringCase(source, "doctype");
- HTML_SWITCH_TO(DOCTYPEState);
- } else if (result == SegmentedString::NotEnoughCharacters)
- return haveBufferedCharacterToken();
- } else if (cc == '[' && shouldAllowCDATA()) {
- SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
- if (result == SegmentedString::DidMatch) {
- advanceStringAndASSERT(source, "[CDATA[");
- HTML_SWITCH_TO(CDATASectionState);
- } else if (result == SegmentedString::NotEnoughCharacters)
- return haveBufferedCharacterToken();
+ m_token.beginComment();
+ SWITCH_TO(CommentStartState);
+ }
+ if (result == SegmentedString::NotEnoughCharacters)
+ RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ } else if (isASCIIAlphaCaselessEqual(character, 'd')) {
+ auto result = source.advancePastLettersIgnoringASCIICase("doctype");
+ if (result == SegmentedString::DidMatch)
+ SWITCH_TO(DOCTYPEState);
+ if (result == SegmentedString::NotEnoughCharacters)
+ RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ } else if (character == '[' && shouldAllowCDATA()) {
+ auto result = source.advancePast("[CDATA[");
+ if (result == SegmentedString::DidMatch)
+ SWITCH_TO(CDATASectionState);
+ if (result == SegmentedString::NotEnoughCharacters)
+ RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
}
parseError();
- HTML_RECONSUME_IN(BogusCommentState);
- }
+ RECONSUME_IN(BogusCommentState);
END_STATE()
- HTML_BEGIN_STATE(CommentStartState) {
- if (cc == '-')
- HTML_ADVANCE_TO(CommentStartDashState);
- else if (cc == '>') {
+ BEGIN_STATE(CommentStartState)
+ if (character == '-')
+ ADVANCE_PAST_NON_NEWLINE_TO(CommentStartDashState);
+ if (character == '>') {
parseError();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(CommentState);
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToComment(character);
+ ADVANCE_TO(CommentState);
END_STATE()
- HTML_BEGIN_STATE(CommentStartDashState) {
- if (cc == '-')
- HTML_ADVANCE_TO(CommentEndState);
- else if (cc == '>') {
+ BEGIN_STATE(CommentStartDashState)
+ if (character == '-')
+ ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
+ if (character == '>') {
parseError();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment('-');
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(CommentState);
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToComment('-');
+ m_token.appendToComment(character);
+ ADVANCE_TO(CommentState);
END_STATE()
- HTML_BEGIN_STATE(CommentState) {
- if (cc == '-')
- HTML_ADVANCE_TO(CommentEndDashState);
- else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(CommentState)
+ if (character == '-')
+ ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState);
+ if (character == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(CommentState);
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToComment(character);
+ ADVANCE_TO(CommentState);
END_STATE()
- HTML_BEGIN_STATE(CommentEndDashState) {
- if (cc == '-')
- HTML_ADVANCE_TO(CommentEndState);
- else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(CommentEndDashState)
+ if (character == '-')
+ ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
+ if (character == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment('-');
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(CommentState);
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToComment('-');
+ m_token.appendToComment(character);
+ ADVANCE_TO(CommentState);
END_STATE()
- HTML_BEGIN_STATE(CommentEndState) {
- if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == '!') {
- parseError();
- HTML_ADVANCE_TO(CommentEndBangState);
- } else if (cc == '-') {
+ BEGIN_STATE(CommentEndState)
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == '!') {
parseError();
- m_token->appendToComment('-');
- HTML_ADVANCE_TO(CommentEndState);
- } else if (cc == kEndOfFileMarker) {
+ ADVANCE_PAST_NON_NEWLINE_TO(CommentEndBangState);
+ }
+ if (character == '-') {
parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
+ m_token.appendToComment('-');
+ ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->appendToComment('-');
- m_token->appendToComment('-');
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(CommentState);
+ return emitAndReconsumeInDataState();
}
- }
- END_STATE()
-
- HTML_BEGIN_STATE(CommentEndBangState) {
- if (cc == '-') {
- m_token->appendToComment('-');
- m_token->appendToComment('-');
- m_token->appendToComment('!');
- HTML_ADVANCE_TO(CommentEndDashState);
- } else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token.appendToComment('-');
+ m_token.appendToComment('-');
+ m_token.appendToComment(character);
+ ADVANCE_TO(CommentState);
+ END_STATE()
+
+ BEGIN_STATE(CommentEndBangState)
+ if (character == '-') {
+ m_token.appendToComment('-');
+ m_token.appendToComment('-');
+ m_token.appendToComment('!');
+ ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState);
+ }
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToComment('-');
- m_token->appendToComment('-');
- m_token->appendToComment('!');
- m_token->appendToComment(cc);
- HTML_ADVANCE_TO(CommentState);
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToComment('-');
+ m_token.appendToComment('-');
+ m_token.appendToComment('!');
+ m_token.appendToComment(character);
+ ADVANCE_TO(CommentState);
END_STATE()
- HTML_BEGIN_STATE(DOCTYPEState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeDOCTYPENameState);
- else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(DOCTYPEState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeDOCTYPENameState);
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->beginDOCTYPE();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- parseError();
- HTML_RECONSUME_IN(BeforeDOCTYPENameState);
+ m_token.beginDOCTYPE();
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ parseError();
+ RECONSUME_IN(BeforeDOCTYPENameState);
END_STATE()
- HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeDOCTYPENameState);
- else if (isASCIIUpper(cc)) {
- m_token->beginDOCTYPE(toLowerCase(cc));
- HTML_ADVANCE_TO(DOCTYPENameState);
- } else if (cc == '>') {
+ BEGIN_STATE(BeforeDOCTYPENameState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeDOCTYPENameState);
+ if (character == '>') {
parseError();
- m_token->beginDOCTYPE();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ m_token.beginDOCTYPE();
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->beginDOCTYPE();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->beginDOCTYPE(cc);
- HTML_ADVANCE_TO(DOCTYPENameState);
+ m_token.beginDOCTYPE();
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.beginDOCTYPE(toASCIILower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState);
END_STATE()
- HTML_BEGIN_STATE(DOCTYPENameState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(AfterDOCTYPENameState);
- else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (isASCIIUpper(cc)) {
- m_token->appendToName(toLowerCase(cc));
- HTML_ADVANCE_TO(DOCTYPENameState);
- } else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(DOCTYPENameState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(AfterDOCTYPENameState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToName(cc);
- HTML_ADVANCE_TO(DOCTYPENameState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToName(toASCIILower(character));
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState);
END_STATE()
- HTML_BEGIN_STATE(AfterDOCTYPENameState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(AfterDOCTYPENameState);
- if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(AfterDOCTYPENameState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(AfterDOCTYPENameState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- DEFINE_STATIC_LOCAL(String, publicString, (ASCIILiteral("public")));
- DEFINE_STATIC_LOCAL(String, systemString, (ASCIILiteral("system")));
- if (cc == 'P' || cc == 'p') {
- SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
- if (result == SegmentedString::DidMatch) {
- advanceStringAndASSERTIgnoringCase(source, "public");
- HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
- } else if (result == SegmentedString::NotEnoughCharacters)
- return haveBufferedCharacterToken();
- } else if (cc == 'S' || cc == 's') {
- SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
- if (result == SegmentedString::DidMatch) {
- advanceStringAndASSERTIgnoringCase(source, "system");
- HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
- } else if (result == SegmentedString::NotEnoughCharacters)
- return haveBufferedCharacterToken();
- }
- parseError();
- m_token->setForceQuirks();
- HTML_ADVANCE_TO(BogusDOCTYPEState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ if (isASCIIAlphaCaselessEqual(character, 'p')) {
+ auto result = source.advancePastLettersIgnoringASCIICase("public");
+ if (result == SegmentedString::DidMatch)
+ SWITCH_TO(AfterDOCTYPEPublicKeywordState);
+ if (result == SegmentedString::NotEnoughCharacters)
+ RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ } else if (isASCIIAlphaCaselessEqual(character, 's')) {
+ auto result = source.advancePastLettersIgnoringASCIICase("system");
+ if (result == SegmentedString::DidMatch)
+ SWITCH_TO(AfterDOCTYPESystemKeywordState);
+ if (result == SegmentedString::NotEnoughCharacters)
+ RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ }
+ parseError();
+ m_token.setForceQuirks();
+ ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
END_STATE()
- HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
- else if (cc == '"') {
- parseError();
- m_token->setPublicIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
- } else if (cc == '\'') {
+ BEGIN_STATE(AfterDOCTYPEPublicKeywordState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+ if (character == '"') {
parseError();
- m_token->setPublicIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
- } else if (cc == '>') {
+ m_token.setPublicIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ }
+ if (character == '\'') {
parseError();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ m_token.setPublicIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ }
+ if (character == '>') {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- HTML_ADVANCE_TO(BogusDOCTYPEState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ parseError();
+ m_token.setForceQuirks();
+ ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
END_STATE()
- HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
- else if (cc == '"') {
- m_token->setPublicIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
- } else if (cc == '\'') {
- m_token->setPublicIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
- } else if (cc == '>') {
- parseError();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+ if (character == '"') {
+ m_token.setPublicIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ }
+ if (character == '\'') {
+ m_token.setPublicIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ }
+ if (character == '>') {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- HTML_ADVANCE_TO(BogusDOCTYPEState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ parseError();
+ m_token.setForceQuirks();
+ ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
END_STATE()
- HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
- if (cc == '"')
- HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
- else if (cc == '>') {
+ BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState)
+ if (character == '"')
+ ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);
+ if (character == '>') {
parseError();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToPublicIdentifier(cc);
- HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToPublicIdentifier(character);
+ ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
END_STATE()
- HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
- if (cc == '\'')
- HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
- else if (cc == '>') {
+ BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState)
+ if (character == '\'')
+ ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);
+ if (character == '>') {
parseError();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToPublicIdentifier(cc);
- HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToPublicIdentifier(character);
+ ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
END_STATE()
- HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
- else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == '"') {
- parseError();
- m_token->setSystemIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
- } else if (cc == '\'') {
+ BEGIN_STATE(AfterDOCTYPEPublicIdentifierState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == '"') {
parseError();
- m_token->setSystemIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
- } else if (cc == kEndOfFileMarker) {
+ m_token.setSystemIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ }
+ if (character == '\'') {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
+ m_token.setSystemIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- HTML_ADVANCE_TO(BogusDOCTYPEState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
- END_STATE()
-
- HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
- else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == '"') {
- m_token->setSystemIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
- } else if (cc == '\'') {
- m_token->setSystemIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
- } else if (cc == kEndOfFileMarker) {
- parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
+ parseError();
+ m_token.setForceQuirks();
+ ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
+ END_STATE()
+
+ BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == '"') {
+ m_token.setSystemIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ }
+ if (character == '\'') {
+ m_token.setSystemIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- HTML_ADVANCE_TO(BogusDOCTYPEState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ parseError();
+ m_token.setForceQuirks();
+ ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
END_STATE()
- HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
- else if (cc == '"') {
+ BEGIN_STATE(AfterDOCTYPESystemKeywordState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+ if (character == '"') {
parseError();
- m_token->setSystemIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
- } else if (cc == '\'') {
- parseError();
- m_token->setSystemIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
- } else if (cc == '>') {
+ m_token.setSystemIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ }
+ if (character == '\'') {
parseError();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ m_token.setSystemIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ }
+ if (character == '>') {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- HTML_ADVANCE_TO(BogusDOCTYPEState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ parseError();
+ m_token.setForceQuirks();
+ ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
END_STATE()
- HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
- if (cc == '"') {
- m_token->setSystemIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
- } else if (cc == '\'') {
- m_token->setSystemIdentifierToEmptyString();
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
- } else if (cc == '>') {
- parseError();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(BeforeDOCTYPESystemIdentifierState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+ if (character == '"') {
+ m_token.setSystemIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ }
+ if (character == '\'') {
+ m_token.setSystemIdentifierToEmptyString();
+ ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ }
+ if (character == '>') {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- HTML_ADVANCE_TO(BogusDOCTYPEState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ parseError();
+ m_token.setForceQuirks();
+ ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
END_STATE()
- HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
- if (cc == '"')
- HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
- else if (cc == '>') {
+ BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState)
+ if (character == '"')
+ ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);
+ if (character == '>') {
parseError();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToSystemIdentifier(cc);
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToSystemIdentifier(character);
+ ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
END_STATE()
- HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
- if (cc == '\'')
- HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
- else if (cc == '>') {
+ BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState)
+ if (character == '\'')
+ ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);
+ if (character == '>') {
parseError();
- m_token->setForceQuirks();
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- } else if (cc == kEndOfFileMarker) {
+ m_token.setForceQuirks();
+ return emitAndResumeInDataState(source);
+ }
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- m_token->appendToSystemIdentifier(cc);
- HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ m_token.appendToSystemIdentifier(character);
+ ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
END_STATE()
- HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
- if (isTokenizerWhitespace(cc))
- HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
- else if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker) {
+ BEGIN_STATE(AfterDOCTYPESystemIdentifierState)
+ if (isTokenizerWhitespace(character))
+ ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == kEndOfFileMarker) {
parseError();
- m_token->setForceQuirks();
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- } else {
- parseError();
- HTML_ADVANCE_TO(BogusDOCTYPEState);
+ m_token.setForceQuirks();
+ return emitAndReconsumeInDataState();
}
- }
+ parseError();
+ ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
END_STATE()
- HTML_BEGIN_STATE(BogusDOCTYPEState) {
- if (cc == '>')
- return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker)
- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
- HTML_ADVANCE_TO(BogusDOCTYPEState);
- }
+ BEGIN_STATE(BogusDOCTYPEState)
+ if (character == '>')
+ return emitAndResumeInDataState(source);
+ if (character == kEndOfFileMarker)
+ return emitAndReconsumeInDataState();
+ ADVANCE_TO(BogusDOCTYPEState);
END_STATE()
- HTML_BEGIN_STATE(CDATASectionState) {
- if (cc == ']')
- HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
- else if (cc == kEndOfFileMarker)
- HTML_RECONSUME_IN(DataState);
- else {
- bufferCharacter(cc);
- HTML_ADVANCE_TO(CDATASectionState);
- }
- }
+ BEGIN_STATE(CDATASectionState)
+ if (character == ']')
+ ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionRightSquareBracketState);
+ if (character == kEndOfFileMarker)
+ RECONSUME_IN(DataState);
+ bufferCharacter(character);
+ ADVANCE_TO(CDATASectionState);
END_STATE()
- HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
- if (cc == ']')
- HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
- else {
- bufferCharacter(']');
- HTML_RECONSUME_IN(CDATASectionState);
- }
- }
+ BEGIN_STATE(CDATASectionRightSquareBracketState)
+ if (character == ']')
+ ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionDoubleRightSquareBracketState);
+ bufferASCIICharacter(']');
+ RECONSUME_IN(CDATASectionState);
+ END_STATE()
- HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
- if (cc == '>')
- HTML_ADVANCE_TO(DataState);
- else {
- bufferCharacter(']');
- bufferCharacter(']');
- HTML_RECONSUME_IN(CDATASectionState);
- }
- }
+ BEGIN_STATE(CDATASectionDoubleRightSquareBracketState)
+ if (character == '>')
+ ADVANCE_PAST_NON_NEWLINE_TO(DataState);
+ bufferASCIICharacter(']');
+ bufferASCIICharacter(']');
+ RECONSUME_IN(CDATASectionState);
END_STATE()
}
@@ -1579,39 +1409,45 @@ String HTMLTokenizer::bufferedCharacters() const
void HTMLTokenizer::updateStateFor(const AtomicString& tagName)
{
if (tagName == textareaTag || tagName == titleTag)
- setState(HTMLTokenizer::RCDATAState);
+ m_state = RCDATAState;
else if (tagName == plaintextTag)
- setState(HTMLTokenizer::PLAINTEXTState);
+ m_state = PLAINTEXTState;
else if (tagName == scriptTag)
- setState(HTMLTokenizer::ScriptDataState);
+ m_state = ScriptDataState;
else if (tagName == styleTag
|| tagName == iframeTag
|| tagName == xmpTag
|| (tagName == noembedTag && m_options.pluginsEnabled)
|| tagName == noframesTag
|| (tagName == noscriptTag && m_options.scriptEnabled))
- setState(HTMLTokenizer::RAWTEXTState);
+ m_state = RAWTEXTState;
+}
+
+inline void HTMLTokenizer::appendToTemporaryBuffer(UChar character)
+{
+ ASSERT(isASCII(character));
+ m_temporaryBuffer.append(character);
}
-inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
+inline bool HTMLTokenizer::temporaryBufferIs(const char* expectedString)
{
return vectorEqualsString(m_temporaryBuffer, expectedString);
}
-inline void HTMLTokenizer::addToPossibleEndTag(LChar cc)
+inline void HTMLTokenizer::appendToPossibleEndTag(UChar character)
{
- ASSERT(isEndTagBufferingState(m_state));
- m_bufferedEndTagName.append(cc);
+ ASSERT(isASCII(character));
+ m_bufferedEndTagName.append(character);
}
-inline bool HTMLTokenizer::isAppropriateEndTag()
+inline bool HTMLTokenizer::isAppropriateEndTag() const
{
if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
return false;
- size_t numCharacters = m_bufferedEndTagName.size();
+ unsigned size = m_bufferedEndTagName.size();
- for (size_t i = 0; i < numCharacters; i++) {
+ for (unsigned i = 0; i < size; i++) {
if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
return false;
}
@@ -1621,7 +1457,6 @@ inline bool HTMLTokenizer::isAppropriateEndTag()
inline void HTMLTokenizer::parseError()
{
- notImplemented();
}
}