diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/HTMLTokenizer.cpp | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/WebCore/html/parser/HTMLTokenizer.cpp')
-rw-r--r-- | Source/WebCore/html/parser/HTMLTokenizer.cpp | 2235 |
1 files changed, 1035 insertions, 1200 deletions
diff --git a/Source/WebCore/html/parser/HTMLTokenizer.cpp b/Source/WebCore/html/parser/HTMLTokenizer.cpp index 2abefaf68..985c618dd 100644 --- a/Source/WebCore/html/parser/HTMLTokenizer.cpp +++ b/Source/WebCore/html/parser/HTMLTokenizer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Apple Inc. All Rights Reserved. + * Copyright (C) 2008-2016 Apple Inc. All Rights Reserved. * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ * Copyright (C) 2010 Google, Inc. All Rights Reserved. * @@ -29,13 +29,9 @@ #include "HTMLTokenizer.h" #include "HTMLEntityParser.h" -#include "HTMLTreeBuilder.h" +#include "HTMLNames.h" #include "MarkupTokenizerInlines.h" -#include "NotImplemented.h" -#include <wtf/ASCIICType.h> -#include <wtf/CurrentTime.h> -#include <wtf/text/CString.h> -#include <wtf/unicode/Unicode.h> +#include <wtf/text/StringBuilder.h> using namespace WTF; @@ -43,81 +39,95 @@ namespace WebCore { using namespace HTMLNames; -// This has to go in a .cpp file, as the linker doesn't like it being included more than once. -// We don't have an HTMLToken.cpp though, so this is the next best place. -QualifiedName AtomicHTMLToken::nameForAttribute(const HTMLToken::Attribute& attribute) const +static inline LChar convertASCIIAlphaToLower(UChar character) { - return QualifiedName(nullAtom, AtomicString(attribute.name), nullAtom); + ASSERT(isASCIIAlpha(character)); + return toASCIILowerUnchecked(character); } -bool AtomicHTMLToken::usesName() const +static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const char* string) { - return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE; + unsigned size = vector.size(); + for (unsigned i = 0; i < size; ++i) { + if (!string[i] || vector[i] != string[i]) + return false; + } + return !string[size]; } -bool AtomicHTMLToken::usesAttributes() const +inline bool HTMLTokenizer::inEndTagBufferingState() const { - return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; + switch (m_state) { + case RCDATAEndTagOpenState: + case RCDATAEndTagNameState: + case RAWTEXTEndTagOpenState: + case RAWTEXTEndTagNameState: + case ScriptDataEndTagOpenState: + case ScriptDataEndTagNameState: + case ScriptDataEscapedEndTagOpenState: + case ScriptDataEscapedEndTagNameState: + return true; + default: + return false; + } } -static inline UChar toLowerCase(UChar cc) +HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options) + : m_preprocessor(*this) + , m_options(options) { - ASSERT(isASCIIUpper(cc)); - const int lowerCaseOffset = 0x20; - return cc + lowerCaseOffset; } -static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const String& string) +inline void HTMLTokenizer::bufferASCIICharacter(UChar character) { - if (vector.size() != string.length()) - return false; - - if (!string.length()) - return true; + ASSERT(character != kEndOfFileMarker); + ASSERT(isASCII(character)); + LChar narrowedCharacter = character; + m_token.appendToCharacter(narrowedCharacter); +} - return equal(string.impl(), vector.data(), vector.size()); +inline void HTMLTokenizer::bufferCharacter(UChar character) +{ + ASSERT(character != kEndOfFileMarker); + m_token.appendToCharacter(character); } -static inline bool isEndTagBufferingState(HTMLTokenizer::State state) +inline bool HTMLTokenizer::emitAndResumeInDataState(SegmentedString& source) { - switch (state) { - case HTMLTokenizer::RCDATAEndTagOpenState: - case HTMLTokenizer::RCDATAEndTagNameState: - case HTMLTokenizer::RAWTEXTEndTagOpenState: - case HTMLTokenizer::RAWTEXTEndTagNameState: - case HTMLTokenizer::ScriptDataEndTagOpenState: - case HTMLTokenizer::ScriptDataEndTagNameState: - case HTMLTokenizer::ScriptDataEscapedEndTagOpenState: - case HTMLTokenizer::ScriptDataEscapedEndTagNameState: - return true; - default: - return false; - } + saveEndTagNameIfNeeded(); + m_state = DataState; + source.advancePastNonNewline(); + return true; } -#define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) -#define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) -#define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) -#define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) +inline bool HTMLTokenizer::emitAndReconsumeInDataState() +{ + saveEndTagNameIfNeeded(); + m_state = DataState; + return true; +} -HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options) - : m_inputStreamPreprocessor(this) - , m_options(options) +inline bool HTMLTokenizer::emitEndOfFile(SegmentedString& source) { - reset(); + m_state = DataState; + if (haveBufferedCharacterToken()) + return true; + source.advance(); + m_token.clear(); + m_token.makeEndOfFile(); + return true; } -HTMLTokenizer::~HTMLTokenizer() +inline void HTMLTokenizer::saveEndTagNameIfNeeded() { + ASSERT(m_token.type() != HTMLToken::Uninitialized); + if (m_token.type() == HTMLToken::StartTag) + m_appropriateEndTagName = m_token.name(); } -void HTMLTokenizer::reset() +inline bool HTMLTokenizer::haveBufferedCharacterToken() const { - m_state = HTMLTokenizer::DataState; - m_token = 0; - m_forceNullCharacterReplacement = false; - m_shouldAllowCDATA = false; - m_additionalAllowedCharacter = '\0'; + return m_token.type() == HTMLToken::Character; } inline bool HTMLTokenizer::processEntity(SegmentedString& source) @@ -129,7 +139,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source) return false; if (!success) { ASSERT(decodedEntity.isEmpty()); - bufferCharacter('&'); + bufferASCIICharacter('&'); } else { for (unsigned i = 0; i < decodedEntity.length(); ++i) bufferCharacter(decodedEntity[i]); @@ -137,1426 +147,1246 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source) return true; } -bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) +void HTMLTokenizer::flushBufferedEndTag() { - ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized); - source.advanceAndUpdateLineNumber(); - if (m_token->type() == HTMLToken::Character) - return true; - m_token->beginEndTag(m_bufferedEndTagName); + m_token.beginEndTag(m_bufferedEndTagName); m_bufferedEndTagName.clear(); m_appropriateEndTagName.clear(); m_temporaryBuffer.clear(); +} + +bool HTMLTokenizer::commitToPartialEndTag(SegmentedString& source, UChar character, State state) +{ + ASSERT(source.currentCharacter() == character); + appendToTemporaryBuffer(character); + source.advancePastNonNewline(); + + if (haveBufferedCharacterToken()) { + // Emit the buffered character token. + // The next call to processToken will flush the buffered end tag and continue parsing it. + m_state = state; + return true; + } + + flushBufferedEndTag(); return false; } -#define FLUSH_AND_ADVANCE_TO(stateName) \ - do { \ - m_state = HTMLTokenizer::stateName; \ - if (flushBufferedEndTag(source)) \ - return true; \ - if (source.isEmpty() \ - || !m_inputStreamPreprocessor.peek(source)) \ - return haveBufferedCharacterToken(); \ - cc = m_inputStreamPreprocessor.nextInputCharacter(); \ - goto stateName; \ - } while (false) - -bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, HTMLTokenizer::State state) +bool HTMLTokenizer::commitToCompleteEndTag(SegmentedString& source) { - m_state = state; - flushBufferedEndTag(source); + ASSERT(source.currentCharacter() == '>'); + appendToTemporaryBuffer('>'); + source.advancePastNonNewline(); + + m_state = DataState; + + if (haveBufferedCharacterToken()) { + // Emit the character token we already have. + // The next call to processToken will flush the buffered end tag and emit it. + return true; + } + + flushBufferedEndTag(); return true; } -bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) +bool HTMLTokenizer::processToken(SegmentedString& source) { - // If we have a token in progress, then we're supposed to be called back - // with the same token so we can finish it. - ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized); - m_token = &token; - - if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { - // FIXME: This should call flushBufferedEndTag(). - // We started an end tag during our last iteration. - m_token->beginEndTag(m_bufferedEndTagName); - m_bufferedEndTagName.clear(); - m_appropriateEndTagName.clear(); - m_temporaryBuffer.clear(); - if (m_state == HTMLTokenizer::DataState) { - // We're back in the data state, so we must be done with the tag. + if (!m_bufferedEndTagName.isEmpty() && !inEndTagBufferingState()) { + // We are back here after emitting a character token that came just before an end tag. + // To continue parsing the end tag we need to move the buffered tag name into the token. + flushBufferedEndTag(); + + // If we are in the data state, the end tag is already complete and we should emit it + // now, otherwise, we want to resume parsing the partial end tag. + if (m_state == DataState) return true; - } } - if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) + if (!m_preprocessor.peek(source, isNullCharacterSkippingState(m_state))) return haveBufferedCharacterToken(); - UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); + UChar character = m_preprocessor.nextInputCharacter(); - // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 + // https://html.spec.whatwg.org/#tokenization switch (m_state) { - HTML_BEGIN_STATE(DataState) { - if (cc == '&') - HTML_ADVANCE_TO(CharacterReferenceInDataState); - else if (cc == '<') { - if (m_token->type() == HTMLToken::Character) { - // We have a bunch of character tokens queued up that we - // are emitting lazily here. - return true; - } - HTML_ADVANCE_TO(TagOpenState); - } else if (cc == kEndOfFileMarker) - return emitEndOfFile(source); - else { - bufferCharacter(cc); - HTML_ADVANCE_TO(DataState); - } - } - END_STATE() - - HTML_BEGIN_STATE(CharacterReferenceInDataState) { - if (!processEntity(source)) - return haveBufferedCharacterToken(); - HTML_SWITCH_TO(DataState); - } - END_STATE() - HTML_BEGIN_STATE(RCDATAState) { - if (cc == '&') - HTML_ADVANCE_TO(CharacterReferenceInRCDATAState); - else if (cc == '<') - HTML_ADVANCE_TO(RCDATALessThanSignState); - else if (cc == kEndOfFileMarker) - return emitEndOfFile(source); - else { - bufferCharacter(cc); - HTML_ADVANCE_TO(RCDATAState); + BEGIN_STATE(DataState) + if (character == '&') + ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInDataState); + if (character == '<') { + if (haveBufferedCharacterToken()) + RETURN_IN_CURRENT_STATE(true); + ADVANCE_PAST_NON_NEWLINE_TO(TagOpenState); } - } + if (character == kEndOfFileMarker) + return emitEndOfFile(source); + bufferCharacter(character); + ADVANCE_TO(DataState); END_STATE() - HTML_BEGIN_STATE(CharacterReferenceInRCDATAState) { + BEGIN_STATE(CharacterReferenceInDataState) if (!processEntity(source)) - return haveBufferedCharacterToken(); - HTML_SWITCH_TO(RCDATAState); - } + RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); + SWITCH_TO(DataState); END_STATE() - HTML_BEGIN_STATE(RAWTEXTState) { - if (cc == '<') - HTML_ADVANCE_TO(RAWTEXTLessThanSignState); - else if (cc == kEndOfFileMarker) - return emitEndOfFile(source); - else { - bufferCharacter(cc); - HTML_ADVANCE_TO(RAWTEXTState); - } - } + BEGIN_STATE(RCDATAState) + if (character == '&') + ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInRCDATAState); + if (character == '<') + ADVANCE_PAST_NON_NEWLINE_TO(RCDATALessThanSignState); + if (character == kEndOfFileMarker) + RECONSUME_IN(DataState); + bufferCharacter(character); + ADVANCE_TO(RCDATAState); END_STATE() - HTML_BEGIN_STATE(ScriptDataState) { - if (cc == '<') - HTML_ADVANCE_TO(ScriptDataLessThanSignState); - else if (cc == kEndOfFileMarker) - return emitEndOfFile(source); - else { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataState); + BEGIN_STATE(CharacterReferenceInRCDATAState) + if (!processEntity(source)) + RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); + SWITCH_TO(RCDATAState); + END_STATE() + + BEGIN_STATE(RAWTEXTState) + if (character == '<') + ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTLessThanSignState); + if (character == kEndOfFileMarker) + RECONSUME_IN(DataState); + bufferCharacter(character); + ADVANCE_TO(RAWTEXTState); + END_STATE() + + BEGIN_STATE(ScriptDataState) + if (character == '<') + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataLessThanSignState); + if (character == kEndOfFileMarker) + RECONSUME_IN(DataState); + bufferCharacter(character); + ADVANCE_TO(ScriptDataState); + END_STATE() + + BEGIN_STATE(PLAINTEXTState) + if (character == kEndOfFileMarker) + RECONSUME_IN(DataState); + bufferCharacter(character); + ADVANCE_TO(PLAINTEXTState); + END_STATE() + + BEGIN_STATE(TagOpenState) + if (character == '!') + ADVANCE_PAST_NON_NEWLINE_TO(MarkupDeclarationOpenState); + if (character == '/') + ADVANCE_PAST_NON_NEWLINE_TO(EndTagOpenState); + if (isASCIIAlpha(character)) { + m_token.beginStartTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(TagNameState); } - } - END_STATE() - - HTML_BEGIN_STATE(PLAINTEXTState) { - if (cc == kEndOfFileMarker) - return emitEndOfFile(source); - bufferCharacter(cc); - HTML_ADVANCE_TO(PLAINTEXTState); - } - END_STATE() - - HTML_BEGIN_STATE(TagOpenState) { - if (cc == '!') - HTML_ADVANCE_TO(MarkupDeclarationOpenState); - else if (cc == '/') - HTML_ADVANCE_TO(EndTagOpenState); - else if (isASCIIUpper(cc)) { - m_token->beginStartTag(toLowerCase(cc)); - HTML_ADVANCE_TO(TagNameState); - } else if (isASCIILower(cc)) { - m_token->beginStartTag(cc); - HTML_ADVANCE_TO(TagNameState); - } else if (cc == '?') { + if (character == '?') { parseError(); // The spec consumes the current character before switching // to the bogus comment state, but it's easier to implement // if we reconsume the current character. - HTML_RECONSUME_IN(BogusCommentState); - } else { - parseError(); - bufferCharacter('<'); - HTML_RECONSUME_IN(DataState); + RECONSUME_IN(BogusCommentState); } - } + parseError(); + bufferASCIICharacter('<'); + RECONSUME_IN(DataState); END_STATE() - HTML_BEGIN_STATE(EndTagOpenState) { - if (isASCIIUpper(cc)) { - m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); + BEGIN_STATE(EndTagOpenState) + if (isASCIIAlpha(character)) { + m_token.beginEndTag(convertASCIIAlphaToLower(character)); m_appropriateEndTagName.clear(); - HTML_ADVANCE_TO(TagNameState); - } else if (isASCIILower(cc)) { - m_token->beginEndTag(static_cast<LChar>(cc)); - m_appropriateEndTagName.clear(); - HTML_ADVANCE_TO(TagNameState); - } else if (cc == '>') { - parseError(); - HTML_ADVANCE_TO(DataState); - } else if (cc == kEndOfFileMarker) { + ADVANCE_PAST_NON_NEWLINE_TO(TagNameState); + } + if (character == '>') { parseError(); - bufferCharacter('<'); - bufferCharacter('/'); - HTML_RECONSUME_IN(DataState); - } else { + ADVANCE_PAST_NON_NEWLINE_TO(DataState); + } + if (character == kEndOfFileMarker) { parseError(); - HTML_RECONSUME_IN(BogusCommentState); + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + RECONSUME_IN(DataState); } - } + parseError(); + RECONSUME_IN(BogusCommentState); END_STATE() - HTML_BEGIN_STATE(TagNameState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeAttributeNameState); - else if (cc == '/') - HTML_ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (m_options.usePreHTML5ParserQuirks && cc == '<') - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - else if (isASCIIUpper(cc)) { - m_token->appendToName(toLowerCase(cc)); - HTML_ADVANCE_TO(TagNameState); - } else if (cc == kEndOfFileMarker) { - parseError(); - HTML_RECONSUME_IN(DataState); - } else { - m_token->appendToName(cc); - HTML_ADVANCE_TO(TagNameState); + BEGIN_STATE(TagNameState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeAttributeNameState); + if (character == '/') + ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState); + if (character == '>') + return emitAndResumeInDataState(source); + if (m_options.usePreHTML5ParserQuirks && character == '<') + return emitAndReconsumeInDataState(); + if (character == kEndOfFileMarker) { + parseError(); + RECONSUME_IN(DataState); } - } + m_token.appendToName(toASCIILower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(TagNameState); END_STATE() - HTML_BEGIN_STATE(RCDATALessThanSignState) { - if (cc == '/') { + BEGIN_STATE(RCDATALessThanSignState) + if (character == '/') { m_temporaryBuffer.clear(); ASSERT(m_bufferedEndTagName.isEmpty()); - HTML_ADVANCE_TO(RCDATAEndTagOpenState); - } else { - bufferCharacter('<'); - HTML_RECONSUME_IN(RCDATAState); + ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagOpenState); } - } + bufferASCIICharacter('<'); + RECONSUME_IN(RCDATAState); END_STATE() - HTML_BEGIN_STATE(RCDATAEndTagOpenState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); - HTML_ADVANCE_TO(RCDATAEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(RCDATAEndTagNameState); - } else { - bufferCharacter('<'); - bufferCharacter('/'); - HTML_RECONSUME_IN(RCDATAState); + BEGIN_STATE(RCDATAEndTagOpenState) + if (isASCIIAlpha(character)) { + appendToTemporaryBuffer(character); + appendToPossibleEndTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState); } - } + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + RECONSUME_IN(RCDATAState); END_STATE() - HTML_BEGIN_STATE(RCDATAEndTagNameState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); - HTML_ADVANCE_TO(RCDATAEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(RCDATAEndTagNameState); - } else { - if (isTokenizerWhitespace(cc)) { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); - } - } else if (cc == '/') { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); - } - } else if (cc == '>') { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - return flushEmitAndResumeIn(source, HTMLTokenizer::DataState); - } + BEGIN_STATE(RCDATAEndTagNameState) + if (isASCIIAlpha(character)) { + appendToTemporaryBuffer(character); + appendToPossibleEndTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState); + } + if (isTokenizerWhitespace(character)) { + if (isAppropriateEndTag()) { + if (commitToPartialEndTag(source, character, BeforeAttributeNameState)) + return true; + SWITCH_TO(BeforeAttributeNameState); } - bufferCharacter('<'); - bufferCharacter('/'); - m_token->appendToCharacter(m_temporaryBuffer); - m_bufferedEndTagName.clear(); - m_temporaryBuffer.clear(); - HTML_RECONSUME_IN(RCDATAState); + } else if (character == '/') { + if (isAppropriateEndTag()) { + if (commitToPartialEndTag(source, '/', SelfClosingStartTagState)) + return true; + SWITCH_TO(SelfClosingStartTagState); + } + } else if (character == '>') { + if (isAppropriateEndTag()) + return commitToCompleteEndTag(source); } - } + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + m_token.appendToCharacter(m_temporaryBuffer); + m_bufferedEndTagName.clear(); + m_temporaryBuffer.clear(); + RECONSUME_IN(RCDATAState); END_STATE() - HTML_BEGIN_STATE(RAWTEXTLessThanSignState) { - if (cc == '/') { + BEGIN_STATE(RAWTEXTLessThanSignState) + if (character == '/') { m_temporaryBuffer.clear(); ASSERT(m_bufferedEndTagName.isEmpty()); - HTML_ADVANCE_TO(RAWTEXTEndTagOpenState); - } else { - bufferCharacter('<'); - HTML_RECONSUME_IN(RAWTEXTState); + ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagOpenState); } - } + bufferASCIICharacter('<'); + RECONSUME_IN(RAWTEXTState); END_STATE() - HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); - HTML_ADVANCE_TO(RAWTEXTEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(RAWTEXTEndTagNameState); - } else { - bufferCharacter('<'); - bufferCharacter('/'); - HTML_RECONSUME_IN(RAWTEXTState); + BEGIN_STATE(RAWTEXTEndTagOpenState) + if (isASCIIAlpha(character)) { + appendToTemporaryBuffer(character); + appendToPossibleEndTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState); } - } + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + RECONSUME_IN(RAWTEXTState); END_STATE() - HTML_BEGIN_STATE(RAWTEXTEndTagNameState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); - HTML_ADVANCE_TO(RAWTEXTEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(RAWTEXTEndTagNameState); - } else { - if (isTokenizerWhitespace(cc)) { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); - } - } else if (cc == '/') { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); - } - } else if (cc == '>') { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - return flushEmitAndResumeIn(source, HTMLTokenizer::DataState); - } + BEGIN_STATE(RAWTEXTEndTagNameState) + if (isASCIIAlpha(character)) { + appendToTemporaryBuffer(character); + appendToPossibleEndTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState); + } + if (isTokenizerWhitespace(character)) { + if (isAppropriateEndTag()) { + if (commitToPartialEndTag(source, character, BeforeAttributeNameState)) + return true; + SWITCH_TO(BeforeAttributeNameState); } - bufferCharacter('<'); - bufferCharacter('/'); - m_token->appendToCharacter(m_temporaryBuffer); - m_bufferedEndTagName.clear(); - m_temporaryBuffer.clear(); - HTML_RECONSUME_IN(RAWTEXTState); + } else if (character == '/') { + if (isAppropriateEndTag()) { + if (commitToPartialEndTag(source, '/', SelfClosingStartTagState)) + return true; + SWITCH_TO(SelfClosingStartTagState); + } + } else if (character == '>') { + if (isAppropriateEndTag()) + return commitToCompleteEndTag(source); } - } + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + m_token.appendToCharacter(m_temporaryBuffer); + m_bufferedEndTagName.clear(); + m_temporaryBuffer.clear(); + RECONSUME_IN(RAWTEXTState); END_STATE() - HTML_BEGIN_STATE(ScriptDataLessThanSignState) { - if (cc == '/') { + BEGIN_STATE(ScriptDataLessThanSignState) + if (character == '/') { m_temporaryBuffer.clear(); ASSERT(m_bufferedEndTagName.isEmpty()); - HTML_ADVANCE_TO(ScriptDataEndTagOpenState); - } else if (cc == '!') { - bufferCharacter('<'); - bufferCharacter('!'); - HTML_ADVANCE_TO(ScriptDataEscapeStartState); - } else { - bufferCharacter('<'); - HTML_RECONSUME_IN(ScriptDataState); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagOpenState); } - } + if (character == '!') { + bufferASCIICharacter('<'); + bufferASCIICharacter('!'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartState); + } + bufferASCIICharacter('<'); + RECONSUME_IN(ScriptDataState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEndTagOpenState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); - HTML_ADVANCE_TO(ScriptDataEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(ScriptDataEndTagNameState); - } else { - bufferCharacter('<'); - bufferCharacter('/'); - HTML_RECONSUME_IN(ScriptDataState); + BEGIN_STATE(ScriptDataEndTagOpenState) + if (isASCIIAlpha(character)) { + appendToTemporaryBuffer(character); + appendToPossibleEndTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState); } - } + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + RECONSUME_IN(ScriptDataState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEndTagNameState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); - HTML_ADVANCE_TO(ScriptDataEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(ScriptDataEndTagNameState); - } else { - if (isTokenizerWhitespace(cc)) { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); - } - } else if (cc == '/') { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); - } - } else if (cc == '>') { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - return flushEmitAndResumeIn(source, HTMLTokenizer::DataState); - } + BEGIN_STATE(ScriptDataEndTagNameState) + if (isASCIIAlpha(character)) { + appendToTemporaryBuffer(character); + appendToPossibleEndTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState); + } + if (isTokenizerWhitespace(character)) { + if (isAppropriateEndTag()) { + if (commitToPartialEndTag(source, character, BeforeAttributeNameState)) + return true; + SWITCH_TO(BeforeAttributeNameState); } - bufferCharacter('<'); - bufferCharacter('/'); - m_token->appendToCharacter(m_temporaryBuffer); - m_bufferedEndTagName.clear(); - m_temporaryBuffer.clear(); - HTML_RECONSUME_IN(ScriptDataState); + } else if (character == '/') { + if (isAppropriateEndTag()) { + if (commitToPartialEndTag(source, '/', SelfClosingStartTagState)) + return true; + SWITCH_TO(SelfClosingStartTagState); + } + } else if (character == '>') { + if (isAppropriateEndTag()) + return commitToCompleteEndTag(source); } - } + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + m_token.appendToCharacter(m_temporaryBuffer); + m_bufferedEndTagName.clear(); + m_temporaryBuffer.clear(); + RECONSUME_IN(ScriptDataState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEscapeStartState) { - if (cc == '-') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataEscapeStartDashState); + BEGIN_STATE(ScriptDataEscapeStartState) + if (character == '-') { + bufferASCIICharacter('-'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartDashState); } else - HTML_RECONSUME_IN(ScriptDataState); - } + RECONSUME_IN(ScriptDataState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEscapeStartDashState) { - if (cc == '-') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataEscapedDashDashState); + BEGIN_STATE(ScriptDataEscapeStartDashState) + if (character == '-') { + bufferASCIICharacter('-'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState); } else - HTML_RECONSUME_IN(ScriptDataState); - } + RECONSUME_IN(ScriptDataState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEscapedState) { - if (cc == '-') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataEscapedDashState); - } else if (cc == '<') - HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState); - else if (cc == kEndOfFileMarker) { + BEGIN_STATE(ScriptDataEscapedState) + if (character == '-') { + bufferASCIICharacter('-'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashState); + } + if (character == '<') + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState); + if (character == kEndOfFileMarker) { parseError(); - HTML_RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataEscapedState); + RECONSUME_IN(DataState); } - } + bufferCharacter(character); + ADVANCE_TO(ScriptDataEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEscapedDashState) { - if (cc == '-') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataEscapedDashDashState); - } else if (cc == '<') - HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState); - else if (cc == kEndOfFileMarker) { + BEGIN_STATE(ScriptDataEscapedDashState) + if (character == '-') { + bufferASCIICharacter('-'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState); + } + if (character == '<') + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState); + if (character == kEndOfFileMarker) { parseError(); - HTML_RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataEscapedState); + RECONSUME_IN(DataState); } - } + bufferCharacter(character); + ADVANCE_TO(ScriptDataEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEscapedDashDashState) { - if (cc == '-') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataEscapedDashDashState); - } else if (cc == '<') - HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState); - else if (cc == '>') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataState); - } else if (cc == kEndOfFileMarker) { + BEGIN_STATE(ScriptDataEscapedDashDashState) + if (character == '-') { + bufferASCIICharacter('-'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState); + } + if (character == '<') + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState); + if (character == '>') { + bufferASCIICharacter('>'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState); + } + if (character == kEndOfFileMarker) { parseError(); - HTML_RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataEscapedState); + RECONSUME_IN(DataState); } - } + bufferCharacter(character); + ADVANCE_TO(ScriptDataEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) { - if (cc == '/') { + BEGIN_STATE(ScriptDataEscapedLessThanSignState) + if (character == '/') { m_temporaryBuffer.clear(); ASSERT(m_bufferedEndTagName.isEmpty()); - HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState); - } else if (isASCIIUpper(cc)) { - bufferCharacter('<'); - bufferCharacter(cc); - m_temporaryBuffer.clear(); - m_temporaryBuffer.append(toLowerCase(cc)); - HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState); - } else if (isASCIILower(cc)) { - bufferCharacter('<'); - bufferCharacter(cc); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagOpenState); + } + if (isASCIIAlpha(character)) { + bufferASCIICharacter('<'); + bufferASCIICharacter(character); m_temporaryBuffer.clear(); - m_temporaryBuffer.append(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState); - } else { - bufferCharacter('<'); - HTML_RECONSUME_IN(ScriptDataEscapedState); + appendToTemporaryBuffer(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState); } - } + bufferASCIICharacter('<'); + RECONSUME_IN(ScriptDataEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEscapedEndTagOpenState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); - HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState); - } else { - bufferCharacter('<'); - bufferCharacter('/'); - HTML_RECONSUME_IN(ScriptDataEscapedState); + BEGIN_STATE(ScriptDataEscapedEndTagOpenState) + if (isASCIIAlpha(character)) { + appendToTemporaryBuffer(character); + appendToPossibleEndTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState); } - } + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + RECONSUME_IN(ScriptDataEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataEscapedEndTagNameState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); - HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - addToPossibleEndTag(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState); - } else { - if (isTokenizerWhitespace(cc)) { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); - } - } else if (cc == '/') { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); - } - } else if (cc == '>') { - if (isAppropriateEndTag()) { - m_temporaryBuffer.append(static_cast<LChar>(cc)); - return flushEmitAndResumeIn(source, HTMLTokenizer::DataState); - } + BEGIN_STATE(ScriptDataEscapedEndTagNameState) + if (isASCIIAlpha(character)) { + appendToTemporaryBuffer(character); + appendToPossibleEndTag(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState); + } + if (isTokenizerWhitespace(character)) { + if (isAppropriateEndTag()) { + if (commitToPartialEndTag(source, character, BeforeAttributeNameState)) + return true; + SWITCH_TO(BeforeAttributeNameState); } - bufferCharacter('<'); - bufferCharacter('/'); - m_token->appendToCharacter(m_temporaryBuffer); - m_bufferedEndTagName.clear(); - m_temporaryBuffer.clear(); - HTML_RECONSUME_IN(ScriptDataEscapedState); + } else if (character == '/') { + if (isAppropriateEndTag()) { + if (commitToPartialEndTag(source, '/', SelfClosingStartTagState)) + return true; + SWITCH_TO(SelfClosingStartTagState); + } + } else if (character == '>') { + if (isAppropriateEndTag()) + return commitToCompleteEndTag(source); } - } + bufferASCIICharacter('<'); + bufferASCIICharacter('/'); + m_token.appendToCharacter(m_temporaryBuffer); + m_bufferedEndTagName.clear(); + m_temporaryBuffer.clear(); + RECONSUME_IN(ScriptDataEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataDoubleEscapeStartState) { - if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') { - bufferCharacter(cc); - if (temporaryBufferIs(scriptTag.localName())) - HTML_ADVANCE_TO(ScriptDataDoubleEscapedState); + BEGIN_STATE(ScriptDataDoubleEscapeStartState) + if (isTokenizerWhitespace(character) || character == '/' || character == '>') { + bufferASCIICharacter(character); + if (temporaryBufferIs("script")) + ADVANCE_TO(ScriptDataDoubleEscapedState); else - HTML_ADVANCE_TO(ScriptDataEscapedState); - } else if (isASCIIUpper(cc)) { - bufferCharacter(cc); - m_temporaryBuffer.append(toLowerCase(cc)); - HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState); - } else if (isASCIILower(cc)) { - bufferCharacter(cc); - m_temporaryBuffer.append(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState); - } else - HTML_RECONSUME_IN(ScriptDataEscapedState); - } + ADVANCE_TO(ScriptDataEscapedState); + } + if (isASCIIAlpha(character)) { + bufferASCIICharacter(character); + appendToTemporaryBuffer(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState); + } + RECONSUME_IN(ScriptDataEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataDoubleEscapedState) { - if (cc == '-') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashState); - } else if (cc == '<') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); - } else if (cc == kEndOfFileMarker) { + BEGIN_STATE(ScriptDataDoubleEscapedState) + if (character == '-') { + bufferASCIICharacter('-'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashState); + } + if (character == '<') { + bufferASCIICharacter('<'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState); + } + if (character == kEndOfFileMarker) { parseError(); - HTML_RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedState); + RECONSUME_IN(DataState); } - } + bufferCharacter(character); + ADVANCE_TO(ScriptDataDoubleEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashState) { - if (cc == '-') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); - } else if (cc == '<') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); - } else if (cc == kEndOfFileMarker) { + BEGIN_STATE(ScriptDataDoubleEscapedDashState) + if (character == '-') { + bufferASCIICharacter('-'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState); + } + if (character == '<') { + bufferASCIICharacter('<'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState); + } + if (character == kEndOfFileMarker) { parseError(); - HTML_RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedState); + RECONSUME_IN(DataState); } - } + bufferCharacter(character); + ADVANCE_TO(ScriptDataDoubleEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) { - if (cc == '-') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); - } else if (cc == '<') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); - } else if (cc == '>') { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataState); - } else if (cc == kEndOfFileMarker) { + BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) + if (character == '-') { + bufferASCIICharacter('-'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState); + } + if (character == '<') { + bufferASCIICharacter('<'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState); + } + if (character == '>') { + bufferASCIICharacter('>'); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState); + } + if (character == kEndOfFileMarker) { parseError(); - HTML_RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - HTML_ADVANCE_TO(ScriptDataDoubleEscapedState); + RECONSUME_IN(DataState); } - } + bufferCharacter(character); + ADVANCE_TO(ScriptDataDoubleEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) { - if (cc == '/') { - bufferCharacter(cc); + BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) + if (character == '/') { + bufferASCIICharacter('/'); m_temporaryBuffer.clear(); - HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState); - } else - HTML_RECONSUME_IN(ScriptDataDoubleEscapedState); - } + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState); + } + RECONSUME_IN(ScriptDataDoubleEscapedState); END_STATE() - HTML_BEGIN_STATE(ScriptDataDoubleEscapeEndState) { - if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') { - bufferCharacter(cc); - if (temporaryBufferIs(scriptTag.localName())) - HTML_ADVANCE_TO(ScriptDataEscapedState); + BEGIN_STATE(ScriptDataDoubleEscapeEndState) + if (isTokenizerWhitespace(character) || character == '/' || character == '>') { + bufferASCIICharacter(character); + if (temporaryBufferIs("script")) + ADVANCE_TO(ScriptDataEscapedState); else - HTML_ADVANCE_TO(ScriptDataDoubleEscapedState); - } else if (isASCIIUpper(cc)) { - bufferCharacter(cc); - m_temporaryBuffer.append(toLowerCase(cc)); - HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState); - } else if (isASCIILower(cc)) { - bufferCharacter(cc); - m_temporaryBuffer.append(static_cast<LChar>(cc)); - HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState); - } else - HTML_RECONSUME_IN(ScriptDataDoubleEscapedState); - } - END_STATE() - - HTML_BEGIN_STATE(BeforeAttributeNameState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeAttributeNameState); - else if (cc == '/') - HTML_ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (m_options.usePreHTML5ParserQuirks && cc == '<') - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - else if (isASCIIUpper(cc)) { - m_token->addNewAttribute(); - m_token->beginAttributeName(source.numberOfCharactersConsumed()); - m_token->appendToAttributeName(toLowerCase(cc)); - HTML_ADVANCE_TO(AttributeNameState); - } else if (cc == kEndOfFileMarker) { - parseError(); - HTML_RECONSUME_IN(DataState); - } else { - if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') - parseError(); - m_token->addNewAttribute(); - m_token->beginAttributeName(source.numberOfCharactersConsumed()); - m_token->appendToAttributeName(cc); - HTML_ADVANCE_TO(AttributeNameState); + ADVANCE_TO(ScriptDataDoubleEscapedState); } - } - END_STATE() - - HTML_BEGIN_STATE(AttributeNameState) { - if (isTokenizerWhitespace(cc)) { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - HTML_ADVANCE_TO(AfterAttributeNameState); - } else if (cc == '/') { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - HTML_ADVANCE_TO(SelfClosingStartTagState); - } else if (cc == '=') { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - HTML_ADVANCE_TO(BeforeAttributeValueState); - } else if (cc == '>') { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (m_options.usePreHTML5ParserQuirks && cc == '<') { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else if (isASCIIUpper(cc)) { - m_token->appendToAttributeName(toLowerCase(cc)); - HTML_ADVANCE_TO(AttributeNameState); - } else if (cc == kEndOfFileMarker) { - parseError(); - m_token->endAttributeName(source.numberOfCharactersConsumed()); - HTML_RECONSUME_IN(DataState); - } else { - if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') - parseError(); - m_token->appendToAttributeName(cc); - HTML_ADVANCE_TO(AttributeNameState); + if (isASCIIAlpha(character)) { + bufferASCIICharacter(character); + appendToTemporaryBuffer(convertASCIIAlphaToLower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState); } - } + RECONSUME_IN(ScriptDataDoubleEscapedState); END_STATE() - HTML_BEGIN_STATE(AfterAttributeNameState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(AfterAttributeNameState); - else if (cc == '/') - HTML_ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '=') - HTML_ADVANCE_TO(BeforeAttributeValueState); - else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (m_options.usePreHTML5ParserQuirks && cc == '<') - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - else if (isASCIIUpper(cc)) { - m_token->addNewAttribute(); - m_token->beginAttributeName(source.numberOfCharactersConsumed()); - m_token->appendToAttributeName(toLowerCase(cc)); - HTML_ADVANCE_TO(AttributeNameState); - } else if (cc == kEndOfFileMarker) { - parseError(); - HTML_RECONSUME_IN(DataState); - } else { - if (cc == '"' || cc == '\'' || cc == '<') - parseError(); - m_token->addNewAttribute(); - m_token->beginAttributeName(source.numberOfCharactersConsumed()); - m_token->appendToAttributeName(cc); - HTML_ADVANCE_TO(AttributeNameState); + BEGIN_STATE(BeforeAttributeNameState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeAttributeNameState); + if (character == '/') + ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState); + if (character == '>') + return emitAndResumeInDataState(source); + if (m_options.usePreHTML5ParserQuirks && character == '<') + return emitAndReconsumeInDataState(); + if (character == kEndOfFileMarker) { + parseError(); + RECONSUME_IN(DataState); } - } + if (character == '"' || character == '\'' || character == '<' || character == '=') + parseError(); + m_token.beginAttribute(source.numberOfCharactersConsumed()); + m_token.appendToAttributeName(toASCIILower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState); + END_STATE() + + BEGIN_STATE(AttributeNameState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(AfterAttributeNameState); + if (character == '/') + ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState); + if (character == '=') + ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState); + if (character == '>') + return emitAndResumeInDataState(source); + if (m_options.usePreHTML5ParserQuirks && character == '<') + return emitAndReconsumeInDataState(); + if (character == kEndOfFileMarker) { + parseError(); + RECONSUME_IN(DataState); + } + if (character == '"' || character == '\'' || character == '<' || character == '=') + parseError(); + m_token.appendToAttributeName(toASCIILower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState); + END_STATE() + + BEGIN_STATE(AfterAttributeNameState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(AfterAttributeNameState); + if (character == '/') + ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState); + if (character == '=') + ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState); + if (character == '>') + return emitAndResumeInDataState(source); + if (m_options.usePreHTML5ParserQuirks && character == '<') + return emitAndReconsumeInDataState(); + if (character == kEndOfFileMarker) { + parseError(); + RECONSUME_IN(DataState); + } + if (character == '"' || character == '\'' || character == '<') + parseError(); + m_token.beginAttribute(source.numberOfCharactersConsumed()); + m_token.appendToAttributeName(toASCIILower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState); END_STATE() - HTML_BEGIN_STATE(BeforeAttributeValueState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeAttributeValueState); - else if (cc == '"') { - m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1); - HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); - } else if (cc == '&') { - m_token->beginAttributeValue(source.numberOfCharactersConsumed()); - HTML_RECONSUME_IN(AttributeValueUnquotedState); - } else if (cc == '\'') { - m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1); - HTML_ADVANCE_TO(AttributeValueSingleQuotedState); - } else if (cc == '>') { - parseError(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { - parseError(); - HTML_RECONSUME_IN(DataState); - } else { - if (cc == '<' || cc == '=' || cc == '`') - parseError(); - m_token->beginAttributeValue(source.numberOfCharactersConsumed()); - m_token->appendToAttributeValue(cc); - HTML_ADVANCE_TO(AttributeValueUnquotedState); + BEGIN_STATE(BeforeAttributeValueState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeAttributeValueState); + if (character == '"') + ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueDoubleQuotedState); + if (character == '&') + RECONSUME_IN(AttributeValueUnquotedState); + if (character == '\'') + ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueSingleQuotedState); + if (character == '>') { + parseError(); + return emitAndResumeInDataState(source); } - } + if (character == kEndOfFileMarker) { + parseError(); + RECONSUME_IN(DataState); + } + if (character == '<' || character == '=' || character == '`') + parseError(); + m_token.appendToAttributeValue(character); + ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState); END_STATE() - HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) { - if (cc == '"') { - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - HTML_ADVANCE_TO(AfterAttributeValueQuotedState); - } else if (cc == '&') { + BEGIN_STATE(AttributeValueDoubleQuotedState) + if (character == '"') { + m_token.endAttribute(source.numberOfCharactersConsumed()); + ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState); + } + if (character == '&') { m_additionalAllowedCharacter = '"'; - HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); - } else if (cc == kEndOfFileMarker) { + ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - HTML_RECONSUME_IN(DataState); - } else { - m_token->appendToAttributeValue(cc); - HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); + m_token.endAttribute(source.numberOfCharactersConsumed()); + RECONSUME_IN(DataState); } - } + m_token.appendToAttributeValue(character); + ADVANCE_TO(AttributeValueDoubleQuotedState); END_STATE() - HTML_BEGIN_STATE(AttributeValueSingleQuotedState) { - if (cc == '\'') { - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - HTML_ADVANCE_TO(AfterAttributeValueQuotedState); - } else if (cc == '&') { + BEGIN_STATE(AttributeValueSingleQuotedState) + if (character == '\'') { + m_token.endAttribute(source.numberOfCharactersConsumed()); + ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState); + } + if (character == '&') { m_additionalAllowedCharacter = '\''; - HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); - } else if (cc == kEndOfFileMarker) { + ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - HTML_RECONSUME_IN(DataState); - } else { - m_token->appendToAttributeValue(cc); - HTML_ADVANCE_TO(AttributeValueSingleQuotedState); + m_token.endAttribute(source.numberOfCharactersConsumed()); + RECONSUME_IN(DataState); } - } + m_token.appendToAttributeValue(character); + ADVANCE_TO(AttributeValueSingleQuotedState); END_STATE() - HTML_BEGIN_STATE(AttributeValueUnquotedState) { - if (isTokenizerWhitespace(cc)) { - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - HTML_ADVANCE_TO(BeforeAttributeNameState); - } else if (cc == '&') { + BEGIN_STATE(AttributeValueUnquotedState) + if (isTokenizerWhitespace(character)) { + m_token.endAttribute(source.numberOfCharactersConsumed()); + ADVANCE_TO(BeforeAttributeNameState); + } + if (character == '&') { m_additionalAllowedCharacter = '>'; - HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); - } else if (cc == '>') { - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { - parseError(); - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - HTML_RECONSUME_IN(DataState); - } else { - if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') - parseError(); - m_token->appendToAttributeValue(cc); - HTML_ADVANCE_TO(AttributeValueUnquotedState); + ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState); } - } + if (character == '>') { + m_token.endAttribute(source.numberOfCharactersConsumed()); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { + parseError(); + m_token.endAttribute(source.numberOfCharactersConsumed()); + RECONSUME_IN(DataState); + } + if (character == '"' || character == '\'' || character == '<' || character == '=' || character == '`') + parseError(); + m_token.appendToAttributeValue(character); + ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState); END_STATE() - HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { + BEGIN_STATE(CharacterReferenceInAttributeValueState) bool notEnoughCharacters = false; StringBuilder decodedEntity; bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter); if (notEnoughCharacters) - return haveBufferedCharacterToken(); + RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); if (!success) { ASSERT(decodedEntity.isEmpty()); - m_token->appendToAttributeValue('&'); + m_token.appendToAttributeValue('&'); } else { for (unsigned i = 0; i < decodedEntity.length(); ++i) - m_token->appendToAttributeValue(decodedEntity[i]); + m_token.appendToAttributeValue(decodedEntity[i]); } // We're supposed to switch back to the attribute value state that // we were in when we were switched into this state. Rather than // keeping track of this explictly, we observe that the previous // state can be determined by m_additionalAllowedCharacter. if (m_additionalAllowedCharacter == '"') - HTML_SWITCH_TO(AttributeValueDoubleQuotedState); - else if (m_additionalAllowedCharacter == '\'') - HTML_SWITCH_TO(AttributeValueSingleQuotedState); - else if (m_additionalAllowedCharacter == '>') - HTML_SWITCH_TO(AttributeValueUnquotedState); - else - ASSERT_NOT_REACHED(); - } - END_STATE() - - HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeAttributeNameState); - else if (cc == '/') - HTML_ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (m_options.usePreHTML5ParserQuirks && cc == '<') - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - else if (cc == kEndOfFileMarker) { - parseError(); - HTML_RECONSUME_IN(DataState); - } else { - parseError(); - HTML_RECONSUME_IN(BeforeAttributeNameState); + SWITCH_TO(AttributeValueDoubleQuotedState); + if (m_additionalAllowedCharacter == '\'') + SWITCH_TO(AttributeValueSingleQuotedState); + ASSERT(m_additionalAllowedCharacter == '>'); + SWITCH_TO(AttributeValueUnquotedState); + END_STATE() + + BEGIN_STATE(AfterAttributeValueQuotedState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeAttributeNameState); + if (character == '/') + ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState); + if (character == '>') + return emitAndResumeInDataState(source); + if (m_options.usePreHTML5ParserQuirks && character == '<') + return emitAndReconsumeInDataState(); + if (character == kEndOfFileMarker) { + parseError(); + RECONSUME_IN(DataState); } - } + parseError(); + RECONSUME_IN(BeforeAttributeNameState); END_STATE() - HTML_BEGIN_STATE(SelfClosingStartTagState) { - if (cc == '>') { - m_token->setSelfClosing(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { - parseError(); - HTML_RECONSUME_IN(DataState); - } else { + BEGIN_STATE(SelfClosingStartTagState) + if (character == '>') { + m_token.setSelfClosing(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - HTML_RECONSUME_IN(BeforeAttributeNameState); + RECONSUME_IN(DataState); } - } + parseError(); + RECONSUME_IN(BeforeAttributeNameState); END_STATE() - HTML_BEGIN_STATE(BogusCommentState) { - m_token->beginComment(); - HTML_RECONSUME_IN(ContinueBogusCommentState); - } + BEGIN_STATE(BogusCommentState) + m_token.beginComment(); + RECONSUME_IN(ContinueBogusCommentState); END_STATE() - HTML_BEGIN_STATE(ContinueBogusCommentState) { - if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (cc == kEndOfFileMarker) - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - else { - m_token->appendToComment(cc); - HTML_ADVANCE_TO(ContinueBogusCommentState); - } - } + BEGIN_STATE(ContinueBogusCommentState) + if (character == '>') + return emitAndResumeInDataState(source); + if (character == kEndOfFileMarker) + return emitAndReconsumeInDataState(); + m_token.appendToComment(character); + ADVANCE_TO(ContinueBogusCommentState); END_STATE() - HTML_BEGIN_STATE(MarkupDeclarationOpenState) { - DEFINE_STATIC_LOCAL(String, dashDashString, (ASCIILiteral("--"))); - DEFINE_STATIC_LOCAL(String, doctypeString, (ASCIILiteral("doctype"))); - DEFINE_STATIC_LOCAL(String, cdataString, (ASCIILiteral("[CDATA["))); - if (cc == '-') { - SegmentedString::LookAheadResult result = source.lookAhead(dashDashString); - if (result == SegmentedString::DidMatch) { - source.advanceAndASSERT('-'); - source.advanceAndASSERT('-'); - m_token->beginComment(); - HTML_SWITCH_TO(CommentStartState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } else if (cc == 'D' || cc == 'd') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString); + BEGIN_STATE(MarkupDeclarationOpenState) + if (character == '-') { + auto result = source.advancePast("--"); if (result == SegmentedString::DidMatch) { - advanceStringAndASSERTIgnoringCase(source, "doctype"); - HTML_SWITCH_TO(DOCTYPEState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } else if (cc == '[' && shouldAllowCDATA()) { - SegmentedString::LookAheadResult result = source.lookAhead(cdataString); - if (result == SegmentedString::DidMatch) { - advanceStringAndASSERT(source, "[CDATA["); - HTML_SWITCH_TO(CDATASectionState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); + m_token.beginComment(); + SWITCH_TO(CommentStartState); + } + if (result == SegmentedString::NotEnoughCharacters) + RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); + } else if (isASCIIAlphaCaselessEqual(character, 'd')) { + auto result = source.advancePastLettersIgnoringASCIICase("doctype"); + if (result == SegmentedString::DidMatch) + SWITCH_TO(DOCTYPEState); + if (result == SegmentedString::NotEnoughCharacters) + RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); + } else if (character == '[' && shouldAllowCDATA()) { + auto result = source.advancePast("[CDATA["); + if (result == SegmentedString::DidMatch) + SWITCH_TO(CDATASectionState); + if (result == SegmentedString::NotEnoughCharacters) + RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); } parseError(); - HTML_RECONSUME_IN(BogusCommentState); - } + RECONSUME_IN(BogusCommentState); END_STATE() - HTML_BEGIN_STATE(CommentStartState) { - if (cc == '-') - HTML_ADVANCE_TO(CommentStartDashState); - else if (cc == '>') { + BEGIN_STATE(CommentStartState) + if (character == '-') + ADVANCE_PAST_NON_NEWLINE_TO(CommentStartDashState); + if (character == '>') { parseError(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToComment(cc); - HTML_ADVANCE_TO(CommentState); + return emitAndReconsumeInDataState(); } - } + m_token.appendToComment(character); + ADVANCE_TO(CommentState); END_STATE() - HTML_BEGIN_STATE(CommentStartDashState) { - if (cc == '-') - HTML_ADVANCE_TO(CommentEndState); - else if (cc == '>') { + BEGIN_STATE(CommentStartDashState) + if (character == '-') + ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState); + if (character == '>') { parseError(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToComment('-'); - m_token->appendToComment(cc); - HTML_ADVANCE_TO(CommentState); + return emitAndReconsumeInDataState(); } - } + m_token.appendToComment('-'); + m_token.appendToComment(character); + ADVANCE_TO(CommentState); END_STATE() - HTML_BEGIN_STATE(CommentState) { - if (cc == '-') - HTML_ADVANCE_TO(CommentEndDashState); - else if (cc == kEndOfFileMarker) { + BEGIN_STATE(CommentState) + if (character == '-') + ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState); + if (character == kEndOfFileMarker) { parseError(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToComment(cc); - HTML_ADVANCE_TO(CommentState); + return emitAndReconsumeInDataState(); } - } + m_token.appendToComment(character); + ADVANCE_TO(CommentState); END_STATE() - HTML_BEGIN_STATE(CommentEndDashState) { - if (cc == '-') - HTML_ADVANCE_TO(CommentEndState); - else if (cc == kEndOfFileMarker) { + BEGIN_STATE(CommentEndDashState) + if (character == '-') + ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState); + if (character == kEndOfFileMarker) { parseError(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToComment('-'); - m_token->appendToComment(cc); - HTML_ADVANCE_TO(CommentState); + return emitAndReconsumeInDataState(); } - } + m_token.appendToComment('-'); + m_token.appendToComment(character); + ADVANCE_TO(CommentState); END_STATE() - HTML_BEGIN_STATE(CommentEndState) { - if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (cc == '!') { - parseError(); - HTML_ADVANCE_TO(CommentEndBangState); - } else if (cc == '-') { + BEGIN_STATE(CommentEndState) + if (character == '>') + return emitAndResumeInDataState(source); + if (character == '!') { parseError(); - m_token->appendToComment('-'); - HTML_ADVANCE_TO(CommentEndState); - } else if (cc == kEndOfFileMarker) { + ADVANCE_PAST_NON_NEWLINE_TO(CommentEndBangState); + } + if (character == '-') { parseError(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { + m_token.appendToComment('-'); + ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->appendToComment('-'); - m_token->appendToComment('-'); - m_token->appendToComment(cc); - HTML_ADVANCE_TO(CommentState); + return emitAndReconsumeInDataState(); } - } - END_STATE() - - HTML_BEGIN_STATE(CommentEndBangState) { - if (cc == '-') { - m_token->appendToComment('-'); - m_token->appendToComment('-'); - m_token->appendToComment('!'); - HTML_ADVANCE_TO(CommentEndDashState); - } else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (cc == kEndOfFileMarker) { + parseError(); + m_token.appendToComment('-'); + m_token.appendToComment('-'); + m_token.appendToComment(character); + ADVANCE_TO(CommentState); + END_STATE() + + BEGIN_STATE(CommentEndBangState) + if (character == '-') { + m_token.appendToComment('-'); + m_token.appendToComment('-'); + m_token.appendToComment('!'); + ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState); + } + if (character == '>') + return emitAndResumeInDataState(source); + if (character == kEndOfFileMarker) { parseError(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToComment('-'); - m_token->appendToComment('-'); - m_token->appendToComment('!'); - m_token->appendToComment(cc); - HTML_ADVANCE_TO(CommentState); + return emitAndReconsumeInDataState(); } - } + m_token.appendToComment('-'); + m_token.appendToComment('-'); + m_token.appendToComment('!'); + m_token.appendToComment(character); + ADVANCE_TO(CommentState); END_STATE() - HTML_BEGIN_STATE(DOCTYPEState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeDOCTYPENameState); - else if (cc == kEndOfFileMarker) { + BEGIN_STATE(DOCTYPEState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeDOCTYPENameState); + if (character == kEndOfFileMarker) { parseError(); - m_token->beginDOCTYPE(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - parseError(); - HTML_RECONSUME_IN(BeforeDOCTYPENameState); + m_token.beginDOCTYPE(); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + parseError(); + RECONSUME_IN(BeforeDOCTYPENameState); END_STATE() - HTML_BEGIN_STATE(BeforeDOCTYPENameState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeDOCTYPENameState); - else if (isASCIIUpper(cc)) { - m_token->beginDOCTYPE(toLowerCase(cc)); - HTML_ADVANCE_TO(DOCTYPENameState); - } else if (cc == '>') { + BEGIN_STATE(BeforeDOCTYPENameState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeDOCTYPENameState); + if (character == '>') { parseError(); - m_token->beginDOCTYPE(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + m_token.beginDOCTYPE(); + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->beginDOCTYPE(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->beginDOCTYPE(cc); - HTML_ADVANCE_TO(DOCTYPENameState); + m_token.beginDOCTYPE(); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + m_token.beginDOCTYPE(toASCIILower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState); END_STATE() - HTML_BEGIN_STATE(DOCTYPENameState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(AfterDOCTYPENameState); - else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (isASCIIUpper(cc)) { - m_token->appendToName(toLowerCase(cc)); - HTML_ADVANCE_TO(DOCTYPENameState); - } else if (cc == kEndOfFileMarker) { + BEGIN_STATE(DOCTYPENameState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(AfterDOCTYPENameState); + if (character == '>') + return emitAndResumeInDataState(source); + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToName(cc); - HTML_ADVANCE_TO(DOCTYPENameState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + m_token.appendToName(toASCIILower(character)); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState); END_STATE() - HTML_BEGIN_STATE(AfterDOCTYPENameState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(AfterDOCTYPENameState); - if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (cc == kEndOfFileMarker) { + BEGIN_STATE(AfterDOCTYPENameState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(AfterDOCTYPENameState); + if (character == '>') + return emitAndResumeInDataState(source); + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - DEFINE_STATIC_LOCAL(String, publicString, (ASCIILiteral("public"))); - DEFINE_STATIC_LOCAL(String, systemString, (ASCIILiteral("system"))); - if (cc == 'P' || cc == 'p') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString); - if (result == SegmentedString::DidMatch) { - advanceStringAndASSERTIgnoringCase(source, "public"); - HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } else if (cc == 'S' || cc == 's') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString); - if (result == SegmentedString::DidMatch) { - advanceStringAndASSERTIgnoringCase(source, "system"); - HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } - parseError(); - m_token->setForceQuirks(); - HTML_ADVANCE_TO(BogusDOCTYPEState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + if (isASCIIAlphaCaselessEqual(character, 'p')) { + auto result = source.advancePastLettersIgnoringASCIICase("public"); + if (result == SegmentedString::DidMatch) + SWITCH_TO(AfterDOCTYPEPublicKeywordState); + if (result == SegmentedString::NotEnoughCharacters) + RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); + } else if (isASCIIAlphaCaselessEqual(character, 's')) { + auto result = source.advancePastLettersIgnoringASCIICase("system"); + if (result == SegmentedString::DidMatch) + SWITCH_TO(AfterDOCTYPESystemKeywordState); + if (result == SegmentedString::NotEnoughCharacters) + RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken()); + } + parseError(); + m_token.setForceQuirks(); + ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState); END_STATE() - HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); - else if (cc == '"') { - parseError(); - m_token->setPublicIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); - } else if (cc == '\'') { + BEGIN_STATE(AfterDOCTYPEPublicKeywordState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); + if (character == '"') { parseError(); - m_token->setPublicIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); - } else if (cc == '>') { + m_token.setPublicIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); + } + if (character == '\'') { parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + m_token.setPublicIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState); + } + if (character == '>') { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - HTML_ADVANCE_TO(BogusDOCTYPEState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + parseError(); + m_token.setForceQuirks(); + ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState); END_STATE() - HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); - else if (cc == '"') { - m_token->setPublicIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); - } else if (cc == '\'') { - m_token->setPublicIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); - } else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); + if (character == '"') { + m_token.setPublicIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); + } + if (character == '\'') { + m_token.setPublicIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState); + } + if (character == '>') { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - HTML_ADVANCE_TO(BogusDOCTYPEState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + parseError(); + m_token.setForceQuirks(); + ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState); END_STATE() - HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) { - if (cc == '"') - HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); - else if (cc == '>') { + BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) + if (character == '"') + ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState); + if (character == '>') { parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToPublicIdentifier(cc); - HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + m_token.appendToPublicIdentifier(character); + ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); END_STATE() - HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) { - if (cc == '\'') - HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); - else if (cc == '>') { + BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) + if (character == '\'') + ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState); + if (character == '>') { parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToPublicIdentifier(cc); - HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + m_token.appendToPublicIdentifier(character); + ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); END_STATE() - HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); - else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (cc == '"') { - parseError(); - m_token->setSystemIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } else if (cc == '\'') { + BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); + if (character == '>') + return emitAndResumeInDataState(source); + if (character == '"') { parseError(); - m_token->setSystemIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } else if (cc == kEndOfFileMarker) { + m_token.setSystemIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState); + } + if (character == '\'') { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { + m_token.setSystemIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - HTML_ADVANCE_TO(BogusDOCTYPEState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } - END_STATE() - - HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); - else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (cc == '"') { - m_token->setSystemIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } else if (cc == '\'') { - m_token->setSystemIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } else if (cc == kEndOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { + parseError(); + m_token.setForceQuirks(); + ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState); + END_STATE() + + BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); + if (character == '>') + return emitAndResumeInDataState(source); + if (character == '"') { + m_token.setSystemIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState); + } + if (character == '\'') { + m_token.setSystemIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - HTML_ADVANCE_TO(BogusDOCTYPEState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + parseError(); + m_token.setForceQuirks(); + ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState); END_STATE() - HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); - else if (cc == '"') { + BEGIN_STATE(AfterDOCTYPESystemKeywordState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); + if (character == '"') { parseError(); - m_token->setSystemIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } else if (cc == '\'') { - parseError(); - m_token->setSystemIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } else if (cc == '>') { + m_token.setSystemIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState); + } + if (character == '\'') { parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + m_token.setSystemIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState); + } + if (character == '>') { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - HTML_ADVANCE_TO(BogusDOCTYPEState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + parseError(); + m_token.setForceQuirks(); + ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState); END_STATE() - HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); - if (cc == '"') { - m_token->setSystemIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } else if (cc == '\'') { - m_token->setSystemIdentifierToEmptyString(); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); + if (character == '"') { + m_token.setSystemIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState); + } + if (character == '\'') { + m_token.setSystemIdentifierToEmptyString(); + ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState); + } + if (character == '>') { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - HTML_ADVANCE_TO(BogusDOCTYPEState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + parseError(); + m_token.setForceQuirks(); + ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState); END_STATE() - HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) { - if (cc == '"') - HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); - else if (cc == '>') { + BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) + if (character == '"') + ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState); + if (character == '>') { parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToSystemIdentifier(cc); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + m_token.appendToSystemIdentifier(character); + ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); END_STATE() - HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) { - if (cc == '\'') - HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); - else if (cc == '>') { + BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) + if (character == '\'') + ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState); + if (character == '>') { parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, HTMLTokenizer::DataState); - } else if (cc == kEndOfFileMarker) { + m_token.setForceQuirks(); + return emitAndResumeInDataState(source); + } + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - m_token->appendToSystemIdentifier(cc); - HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + m_token.appendToSystemIdentifier(character); + ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); END_STATE() - HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) { - if (isTokenizerWhitespace(cc)) - HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); - else if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (cc == kEndOfFileMarker) { + BEGIN_STATE(AfterDOCTYPESystemIdentifierState) + if (isTokenizerWhitespace(character)) + ADVANCE_TO(AfterDOCTYPESystemIdentifierState); + if (character == '>') + return emitAndResumeInDataState(source); + if (character == kEndOfFileMarker) { parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - } else { - parseError(); - HTML_ADVANCE_TO(BogusDOCTYPEState); + m_token.setForceQuirks(); + return emitAndReconsumeInDataState(); } - } + parseError(); + ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState); END_STATE() - HTML_BEGIN_STATE(BogusDOCTYPEState) { - if (cc == '>') - return emitAndResumeIn(source, HTMLTokenizer::DataState); - else if (cc == kEndOfFileMarker) - return emitAndReconsumeIn(source, HTMLTokenizer::DataState); - HTML_ADVANCE_TO(BogusDOCTYPEState); - } + BEGIN_STATE(BogusDOCTYPEState) + if (character == '>') + return emitAndResumeInDataState(source); + if (character == kEndOfFileMarker) + return emitAndReconsumeInDataState(); + ADVANCE_TO(BogusDOCTYPEState); END_STATE() - HTML_BEGIN_STATE(CDATASectionState) { - if (cc == ']') - HTML_ADVANCE_TO(CDATASectionRightSquareBracketState); - else if (cc == kEndOfFileMarker) - HTML_RECONSUME_IN(DataState); - else { - bufferCharacter(cc); - HTML_ADVANCE_TO(CDATASectionState); - } - } + BEGIN_STATE(CDATASectionState) + if (character == ']') + ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionRightSquareBracketState); + if (character == kEndOfFileMarker) + RECONSUME_IN(DataState); + bufferCharacter(character); + ADVANCE_TO(CDATASectionState); END_STATE() - HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) { - if (cc == ']') - HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState); - else { - bufferCharacter(']'); - HTML_RECONSUME_IN(CDATASectionState); - } - } + BEGIN_STATE(CDATASectionRightSquareBracketState) + if (character == ']') + ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionDoubleRightSquareBracketState); + bufferASCIICharacter(']'); + RECONSUME_IN(CDATASectionState); + END_STATE() - HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) { - if (cc == '>') - HTML_ADVANCE_TO(DataState); - else { - bufferCharacter(']'); - bufferCharacter(']'); - HTML_RECONSUME_IN(CDATASectionState); - } - } + BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) + if (character == '>') + ADVANCE_PAST_NON_NEWLINE_TO(DataState); + bufferASCIICharacter(']'); + bufferASCIICharacter(']'); + RECONSUME_IN(CDATASectionState); END_STATE() } @@ -1579,39 +1409,45 @@ String HTMLTokenizer::bufferedCharacters() const void HTMLTokenizer::updateStateFor(const AtomicString& tagName) { if (tagName == textareaTag || tagName == titleTag) - setState(HTMLTokenizer::RCDATAState); + m_state = RCDATAState; else if (tagName == plaintextTag) - setState(HTMLTokenizer::PLAINTEXTState); + m_state = PLAINTEXTState; else if (tagName == scriptTag) - setState(HTMLTokenizer::ScriptDataState); + m_state = ScriptDataState; else if (tagName == styleTag || tagName == iframeTag || tagName == xmpTag || (tagName == noembedTag && m_options.pluginsEnabled) || tagName == noframesTag || (tagName == noscriptTag && m_options.scriptEnabled)) - setState(HTMLTokenizer::RAWTEXTState); + m_state = RAWTEXTState; +} + +inline void HTMLTokenizer::appendToTemporaryBuffer(UChar character) +{ + ASSERT(isASCII(character)); + m_temporaryBuffer.append(character); } -inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) +inline bool HTMLTokenizer::temporaryBufferIs(const char* expectedString) { return vectorEqualsString(m_temporaryBuffer, expectedString); } -inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) +inline void HTMLTokenizer::appendToPossibleEndTag(UChar character) { - ASSERT(isEndTagBufferingState(m_state)); - m_bufferedEndTagName.append(cc); + ASSERT(isASCII(character)); + m_bufferedEndTagName.append(character); } -inline bool HTMLTokenizer::isAppropriateEndTag() +inline bool HTMLTokenizer::isAppropriateEndTag() const { if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) return false; - size_t numCharacters = m_bufferedEndTagName.size(); + unsigned size = m_bufferedEndTagName.size(); - for (size_t i = 0; i < numCharacters; i++) { + for (unsigned i = 0; i < size; i++) { if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) return false; } @@ -1621,7 +1457,6 @@ inline bool HTMLTokenizer::isAppropriateEndTag() inline void HTMLTokenizer::parseError() { - notImplemented(); } } |