diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/InputStreamPreprocessor.h | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/WebCore/html/parser/InputStreamPreprocessor.h')
-rw-r--r-- | Source/WebCore/html/parser/InputStreamPreprocessor.h | 92 |
1 files changed, 39 insertions, 53 deletions
diff --git a/Source/WebCore/html/parser/InputStreamPreprocessor.h b/Source/WebCore/html/parser/InputStreamPreprocessor.h index ffd639abe..361f65301 100644 --- a/Source/WebCore/html/parser/InputStreamPreprocessor.h +++ b/Source/WebCore/html/parser/InputStreamPreprocessor.h @@ -25,25 +25,20 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef InputStreamPreprocessor_h -#define InputStreamPreprocessor_h +#pragma once #include "SegmentedString.h" -#include <wtf/Noncopyable.h> +#include <wtf/unicode/CharacterNames.h> namespace WebCore { -const LChar kEndOfFileMarker = 0; - // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream template <typename Tokenizer> class InputStreamPreprocessor { - WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor); public: - InputStreamPreprocessor(Tokenizer* tokenizer) + explicit InputStreamPreprocessor(Tokenizer& tokenizer) : m_tokenizer(tokenizer) { - reset(); } ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; } @@ -51,88 +46,79 @@ public: // Returns whether we succeeded in peeking at the next character. // The only way we can fail to peek is if there are no more // characters in |source| (after collapsing \r\n, etc). - ALWAYS_INLINE bool peek(SegmentedString& source) + ALWAYS_INLINE bool peek(SegmentedString& source, bool skipNullCharacters = false) { - m_nextInputCharacter = source.currentChar(); + if (UNLIKELY(source.isEmpty())) + return false; + + m_nextInputCharacter = source.currentCharacter(); // Every branch in this function is expensive, so we have a // fast-reject branch for characters that don't require special // handling. Please run the parser benchmark whenever you touch // this function. It's very hot. - static const UChar specialCharacterMask = '\n' | '\r' | '\0'; - if (m_nextInputCharacter & ~specialCharacterMask) { + constexpr UChar specialCharacterMask = '\n' | '\r' | '\0'; + if (LIKELY(m_nextInputCharacter & ~specialCharacterMask)) { m_skipNextNewLine = false; return true; } - return processNextInputCharacter(source); + + return processNextInputCharacter(source, skipNullCharacters); } // Returns whether there are more characters in |source| after advancing. - ALWAYS_INLINE bool advance(SegmentedString& source) + ALWAYS_INLINE bool advance(SegmentedString& source, bool skipNullCharacters = false) { - source.advanceAndUpdateLineNumber(); - if (source.isEmpty()) - return false; - return peek(source); + source.advance(); + return peek(source, skipNullCharacters); } - - bool skipNextNewLine() const { return m_skipNextNewLine; } - - void reset(bool skipNextNewLine = false) + ALWAYS_INLINE bool advancePastNonNewline(SegmentedString& source, bool skipNullCharacters = false) { - m_nextInputCharacter = '\0'; - m_skipNextNewLine = skipNextNewLine; + source.advancePastNonNewline(); + return peek(source, skipNullCharacters); } private: - bool processNextInputCharacter(SegmentedString& source) + bool processNextInputCharacter(SegmentedString& source, bool skipNullCharacters) { ProcessAgain: - ASSERT(m_nextInputCharacter == source.currentChar()); - + ASSERT(m_nextInputCharacter == source.currentCharacter()); if (m_nextInputCharacter == '\n' && m_skipNextNewLine) { m_skipNextNewLine = false; - source.advancePastNewlineAndUpdateLineNumber(); + source.advancePastNewline(); if (source.isEmpty()) return false; - m_nextInputCharacter = source.currentChar(); + m_nextInputCharacter = source.currentCharacter(); } if (m_nextInputCharacter == '\r') { m_nextInputCharacter = '\n'; m_skipNextNewLine = true; - } else { - m_skipNextNewLine = false; - // FIXME: The spec indicates that the surrogate pair range as well as - // a number of specific character values are parse errors and should be replaced - // by the replacement character. We suspect this is a problem with the spec as doing - // that filtering breaks surrogate pair handling and causes us not to match Minefield. - if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) { - if (m_tokenizer->shouldSkipNullCharacters()) { - source.advancePastNonNewline(); - if (source.isEmpty()) - return false; - m_nextInputCharacter = source.currentChar(); - goto ProcessAgain; - } - m_nextInputCharacter = 0xFFFD; - } + return true; + } + m_skipNextNewLine = false; + if (m_nextInputCharacter || isAtEndOfFile(source)) + return true; + if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) { + source.advancePastNonNewline(); + if (source.isEmpty()) + return false; + m_nextInputCharacter = source.currentCharacter(); + goto ProcessAgain; } + m_nextInputCharacter = replacementCharacter; return true; } - bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const + static bool isAtEndOfFile(SegmentedString& source) { return source.isClosed() && source.length() == 1; } - Tokenizer* m_tokenizer; + Tokenizer& m_tokenizer; // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character - UChar m_nextInputCharacter; - bool m_skipNextNewLine; + UChar m_nextInputCharacter { 0 }; + bool m_skipNextNewLine { false }; }; -} - -#endif // InputStreamPreprocessor_h - +} // namespace WebCore |