summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/parser/InputStreamPreprocessor.h
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/InputStreamPreprocessor.h
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/WebCore/html/parser/InputStreamPreprocessor.h')
-rw-r--r--Source/WebCore/html/parser/InputStreamPreprocessor.h92
1 files changed, 39 insertions, 53 deletions
diff --git a/Source/WebCore/html/parser/InputStreamPreprocessor.h b/Source/WebCore/html/parser/InputStreamPreprocessor.h
index ffd639abe..361f65301 100644
--- a/Source/WebCore/html/parser/InputStreamPreprocessor.h
+++ b/Source/WebCore/html/parser/InputStreamPreprocessor.h
@@ -25,25 +25,20 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef InputStreamPreprocessor_h
-#define InputStreamPreprocessor_h
+#pragma once
#include "SegmentedString.h"
-#include <wtf/Noncopyable.h>
+#include <wtf/unicode/CharacterNames.h>
namespace WebCore {
-const LChar kEndOfFileMarker = 0;
-
// http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
template <typename Tokenizer>
class InputStreamPreprocessor {
- WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
public:
- InputStreamPreprocessor(Tokenizer* tokenizer)
+ explicit InputStreamPreprocessor(Tokenizer& tokenizer)
: m_tokenizer(tokenizer)
{
- reset();
}
ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; }
@@ -51,88 +46,79 @@ public:
// Returns whether we succeeded in peeking at the next character.
// The only way we can fail to peek is if there are no more
// characters in |source| (after collapsing \r\n, etc).
- ALWAYS_INLINE bool peek(SegmentedString& source)
+ ALWAYS_INLINE bool peek(SegmentedString& source, bool skipNullCharacters = false)
{
- m_nextInputCharacter = source.currentChar();
+ if (UNLIKELY(source.isEmpty()))
+ return false;
+
+ m_nextInputCharacter = source.currentCharacter();
// Every branch in this function is expensive, so we have a
// fast-reject branch for characters that don't require special
// handling. Please run the parser benchmark whenever you touch
// this function. It's very hot.
- static const UChar specialCharacterMask = '\n' | '\r' | '\0';
- if (m_nextInputCharacter & ~specialCharacterMask) {
+ constexpr UChar specialCharacterMask = '\n' | '\r' | '\0';
+ if (LIKELY(m_nextInputCharacter & ~specialCharacterMask)) {
m_skipNextNewLine = false;
return true;
}
- return processNextInputCharacter(source);
+
+ return processNextInputCharacter(source, skipNullCharacters);
}
// Returns whether there are more characters in |source| after advancing.
- ALWAYS_INLINE bool advance(SegmentedString& source)
+ ALWAYS_INLINE bool advance(SegmentedString& source, bool skipNullCharacters = false)
{
- source.advanceAndUpdateLineNumber();
- if (source.isEmpty())
- return false;
- return peek(source);
+ source.advance();
+ return peek(source, skipNullCharacters);
}
-
- bool skipNextNewLine() const { return m_skipNextNewLine; }
-
- void reset(bool skipNextNewLine = false)
+ ALWAYS_INLINE bool advancePastNonNewline(SegmentedString& source, bool skipNullCharacters = false)
{
- m_nextInputCharacter = '\0';
- m_skipNextNewLine = skipNextNewLine;
+ source.advancePastNonNewline();
+ return peek(source, skipNullCharacters);
}
private:
- bool processNextInputCharacter(SegmentedString& source)
+ bool processNextInputCharacter(SegmentedString& source, bool skipNullCharacters)
{
ProcessAgain:
- ASSERT(m_nextInputCharacter == source.currentChar());
-
+ ASSERT(m_nextInputCharacter == source.currentCharacter());
if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
m_skipNextNewLine = false;
- source.advancePastNewlineAndUpdateLineNumber();
+ source.advancePastNewline();
if (source.isEmpty())
return false;
- m_nextInputCharacter = source.currentChar();
+ m_nextInputCharacter = source.currentCharacter();
}
if (m_nextInputCharacter == '\r') {
m_nextInputCharacter = '\n';
m_skipNextNewLine = true;
- } else {
- m_skipNextNewLine = false;
- // FIXME: The spec indicates that the surrogate pair range as well as
- // a number of specific character values are parse errors and should be replaced
- // by the replacement character. We suspect this is a problem with the spec as doing
- // that filtering breaks surrogate pair handling and causes us not to match Minefield.
- if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
- if (m_tokenizer->shouldSkipNullCharacters()) {
- source.advancePastNonNewline();
- if (source.isEmpty())
- return false;
- m_nextInputCharacter = source.currentChar();
- goto ProcessAgain;
- }
- m_nextInputCharacter = 0xFFFD;
- }
+ return true;
+ }
+ m_skipNextNewLine = false;
+ if (m_nextInputCharacter || isAtEndOfFile(source))
+ return true;
+ if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) {
+ source.advancePastNonNewline();
+ if (source.isEmpty())
+ return false;
+ m_nextInputCharacter = source.currentCharacter();
+ goto ProcessAgain;
}
+ m_nextInputCharacter = replacementCharacter;
return true;
}
- bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const
+ static bool isAtEndOfFile(SegmentedString& source)
{
return source.isClosed() && source.length() == 1;
}
- Tokenizer* m_tokenizer;
+ Tokenizer& m_tokenizer;
// http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
- UChar m_nextInputCharacter;
- bool m_skipNextNewLine;
+ UChar m_nextInputCharacter { 0 };
+ bool m_skipNextNewLine { false };
};
-}
-
-#endif // InputStreamPreprocessor_h
-
+} // namespace WebCore