webkitgtk-2.16.5HEAD webkitgtk-2.16.5 master

author: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-06-27 06:07:23 +0000
committer: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-06-27 06:07:23 +0000
commit: 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree: 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/InputStreamPreprocessor.h
parent: 32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
download: WebKitGtk-tarball-master.tar.gz
1 files changed, 39 insertions, 53 deletions
diff --git a/Source/WebCore/html/parser/InputStreamPreprocessor.h b/Source/WebCore/html/parser/InputStreamPreprocessor.h
index ffd639abe..361f65301 100644
--- a/Source/WebCore/html/parser/InputStreamPreprocessor.h
+++ b/Source/WebCore/html/parser/InputStreamPreprocessor.h
@@ -25,25 +25,20 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef InputStreamPreprocessor_h
-#define InputStreamPreprocessor_h
+#pragma once
 
 #include "SegmentedString.h"
-#include <wtf/Noncopyable.h>
+#include <wtf/unicode/CharacterNames.h>
 
 namespace WebCore {
 
-const LChar kEndOfFileMarker = 0;
-
 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
 template <typename Tokenizer>
 class InputStreamPreprocessor {
-    WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
 public:
-    InputStreamPreprocessor(Tokenizer* tokenizer)
+    explicit InputStreamPreprocessor(Tokenizer& tokenizer)
         : m_tokenizer(tokenizer)
     {
-        reset();
     }
 
     ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; }
@@ -51,88 +46,79 @@ public:
     // Returns whether we succeeded in peeking at the next character.
     // The only way we can fail to peek is if there are no more
     // characters in |source| (after collapsing \r\n, etc).
-    ALWAYS_INLINE bool peek(SegmentedString& source)
+    ALWAYS_INLINE bool peek(SegmentedString& source, bool skipNullCharacters = false)
     {
-        m_nextInputCharacter = source.currentChar();
+        if (UNLIKELY(source.isEmpty()))
+            return false;
+
+        m_nextInputCharacter = source.currentCharacter();
 
         // Every branch in this function is expensive, so we have a
         // fast-reject branch for characters that don't require special
         // handling. Please run the parser benchmark whenever you touch
         // this function. It's very hot.
-        static const UChar specialCharacterMask = '\n' | '\r' | '\0';
-        if (m_nextInputCharacter & ~specialCharacterMask) {
+        constexpr UChar specialCharacterMask = '\n' | '\r' | '\0';
+        if (LIKELY(m_nextInputCharacter & ~specialCharacterMask)) {
             m_skipNextNewLine = false;
             return true;
         }
-        return processNextInputCharacter(source);
+
+        return processNextInputCharacter(source, skipNullCharacters);
     }
 
     // Returns whether there are more characters in |source| after advancing.
-    ALWAYS_INLINE bool advance(SegmentedString& source)
+    ALWAYS_INLINE bool advance(SegmentedString& source, bool skipNullCharacters = false)
     {
-        source.advanceAndUpdateLineNumber();
-        if (source.isEmpty())
-            return false;
-        return peek(source);
+        source.advance();
+        return peek(source, skipNullCharacters);
     }
-
-    bool skipNextNewLine() const { return m_skipNextNewLine; }
-
-    void reset(bool skipNextNewLine = false)
+    ALWAYS_INLINE bool advancePastNonNewline(SegmentedString& source, bool skipNullCharacters = false)
     {
-        m_nextInputCharacter = '\0';
-        m_skipNextNewLine = skipNextNewLine;
+        source.advancePastNonNewline();
+        return peek(source, skipNullCharacters);
     }
 
 private:
-    bool processNextInputCharacter(SegmentedString& source)
+    bool processNextInputCharacter(SegmentedString& source, bool skipNullCharacters)
     {
     ProcessAgain:
-        ASSERT(m_nextInputCharacter == source.currentChar());
-
+        ASSERT(m_nextInputCharacter == source.currentCharacter());
         if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
             m_skipNextNewLine = false;
-            source.advancePastNewlineAndUpdateLineNumber();
+            source.advancePastNewline();
             if (source.isEmpty())
                 return false;
-            m_nextInputCharacter = source.currentChar();
+            m_nextInputCharacter = source.currentCharacter();
         }
         if (m_nextInputCharacter == '\r') {
             m_nextInputCharacter = '\n';
             m_skipNextNewLine = true;
-        } else {
-            m_skipNextNewLine = false;
-            // FIXME: The spec indicates that the surrogate pair range as well as
-            // a number of specific character values are parse errors and should be replaced
-            // by the replacement character. We suspect this is a problem with the spec as doing
-            // that filtering breaks surrogate pair handling and causes us not to match Minefield.
-            if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
-                if (m_tokenizer->shouldSkipNullCharacters()) {
-                    source.advancePastNonNewline();
-                    if (source.isEmpty())
-                        return false;
-                    m_nextInputCharacter = source.currentChar();
-                    goto ProcessAgain;
-                }
-                m_nextInputCharacter = 0xFFFD;
-            }
+            return true;
+        }
+        m_skipNextNewLine = false;
+        if (m_nextInputCharacter || isAtEndOfFile(source))
+            return true;
+        if (skipNullCharacters && !m_tokenizer.neverSkipNullCharacters()) {
+            source.advancePastNonNewline();
+            if (source.isEmpty())
+                return false;
+            m_nextInputCharacter = source.currentCharacter();
+            goto ProcessAgain;
         }
+        m_nextInputCharacter = replacementCharacter;
         return true;
     }
 
-    bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const
+    static bool isAtEndOfFile(SegmentedString& source)
     {
         return source.isClosed() && source.length() == 1;
     }
 
-    Tokenizer* m_tokenizer;
+    Tokenizer& m_tokenizer;
 
     // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
-    UChar m_nextInputCharacter;
-    bool m_skipNextNewLine;
+    UChar m_nextInputCharacter { 0 };
+    bool m_skipNextNewLine { false };
 };
 
-}
-
-#endif // InputStreamPreprocessor_h
-
+} // namespace WebCore
author	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-06-27 06:07:23 +0000
committer	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-06-27 06:07:23 +0000
commit	1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree	46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/InputStreamPreprocessor.h
parent	32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
download	WebKitGtk-tarball-master.tar.gz