webkitgtk-2.16.5HEAD webkitgtk-2.16.5 master

author: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-06-27 06:07:23 +0000
committer: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-06-27 06:07:23 +0000
commit: 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree: 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
parent: 32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
download: WebKitGtk-tarball-master.tar.gz
1 files changed, 38 insertions, 57 deletions
diff --git a/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp b/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
index 11e14a46e..8a8468915 100644
--- a/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
+++ b/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ * Copyright (C) 2015-2016 Apple Inc. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -28,41 +29,26 @@
 
 #include "HTMLNames.h"
 #include "HTMLParserIdioms.h"
-#include "HTMLTokenizer.h"
-#include "TextCodec.h"
 #include "TextEncodingRegistry.h"
 
-using namespace WTF;
-
 namespace WebCore {
 
 using namespace HTMLNames;
 
 HTMLMetaCharsetParser::HTMLMetaCharsetParser()
-    : m_tokenizer(std::make_unique<HTMLTokenizer>(HTMLParserOptions()))
-    , m_assumedCodec(newTextCodec(Latin1Encoding()))
-    , m_inHeadSection(true)
-    , m_doneChecking(false)
-{
-}
-
-HTMLMetaCharsetParser::~HTMLMetaCharsetParser()
+    : m_codec(newTextCodec(Latin1Encoding()))
 {
 }
 
-static const char charsetString[] = "charset";
-static const size_t charsetLength = sizeof("charset") - 1;
-
-String HTMLMetaCharsetParser::extractCharset(const String& value)
+static StringView extractCharset(const String& value)
 {
-    size_t pos = 0;
     unsigned length = value.length();
-
-    while (pos < length) {
-        pos = value.find(charsetString, pos, false);
+    for (size_t pos = 0; pos < length; ) {
+        pos = value.find("charset", pos, false);
         if (pos == notFound)
             break;
 
+        static const size_t charsetLength = sizeof("charset") - 1;
         pos += charsetLength;
 
         // Skip whitespace.
@@ -77,12 +63,10 @@ String HTMLMetaCharsetParser::extractCharset(const String& value)
         while (pos < length && value[pos] <= ' ')
             ++pos;
 
-        char quoteMark = 0;
-        if (pos < length && (value[pos] == '"' || value[pos] == '\'')) {
-            quoteMark = static_cast<char>(value[pos++]);
-            ASSERT(!(quoteMark & 0x80));
-        }
-            
+        UChar quoteMark = 0;
+        if (pos < length && (value[pos] == '"' || value[pos] == '\''))
+            quoteMark = value[pos++];
+
         if (pos == length)
             break;
 
@@ -93,19 +77,17 @@ String HTMLMetaCharsetParser::extractCharset(const String& value)
         if (quoteMark && (end == length))
             break; // Close quote not found.
 
-        return value.substring(pos, end - pos);
+        return StringView(value).substring(pos, end - pos);
     }
-
-    return "";
+    return StringView();
 }
 
-bool HTMLMetaCharsetParser::processMeta()
+bool HTMLMetaCharsetParser::processMeta(HTMLToken& token)
 {
-    const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();
     AttributeList attributes;
-    for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin(); iter != tokenAttributes.end(); ++iter) {
-        String attributeName = StringImpl::create8BitIfPossible(iter->name);
-        String attributeValue = StringImpl::create8BitIfPossible(iter->value);
+    for (auto& attribute : token.attributes()) {
+        String attributeName = StringImpl::create8BitIfPossible(attribute.name);
+        String attributeValue = StringImpl::create8BitIfPossible(attribute.value);
         attributes.append(std::make_pair(attributeName, attributeValue));
     }
 
@@ -116,15 +98,15 @@ bool HTMLMetaCharsetParser::processMeta()
 TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const AttributeList& attributes)
 {
     bool gotPragma = false;
-    Mode mode = None;
-    String charset;
+    enum { None, Charset, Pragma } mode = None;
+    StringView charset;
 
-    for (AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
-        const AtomicString& attributeName = iter->first;
-        const String& attributeValue = iter->second;
+    for (auto& attribute : attributes) {
+        const String& attributeName = attribute.first;
+        const String& attributeValue = attribute.second;
 
         if (attributeName == http_equivAttr) {
-            if (equalIgnoringCase(attributeValue, "content-type"))
+            if (equalLettersIgnoringASCIICase(attributeValue, "content-type"))
                 gotPragma = true;
         } else if (charset.isEmpty()) {
             if (attributeName == charsetAttr) {
@@ -139,13 +121,11 @@ TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const AttributeLi
     }
 
     if (mode == Charset || (mode == Pragma && gotPragma))
-        return TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
+        return TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset.toStringWithoutCopying()));
 
     return TextEncoding();
 }
 
-static const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.
-
 bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
 {
     if (m_doneChecking)
@@ -156,30 +136,32 @@ bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
     // We still don't have an encoding, and are in the head.
     // The following tags are allowed in <head>:
     // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
-
+    //
     // We stop scanning when a tag that is not permitted in <head>
     // is seen, rather when </head> is seen, because that more closely
     // matches behavior in other browsers; more details in
     // <http://bugs.webkit.org/show_bug.cgi?id=3590>.
-
+    //
     // Additionally, we ignore things that looks like tags in <title>, <script>
     // and <noscript>; see <http://bugs.webkit.org/show_bug.cgi?id=4560>,
     // <http://bugs.webkit.org/show_bug.cgi?id=12165> and
     // <http://bugs.webkit.org/show_bug.cgi?id=12389>.
-
+    //
     // Since many sites have charset declarations after <body> or other tags
     // that are disallowed in <head>, we don't bail out until we've checked at
     // least bytesToCheckUnconditionally bytes of input.
 
-    m_input.append(SegmentedString(m_assumedCodec->decode(data, length)));
+    constexpr int bytesToCheckUnconditionally = 1024;
+
+    m_input.append(m_codec->decode(data, length));
 
-    while (m_tokenizer->nextToken(m_input, m_token)) {
-        bool end = m_token.type() == HTMLToken::EndTag;
-        if (end || m_token.type() == HTMLToken::StartTag) {
-            AtomicString tagName(m_token.name());
-            if (!end) {
-                m_tokenizer->updateStateFor(tagName);
-                if (tagName == metaTag && processMeta()) {
+    while (auto token = m_tokenizer.nextToken(m_input)) {
+        bool isEnd = token->type() == HTMLToken::EndTag;
+        if (isEnd || token->type() == HTMLToken::StartTag) {
+            AtomicString tagName(token->name());
+            if (!isEnd) {
+                m_tokenizer.updateStateFor(tagName);
+                if (tagName == metaTag && processMeta(*token)) {
                     m_doneChecking = true;
                     return true;
                 }
@@ -189,7 +171,8 @@ bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
                 && tagName != styleTag && tagName != linkTag
                 && tagName != metaTag && tagName != objectTag
                 && tagName != titleTag && tagName != baseTag
-                && (end || tagName != htmlTag) && (end || tagName != headTag)) {
+                && (isEnd || tagName != htmlTag)
+                && (isEnd || tagName != headTag)) {
                 m_inHeadSection = false;
             }
         }
@@ -198,8 +181,6 @@ bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
             m_doneChecking = true;
             return true;
         }
-
-        m_token.clear();
     }
 
     return false;
author	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-06-27 06:07:23 +0000
committer	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-06-27 06:07:23 +0000
commit	1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree	46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
parent	32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
download	WebKitGtk-tarball-master.tar.gz