Imported WebKit commit bb52bf3c0119e8a128cd93afe5572413a8617de9 (http://svn.webkit.org/repository/webkit/trunk@108790)

author: Simon Hausmann <simon.hausmann@nokia.com> 2012-02-24 16:36:50 +0100
committer: Simon Hausmann <simon.hausmann@nokia.com> 2012-02-24 16:36:50 +0100
commit: ad0d549d4cc13433f77c1ac8f0ab379c83d93f28 (patch)
tree: b34b0daceb7c8e7fdde4b4ec43650ab7caadb0a9 /Source/WebCore/html/parser
parent: 03e12282df9aa1e1fb05a8b90f1cfc2e08764cec (diff)
download: qtwebkit-ad0d549d4cc13433f77c1ac8f0ab379c83d93f28.tar.gz
5 files changed, 76 insertions, 109 deletions
diff --git a/Source/WebCore/html/parser/HTMLElementStack.cpp b/Source/WebCore/html/parser/HTMLElementStack.cpp
index 98885743e..9cc3a0316 100644
--- a/Source/WebCore/html/parser/HTMLElementStack.cpp
+++ b/Source/WebCore/html/parser/HTMLElementStack.cpp
@@ -53,7 +53,6 @@ inline bool isNumberedHeaderElement(ContainerNode* node)
 inline bool isRootNode(ContainerNode* node)
 {
     return node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE
-        || node->nodeType() == Node::SHADOW_ROOT_NODE
         || node->hasTagName(htmlTag);
 }
 
@@ -310,7 +309,7 @@ void HTMLElementStack::popUntilForeignContentScopeMarker()
     
 void HTMLElementStack::pushRootNode(PassRefPtr<ContainerNode> rootNode)
 {
-    ASSERT(rootNode->nodeType() == Node::DOCUMENT_FRAGMENT_NODE || rootNode->nodeType() == Node::SHADOW_ROOT_NODE);
+    ASSERT(rootNode->nodeType() == Node::DOCUMENT_FRAGMENT_NODE);
     pushRootNodeCommon(rootNode);
 }
 
diff --git a/Source/WebCore/html/parser/HTMLElementStack.h b/Source/WebCore/html/parser/HTMLElementStack.h
index f604f82c7..5697e80c8 100644
--- a/Source/WebCore/html/parser/HTMLElementStack.h
+++ b/Source/WebCore/html/parser/HTMLElementStack.h
@@ -180,8 +180,7 @@ inline bool isInHTMLNamespace(Node* node)
     // A DocumentFragment takes the place of the document element when parsing
     // fragments and should be considered in the HTML namespace.
     return node->namespaceURI() == HTMLNames::xhtmlNamespaceURI
-        || node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE
-        || node->nodeType() == Node::SHADOW_ROOT_NODE; // FIXME: Does this also apply to ShadowRoot?
+        || node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE; // FIXME: Does this also apply to ShadowRoot?
 }
 
 
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
index 9d839120d..3b5414913 100644
--- a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -586,10 +586,8 @@ void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
     notImplemented(); // Acknowledge self-closing flag
     processFakeStartTag(formTag);
     RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
-    if (actionAttribute) {
-        ASSERT(m_tree.currentElement()->hasTagName(formTag));
-        m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
-    }
+    if (actionAttribute)
+        m_tree.form()->setAttribute(actionAttr, actionAttribute->value());
     processFakeStartTag(hrTag);
     processFakeStartTag(labelTag);
     RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
diff --git a/Source/WebCore/html/parser/XSSAuditor.cpp b/Source/WebCore/html/parser/XSSAuditor.cpp
index 138a67118..8fde1a5d7 100644
--- a/Source/WebCore/html/parser/XSSAuditor.cpp
+++ b/Source/WebCore/html/parser/XSSAuditor.cpp
@@ -36,6 +36,7 @@
 #include "FrameLoaderClient.h"
 #include "HTMLDocumentParser.h"
 #include "HTMLNames.h"
+#include "HTMLTokenizer.h"
 #include "HTMLParamElement.h"
 #include "HTMLParserIdioms.h"
 #include "SecurityOrigin.h"
@@ -54,7 +55,7 @@ static bool isNonCanonicalCharacter(UChar c)
     // We remove all non-ASCII characters, including non-printable ASCII characters.
     //
     // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
-    // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the 
+    // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
     // adverse effect that we remove any legitimate zeros from a string.
     //
     // For instance: new String("http://localhost:8000") => new String("http://localhost:8").
@@ -71,7 +72,7 @@ static bool isRequiredForInjection(UChar c)
     return (c == '\'' || c == '"' || c == '<' || c == '>');
 }
 
-static bool isTerminatingCharacter(UChar c) 
+static bool isTerminatingCharacter(UChar c)
 {
     return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<');
 }
@@ -81,26 +82,21 @@ static bool isHTMLQuote(UChar c)
     return (c == '"' || c == '\'');
 }
 
-static bool isHTMLNewline(UChar c)
+static bool isJSNewline(UChar c)
 {
-    return (c == '\n' || c == '\r');
+    // Per ecma-262 section 7.3 Line Terminators.
+    return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029);
 }
 
-static bool startsHTMLEndTagAt(const String& string, size_t start)
-{
-    return (start + 1 < string.length() && string[start] == '<' && string[start+1] == '/');
-}    
-
-
 static bool startsHTMLCommentAt(const String& string, size_t start)
 {
     return (start + 3 < string.length() && string[start] == '<' && string[start+1] == '!' && string[start+2] == '-' && string[start+3] == '-');
-}    
+}
 
 static bool startsSingleLineCommentAt(const String& string, size_t start)
 {
     return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '/');
-}    
+}
 
 static bool startsMultiLineCommentAt(const String& string, size_t start)
 {
@@ -137,23 +133,6 @@ static bool isDangerousHTTPEquiv(const String& value)
     return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie");
 }
 
-static bool containsJavaScriptURL(const Vector<UChar, 32>& value)
-{
-    static const char javaScriptScheme[] = "javascript:";
-    static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1;
-
-    size_t i;
-    for (i = 0; i < value.size(); ++i) {
-        if (!isHTMLSpace(value[i]))
-            break;
-    }
-
-    if (value.size() - i < lengthOfJavaScriptScheme)
-        return false;
-
-    return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme);
-}
-
 static inline String decode16BitUnicodeEscapeSequences(const String& string)
 {
     // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit.
@@ -176,7 +155,6 @@ static String fullyDecodeString(const String& string, const TextResourceDecoder*
         oldWorkingStringLength = workingString.length();
         workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding));
     } while (workingString.length() < oldWorkingStringLength);
-    ASSERT(!workingString.isEmpty());
     workingString.replace('+', ' ');
     workingString = canonicalize(workingString);
     return workingString;
@@ -187,6 +165,8 @@ XSSAuditor::XSSAuditor(HTMLDocumentParser* parser)
     , m_isEnabled(false)
     , m_xssProtection(XSSProtectionEnabled)
     , m_state(Uninitialized)
+    , m_shouldAllowCDATA(false)
+    , m_scriptTagNestingLevel(0)
     , m_notifiedClient(false)
 {
     ASSERT(m_parser);
@@ -204,7 +184,7 @@ void XSSAuditor::init()
     const int suffixTreeDepth = 5;
 
     ASSERT(m_state == Uninitialized);
-    m_state = Initial;
+    m_state = Initialized;
 
     if (!m_isEnabled)
         return;
@@ -257,28 +237,21 @@ void XSSAuditor::init()
 
 void XSSAuditor::filterToken(HTMLToken& token)
 {
-    if (m_state == Uninitialized) {
+    if (m_state == Uninitialized)
         init();
-        ASSERT(m_state == Initial);
-    }
-
+   
+    ASSERT(m_state == Initialized);
     if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled)
         return;
 
     bool didBlockScript = false;
-
-    switch (m_state) {
-    case Uninitialized:
-        ASSERT_NOT_REACHED();
-        break;
-    case Initial:
-        didBlockScript = filterTokenInitial(token);
-        break;
-    case AfterScriptStartTag:
-        didBlockScript = filterTokenAfterScriptStartTag(token);
-        ASSERT(m_state == Initial);
-        m_cachedSnippet = String();
-        break;
+    if (token.type() == HTMLTokenTypes::StartTag)
+        didBlockScript = filterStartToken(token);
+    else if (m_scriptTagNestingLevel) {
+        if (token.type() == HTMLTokenTypes::Character)
+            didBlockScript = filterCharacterToken(token);
+        else if (token.type() == HTMLTokenTypes::EndTag)
+            filterEndToken(token);
     }
 
     if (didBlockScript) {
@@ -300,18 +273,15 @@ void XSSAuditor::filterToken(HTMLToken& token)
     }
 }
 
-bool XSSAuditor::filterTokenInitial(HTMLToken& token)
+bool XSSAuditor::filterStartToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
-
-    if (token.type() != HTMLTokenTypes::StartTag)
-        return false;
-
     bool didBlockScript = eraseDangerousAttributesIfInjected(token);
 
-    if (hasName(token, scriptTag))
+    if (hasName(token, scriptTag)) {
         didBlockScript |= filterScriptToken(token);
-    else if (hasName(token, objectTag))
+        ASSERT(m_shouldAllowCDATA || !m_scriptTagNestingLevel);
+        m_scriptTagNestingLevel++;
+    } else if (hasName(token, objectTag))
         didBlockScript |= filterObjectToken(token);
     else if (hasName(token, paramTag))
         didBlockScript |= filterParamToken(token);
@@ -331,16 +301,18 @@ bool XSSAuditor::filterTokenInitial(HTMLToken& token)
     return didBlockScript;
 }
 
-bool XSSAuditor::filterTokenAfterScriptStartTag(HTMLToken& token)
+void XSSAuditor::filterEndToken(HTMLToken& token)
 {
-    ASSERT(m_state == AfterScriptStartTag);
-    m_state = Initial;
-
-    if (token.type() != HTMLTokenTypes::Character) {
-        ASSERT(token.type() == HTMLTokenTypes::EndTag || token.type() == HTMLTokenTypes::EndOfFile);
-        return false;
+    ASSERT(m_scriptTagNestingLevel);
+    if (hasName(token, scriptTag)) {
+        m_scriptTagNestingLevel--;
+        ASSERT(m_shouldAllowCDATA || !m_scriptTagNestingLevel);
     }
+}
 
+bool XSSAuditor::filterCharacterToken(HTMLToken& token)
+{
+    ASSERT(m_scriptTagNestingLevel);
     TextResourceDecoder* decoder = m_parser->document()->decoder();
     if (isContainedInRequest(fullyDecodeString(m_cachedSnippet, decoder))) {
         int start = 0;
@@ -357,21 +329,19 @@ bool XSSAuditor::filterTokenAfterScriptStartTag(HTMLToken& token)
 
 bool XSSAuditor::filterScriptToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, scriptTag));
 
     if (eraseAttributeIfInjected(token, srcAttr, blankURL().string(), SrcLikeAttribute))
         return true;
 
-    m_state = AfterScriptStartTag;
     m_cachedSnippet = m_parser->sourceForToken(token);
+    m_shouldAllowCDATA = m_parser->tokenizer()->shouldAllowCDATA();
     return false;
 }
 
 bool XSSAuditor::filterObjectToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, objectTag));
 
@@ -386,7 +356,6 @@ bool XSSAuditor::filterObjectToken(HTMLToken& token)
 
 bool XSSAuditor::filterParamToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, paramTag));
 
@@ -405,7 +374,6 @@ bool XSSAuditor::filterParamToken(HTMLToken& token)
 
 bool XSSAuditor::filterEmbedToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, embedTag));
 
@@ -420,7 +388,6 @@ bool XSSAuditor::filterEmbedToken(HTMLToken& token)
 
 bool XSSAuditor::filterAppletToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, appletTag));
 
@@ -434,7 +401,6 @@ bool XSSAuditor::filterAppletToken(HTMLToken& token)
 
 bool XSSAuditor::filterIframeToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, iframeTag));
 
@@ -443,7 +409,6 @@ bool XSSAuditor::filterIframeToken(HTMLToken& token)
 
 bool XSSAuditor::filterMetaToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, metaTag));
 
@@ -452,7 +417,6 @@ bool XSSAuditor::filterMetaToken(HTMLToken& token)
 
 bool XSSAuditor::filterBaseToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, baseTag));
 
@@ -461,7 +425,6 @@ bool XSSAuditor::filterBaseToken(HTMLToken& token)
 
 bool XSSAuditor::filterFormToken(HTMLToken& token)
 {
-    ASSERT(m_state == Initial);
     ASSERT(token.type() == HTMLTokenTypes::StartTag);
     ASSERT(hasName(token, formTag));
 
@@ -476,7 +439,7 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(HTMLToken& token)
     for (size_t i = 0; i < token.attributes().size(); ++i) {
         const HTMLToken::Attribute& attribute = token.attributes().at(i);
         bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name);
-        bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value);
+        bool valueContainsJavaScriptURL = !isInlineEventHandler && protocolIsJavaScript(stripLeadingAndTrailingHTMLSpaces(String(attribute.m_value.data(), attribute.m_value.size())));
         if (!isInlineEventHandler && !valueContainsJavaScriptURL)
             continue;
         // Beware of trailing characters which came from the page itself, not the 
@@ -540,8 +503,6 @@ String XSSAuditor::snippetForRange(const HTMLToken& token, int start, int end)
 
 String XSSAuditor::decodedSnippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute, AttributeKind treatment)
 {
-    const size_t kMaximumSnippetLength = 100;
-
     // The range doesn't inlcude the character which terminates the value. So,
     // for an input of |name="value"|, the snippet is |name="value|. For an
     // unquoted input of |name=value |, the snippet is |name=value|.
@@ -549,7 +510,7 @@ String XSSAuditor::decodedSnippetForAttribute(const HTMLToken& token, const HTML
     int start = attribute.m_nameRange.m_start - token.startIndex();
     int end = attribute.m_valueRange.m_end - token.startIndex();
     String decodedSnippet = fullyDecodeString(snippetForRange(token, start, end), m_parser->document()->decoder());
-    decodedSnippet.truncate(kMaximumSnippetLength);
+    decodedSnippet.truncate(kMaximumFragmentLengthTarget);
     if (treatment == SrcLikeAttribute) {
         int slashCount;
         size_t currentLength;
@@ -592,8 +553,6 @@ bool XSSAuditor::isSameOriginResource(const String& url)
 
 String XSSAuditor::snippetForJavaScript(const String& string)
 {
-    const size_t kMaximumFragmentLengthTarget = 100;
-
     size_t startPosition = 0;
     size_t endPosition = string.length();
     size_t foundPosition = notFound;
@@ -602,8 +561,17 @@ String XSSAuditor::snippetForJavaScript(const String& string)
     while (startPosition < endPosition) {
         while (startPosition < endPosition && isHTMLSpace(string[startPosition]))
             startPosition++;
+
+        // Under SVG/XML rules, only HTML comment syntax matters and the parser returns
+        // these as a separate comment tokens. Having consumed whitespace, we need not look
+        // further for these.
+        if (m_shouldAllowCDATA)
+            break;
+
+        // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML
+        // comment ends at the end of the line, not with -->.
         if (startsHTMLCommentAt(string, startPosition) || startsSingleLineCommentAt(string, startPosition)) {
-            while (startPosition < endPosition && !isHTMLNewline(string[startPosition]))
+            while (startPosition < endPosition && !isJSNewline(string[startPosition]))
                 startPosition++;
         } else if (startsMultiLineCommentAt(string, startPosition)) {
             if ((foundPosition = string.find("*/", startPosition)) != notFound)
@@ -614,29 +582,29 @@ String XSSAuditor::snippetForJavaScript(const String& string)
             break;
     }
 
-    // Stop at next comment, or at a closing script tag (which may have been included with
-    // the code fragment because of buffering in the HTMLSourceTracker), or when we exceed
-    // the maximum length target. After hitting the length target, we can only stop at a
-    // point where we know we are not in the middle of a %-escape sequence. For the sake of
-    // simplicity, approximate stopping at a close script tag by stopping at any close tag,
-    // and approximate not stopping inside a (possibly multiply encoded) %-esacpe sequence
-    // by breaking on whitespace only. We should have enough text in these cases to avoid
-    // false positives.
+    // Stop at next comment (using the same rules as above for SVG/XML vs HTML), or when
+    // we exceed the maximum length target. After hitting the length target, we can only
+    // stop at a point where we know we are not in the middle of a %-escape sequence. For
+    // the sake of simplicity, approximate not stopping inside a (possibly multiply encoded)
+    // %-esacpe sequence by breaking on whitespace only. We should have enough text in
+    // these cases to avoid false positives.
     for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) {
-        if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition) || startsHTMLEndTagAt(string, foundPosition)) {
-            endPosition = foundPosition + 2;
-            break;
-        }
-        if (startsHTMLCommentAt(string, foundPosition)) {
-            endPosition = foundPosition + 4;
-            break;
+        if (!m_shouldAllowCDATA) {
+            if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition)) {
+                endPosition = foundPosition + 2;
+                break;
+            }
+            if (startsHTMLCommentAt(string, foundPosition)) {
+                endPosition = foundPosition + 4;
+                break;
+            }
         }
         if (foundPosition > startPosition + kMaximumFragmentLengthTarget && isHTMLSpace(string[foundPosition])) {
             endPosition = foundPosition;
             break;
         }
     }
-    
+
     return string.substring(startPosition, endPosition - startPosition);
 }
 
diff --git a/Source/WebCore/html/parser/XSSAuditor.h b/Source/WebCore/html/parser/XSSAuditor.h
index 89b7b8c65..713cd0b6f 100644
--- a/Source/WebCore/html/parser/XSSAuditor.h
+++ b/Source/WebCore/html/parser/XSSAuditor.h
@@ -42,10 +42,11 @@ public:
     void filterToken(HTMLToken&);
 
 private:
+    static const size_t kMaximumFragmentLengthTarget = 100;
+
     enum State {
         Uninitialized,
-        Initial,
-        AfterScriptStartTag,
+        Initialized
     };
 
     enum AttributeKind {
@@ -55,9 +56,9 @@ private:
 
     void init();
 
-    bool filterTokenInitial(HTMLToken&);
-    bool filterTokenAfterScriptStartTag(HTMLToken&);
-
+    bool filterStartToken(HTMLToken&);
+    void filterEndToken(HTMLToken&);
+    bool filterCharacterToken(HTMLToken&);
     bool filterScriptToken(HTMLToken&);
     bool filterObjectToken(HTMLToken&);
     bool filterParamToken(HTMLToken&);
@@ -88,6 +89,8 @@ private:
 
     State m_state;
     String m_cachedSnippet;
+    bool m_shouldAllowCDATA;
+    unsigned m_scriptTagNestingLevel;
     bool m_notifiedClient;
 };
author	Simon Hausmann <simon.hausmann@nokia.com>	2012-02-24 16:36:50 +0100
committer	Simon Hausmann <simon.hausmann@nokia.com>	2012-02-24 16:36:50 +0100
commit	ad0d549d4cc13433f77c1ac8f0ab379c83d93f28 (patch)
tree	b34b0daceb7c8e7fdde4b4ec43650ab7caadb0a9 /Source/WebCore/html/parser
parent	03e12282df9aa1e1fb05a8b90f1cfc2e08764cec (diff)
download	qtwebkit-ad0d549d4cc13433f77c1ac8f0ab379c83d93f28.tar.gz