summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/parser
diff options
context:
space:
mode:
authorSimon Hausmann <simon.hausmann@nokia.com>2012-02-24 16:36:50 +0100
committerSimon Hausmann <simon.hausmann@nokia.com>2012-02-24 16:36:50 +0100
commitad0d549d4cc13433f77c1ac8f0ab379c83d93f28 (patch)
treeb34b0daceb7c8e7fdde4b4ec43650ab7caadb0a9 /Source/WebCore/html/parser
parent03e12282df9aa1e1fb05a8b90f1cfc2e08764cec (diff)
downloadqtwebkit-ad0d549d4cc13433f77c1ac8f0ab379c83d93f28.tar.gz
Imported WebKit commit bb52bf3c0119e8a128cd93afe5572413a8617de9 (http://svn.webkit.org/repository/webkit/trunk@108790)
Diffstat (limited to 'Source/WebCore/html/parser')
-rw-r--r--Source/WebCore/html/parser/HTMLElementStack.cpp3
-rw-r--r--Source/WebCore/html/parser/HTMLElementStack.h3
-rw-r--r--Source/WebCore/html/parser/HTMLTreeBuilder.cpp6
-rw-r--r--Source/WebCore/html/parser/XSSAuditor.cpp160
-rw-r--r--Source/WebCore/html/parser/XSSAuditor.h13
5 files changed, 76 insertions, 109 deletions
diff --git a/Source/WebCore/html/parser/HTMLElementStack.cpp b/Source/WebCore/html/parser/HTMLElementStack.cpp
index 98885743e..9cc3a0316 100644
--- a/Source/WebCore/html/parser/HTMLElementStack.cpp
+++ b/Source/WebCore/html/parser/HTMLElementStack.cpp
@@ -53,7 +53,6 @@ inline bool isNumberedHeaderElement(ContainerNode* node)
inline bool isRootNode(ContainerNode* node)
{
return node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE
- || node->nodeType() == Node::SHADOW_ROOT_NODE
|| node->hasTagName(htmlTag);
}
@@ -310,7 +309,7 @@ void HTMLElementStack::popUntilForeignContentScopeMarker()
void HTMLElementStack::pushRootNode(PassRefPtr<ContainerNode> rootNode)
{
- ASSERT(rootNode->nodeType() == Node::DOCUMENT_FRAGMENT_NODE || rootNode->nodeType() == Node::SHADOW_ROOT_NODE);
+ ASSERT(rootNode->nodeType() == Node::DOCUMENT_FRAGMENT_NODE);
pushRootNodeCommon(rootNode);
}
diff --git a/Source/WebCore/html/parser/HTMLElementStack.h b/Source/WebCore/html/parser/HTMLElementStack.h
index f604f82c7..5697e80c8 100644
--- a/Source/WebCore/html/parser/HTMLElementStack.h
+++ b/Source/WebCore/html/parser/HTMLElementStack.h
@@ -180,8 +180,7 @@ inline bool isInHTMLNamespace(Node* node)
// A DocumentFragment takes the place of the document element when parsing
// fragments and should be considered in the HTML namespace.
return node->namespaceURI() == HTMLNames::xhtmlNamespaceURI
- || node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE
- || node->nodeType() == Node::SHADOW_ROOT_NODE; // FIXME: Does this also apply to ShadowRoot?
+ || node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE; // FIXME: Does this also apply to ShadowRoot?
}
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
index 9d839120d..3b5414913 100644
--- a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -586,10 +586,8 @@ void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
notImplemented(); // Acknowledge self-closing flag
processFakeStartTag(formTag);
RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
- if (actionAttribute) {
- ASSERT(m_tree.currentElement()->hasTagName(formTag));
- m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
- }
+ if (actionAttribute)
+ m_tree.form()->setAttribute(actionAttr, actionAttribute->value());
processFakeStartTag(hrTag);
processFakeStartTag(labelTag);
RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
diff --git a/Source/WebCore/html/parser/XSSAuditor.cpp b/Source/WebCore/html/parser/XSSAuditor.cpp
index 138a67118..8fde1a5d7 100644
--- a/Source/WebCore/html/parser/XSSAuditor.cpp
+++ b/Source/WebCore/html/parser/XSSAuditor.cpp
@@ -36,6 +36,7 @@
#include "FrameLoaderClient.h"
#include "HTMLDocumentParser.h"
#include "HTMLNames.h"
+#include "HTMLTokenizer.h"
#include "HTMLParamElement.h"
#include "HTMLParserIdioms.h"
#include "SecurityOrigin.h"
@@ -54,7 +55,7 @@ static bool isNonCanonicalCharacter(UChar c)
// We remove all non-ASCII characters, including non-printable ASCII characters.
//
// Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
- // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
+ // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
// adverse effect that we remove any legitimate zeros from a string.
//
// For instance: new String("http://localhost:8000") => new String("http://localhost:8").
@@ -71,7 +72,7 @@ static bool isRequiredForInjection(UChar c)
return (c == '\'' || c == '"' || c == '<' || c == '>');
}
-static bool isTerminatingCharacter(UChar c)
+static bool isTerminatingCharacter(UChar c)
{
return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<');
}
@@ -81,26 +82,21 @@ static bool isHTMLQuote(UChar c)
return (c == '"' || c == '\'');
}
-static bool isHTMLNewline(UChar c)
+static bool isJSNewline(UChar c)
{
- return (c == '\n' || c == '\r');
+ // Per ecma-262 section 7.3 Line Terminators.
+ return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029);
}
-static bool startsHTMLEndTagAt(const String& string, size_t start)
-{
- return (start + 1 < string.length() && string[start] == '<' && string[start+1] == '/');
-}
-
-
static bool startsHTMLCommentAt(const String& string, size_t start)
{
return (start + 3 < string.length() && string[start] == '<' && string[start+1] == '!' && string[start+2] == '-' && string[start+3] == '-');
-}
+}
static bool startsSingleLineCommentAt(const String& string, size_t start)
{
return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '/');
-}
+}
static bool startsMultiLineCommentAt(const String& string, size_t start)
{
@@ -137,23 +133,6 @@ static bool isDangerousHTTPEquiv(const String& value)
return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie");
}
-static bool containsJavaScriptURL(const Vector<UChar, 32>& value)
-{
- static const char javaScriptScheme[] = "javascript:";
- static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1;
-
- size_t i;
- for (i = 0; i < value.size(); ++i) {
- if (!isHTMLSpace(value[i]))
- break;
- }
-
- if (value.size() - i < lengthOfJavaScriptScheme)
- return false;
-
- return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme);
-}
-
static inline String decode16BitUnicodeEscapeSequences(const String& string)
{
// Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit.
@@ -176,7 +155,6 @@ static String fullyDecodeString(const String& string, const TextResourceDecoder*
oldWorkingStringLength = workingString.length();
workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding));
} while (workingString.length() < oldWorkingStringLength);
- ASSERT(!workingString.isEmpty());
workingString.replace('+', ' ');
workingString = canonicalize(workingString);
return workingString;
@@ -187,6 +165,8 @@ XSSAuditor::XSSAuditor(HTMLDocumentParser* parser)
, m_isEnabled(false)
, m_xssProtection(XSSProtectionEnabled)
, m_state(Uninitialized)
+ , m_shouldAllowCDATA(false)
+ , m_scriptTagNestingLevel(0)
, m_notifiedClient(false)
{
ASSERT(m_parser);
@@ -204,7 +184,7 @@ void XSSAuditor::init()
const int suffixTreeDepth = 5;
ASSERT(m_state == Uninitialized);
- m_state = Initial;
+ m_state = Initialized;
if (!m_isEnabled)
return;
@@ -257,28 +237,21 @@ void XSSAuditor::init()
void XSSAuditor::filterToken(HTMLToken& token)
{
- if (m_state == Uninitialized) {
+ if (m_state == Uninitialized)
init();
- ASSERT(m_state == Initial);
- }
-
+
+ ASSERT(m_state == Initialized);
if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled)
return;
bool didBlockScript = false;
-
- switch (m_state) {
- case Uninitialized:
- ASSERT_NOT_REACHED();
- break;
- case Initial:
- didBlockScript = filterTokenInitial(token);
- break;
- case AfterScriptStartTag:
- didBlockScript = filterTokenAfterScriptStartTag(token);
- ASSERT(m_state == Initial);
- m_cachedSnippet = String();
- break;
+ if (token.type() == HTMLTokenTypes::StartTag)
+ didBlockScript = filterStartToken(token);
+ else if (m_scriptTagNestingLevel) {
+ if (token.type() == HTMLTokenTypes::Character)
+ didBlockScript = filterCharacterToken(token);
+ else if (token.type() == HTMLTokenTypes::EndTag)
+ filterEndToken(token);
}
if (didBlockScript) {
@@ -300,18 +273,15 @@ void XSSAuditor::filterToken(HTMLToken& token)
}
}
-bool XSSAuditor::filterTokenInitial(HTMLToken& token)
+bool XSSAuditor::filterStartToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
-
- if (token.type() != HTMLTokenTypes::StartTag)
- return false;
-
bool didBlockScript = eraseDangerousAttributesIfInjected(token);
- if (hasName(token, scriptTag))
+ if (hasName(token, scriptTag)) {
didBlockScript |= filterScriptToken(token);
- else if (hasName(token, objectTag))
+ ASSERT(m_shouldAllowCDATA || !m_scriptTagNestingLevel);
+ m_scriptTagNestingLevel++;
+ } else if (hasName(token, objectTag))
didBlockScript |= filterObjectToken(token);
else if (hasName(token, paramTag))
didBlockScript |= filterParamToken(token);
@@ -331,16 +301,18 @@ bool XSSAuditor::filterTokenInitial(HTMLToken& token)
return didBlockScript;
}
-bool XSSAuditor::filterTokenAfterScriptStartTag(HTMLToken& token)
+void XSSAuditor::filterEndToken(HTMLToken& token)
{
- ASSERT(m_state == AfterScriptStartTag);
- m_state = Initial;
-
- if (token.type() != HTMLTokenTypes::Character) {
- ASSERT(token.type() == HTMLTokenTypes::EndTag || token.type() == HTMLTokenTypes::EndOfFile);
- return false;
+ ASSERT(m_scriptTagNestingLevel);
+ if (hasName(token, scriptTag)) {
+ m_scriptTagNestingLevel--;
+ ASSERT(m_shouldAllowCDATA || !m_scriptTagNestingLevel);
}
+}
+bool XSSAuditor::filterCharacterToken(HTMLToken& token)
+{
+ ASSERT(m_scriptTagNestingLevel);
TextResourceDecoder* decoder = m_parser->document()->decoder();
if (isContainedInRequest(fullyDecodeString(m_cachedSnippet, decoder))) {
int start = 0;
@@ -357,21 +329,19 @@ bool XSSAuditor::filterTokenAfterScriptStartTag(HTMLToken& token)
bool XSSAuditor::filterScriptToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, scriptTag));
if (eraseAttributeIfInjected(token, srcAttr, blankURL().string(), SrcLikeAttribute))
return true;
- m_state = AfterScriptStartTag;
m_cachedSnippet = m_parser->sourceForToken(token);
+ m_shouldAllowCDATA = m_parser->tokenizer()->shouldAllowCDATA();
return false;
}
bool XSSAuditor::filterObjectToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, objectTag));
@@ -386,7 +356,6 @@ bool XSSAuditor::filterObjectToken(HTMLToken& token)
bool XSSAuditor::filterParamToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, paramTag));
@@ -405,7 +374,6 @@ bool XSSAuditor::filterParamToken(HTMLToken& token)
bool XSSAuditor::filterEmbedToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, embedTag));
@@ -420,7 +388,6 @@ bool XSSAuditor::filterEmbedToken(HTMLToken& token)
bool XSSAuditor::filterAppletToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, appletTag));
@@ -434,7 +401,6 @@ bool XSSAuditor::filterAppletToken(HTMLToken& token)
bool XSSAuditor::filterIframeToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, iframeTag));
@@ -443,7 +409,6 @@ bool XSSAuditor::filterIframeToken(HTMLToken& token)
bool XSSAuditor::filterMetaToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, metaTag));
@@ -452,7 +417,6 @@ bool XSSAuditor::filterMetaToken(HTMLToken& token)
bool XSSAuditor::filterBaseToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, baseTag));
@@ -461,7 +425,6 @@ bool XSSAuditor::filterBaseToken(HTMLToken& token)
bool XSSAuditor::filterFormToken(HTMLToken& token)
{
- ASSERT(m_state == Initial);
ASSERT(token.type() == HTMLTokenTypes::StartTag);
ASSERT(hasName(token, formTag));
@@ -476,7 +439,7 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(HTMLToken& token)
for (size_t i = 0; i < token.attributes().size(); ++i) {
const HTMLToken::Attribute& attribute = token.attributes().at(i);
bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name);
- bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value);
+ bool valueContainsJavaScriptURL = !isInlineEventHandler && protocolIsJavaScript(stripLeadingAndTrailingHTMLSpaces(String(attribute.m_value.data(), attribute.m_value.size())));
if (!isInlineEventHandler && !valueContainsJavaScriptURL)
continue;
// Beware of trailing characters which came from the page itself, not the
@@ -540,8 +503,6 @@ String XSSAuditor::snippetForRange(const HTMLToken& token, int start, int end)
String XSSAuditor::decodedSnippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute, AttributeKind treatment)
{
- const size_t kMaximumSnippetLength = 100;
-
// The range doesn't inlcude the character which terminates the value. So,
// for an input of |name="value"|, the snippet is |name="value|. For an
// unquoted input of |name=value |, the snippet is |name=value|.
@@ -549,7 +510,7 @@ String XSSAuditor::decodedSnippetForAttribute(const HTMLToken& token, const HTML
int start = attribute.m_nameRange.m_start - token.startIndex();
int end = attribute.m_valueRange.m_end - token.startIndex();
String decodedSnippet = fullyDecodeString(snippetForRange(token, start, end), m_parser->document()->decoder());
- decodedSnippet.truncate(kMaximumSnippetLength);
+ decodedSnippet.truncate(kMaximumFragmentLengthTarget);
if (treatment == SrcLikeAttribute) {
int slashCount;
size_t currentLength;
@@ -592,8 +553,6 @@ bool XSSAuditor::isSameOriginResource(const String& url)
String XSSAuditor::snippetForJavaScript(const String& string)
{
- const size_t kMaximumFragmentLengthTarget = 100;
-
size_t startPosition = 0;
size_t endPosition = string.length();
size_t foundPosition = notFound;
@@ -602,8 +561,17 @@ String XSSAuditor::snippetForJavaScript(const String& string)
while (startPosition < endPosition) {
while (startPosition < endPosition && isHTMLSpace(string[startPosition]))
startPosition++;
+
+ // Under SVG/XML rules, only HTML comment syntax matters and the parser returns
+ // these as a separate comment tokens. Having consumed whitespace, we need not look
+ // further for these.
+ if (m_shouldAllowCDATA)
+ break;
+
+ // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML
+ // comment ends at the end of the line, not with -->.
if (startsHTMLCommentAt(string, startPosition) || startsSingleLineCommentAt(string, startPosition)) {
- while (startPosition < endPosition && !isHTMLNewline(string[startPosition]))
+ while (startPosition < endPosition && !isJSNewline(string[startPosition]))
startPosition++;
} else if (startsMultiLineCommentAt(string, startPosition)) {
if ((foundPosition = string.find("*/", startPosition)) != notFound)
@@ -614,29 +582,29 @@ String XSSAuditor::snippetForJavaScript(const String& string)
break;
}
- // Stop at next comment, or at a closing script tag (which may have been included with
- // the code fragment because of buffering in the HTMLSourceTracker), or when we exceed
- // the maximum length target. After hitting the length target, we can only stop at a
- // point where we know we are not in the middle of a %-escape sequence. For the sake of
- // simplicity, approximate stopping at a close script tag by stopping at any close tag,
- // and approximate not stopping inside a (possibly multiply encoded) %-esacpe sequence
- // by breaking on whitespace only. We should have enough text in these cases to avoid
- // false positives.
+ // Stop at next comment (using the same rules as above for SVG/XML vs HTML), or when
+ // we exceed the maximum length target. After hitting the length target, we can only
+ // stop at a point where we know we are not in the middle of a %-escape sequence. For
+ // the sake of simplicity, approximate not stopping inside a (possibly multiply encoded)
+ // %-esacpe sequence by breaking on whitespace only. We should have enough text in
+ // these cases to avoid false positives.
for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) {
- if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition) || startsHTMLEndTagAt(string, foundPosition)) {
- endPosition = foundPosition + 2;
- break;
- }
- if (startsHTMLCommentAt(string, foundPosition)) {
- endPosition = foundPosition + 4;
- break;
+ if (!m_shouldAllowCDATA) {
+ if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition)) {
+ endPosition = foundPosition + 2;
+ break;
+ }
+ if (startsHTMLCommentAt(string, foundPosition)) {
+ endPosition = foundPosition + 4;
+ break;
+ }
}
if (foundPosition > startPosition + kMaximumFragmentLengthTarget && isHTMLSpace(string[foundPosition])) {
endPosition = foundPosition;
break;
}
}
-
+
return string.substring(startPosition, endPosition - startPosition);
}
diff --git a/Source/WebCore/html/parser/XSSAuditor.h b/Source/WebCore/html/parser/XSSAuditor.h
index 89b7b8c65..713cd0b6f 100644
--- a/Source/WebCore/html/parser/XSSAuditor.h
+++ b/Source/WebCore/html/parser/XSSAuditor.h
@@ -42,10 +42,11 @@ public:
void filterToken(HTMLToken&);
private:
+ static const size_t kMaximumFragmentLengthTarget = 100;
+
enum State {
Uninitialized,
- Initial,
- AfterScriptStartTag,
+ Initialized
};
enum AttributeKind {
@@ -55,9 +56,9 @@ private:
void init();
- bool filterTokenInitial(HTMLToken&);
- bool filterTokenAfterScriptStartTag(HTMLToken&);
-
+ bool filterStartToken(HTMLToken&);
+ void filterEndToken(HTMLToken&);
+ bool filterCharacterToken(HTMLToken&);
bool filterScriptToken(HTMLToken&);
bool filterObjectToken(HTMLToken&);
bool filterParamToken(HTMLToken&);
@@ -88,6 +89,8 @@ private:
State m_state;
String m_cachedSnippet;
+ bool m_shouldAllowCDATA;
+ unsigned m_scriptTagNestingLevel;
bool m_notifiedClient;
};