diff options
Diffstat (limited to 'Source/WebCore/html/parser/XSSAuditor.cpp')
-rw-r--r-- | Source/WebCore/html/parser/XSSAuditor.cpp | 374 |
1 files changed, 195 insertions, 179 deletions
diff --git a/Source/WebCore/html/parser/XSSAuditor.cpp b/Source/WebCore/html/parser/XSSAuditor.cpp index 1bf5a096e..f314dd438 100644 --- a/Source/WebCore/html/parser/XSSAuditor.cpp +++ b/Source/WebCore/html/parser/XSSAuditor.cpp @@ -27,7 +27,6 @@ #include "config.h" #include "XSSAuditor.h" -#include "ContentSecurityPolicy.h" #include "DecodeEscapeSequences.h" #include "Document.h" #include "DocumentLoader.h" @@ -37,14 +36,13 @@ #include "HTMLNames.h" #include "HTMLParamElement.h" #include "HTMLParserIdioms.h" +#include "SVGNames.h" #include "Settings.h" #include "TextResourceDecoder.h" #include "XLinkNames.h" +#include <wtf/ASCIICType.h> #include <wtf/MainThread.h> - -#if ENABLE(SVG) -#include "SVGNames.h" -#endif +#include <wtf/NeverDestroyed.h> namespace WebCore { @@ -57,14 +55,11 @@ static bool isNonCanonicalCharacter(UChar c) // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character. // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the // adverse effect that we remove any legitimate zeros from a string. + // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg, + // a//b becomes a/b. // - // For instance: new String("http://localhost:8000") => new String("http://localhost:8"). - return (c == '\\' || c == '0' || c == '\0' || c >= 127); -} - -static String canonicalize(const String& string) -{ - return string.removeCharacters(&isNonCanonicalCharacter); + // For instance: new String("http://localhost:8000") => new String("http:localhost:8"). + return (c == '\\' || c == '0' || c == '\0' || c == '/' || c >= 127); } static bool isRequiredForInjection(UChar c) @@ -90,17 +85,28 @@ static bool isJSNewline(UChar c) static bool startsHTMLCommentAt(const String& string, size_t start) { - return (start + 3 < string.length() && string[start] == '<' && string[start+1] == '!' && string[start+2] == '-' && string[start+3] == '-'); + return (start + 3 < string.length() && string[start] == '<' && string[start + 1] == '!' && string[start + 2] == '-' && string[start + 3] == '-'); } static bool startsSingleLineCommentAt(const String& string, size_t start) { - return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '/'); + return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '/'); } static bool startsMultiLineCommentAt(const String& string, size_t start) { - return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '*'); + return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '*'); +} + +static bool startsOpeningScriptTagAt(const String& string, size_t start) +{ + return start + 6 < string.length() && string[start] == '<' + && WTF::toASCIILowerUnchecked(string[start + 1]) == 's' + && WTF::toASCIILowerUnchecked(string[start + 2]) == 'c' + && WTF::toASCIILowerUnchecked(string[start + 3]) == 'r' + && WTF::toASCIILowerUnchecked(string[start + 4]) == 'i' + && WTF::toASCIILowerUnchecked(string[start + 5]) == 'p' + && WTF::toASCIILowerUnchecked(string[start + 6]) == 't'; } // If other files need this, we should move this to HTMLParserIdioms.h @@ -140,7 +146,7 @@ static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) static bool isDangerousHTTPEquiv(const String& value) { String equiv = value.stripWhiteSpace(); - return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie"); + return equalLettersIgnoringASCIICase(equiv, "refresh") || equalLettersIgnoringASCIICase(equiv, "set-cookie"); } static inline String decode16BitUnicodeEscapeSequences(const String& string) @@ -165,36 +171,79 @@ static String fullyDecodeString(const String& string, const TextEncoding& encodi workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding)); } while (workingString.length() < oldWorkingStringLength); workingString.replace('+', ' '); - workingString = canonicalize(workingString); return workingString; } -static ContentSecurityPolicy::ReflectedXSSDisposition combineXSSProtectionHeaderAndCSP(ContentSecurityPolicy::ReflectedXSSDisposition xssProtection, ContentSecurityPolicy::ReflectedXSSDisposition reflectedXSS) -{ - ContentSecurityPolicy::ReflectedXSSDisposition result = std::max(xssProtection, reflectedXSS); - - if (result == ContentSecurityPolicy::ReflectedXSSInvalid || result == ContentSecurityPolicy::FilterReflectedXSS || result == ContentSecurityPolicy::ReflectedXSSUnset) - return ContentSecurityPolicy::FilterReflectedXSS; +static void truncateForSrcLikeAttribute(String& decodedSnippet) +{ + // In HTTP URLs, characters following the first ?, #, or third slash may come from + // the page itself and can be merely ignored by an attacker's server when a remote + // script or script-like resource is requested. In data URLs, the payload starts at + // the first comma, and the first /*, //, or <!-- may introduce a comment. Also + // data URLs may use the same string literal tricks as with script content itself. + // In either case, content following this may come from the page and may be ignored + // when the script is executed. Also, any of these characters may now be represented + // by the (enlarged) set of HTML5 entities. + // For simplicity, we don't differentiate based on URL scheme, and stop at the first + // & (since it might be part of an entity for any of the subsequent punctuation) + // the first # or ?, the third slash, or the first slash, <, ', or " once a comma + // is seen. + int slashCount = 0; + bool commaSeen = false; + for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) { + UChar currentChar = decodedSnippet[currentLength]; + if (currentChar == '&' + || currentChar == '?' + || currentChar == '#' + || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2)) + || (currentChar == '<' && commaSeen) + || (currentChar == '\'' && commaSeen) + || (currentChar == '"' && commaSeen)) { + decodedSnippet.truncate(currentLength); + return; + } + if (currentChar == ',') + commaSeen = true; + } +} - return result; +static void truncateForScriptLikeAttribute(String& decodedSnippet) +{ + // Beware of trailing characters which came from the page itself, not the + // injected vector. Excluding the terminating character covers common cases + // where the page immediately ends the attribute, but doesn't cover more + // complex cases where there is other page data following the injection. + // Generally, these won't parse as JavaScript, so the injected vector + // typically excludes them from consideration via a single-line comment or + // by enclosing them in a string literal terminated later by the page's own + // closing punctuation. Since the snippet has not been parsed, the vector + // may also try to introduce these via entities. As a result, we'd like to + // stop before the first "//", the first <!--, the first entity, or the first + // quote not immediately following the first equals sign (taking whitespace + // into consideration). To keep things simpler, we don't try to distinguish + // between entity-introducing ampersands vs. other uses, nor do we bother to + // check for a second slash for a comment, nor do we bother to check for + // !-- following a less-than sign. We stop instead on any ampersand + // slash, or less-than sign. + size_t position = 0; + if ((position = decodedSnippet.find('=')) != notFound + && (position = decodedSnippet.find(isNotHTMLSpace, position + 1)) != notFound + && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != notFound) { + decodedSnippet.truncate(position); + } } static bool isSemicolonSeparatedAttribute(const HTMLToken::Attribute& attribute) { -#if ENABLE(SVG) return threadSafeMatch(attribute.name, SVGNames::valuesAttr); -#else - UNUSED_PARAM(attribute); - return false; -#endif } static bool semicolonSeparatedValueContainsJavaScriptURL(const String& value) { Vector<String> valueList; value.split(';', valueList); - for (size_t i = 0; i < valueList.size(); ++i) { - if (protocolIsJavaScript(valueList[i])) + for (auto& value : valueList) { + if (protocolIsJavaScript(value)) return true; } return false; @@ -202,8 +251,7 @@ static bool semicolonSeparatedValueContainsJavaScriptURL(const String& value) XSSAuditor::XSSAuditor() : m_isEnabled(false) - , m_xssProtection(ContentSecurityPolicy::FilterReflectedXSS) - , m_didSendValidCSPHeader(false) + , m_xssProtection(XSSProtectionDisposition::Enabled) , m_didSendValidXSSProtectionHeader(false) , m_state(Uninitialized) , m_scriptTagNestingLevel(0) @@ -240,7 +288,7 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) if (!m_isEnabled) return; - m_documentURL = document->url().copy(); + m_documentURL = document->url().isolatedCopy(); // In theory, the Document could have detached from the Frame after the // XSSAuditor was constructed. @@ -263,45 +311,42 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) if (document->decoder()) m_encoding = document->decoder()->encoding(); - m_decodedURL = fullyDecodeString(m_documentURL.string(), m_encoding); + m_decodedURL = canonicalize(m_documentURL.string(), TruncationStyle::None); if (m_decodedURL.find(isRequiredForInjection) == notFound) m_decodedURL = String(); String httpBodyAsString; if (DocumentLoader* documentLoader = document->frame()->loader().documentLoader()) { - DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, (ASCIILiteral("X-XSS-Protection"))); + static NeverDestroyed<String> XSSProtectionHeader(ASCIILiteral("X-XSS-Protection")); String headerValue = documentLoader->response().httpHeaderField(XSSProtectionHeader); String errorDetails; unsigned errorPosition = 0; - String reportURL; - URL xssProtectionReportURL; - - // Process the X-XSS-Protection header, then mix in the CSP header's value. - ContentSecurityPolicy::ReflectedXSSDisposition xssProtectionHeader = parseXSSProtectionHeader(headerValue, errorDetails, errorPosition, reportURL); - m_didSendValidXSSProtectionHeader = xssProtectionHeader != ContentSecurityPolicy::ReflectedXSSUnset && xssProtectionHeader != ContentSecurityPolicy::ReflectedXSSInvalid; - if ((xssProtectionHeader == ContentSecurityPolicy::FilterReflectedXSS || xssProtectionHeader == ContentSecurityPolicy::BlockReflectedXSS) && !reportURL.isEmpty()) { - xssProtectionReportURL = document->completeURL(reportURL); - if (MixedContentChecker::isMixedContent(document->securityOrigin(), xssProtectionReportURL)) { + String parsedReportURL; + URL reportURL; + m_xssProtection = parseXSSProtectionHeader(headerValue, errorDetails, errorPosition, parsedReportURL); + m_didSendValidXSSProtectionHeader = !headerValue.isNull() && m_xssProtection != XSSProtectionDisposition::Invalid; + + if ((m_xssProtection == XSSProtectionDisposition::Enabled || m_xssProtection == XSSProtectionDisposition::BlockEnabled) && !parsedReportURL.isEmpty()) { + reportURL = document->completeURL(parsedReportURL); + if (MixedContentChecker::isMixedContent(document->securityOrigin(), reportURL)) { errorDetails = "insecure reporting URL for secure page"; - xssProtectionHeader = ContentSecurityPolicy::ReflectedXSSInvalid; - xssProtectionReportURL = URL(); + m_xssProtection = XSSProtectionDisposition::Invalid; + reportURL = URL(); + m_didSendValidXSSProtectionHeader = false; } } - if (xssProtectionHeader == ContentSecurityPolicy::ReflectedXSSInvalid) - document->addConsoleMessage(SecurityMessageSource, ErrorMessageLevel, "Error parsing header X-XSS-Protection: " + headerValue + ": " + errorDetails + " at character position " + String::format("%u", errorPosition) + ". The default protections will be applied."); - - ContentSecurityPolicy::ReflectedXSSDisposition cspHeader = document->contentSecurityPolicy()->reflectedXSSDisposition(); - m_didSendValidCSPHeader = cspHeader != ContentSecurityPolicy::ReflectedXSSUnset && cspHeader != ContentSecurityPolicy::ReflectedXSSInvalid; + if (m_xssProtection == XSSProtectionDisposition::Invalid) { + document->addConsoleMessage(MessageSource::Security, MessageLevel::Error, "Error parsing header X-XSS-Protection: " + headerValue + ": " + errorDetails + " at character position " + String::format("%u", errorPosition) + ". The default protections will be applied."); + m_xssProtection = XSSProtectionDisposition::Enabled; + } - m_xssProtection = combineXSSProtectionHeaderAndCSP(xssProtectionHeader, cspHeader); - // FIXME: Combine the two report URLs in some reasonable way. if (auditorDelegate) - auditorDelegate->setReportURL(xssProtectionReportURL.copy()); + auditorDelegate->setReportURL(reportURL.isolatedCopy()); FormData* httpBody = documentLoader->originalRequest().httpBody(); if (httpBody && !httpBody->isEmpty()) { httpBodyAsString = httpBody->flattenToString(); if (!httpBodyAsString.isEmpty()) { - m_decodedHTTPBody = fullyDecodeString(httpBodyAsString, m_encoding); + m_decodedHTTPBody = canonicalize(httpBodyAsString, TruncationStyle::None); if (m_decodedHTTPBody.find(isRequiredForInjection) == notFound) m_decodedHTTPBody = String(); if (m_decodedHTTPBody.length() >= minimumLengthForSuffixTree) @@ -319,7 +364,7 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) std::unique_ptr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& request) { ASSERT(m_state == Initialized); - if (!m_isEnabled || m_xssProtection == ContentSecurityPolicy::AllowReflectedXSS) + if (!m_isEnabled || m_xssProtection == XSSProtectionDisposition::Disabled) return nullptr; bool didBlockScript = false; @@ -335,8 +380,8 @@ std::unique_ptr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& reque if (!didBlockScript) return nullptr; - bool didBlockEntirePage = (m_xssProtection == ContentSecurityPolicy::BlockReflectedXSS); - return std::make_unique<XSSInfo>(didBlockEntirePage, m_didSendValidXSSProtectionHeader, m_didSendValidCSPHeader); + bool didBlockEntirePage = m_xssProtection == XSSProtectionDisposition::BlockEnabled; + return std::make_unique<XSSInfo>(m_documentURL, didBlockEntirePage, m_didSendValidXSSProtectionHeader); } bool XSSAuditor::filterStartToken(const FilterTokenRequest& request) @@ -355,8 +400,8 @@ bool XSSAuditor::filterStartToken(const FilterTokenRequest& request) didBlockScript |= filterEmbedToken(request); else if (hasName(request.token, appletTag)) didBlockScript |= filterAppletToken(request); - else if (hasName(request.token, iframeTag)) - didBlockScript |= filterIframeToken(request); + else if (hasName(request.token, iframeTag) || hasName(request.token, frameTag)) + didBlockScript |= filterFrameToken(request); else if (hasName(request.token, metaTag)) didBlockScript |= filterMetaToken(request); else if (hasName(request.token, baseTag)) @@ -383,9 +428,10 @@ void XSSAuditor::filterEndToken(const FilterTokenRequest& request) bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request) { ASSERT(m_scriptTagNestingLevel); - if (isContainedInRequest(m_cachedDecodedSnippet) && isContainedInRequest(decodedSnippetForJavaScript(request))) { - request.token.eraseCharacters(); - request.token.appendToCharacter(' '); // Technically, character tokens can't be empty. + if (m_wasScriptTagFoundInRequest && isContainedInRequest(canonicalizedSnippetForJavaScript(request))) { + request.token.clear(); + LChar space = ' '; + request.token.appendToCharacter(space); // Technically, character tokens can't be empty. return true; } return false; @@ -396,12 +442,12 @@ bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request) ASSERT(request.token.type() == HTMLToken::StartTag); ASSERT(hasName(request.token, scriptTag)); - m_cachedDecodedSnippet = decodedSnippetForName(request); + m_wasScriptTagFoundInRequest = isContainedInRequest(canonicalizedSnippetForTagName(request)); bool didBlockScript = false; - if (isContainedInRequest(decodedSnippetForName(request))) { - didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute); - didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttribute); + if (m_wasScriptTagFoundInRequest) { + didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), TruncationStyle::SrcLikeAttribute); + didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), TruncationStyle::SrcLikeAttribute); } return didBlockScript; @@ -413,8 +459,8 @@ bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request) ASSERT(hasName(request.token, objectTag)); bool didBlockScript = false; - if (isContainedInRequest(decodedSnippetForName(request))) { - didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttribute); + if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { + didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), TruncationStyle::SrcLikeAttribute); didBlockScript |= eraseAttributeIfInjected(request, typeAttr); didBlockScript |= eraseAttributeIfInjected(request, classidAttr); } @@ -434,7 +480,7 @@ bool XSSAuditor::filterParamToken(const FilterTokenRequest& request) if (!HTMLParamElement::isURLParameter(String(nameAttribute.value))) return false; - return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttribute); + return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), TruncationStyle::SrcLikeAttribute); } bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request) @@ -443,9 +489,9 @@ bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request) ASSERT(hasName(request.token, embedTag)); bool didBlockScript = false; - if (isContainedInRequest(decodedSnippetForName(request))) { - didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute); - didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute); + if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { + didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), TruncationStyle::SrcLikeAttribute); + didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), TruncationStyle::SrcLikeAttribute); didBlockScript |= eraseAttributeIfInjected(request, typeAttr); } return didBlockScript; @@ -457,21 +503,21 @@ bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request) ASSERT(hasName(request.token, appletTag)); bool didBlockScript = false; - if (isContainedInRequest(decodedSnippetForName(request))) { - didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute); + if (isContainedInRequest(canonicalizedSnippetForTagName(request))) { + didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), TruncationStyle::SrcLikeAttribute); didBlockScript |= eraseAttributeIfInjected(request, objectAttr); } return didBlockScript; } -bool XSSAuditor::filterIframeToken(const FilterTokenRequest& request) +bool XSSAuditor::filterFrameToken(const FilterTokenRequest& request) { ASSERT(request.token.type() == HTMLToken::StartTag); - ASSERT(hasName(request.token, iframeTag)); + ASSERT(hasName(request.token, iframeTag) || hasName(request.token, frameTag)); - bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttribute); - if (isContainedInRequest(decodedSnippetForName(request))) - didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttribute); + bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), TruncationStyle::ScriptLikeAttribute); + if (isContainedInRequest(canonicalizedSnippetForTagName(request))) + didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), TruncationStyle::SrcLikeAttribute); return didBlockScript; } @@ -505,7 +551,7 @@ bool XSSAuditor::filterInputToken(const FilterTokenRequest& request) ASSERT(request.token.type() == HTMLToken::StartTag); ASSERT(hasName(request.token, inputTag)); - return eraseAttributeIfInjected(request, formactionAttr, blankURL().string(), SrcLikeAttribute); + return eraseAttributeIfInjected(request, formactionAttr, blankURL().string(), TruncationStyle::SrcLikeAttribute); } bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request) @@ -513,12 +559,12 @@ bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request) ASSERT(request.token.type() == HTMLToken::StartTag); ASSERT(hasName(request.token, buttonTag)); - return eraseAttributeIfInjected(request, formactionAttr, blankURL().string(), SrcLikeAttribute); + return eraseAttributeIfInjected(request, formactionAttr, blankURL().string(), TruncationStyle::SrcLikeAttribute); } bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request) { - DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, (ASCIILiteral("javascript:void(0)"))); + static NeverDestroyed<String> safeJavaScriptURL(ASCIILiteral("javascript:void(0)")); bool didBlockScript = false; for (size_t i = 0; i < request.token.attributes().size(); ++i) { @@ -529,106 +575,75 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& re bool valueContainsJavaScriptURL = (!isInlineEventHandler && protocolIsJavaScript(strippedValue)) || (isSemicolonSeparatedAttribute(attribute) && semicolonSeparatedValueContainsJavaScriptURL(strippedValue)); if (!isInlineEventHandler && !valueContainsJavaScriptURL) continue; - if (!isContainedInRequest(decodedSnippetForAttribute(request, attribute, ScriptLikeAttribute))) + if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), TruncationStyle::ScriptLikeAttribute))) continue; request.token.eraseValueOfAttribute(i); if (valueContainsJavaScriptURL) - request.token.appendToAttributeValue(i, safeJavaScriptURL); + request.token.appendToAttributeValue(i, safeJavaScriptURL.get()); didBlockScript = true; } return didBlockScript; } -bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, AttributeKind treatment) +bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, TruncationStyle truncationStyle) { size_t indexOfAttribute = 0; - if (findAttributeWithName(request.token, attributeName, indexOfAttribute)) { - const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute); - if (isContainedInRequest(decodedSnippetForAttribute(request, attribute, treatment))) { - if (threadSafeMatch(attributeName, srcAttr) && isLikelySafeResource(String(attribute.value))) - return false; - if (threadSafeMatch(attributeName, http_equivAttr) && !isDangerousHTTPEquiv(String(attribute.value))) - return false; - request.token.eraseValueOfAttribute(indexOfAttribute); - if (!replacementValue.isEmpty()) - request.token.appendToAttributeValue(indexOfAttribute, replacementValue); - return true; - } + if (!findAttributeWithName(request.token, attributeName, indexOfAttribute)) + return false; + + const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute); + if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), truncationStyle))) + return false; + + if (threadSafeMatch(attributeName, srcAttr)) { + if (isLikelySafeResource(String(attribute.value))) + return false; + } else if (threadSafeMatch(attributeName, http_equivAttr)) { + if (!isDangerousHTTPEquiv(String(attribute.value))) + return false; } - return false; + + request.token.eraseValueOfAttribute(indexOfAttribute); + if (!replacementValue.isEmpty()) + request.token.appendToAttributeValue(indexOfAttribute, replacementValue); + return true; } -String XSSAuditor::decodedSnippetForName(const FilterTokenRequest& request) +String XSSAuditor::canonicalizedSnippetForTagName(const FilterTokenRequest& request) { // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<"). - return fullyDecodeString(request.sourceTracker.sourceForToken(request.token), m_encoding).substring(0, request.token.name().size() + 1); + return canonicalize(request.sourceTracker.source(request.token).substring(0, request.token.name().size() + 1), TruncationStyle::None); } -String XSSAuditor::decodedSnippetForAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute, AttributeKind treatment) +String XSSAuditor::snippetFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute) { - // The range doesn't inlcude the character which terminates the value. So, + // The range doesn't include the character which terminates the value. So, // for an input of |name="value"|, the snippet is |name="value|. For an // unquoted input of |name=value |, the snippet is |name=value|. // FIXME: We should grab one character before the name also. - int start = attribute.nameRange.start - request.token.startIndex(); - int end = attribute.valueRange.end - request.token.startIndex(); - String decodedSnippet = fullyDecodeString(request.sourceTracker.sourceForToken(request.token).substring(start, end - start), m_encoding); - decodedSnippet.truncate(kMaximumFragmentLengthTarget); - if (treatment == SrcLikeAttribute) { - int slashCount = 0; - bool commaSeen = false; - // In HTTP URLs, characters following the first ?, #, or third slash may come from - // the page itself and can be merely ignored by an attacker's server when a remote - // script or script-like resource is requested. In DATA URLS, the payload starts at - // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters - // following this may come from the page itself and may be ignored when the script is - // executed. For simplicity, we don't differentiate based on URL scheme, and stop at - // the first # or ?, the third slash, or the first slash or < once a comma is seen. - for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) { - UChar currentChar = decodedSnippet[currentLength]; - if (currentChar == '?' - || currentChar == '#' - || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2)) - || (currentChar == '<' && commaSeen)) { - decodedSnippet.truncate(currentLength); - break; - } - if (currentChar == ',') - commaSeen = true; - } - } else if (treatment == ScriptLikeAttribute) { - // Beware of trailing characters which came from the page itself, not the - // injected vector. Excluding the terminating character covers common cases - // where the page immediately ends the attribute, but doesn't cover more - // complex cases where there is other page data following the injection. - // Generally, these won't parse as javascript, so the injected vector - // typically excludes them from consideration via a single-line comment or - // by enclosing them in a string literal terminated later by the page's own - // closing punctuation. Since the snippet has not been parsed, the vector - // may also try to introduce these via entities. As a result, we'd like to - // stop before the first "//", the first <!--, the first entity, or the first - // quote not immediately following the first equals sign (taking whitespace - // into consideration). To keep things simpler, we don't try to distinguish - // between entity-introducing amperands vs. other uses, nor do we bother to - // check for a second slash for a comment, nor do we bother to check for - // !-- following a less-than sign. We stop instead on any ampersand - // slash, or less-than sign. - size_t position = 0; - if ((position = decodedSnippet.find("=")) != notFound - && (position = decodedSnippet.find(isNotHTMLSpace, position + 1)) != notFound - && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != notFound) { - decodedSnippet.truncate(position); - } + return request.sourceTracker.source(request.token, attribute.startOffset, attribute.endOffset); +} + +String XSSAuditor::canonicalize(const String& snippet, TruncationStyle truncationStyle) +{ + String decodedSnippet = fullyDecodeString(snippet, m_encoding); + if (truncationStyle != TruncationStyle::None) { + decodedSnippet.truncate(kMaximumFragmentLengthTarget); + if (truncationStyle == TruncationStyle::SrcLikeAttribute) + truncateForSrcLikeAttribute(decodedSnippet); + else if (truncationStyle == TruncationStyle::ScriptLikeAttribute) + truncateForScriptLikeAttribute(decodedSnippet); } - return decodedSnippet; + return decodedSnippet.removeCharacters(&isNonCanonicalCharacter); } -String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request) +String XSSAuditor::canonicalizedSnippetForJavaScript(const FilterTokenRequest& request) { - String string = request.sourceTracker.sourceForToken(request.token); + String string = request.sourceTracker.source(request.token); size_t startPosition = 0; size_t endPosition = string.length(); size_t foundPosition = notFound; + size_t lastNonSpacePosition = notFound; // Skip over initial comments to find start of code. while (startPosition < endPosition) { @@ -657,30 +672,39 @@ String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request String result; while (startPosition < endPosition && !result.length()) { - // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we - // encounter a comma, or when we exceed the maximum length target. The comma rule - // covers a common parameter concatenation case performed by some webservers. - // After hitting the length target, we can only stop at a point where we know we are - // not in the middle of a %-escape sequence. For the sake of simplicity, approximate - // not stopping inside a (possibly multiply encoded) %-esacpe sequence by breaking on - // whitespace only. We should have enough text in these cases to avoid false positives. + // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma, + // when we hit an opening <script> tag, or when we exceed the maximum length target. The comma rule + // covers a common parameter concatenation case performed by some web servers. + lastNonSpacePosition = notFound; for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) { if (!request.shouldAllowCDATA) { - if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition)) { - foundPosition += 2; - break; - } - if (startsHTMLCommentAt(string, foundPosition)) { - foundPosition += 4; + if (startsSingleLineCommentAt(string, foundPosition) + || startsMultiLineCommentAt(string, foundPosition) + || startsHTMLCommentAt(string, foundPosition)) { break; } } - if (string[foundPosition] == ',' || (foundPosition > startPosition + kMaximumFragmentLengthTarget && isHTMLSpace(string[foundPosition]))) { + if (string[foundPosition] == ',') + break; + + if (lastNonSpacePosition != notFound && startsOpeningScriptTagAt(string, foundPosition)) { + foundPosition = lastNonSpacePosition + 1; break; } + if (foundPosition > startPosition + kMaximumFragmentLengthTarget) { + // After hitting the length target, we can only stop at a point where we know we are + // not in the middle of a %-escape sequence. For the sake of simplicity, approximate + // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on + // whitespace only. We should have enough text in these cases to avoid false positives. + if (isHTMLSpace(string[foundPosition])) + break; + } + + if (!isHTMLSpace(string[foundPosition])) + lastNonSpacePosition = foundPosition; } - result = fullyDecodeString(string.substring(startPosition, foundPosition - startPosition), m_encoding); + result = canonicalize(string.substring(startPosition, foundPosition - startPosition), TruncationStyle::None); startPosition = foundPosition + 1; } return result; @@ -718,12 +742,4 @@ bool XSSAuditor::isLikelySafeResource(const String& url) return (m_documentURL.host() == resourceURL.host() && resourceURL.query().isEmpty()); } -bool XSSAuditor::isSafeToSendToAnotherThread() const -{ - return m_documentURL.isSafeToSendToAnotherThread() - && m_decodedURL.isSafeToSendToAnotherThread() - && m_decodedHTTPBody.isSafeToSendToAnotherThread() - && m_cachedDecodedSnippet.isSafeToSendToAnotherThread(); -} - } // namespace WebCore |