diff options
Diffstat (limited to 'Source/WebCore/html/parser/HTMLParserIdioms.cpp')
-rw-r--r-- | Source/WebCore/html/parser/HTMLParserIdioms.cpp | 450 |
1 files changed, 245 insertions, 205 deletions
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.cpp b/Source/WebCore/html/parser/HTMLParserIdioms.cpp index 43dd13ce4..e20276d6d 100644 --- a/Source/WebCore/html/parser/HTMLParserIdioms.cpp +++ b/Source/WebCore/html/parser/HTMLParserIdioms.cpp @@ -26,10 +26,12 @@ #include "HTMLParserIdioms.h" #include "Decimal.h" +#include "QualifiedName.h" #include "URL.h" #include <limits> #include <wtf/MathExtras.h> -#include <wtf/text/AtomicString.h> +#include <wtf/NeverDestroyed.h> +#include <wtf/dtoa.h> #include <wtf/text/StringBuilder.h> namespace WebCore { @@ -71,7 +73,7 @@ String stripLeadingAndTrailingHTMLSpaces(const String& string) if (string.is8Bit()) return stripLeadingAndTrailingHTMLSpaces(string, string.characters8(), length); - return stripLeadingAndTrailingHTMLSpaces(string, string.deprecatedCharacters(), length); + return stripLeadingAndTrailingHTMLSpaces(string, string.characters16(), length); } String serializeForNumberType(const Decimal& number) @@ -152,276 +154,314 @@ double parseToDoubleForNumberType(const String& string) } template <typename CharacterType> -static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value) +static std::optional<int> parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end) { - // Step 3 - int sign = 1; - - // Step 4 - while (position < end) { - if (!isHTMLSpace(*position)) - break; + while (position < end && isHTMLSpace(*position)) ++position; - } - // Step 5 if (position == end) - return false; - ASSERT_WITH_SECURITY_IMPLICATION(position < end); + return std::nullopt; - // Step 6 + bool isNegative = false; if (*position == '-') { - sign = -1; + isNegative = true; ++position; } else if (*position == '+') ++position; - if (position == end) - return false; - ASSERT_WITH_SECURITY_IMPLICATION(position < end); - // Step 7 - if (!isASCIIDigit(*position)) - return false; + if (position == end || !isASCIIDigit(*position)) + return std::nullopt; - // Step 8 - StringBuilder digits; - while (position < end) { - if (!isASCIIDigit(*position)) - break; - digits.append(*position++); - } + constexpr int intMax = std::numeric_limits<int>::max(); + constexpr int base = 10; + constexpr int maxMultiplier = intMax / base; - // Step 9 - bool ok; - if (digits.is8Bit()) - value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok); - else - value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok); - return ok; + unsigned result = 0; + do { + int digitValue = *position - '0'; + + if (result > maxMultiplier || (result == maxMultiplier && digitValue > (intMax % base) + isNegative)) + return std::nullopt; + + result = base * result + digitValue; + ++position; + } while (position < end && isASCIIDigit(*position)); + + return isNegative ? -result : result; } -// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers -bool parseHTMLInteger(const String& input, int& value) +// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-integers +std::optional<int> parseHTMLInteger(StringView input) { - // Step 1 - // Step 2 unsigned length = input.length(); - if (!length || input.is8Bit()) { - const LChar* start = input.characters8(); - return parseHTMLIntegerInternal(start, start + length, value); + if (!length) + return std::nullopt; + + if (LIKELY(input.is8Bit())) { + auto* start = input.characters8(); + return parseHTMLIntegerInternal(start, start + length); } - const UChar* start = input.characters16(); - return parseHTMLIntegerInternal(start, start + length, value); + auto* start = input.characters16(); + return parseHTMLIntegerInternal(start, start + length); +} + +// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-non-negative-integers +std::optional<unsigned> parseHTMLNonNegativeInteger(StringView input) +{ + std::optional<int> signedValue = parseHTMLInteger(input); + if (!signedValue || signedValue.value() < 0) + return std::nullopt; + + return static_cast<unsigned>(signedValue.value()); } template <typename CharacterType> -static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value) +static std::optional<int> parseValidHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end) { - // Step 3 - while (position < end) { - if (!isHTMLSpace(*position)) - break; - ++position; + // A string is a valid non-negative integer if it consists of one or more ASCII digits. + for (auto* c = position; c < end; ++c) { + if (!isASCIIDigit(*c)) + return std::nullopt; } - // Step 4 - if (position == end) - return false; - ASSERT_WITH_SECURITY_IMPLICATION(position < end); - - // Step 5 - if (*position == '+') - ++position; + std::optional<int> signedValue = parseHTMLIntegerInternal(position, end); + if (!signedValue || signedValue.value() < 0) + return std::nullopt; - // Step 6 - if (position == end) - return false; - ASSERT_WITH_SECURITY_IMPLICATION(position < end); + return signedValue; +} - // Step 7 - if (!isASCIIDigit(*position)) - return false; +// https://html.spec.whatwg.org/#valid-non-negative-integer +std::optional<int> parseValidHTMLNonNegativeInteger(StringView input) +{ + if (input.isEmpty()) + return std::nullopt; - // Step 8 - StringBuilder digits; - while (position < end) { - if (!isASCIIDigit(*position)) - break; - digits.append(*position++); + if (LIKELY(input.is8Bit())) { + auto* start = input.characters8(); + return parseValidHTMLNonNegativeIntegerInternal(start, start + input.length()); } - // Step 9 - bool ok; - if (digits.is8Bit()) - value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok); - else - value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok); - return ok; + auto* start = input.characters16(); + return parseValidHTMLNonNegativeIntegerInternal(start, start + input.length()); } +template <typename CharacterType> +static std::optional<double> parseValidHTMLFloatingPointNumberInternal(const CharacterType* position, size_t length) +{ + ASSERT(length > 0); + + // parseDouble() allows the string to start with a '+' or to end with a '.' but those + // are not valid floating point numbers as per HTML. + if (*position == '+' || *(position + length - 1) == '.') + return std::nullopt; -// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers -bool parseHTMLNonNegativeInteger(const String& input, unsigned& value) + size_t parsedLength = 0; + double number = parseDouble(position, length, parsedLength); + return parsedLength == length && std::isfinite(number) ? number : std::optional<double>(); +} + +// https://html.spec.whatwg.org/#valid-floating-point-number +std::optional<double> parseValidHTMLFloatingPointNumber(StringView input) { - // Step 1 - // Step 2 - unsigned length = input.length(); - if (length && input.is8Bit()) { - const LChar* start = input.characters8(); - return parseHTMLNonNegativeIntegerInternal(start, start + length, value); + if (input.isEmpty()) + return std::nullopt; + + if (LIKELY(input.is8Bit())) { + auto* start = input.characters8(); + return parseValidHTMLFloatingPointNumberInternal(start, input.length()); } - - const UChar* start = input.deprecatedCharacters(); - return parseHTMLNonNegativeIntegerInternal(start, start + length, value); + + auto* start = input.characters16(); + return parseValidHTMLFloatingPointNumberInternal(start, input.length()); } -static bool threadSafeEqual(const StringImpl* a, const StringImpl* b) +static inline bool isHTMLSpaceOrDelimiter(UChar character) { - if (a == b) - return true; - if (a->hash() != b->hash()) - return false; - return equalNonNull(a, b); + return isHTMLSpace(character) || character == ',' || character == ';'; } -bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b) +static inline bool isNumberStart(UChar character) { - return threadSafeEqual(a.localName().impl(), b.localName().impl()); + return isASCIIDigit(character) || character == '.' || character == '-'; } -struct ImageWithScale { - unsigned imageURLStart; - unsigned imageURLLength; - float scaleFactor; +// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-floating-point-number-values +template <typename CharacterType> +static Vector<double> parseHTMLListOfOfFloatingPointNumberValuesInternal(const CharacterType* position, const CharacterType* end) +{ + Vector<double> numbers; + + // This skips past any leading delimiters. + while (position < end && isHTMLSpaceOrDelimiter(*position)) + ++position; + + while (position < end) { + // This skips past leading garbage. + while (position < end && !(isHTMLSpaceOrDelimiter(*position) || isNumberStart(*position))) + ++position; + + const CharacterType* numberStart = position; + while (position < end && !isHTMLSpaceOrDelimiter(*position)) + ++position; + + size_t parsedLength = 0; + double number = parseDouble(numberStart, position - numberStart, parsedLength); + numbers.append(parsedLength > 0 && std::isfinite(number) ? number : 0); - ImageWithScale() - : imageURLStart(0) - , imageURLLength(0) - , scaleFactor(1) - { + // This skips past the delimiter. + while (position < end && isHTMLSpaceOrDelimiter(*position)) + ++position; } - bool hasImageURL() const - { - return imageURLLength; + return numbers; +} + +Vector<double> parseHTMLListOfOfFloatingPointNumberValues(StringView input) +{ + if (LIKELY(input.is8Bit())) { + auto* start = input.characters8(); + return parseHTMLListOfOfFloatingPointNumberValuesInternal(start, start + input.length()); } -}; -typedef Vector<ImageWithScale> ImageCandidates; -static inline bool compareByScaleFactor(const ImageWithScale& first, const ImageWithScale& second) + auto* start = input.characters16(); + return parseHTMLListOfOfFloatingPointNumberValuesInternal(start, start + input.length()); +} + +static bool threadSafeEqual(const StringImpl& a, const StringImpl& b) { - return first.scaleFactor < second.scaleFactor; + if (&a == &b) + return true; + if (a.hash() != b.hash()) + return false; + return equal(a, b); } -static inline bool isHTMLSpaceOrComma(UChar character) +bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b) { - return isHTMLSpace(character) || character == ','; + return threadSafeEqual(*a.localName().impl(), *b.localName().impl()); } -// See the specifications for more details about the algorithm to follow. -// http://www.w3.org/TR/2013/WD-html-srcset-20130228/#processing-the-image-candidates. -static void parseImagesWithScaleFromSrcsetAttribute(const String& srcsetAttribute, ImageCandidates& imageCandidates) +String parseCORSSettingsAttribute(const AtomicString& value) { - ASSERT(imageCandidates.isEmpty()); + if (value.isNull()) + return String(); + if (equalIgnoringASCIICase(value, "use-credentials")) + return ASCIILiteral("use-credentials"); + return ASCIILiteral("anonymous"); +} - size_t imageCandidateStart = 0; - unsigned srcsetAttributeLength = srcsetAttribute.length(); +// https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-refresh +template <typename CharacterType> +static bool parseHTTPRefreshInternal(const CharacterType* position, const CharacterType* end, double& parsedDelay, String& parsedURL) +{ + while (position < end && isHTMLSpace(*position)) + ++position; - while (imageCandidateStart < srcsetAttributeLength) { - float imageScaleFactor = 1; - size_t separator; + const CharacterType* numberStart = position; + while (position < end && isASCIIDigit(*position)) + ++position; - // 4. Splitting loop: Skip whitespace. - size_t imageURLStart = srcsetAttribute.find(isNotHTMLSpace, imageCandidateStart); - if (imageURLStart == notFound) - break; - // If The current candidate is either totally empty or only contains space, skipping. - if (srcsetAttribute[imageURLStart] == ',') { - imageCandidateStart = imageURLStart + 1; - continue; + std::optional<unsigned> number = parseHTMLNonNegativeInteger(StringView(numberStart, position - numberStart)); + if (!number) + return false; + + while (position < end && (isASCIIDigit(*position) || *position == '.')) + ++position; + + if (position == end) { + parsedDelay = number.value(); + return true; + } + + if (*position != ';' && *position != ',' && !isHTMLSpace(*position)) + return false; + + parsedDelay = number.value(); + + while (position < end && isHTMLSpace(*position)) + ++position; + + if (position < end && (*position == ';' || *position == ',')) + ++position; + + while (position < end && isHTMLSpace(*position)) + ++position; + + if (position == end) + return true; + + if (*position == 'U' || *position == 'u') { + StringView url(position, end - position); + + ++position; + + if (position < end && (*position == 'R' || *position == 'r')) + ++position; + else { + parsedURL = url.toString(); + return true; } - // 5. Collect a sequence of characters that are not space characters, and let that be url. - size_t imageURLEnd = srcsetAttribute.find(isHTMLSpace, imageURLStart + 1); - if (imageURLEnd == notFound) { - imageURLEnd = srcsetAttributeLength; - separator = srcsetAttributeLength; - } else if (srcsetAttribute[imageURLEnd - 1] == ',') { - --imageURLEnd; - separator = imageURLEnd; - } else { - // 7. Collect a sequence of characters that are not "," (U+002C) characters, and let that be descriptors. - size_t imageScaleStart = srcsetAttribute.find(isNotHTMLSpace, imageURLEnd + 1); - if (imageScaleStart == notFound) - separator = srcsetAttributeLength; - else if (srcsetAttribute[imageScaleStart] == ',') - separator = imageScaleStart; - else { - // This part differs from the spec as the current implementation only supports pixel density descriptors for now. - size_t imageScaleEnd = srcsetAttribute.find(isHTMLSpaceOrComma, imageScaleStart + 1); - imageScaleEnd = (imageScaleEnd == notFound) ? srcsetAttributeLength : imageScaleEnd; - size_t commaPosition = imageScaleEnd; - // Make sure there are no other descriptors. - while ((commaPosition < srcsetAttributeLength - 1) && isHTMLSpace(srcsetAttribute[commaPosition])) - ++commaPosition; - // If the first not html space character after the scale modifier is not a comma, - // the current candidate is an invalid input. - if ((commaPosition < srcsetAttributeLength - 1) && srcsetAttribute[commaPosition] != ',') { - // Find the nearest comma and skip the input. - commaPosition = srcsetAttribute.find(',', commaPosition + 1); - if (commaPosition == notFound) - break; - imageCandidateStart = commaPosition + 1; - continue; - } - separator = commaPosition; - if (srcsetAttribute[imageScaleEnd - 1] != 'x') { - imageCandidateStart = separator + 1; - continue; - } - bool validScaleFactor = false; - size_t scaleFactorLengthWithoutUnit = imageScaleEnd - imageScaleStart - 1; - imageScaleFactor = charactersToFloat(srcsetAttribute.deprecatedCharacters() + imageScaleStart, scaleFactorLengthWithoutUnit, &validScaleFactor); - - if (!validScaleFactor) { - imageCandidateStart = separator + 1; - continue; - } - } + + if (position < end && (*position == 'L' || *position == 'l')) + ++position; + else { + parsedURL = url.toString(); + return true; } - ImageWithScale image; - image.imageURLStart = imageURLStart; - image.imageURLLength = imageURLEnd - imageURLStart; - image.scaleFactor = imageScaleFactor; - - imageCandidates.append(image); - // 11. Return to the step labeled splitting loop. - imageCandidateStart = separator + 1; - } -} -String bestFitSourceForImageAttributes(float deviceScaleFactor, const String& srcAttribute, const String& srcsetAttribute) -{ - ImageCandidates imageCandidates; + while (position < end && isHTMLSpace(*position)) + ++position; - parseImagesWithScaleFromSrcsetAttribute(srcsetAttribute, imageCandidates); + if (position < end && *position == '=') + ++position; + else { + parsedURL = url.toString(); + return true; + } - if (!srcAttribute.isEmpty()) { - ImageWithScale srcPlaceholderImage; - imageCandidates.append(srcPlaceholderImage); + while (position < end && isHTMLSpace(*position)) + ++position; } - if (imageCandidates.isEmpty()) - return String(); + CharacterType quote; + if (position < end && (*position == '\'' || *position == '"')) { + quote = *position; + ++position; + } else + quote = '\0'; - std::stable_sort(imageCandidates.begin(), imageCandidates.end(), compareByScaleFactor); + StringView url(position, end - position); - for (size_t i = 0; i < imageCandidates.size() - 1; ++i) { - if (imageCandidates[i].scaleFactor >= deviceScaleFactor) - return imageCandidates[i].hasImageURL() ? srcsetAttribute.substringSharingImpl(imageCandidates[i].imageURLStart, imageCandidates[i].imageURLLength) : srcAttribute; + if (quote != '\0') { + size_t index = url.find(quote); + if (index != notFound) + url = url.substring(0, index); } - const ImageWithScale& lastCandidate = imageCandidates.last(); - return lastCandidate.hasImageURL() ? srcsetAttribute.substringSharingImpl(lastCandidate.imageURLStart, lastCandidate.imageURLLength) : srcAttribute; + + parsedURL = url.toString(); + return true; +} + +bool parseMetaHTTPEquivRefresh(const StringView& input, double& delay, String& url) +{ + if (LIKELY(input.is8Bit())) { + auto* start = input.characters8(); + return parseHTTPRefreshInternal(start, start + input.length(), delay, url); + } + + auto* start = input.characters16(); + return parseHTTPRefreshInternal(start, start + input.length(), delay, url); +} + +// https://html.spec.whatwg.org/#rules-for-parsing-a-hash-name-reference +AtomicString parseHTMLHashNameReference(StringView usemap) +{ + size_t numberSignIndex = usemap.find('#'); + if (numberSignIndex == notFound) + return nullAtom; + return usemap.substring(numberSignIndex + 1).toAtomicString(); } } |