1 files changed, 245 insertions, 205 deletions
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.cpp b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
index 43dd13ce4..e20276d6d 100644
--- a/Source/WebCore/html/parser/HTMLParserIdioms.cpp
+++ b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
@@ -26,10 +26,12 @@
 #include "HTMLParserIdioms.h"
 
 #include "Decimal.h"
+#include "QualifiedName.h"
 #include "URL.h"
 #include <limits>
 #include <wtf/MathExtras.h>
-#include <wtf/text/AtomicString.h>
+#include <wtf/NeverDestroyed.h>
+#include <wtf/dtoa.h>
 #include <wtf/text/StringBuilder.h>
 
 namespace WebCore {
@@ -71,7 +73,7 @@ String stripLeadingAndTrailingHTMLSpaces(const String& string)
     if (string.is8Bit())
         return stripLeadingAndTrailingHTMLSpaces(string, string.characters8(), length);
 
-    return stripLeadingAndTrailingHTMLSpaces(string, string.deprecatedCharacters(), length);
+    return stripLeadingAndTrailingHTMLSpaces(string, string.characters16(), length);
 }
 
 String serializeForNumberType(const Decimal& number)
@@ -152,276 +154,314 @@ double parseToDoubleForNumberType(const String& string)
 }
 
 template <typename CharacterType>
-static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value)
+static std::optional<int> parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end)
 {
-    // Step 3
-    int sign = 1;
-
-    // Step 4
-    while (position < end) {
-        if (!isHTMLSpace(*position))
-            break;
+    while (position < end && isHTMLSpace(*position))
         ++position;
-    }
 
-    // Step 5
     if (position == end)
-        return false;
-    ASSERT_WITH_SECURITY_IMPLICATION(position < end);
+        return std::nullopt;
 
-    // Step 6
+    bool isNegative = false;
     if (*position == '-') {
-        sign = -1;
+        isNegative = true;
         ++position;
     } else if (*position == '+')
         ++position;
-    if (position == end)
-        return false;
-    ASSERT_WITH_SECURITY_IMPLICATION(position < end);
 
-    // Step 7
-    if (!isASCIIDigit(*position))
-        return false;
+    if (position == end || !isASCIIDigit(*position))
+        return std::nullopt;
 
-    // Step 8
-    StringBuilder digits;
-    while (position < end) {
-        if (!isASCIIDigit(*position))
-            break;
-        digits.append(*position++);
-    }
+    constexpr int intMax = std::numeric_limits<int>::max();
+    constexpr int base = 10;
+    constexpr int maxMultiplier = intMax / base;
 
-    // Step 9
-    bool ok;
-    if (digits.is8Bit())
-        value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok);
-    else
-        value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok);
-    return ok;
+    unsigned result = 0;
+    do {
+        int digitValue = *position - '0';
+
+        if (result > maxMultiplier || (result == maxMultiplier && digitValue > (intMax % base) + isNegative))
+            return std::nullopt;
+
+        result = base * result + digitValue;
+        ++position;
+    } while (position < end && isASCIIDigit(*position));
+
+    return isNegative ? -result : result;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
-bool parseHTMLInteger(const String& input, int& value)
+// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-integers
+std::optional<int> parseHTMLInteger(StringView input)
 {
-    // Step 1
-    // Step 2
     unsigned length = input.length();
-    if (!length || input.is8Bit()) {
-        const LChar* start = input.characters8();
-        return parseHTMLIntegerInternal(start, start + length, value);
+    if (!length)
+        return std::nullopt;
+
+    if (LIKELY(input.is8Bit())) {
+        auto* start = input.characters8();
+        return parseHTMLIntegerInternal(start, start + length);
     }
 
-    const UChar* start = input.characters16();
-    return parseHTMLIntegerInternal(start, start + length, value);
+    auto* start = input.characters16();
+    return parseHTMLIntegerInternal(start, start + length);
+}
+
+// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-non-negative-integers
+std::optional<unsigned> parseHTMLNonNegativeInteger(StringView input)
+{
+    std::optional<int> signedValue = parseHTMLInteger(input);
+    if (!signedValue || signedValue.value() < 0)
+        return std::nullopt;
+
+    return static_cast<unsigned>(signedValue.value());
 }
 
 template <typename CharacterType>
-static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value)
+static std::optional<int> parseValidHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end)
 {
-    // Step 3
-    while (position < end) {
-        if (!isHTMLSpace(*position))
-            break;
-        ++position;
+    // A string is a valid non-negative integer if it consists of one or more ASCII digits.
+    for (auto* c = position; c < end; ++c) {
+        if (!isASCIIDigit(*c))
+            return std::nullopt;
     }
 
-    // Step 4
-    if (position == end)
-        return false;
-    ASSERT_WITH_SECURITY_IMPLICATION(position < end);
-
-    // Step 5
-    if (*position == '+')
-        ++position;
+    std::optional<int> signedValue = parseHTMLIntegerInternal(position, end);
+    if (!signedValue || signedValue.value() < 0)
+        return std::nullopt;
 
-    // Step 6
-    if (position == end)
-        return false;
-    ASSERT_WITH_SECURITY_IMPLICATION(position < end);
+    return signedValue;
+}
 
-    // Step 7
-    if (!isASCIIDigit(*position))
-        return false;
+// https://html.spec.whatwg.org/#valid-non-negative-integer
+std::optional<int> parseValidHTMLNonNegativeInteger(StringView input)
+{
+    if (input.isEmpty())
+        return std::nullopt;
 
-    // Step 8
-    StringBuilder digits;
-    while (position < end) {
-        if (!isASCIIDigit(*position))
-            break;
-        digits.append(*position++);
+    if (LIKELY(input.is8Bit())) {
+        auto* start = input.characters8();
+        return parseValidHTMLNonNegativeIntegerInternal(start, start + input.length());
     }
 
-    // Step 9
-    bool ok;
-    if (digits.is8Bit())
-        value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok);
-    else
-        value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok);
-    return ok;
+    auto* start = input.characters16();
+    return parseValidHTMLNonNegativeIntegerInternal(start, start + input.length());
 }
 
+template <typename CharacterType>
+static std::optional<double> parseValidHTMLFloatingPointNumberInternal(const CharacterType* position, size_t length)
+{
+    ASSERT(length > 0);
+
+    // parseDouble() allows the string to start with a '+' or to end with a '.' but those
+    // are not valid floating point numbers as per HTML.
+    if (*position == '+' || *(position + length - 1) == '.')
+        return std::nullopt;
 
-// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers
-bool parseHTMLNonNegativeInteger(const String& input, unsigned& value)
+    size_t parsedLength = 0;
+    double number = parseDouble(position, length, parsedLength);
+    return parsedLength == length && std::isfinite(number) ? number : std::optional<double>();
+}
+
+// https://html.spec.whatwg.org/#valid-floating-point-number
+std::optional<double> parseValidHTMLFloatingPointNumber(StringView input)
 {
-    // Step 1
-    // Step 2
-    unsigned length = input.length();
-    if (length && input.is8Bit()) {
-        const LChar* start = input.characters8();
-        return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
+    if (input.isEmpty())
+        return std::nullopt;
+
+    if (LIKELY(input.is8Bit())) {
+        auto* start = input.characters8();
+        return parseValidHTMLFloatingPointNumberInternal(start, input.length());
     }
-    
-    const UChar* start = input.deprecatedCharacters();
-    return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
+
+    auto* start = input.characters16();
+    return parseValidHTMLFloatingPointNumberInternal(start, input.length());
 }
 
-static bool threadSafeEqual(const StringImpl* a, const StringImpl* b)
+static inline bool isHTMLSpaceOrDelimiter(UChar character)
 {
-    if (a == b)
-        return true;
-    if (a->hash() != b->hash())
-        return false;
-    return equalNonNull(a, b);
+    return isHTMLSpace(character) || character == ',' || character == ';';
 }
 
-bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
+static inline bool isNumberStart(UChar character)
 {
-    return threadSafeEqual(a.localName().impl(), b.localName().impl());
+    return isASCIIDigit(character) || character == '.' || character == '-';
 }
 
-struct ImageWithScale {
-    unsigned imageURLStart;
-    unsigned imageURLLength;
-    float scaleFactor;
+// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-floating-point-number-values
+template <typename CharacterType>
+static Vector<double> parseHTMLListOfOfFloatingPointNumberValuesInternal(const CharacterType* position, const CharacterType* end)
+{
+    Vector<double> numbers;
+
+    // This skips past any leading delimiters.
+    while (position < end && isHTMLSpaceOrDelimiter(*position))
+        ++position;
+
+    while (position < end) {
+        // This skips past leading garbage.
+        while (position < end && !(isHTMLSpaceOrDelimiter(*position) || isNumberStart(*position)))
+            ++position;
+
+        const CharacterType* numberStart = position;
+        while (position < end && !isHTMLSpaceOrDelimiter(*position))
+            ++position;
+
+        size_t parsedLength = 0;
+        double number = parseDouble(numberStart, position - numberStart, parsedLength);
+        numbers.append(parsedLength > 0 && std::isfinite(number) ? number : 0);
 
-    ImageWithScale()
-        : imageURLStart(0)
-        , imageURLLength(0)
-        , scaleFactor(1)
-    { 
+        // This skips past the delimiter.
+        while (position < end && isHTMLSpaceOrDelimiter(*position))
+            ++position;
     }
 
-    bool hasImageURL() const
-    {
-        return imageURLLength;
+    return numbers;
+}
+
+Vector<double> parseHTMLListOfOfFloatingPointNumberValues(StringView input)
+{
+    if (LIKELY(input.is8Bit())) {
+        auto* start = input.characters8();
+        return parseHTMLListOfOfFloatingPointNumberValuesInternal(start, start + input.length());
     }
-};
-typedef Vector<ImageWithScale> ImageCandidates;
 
-static inline bool compareByScaleFactor(const ImageWithScale& first, const ImageWithScale& second)
+    auto* start = input.characters16();
+    return parseHTMLListOfOfFloatingPointNumberValuesInternal(start, start + input.length());
+}
+
+static bool threadSafeEqual(const StringImpl& a, const StringImpl& b)
 {
-    return first.scaleFactor < second.scaleFactor;
+    if (&a == &b)
+        return true;
+    if (a.hash() != b.hash())
+        return false;
+    return equal(a, b);
 }
 
-static inline bool isHTMLSpaceOrComma(UChar character)
+bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
 {
-    return isHTMLSpace(character) || character == ',';
+    return threadSafeEqual(*a.localName().impl(), *b.localName().impl());
 }
 
-// See the specifications for more details about the algorithm to follow.
-// http://www.w3.org/TR/2013/WD-html-srcset-20130228/#processing-the-image-candidates.
-static void parseImagesWithScaleFromSrcsetAttribute(const String& srcsetAttribute, ImageCandidates& imageCandidates)
+String parseCORSSettingsAttribute(const AtomicString& value)
 {
-    ASSERT(imageCandidates.isEmpty());
+    if (value.isNull())
+        return String();
+    if (equalIgnoringASCIICase(value, "use-credentials"))
+        return ASCIILiteral("use-credentials");
+    return ASCIILiteral("anonymous");
+}
 
-    size_t imageCandidateStart = 0;
-    unsigned srcsetAttributeLength = srcsetAttribute.length();
+// https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-refresh
+template <typename CharacterType>
+static bool parseHTTPRefreshInternal(const CharacterType* position, const CharacterType* end, double& parsedDelay, String& parsedURL)
+{
+    while (position < end && isHTMLSpace(*position))
+        ++position;
 
-    while (imageCandidateStart < srcsetAttributeLength) {
-        float imageScaleFactor = 1;
-        size_t separator;
+    const CharacterType* numberStart = position;
+    while (position < end && isASCIIDigit(*position))
+        ++position;
 
-        // 4. Splitting loop: Skip whitespace.
-        size_t imageURLStart = srcsetAttribute.find(isNotHTMLSpace, imageCandidateStart);
-        if (imageURLStart == notFound)
-            break;
-        // If The current candidate is either totally empty or only contains space, skipping.
-        if (srcsetAttribute[imageURLStart] == ',') {
-            imageCandidateStart = imageURLStart + 1;
-            continue;
+    std::optional<unsigned> number = parseHTMLNonNegativeInteger(StringView(numberStart, position - numberStart));
+    if (!number)
+        return false;
+
+    while (position < end && (isASCIIDigit(*position) || *position == '.'))
+        ++position;
+
+    if (position == end) {
+        parsedDelay = number.value();
+        return true;
+    }
+
+    if (*position != ';' && *position != ',' && !isHTMLSpace(*position))
+        return false;
+
+    parsedDelay = number.value();
+
+    while (position < end && isHTMLSpace(*position))
+        ++position;
+
+    if (position < end && (*position == ';' || *position == ','))
+        ++position;
+
+    while (position < end && isHTMLSpace(*position))
+        ++position;
+
+    if (position == end)
+        return true;
+
+    if (*position == 'U' || *position == 'u') {
+        StringView url(position, end - position);
+
+        ++position;
+
+        if (position < end && (*position == 'R' || *position == 'r'))
+            ++position;
+        else {
+            parsedURL = url.toString();
+            return true;
         }
-        // 5. Collect a sequence of characters that are not space characters, and let that be url.
-        size_t imageURLEnd = srcsetAttribute.find(isHTMLSpace, imageURLStart + 1);
-        if (imageURLEnd == notFound) {
-            imageURLEnd = srcsetAttributeLength;
-            separator = srcsetAttributeLength;
-        } else if (srcsetAttribute[imageURLEnd - 1] == ',') {
-            --imageURLEnd;
-            separator = imageURLEnd;
-        } else {
-            // 7. Collect a sequence of characters that are not "," (U+002C) characters, and let that be descriptors.
-            size_t imageScaleStart = srcsetAttribute.find(isNotHTMLSpace, imageURLEnd + 1);
-            if (imageScaleStart == notFound)
-                separator = srcsetAttributeLength;
-            else if (srcsetAttribute[imageScaleStart] == ',')
-                separator = imageScaleStart;
-            else {
-                // This part differs from the spec as the current implementation only supports pixel density descriptors for now.
-                size_t imageScaleEnd = srcsetAttribute.find(isHTMLSpaceOrComma, imageScaleStart + 1);
-                imageScaleEnd = (imageScaleEnd == notFound) ? srcsetAttributeLength : imageScaleEnd;
-                size_t commaPosition = imageScaleEnd;
-                // Make sure there are no other descriptors.
-                while ((commaPosition < srcsetAttributeLength - 1) && isHTMLSpace(srcsetAttribute[commaPosition]))
-                    ++commaPosition;
-                // If the first not html space character after the scale modifier is not a comma,
-                // the current candidate is an invalid input.
-                if ((commaPosition < srcsetAttributeLength - 1) && srcsetAttribute[commaPosition] != ',') {
-                    // Find the nearest comma and skip the input.
-                    commaPosition = srcsetAttribute.find(',', commaPosition + 1);
-                    if (commaPosition == notFound)
-                        break;
-                    imageCandidateStart = commaPosition + 1;
-                    continue;
-                }
-                separator = commaPosition;
-                if (srcsetAttribute[imageScaleEnd - 1] != 'x') {
-                    imageCandidateStart = separator + 1;
-                    continue;
-                }
-                bool validScaleFactor = false;
-                size_t scaleFactorLengthWithoutUnit = imageScaleEnd - imageScaleStart - 1;
-                imageScaleFactor = charactersToFloat(srcsetAttribute.deprecatedCharacters() + imageScaleStart, scaleFactorLengthWithoutUnit, &validScaleFactor);
-
-                if (!validScaleFactor) {
-                    imageCandidateStart = separator + 1;
-                    continue;
-                }
-            }
+
+        if (position < end && (*position == 'L' || *position == 'l'))
+            ++position;
+        else {
+            parsedURL = url.toString();
+            return true;
         }
-        ImageWithScale image;
-        image.imageURLStart = imageURLStart;
-        image.imageURLLength = imageURLEnd - imageURLStart;
-        image.scaleFactor = imageScaleFactor;
-
-        imageCandidates.append(image);
-        // 11. Return to the step labeled splitting loop.
-        imageCandidateStart = separator + 1;
-    }
-}
 
-String bestFitSourceForImageAttributes(float deviceScaleFactor, const String& srcAttribute, const String& srcsetAttribute)
-{
-    ImageCandidates imageCandidates;
+        while (position < end && isHTMLSpace(*position))
+            ++position;
 
-    parseImagesWithScaleFromSrcsetAttribute(srcsetAttribute, imageCandidates);
+        if (position < end && *position == '=')
+            ++position;
+        else {
+            parsedURL = url.toString();
+            return true;
+        }
 
-    if (!srcAttribute.isEmpty()) {
-        ImageWithScale srcPlaceholderImage;
-        imageCandidates.append(srcPlaceholderImage);
+        while (position < end && isHTMLSpace(*position))
+            ++position;
     }
 
-    if (imageCandidates.isEmpty())
-        return String();
+    CharacterType quote;
+    if (position < end && (*position == '\'' || *position == '"')) {
+        quote = *position;
+        ++position;
+    } else
+        quote = '\0';
 
-    std::stable_sort(imageCandidates.begin(), imageCandidates.end(), compareByScaleFactor);
+    StringView url(position, end - position);
 
-    for (size_t i = 0; i < imageCandidates.size() - 1; ++i) {
-        if (imageCandidates[i].scaleFactor >= deviceScaleFactor)
-            return imageCandidates[i].hasImageURL() ? srcsetAttribute.substringSharingImpl(imageCandidates[i].imageURLStart, imageCandidates[i].imageURLLength) : srcAttribute;
+    if (quote != '\0') {
+        size_t index = url.find(quote);
+        if (index != notFound)
+            url = url.substring(0, index);
     }
-    const ImageWithScale& lastCandidate = imageCandidates.last();
-    return lastCandidate.hasImageURL() ? srcsetAttribute.substringSharingImpl(lastCandidate.imageURLStart, lastCandidate.imageURLLength) : srcAttribute;
+
+    parsedURL = url.toString();
+    return true;
+}
+
+bool parseMetaHTTPEquivRefresh(const StringView& input, double& delay, String& url)
+{
+    if (LIKELY(input.is8Bit())) {
+        auto* start = input.characters8();
+        return parseHTTPRefreshInternal(start, start + input.length(), delay, url);
+    }
+
+    auto* start = input.characters16();
+    return parseHTTPRefreshInternal(start, start + input.length(), delay, url);
+}
+
+// https://html.spec.whatwg.org/#rules-for-parsing-a-hash-name-reference
+AtomicString parseHTMLHashNameReference(StringView usemap)
+{
+    size_t numberSignIndex = usemap.find('#');
+    if (numberSignIndex == notFound)
+        return nullAtom;
+    return usemap.substring(numberSignIndex + 1).toAtomicString();
 }
 
 }