summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/parser/HTMLParserIdioms.cpp
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/html/parser/HTMLParserIdioms.cpp
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/WebCore/html/parser/HTMLParserIdioms.cpp')
-rw-r--r--Source/WebCore/html/parser/HTMLParserIdioms.cpp450
1 files changed, 245 insertions, 205 deletions
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.cpp b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
index 43dd13ce4..e20276d6d 100644
--- a/Source/WebCore/html/parser/HTMLParserIdioms.cpp
+++ b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
@@ -26,10 +26,12 @@
#include "HTMLParserIdioms.h"
#include "Decimal.h"
+#include "QualifiedName.h"
#include "URL.h"
#include <limits>
#include <wtf/MathExtras.h>
-#include <wtf/text/AtomicString.h>
+#include <wtf/NeverDestroyed.h>
+#include <wtf/dtoa.h>
#include <wtf/text/StringBuilder.h>
namespace WebCore {
@@ -71,7 +73,7 @@ String stripLeadingAndTrailingHTMLSpaces(const String& string)
if (string.is8Bit())
return stripLeadingAndTrailingHTMLSpaces(string, string.characters8(), length);
- return stripLeadingAndTrailingHTMLSpaces(string, string.deprecatedCharacters(), length);
+ return stripLeadingAndTrailingHTMLSpaces(string, string.characters16(), length);
}
String serializeForNumberType(const Decimal& number)
@@ -152,276 +154,314 @@ double parseToDoubleForNumberType(const String& string)
}
template <typename CharacterType>
-static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value)
+static std::optional<int> parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end)
{
- // Step 3
- int sign = 1;
-
- // Step 4
- while (position < end) {
- if (!isHTMLSpace(*position))
- break;
+ while (position < end && isHTMLSpace(*position))
++position;
- }
- // Step 5
if (position == end)
- return false;
- ASSERT_WITH_SECURITY_IMPLICATION(position < end);
+ return std::nullopt;
- // Step 6
+ bool isNegative = false;
if (*position == '-') {
- sign = -1;
+ isNegative = true;
++position;
} else if (*position == '+')
++position;
- if (position == end)
- return false;
- ASSERT_WITH_SECURITY_IMPLICATION(position < end);
- // Step 7
- if (!isASCIIDigit(*position))
- return false;
+ if (position == end || !isASCIIDigit(*position))
+ return std::nullopt;
- // Step 8
- StringBuilder digits;
- while (position < end) {
- if (!isASCIIDigit(*position))
- break;
- digits.append(*position++);
- }
+ constexpr int intMax = std::numeric_limits<int>::max();
+ constexpr int base = 10;
+ constexpr int maxMultiplier = intMax / base;
- // Step 9
- bool ok;
- if (digits.is8Bit())
- value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok);
- else
- value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok);
- return ok;
+ unsigned result = 0;
+ do {
+ int digitValue = *position - '0';
+
+ if (result > maxMultiplier || (result == maxMultiplier && digitValue > (intMax % base) + isNegative))
+ return std::nullopt;
+
+ result = base * result + digitValue;
+ ++position;
+ } while (position < end && isASCIIDigit(*position));
+
+ return isNegative ? -result : result;
}
-// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
-bool parseHTMLInteger(const String& input, int& value)
+// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-integers
+std::optional<int> parseHTMLInteger(StringView input)
{
- // Step 1
- // Step 2
unsigned length = input.length();
- if (!length || input.is8Bit()) {
- const LChar* start = input.characters8();
- return parseHTMLIntegerInternal(start, start + length, value);
+ if (!length)
+ return std::nullopt;
+
+ if (LIKELY(input.is8Bit())) {
+ auto* start = input.characters8();
+ return parseHTMLIntegerInternal(start, start + length);
}
- const UChar* start = input.characters16();
- return parseHTMLIntegerInternal(start, start + length, value);
+ auto* start = input.characters16();
+ return parseHTMLIntegerInternal(start, start + length);
+}
+
+// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-non-negative-integers
+std::optional<unsigned> parseHTMLNonNegativeInteger(StringView input)
+{
+ std::optional<int> signedValue = parseHTMLInteger(input);
+ if (!signedValue || signedValue.value() < 0)
+ return std::nullopt;
+
+ return static_cast<unsigned>(signedValue.value());
}
template <typename CharacterType>
-static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value)
+static std::optional<int> parseValidHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end)
{
- // Step 3
- while (position < end) {
- if (!isHTMLSpace(*position))
- break;
- ++position;
+ // A string is a valid non-negative integer if it consists of one or more ASCII digits.
+ for (auto* c = position; c < end; ++c) {
+ if (!isASCIIDigit(*c))
+ return std::nullopt;
}
- // Step 4
- if (position == end)
- return false;
- ASSERT_WITH_SECURITY_IMPLICATION(position < end);
-
- // Step 5
- if (*position == '+')
- ++position;
+ std::optional<int> signedValue = parseHTMLIntegerInternal(position, end);
+ if (!signedValue || signedValue.value() < 0)
+ return std::nullopt;
- // Step 6
- if (position == end)
- return false;
- ASSERT_WITH_SECURITY_IMPLICATION(position < end);
+ return signedValue;
+}
- // Step 7
- if (!isASCIIDigit(*position))
- return false;
+// https://html.spec.whatwg.org/#valid-non-negative-integer
+std::optional<int> parseValidHTMLNonNegativeInteger(StringView input)
+{
+ if (input.isEmpty())
+ return std::nullopt;
- // Step 8
- StringBuilder digits;
- while (position < end) {
- if (!isASCIIDigit(*position))
- break;
- digits.append(*position++);
+ if (LIKELY(input.is8Bit())) {
+ auto* start = input.characters8();
+ return parseValidHTMLNonNegativeIntegerInternal(start, start + input.length());
}
- // Step 9
- bool ok;
- if (digits.is8Bit())
- value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok);
- else
- value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok);
- return ok;
+ auto* start = input.characters16();
+ return parseValidHTMLNonNegativeIntegerInternal(start, start + input.length());
}
+template <typename CharacterType>
+static std::optional<double> parseValidHTMLFloatingPointNumberInternal(const CharacterType* position, size_t length)
+{
+ ASSERT(length > 0);
+
+ // parseDouble() allows the string to start with a '+' or to end with a '.' but those
+ // are not valid floating point numbers as per HTML.
+ if (*position == '+' || *(position + length - 1) == '.')
+ return std::nullopt;
-// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers
-bool parseHTMLNonNegativeInteger(const String& input, unsigned& value)
+ size_t parsedLength = 0;
+ double number = parseDouble(position, length, parsedLength);
+ return parsedLength == length && std::isfinite(number) ? number : std::optional<double>();
+}
+
+// https://html.spec.whatwg.org/#valid-floating-point-number
+std::optional<double> parseValidHTMLFloatingPointNumber(StringView input)
{
- // Step 1
- // Step 2
- unsigned length = input.length();
- if (length && input.is8Bit()) {
- const LChar* start = input.characters8();
- return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
+ if (input.isEmpty())
+ return std::nullopt;
+
+ if (LIKELY(input.is8Bit())) {
+ auto* start = input.characters8();
+ return parseValidHTMLFloatingPointNumberInternal(start, input.length());
}
-
- const UChar* start = input.deprecatedCharacters();
- return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
+
+ auto* start = input.characters16();
+ return parseValidHTMLFloatingPointNumberInternal(start, input.length());
}
-static bool threadSafeEqual(const StringImpl* a, const StringImpl* b)
+static inline bool isHTMLSpaceOrDelimiter(UChar character)
{
- if (a == b)
- return true;
- if (a->hash() != b->hash())
- return false;
- return equalNonNull(a, b);
+ return isHTMLSpace(character) || character == ',' || character == ';';
}
-bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
+static inline bool isNumberStart(UChar character)
{
- return threadSafeEqual(a.localName().impl(), b.localName().impl());
+ return isASCIIDigit(character) || character == '.' || character == '-';
}
-struct ImageWithScale {
- unsigned imageURLStart;
- unsigned imageURLLength;
- float scaleFactor;
+// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-floating-point-number-values
+template <typename CharacterType>
+static Vector<double> parseHTMLListOfOfFloatingPointNumberValuesInternal(const CharacterType* position, const CharacterType* end)
+{
+ Vector<double> numbers;
+
+ // This skips past any leading delimiters.
+ while (position < end && isHTMLSpaceOrDelimiter(*position))
+ ++position;
+
+ while (position < end) {
+ // This skips past leading garbage.
+ while (position < end && !(isHTMLSpaceOrDelimiter(*position) || isNumberStart(*position)))
+ ++position;
+
+ const CharacterType* numberStart = position;
+ while (position < end && !isHTMLSpaceOrDelimiter(*position))
+ ++position;
+
+ size_t parsedLength = 0;
+ double number = parseDouble(numberStart, position - numberStart, parsedLength);
+ numbers.append(parsedLength > 0 && std::isfinite(number) ? number : 0);
- ImageWithScale()
- : imageURLStart(0)
- , imageURLLength(0)
- , scaleFactor(1)
- {
+ // This skips past the delimiter.
+ while (position < end && isHTMLSpaceOrDelimiter(*position))
+ ++position;
}
- bool hasImageURL() const
- {
- return imageURLLength;
+ return numbers;
+}
+
+Vector<double> parseHTMLListOfOfFloatingPointNumberValues(StringView input)
+{
+ if (LIKELY(input.is8Bit())) {
+ auto* start = input.characters8();
+ return parseHTMLListOfOfFloatingPointNumberValuesInternal(start, start + input.length());
}
-};
-typedef Vector<ImageWithScale> ImageCandidates;
-static inline bool compareByScaleFactor(const ImageWithScale& first, const ImageWithScale& second)
+ auto* start = input.characters16();
+ return parseHTMLListOfOfFloatingPointNumberValuesInternal(start, start + input.length());
+}
+
+static bool threadSafeEqual(const StringImpl& a, const StringImpl& b)
{
- return first.scaleFactor < second.scaleFactor;
+ if (&a == &b)
+ return true;
+ if (a.hash() != b.hash())
+ return false;
+ return equal(a, b);
}
-static inline bool isHTMLSpaceOrComma(UChar character)
+bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
{
- return isHTMLSpace(character) || character == ',';
+ return threadSafeEqual(*a.localName().impl(), *b.localName().impl());
}
-// See the specifications for more details about the algorithm to follow.
-// http://www.w3.org/TR/2013/WD-html-srcset-20130228/#processing-the-image-candidates.
-static void parseImagesWithScaleFromSrcsetAttribute(const String& srcsetAttribute, ImageCandidates& imageCandidates)
+String parseCORSSettingsAttribute(const AtomicString& value)
{
- ASSERT(imageCandidates.isEmpty());
+ if (value.isNull())
+ return String();
+ if (equalIgnoringASCIICase(value, "use-credentials"))
+ return ASCIILiteral("use-credentials");
+ return ASCIILiteral("anonymous");
+}
- size_t imageCandidateStart = 0;
- unsigned srcsetAttributeLength = srcsetAttribute.length();
+// https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-refresh
+template <typename CharacterType>
+static bool parseHTTPRefreshInternal(const CharacterType* position, const CharacterType* end, double& parsedDelay, String& parsedURL)
+{
+ while (position < end && isHTMLSpace(*position))
+ ++position;
- while (imageCandidateStart < srcsetAttributeLength) {
- float imageScaleFactor = 1;
- size_t separator;
+ const CharacterType* numberStart = position;
+ while (position < end && isASCIIDigit(*position))
+ ++position;
- // 4. Splitting loop: Skip whitespace.
- size_t imageURLStart = srcsetAttribute.find(isNotHTMLSpace, imageCandidateStart);
- if (imageURLStart == notFound)
- break;
- // If The current candidate is either totally empty or only contains space, skipping.
- if (srcsetAttribute[imageURLStart] == ',') {
- imageCandidateStart = imageURLStart + 1;
- continue;
+ std::optional<unsigned> number = parseHTMLNonNegativeInteger(StringView(numberStart, position - numberStart));
+ if (!number)
+ return false;
+
+ while (position < end && (isASCIIDigit(*position) || *position == '.'))
+ ++position;
+
+ if (position == end) {
+ parsedDelay = number.value();
+ return true;
+ }
+
+ if (*position != ';' && *position != ',' && !isHTMLSpace(*position))
+ return false;
+
+ parsedDelay = number.value();
+
+ while (position < end && isHTMLSpace(*position))
+ ++position;
+
+ if (position < end && (*position == ';' || *position == ','))
+ ++position;
+
+ while (position < end && isHTMLSpace(*position))
+ ++position;
+
+ if (position == end)
+ return true;
+
+ if (*position == 'U' || *position == 'u') {
+ StringView url(position, end - position);
+
+ ++position;
+
+ if (position < end && (*position == 'R' || *position == 'r'))
+ ++position;
+ else {
+ parsedURL = url.toString();
+ return true;
}
- // 5. Collect a sequence of characters that are not space characters, and let that be url.
- size_t imageURLEnd = srcsetAttribute.find(isHTMLSpace, imageURLStart + 1);
- if (imageURLEnd == notFound) {
- imageURLEnd = srcsetAttributeLength;
- separator = srcsetAttributeLength;
- } else if (srcsetAttribute[imageURLEnd - 1] == ',') {
- --imageURLEnd;
- separator = imageURLEnd;
- } else {
- // 7. Collect a sequence of characters that are not "," (U+002C) characters, and let that be descriptors.
- size_t imageScaleStart = srcsetAttribute.find(isNotHTMLSpace, imageURLEnd + 1);
- if (imageScaleStart == notFound)
- separator = srcsetAttributeLength;
- else if (srcsetAttribute[imageScaleStart] == ',')
- separator = imageScaleStart;
- else {
- // This part differs from the spec as the current implementation only supports pixel density descriptors for now.
- size_t imageScaleEnd = srcsetAttribute.find(isHTMLSpaceOrComma, imageScaleStart + 1);
- imageScaleEnd = (imageScaleEnd == notFound) ? srcsetAttributeLength : imageScaleEnd;
- size_t commaPosition = imageScaleEnd;
- // Make sure there are no other descriptors.
- while ((commaPosition < srcsetAttributeLength - 1) && isHTMLSpace(srcsetAttribute[commaPosition]))
- ++commaPosition;
- // If the first not html space character after the scale modifier is not a comma,
- // the current candidate is an invalid input.
- if ((commaPosition < srcsetAttributeLength - 1) && srcsetAttribute[commaPosition] != ',') {
- // Find the nearest comma and skip the input.
- commaPosition = srcsetAttribute.find(',', commaPosition + 1);
- if (commaPosition == notFound)
- break;
- imageCandidateStart = commaPosition + 1;
- continue;
- }
- separator = commaPosition;
- if (srcsetAttribute[imageScaleEnd - 1] != 'x') {
- imageCandidateStart = separator + 1;
- continue;
- }
- bool validScaleFactor = false;
- size_t scaleFactorLengthWithoutUnit = imageScaleEnd - imageScaleStart - 1;
- imageScaleFactor = charactersToFloat(srcsetAttribute.deprecatedCharacters() + imageScaleStart, scaleFactorLengthWithoutUnit, &validScaleFactor);
-
- if (!validScaleFactor) {
- imageCandidateStart = separator + 1;
- continue;
- }
- }
+
+ if (position < end && (*position == 'L' || *position == 'l'))
+ ++position;
+ else {
+ parsedURL = url.toString();
+ return true;
}
- ImageWithScale image;
- image.imageURLStart = imageURLStart;
- image.imageURLLength = imageURLEnd - imageURLStart;
- image.scaleFactor = imageScaleFactor;
-
- imageCandidates.append(image);
- // 11. Return to the step labeled splitting loop.
- imageCandidateStart = separator + 1;
- }
-}
-String bestFitSourceForImageAttributes(float deviceScaleFactor, const String& srcAttribute, const String& srcsetAttribute)
-{
- ImageCandidates imageCandidates;
+ while (position < end && isHTMLSpace(*position))
+ ++position;
- parseImagesWithScaleFromSrcsetAttribute(srcsetAttribute, imageCandidates);
+ if (position < end && *position == '=')
+ ++position;
+ else {
+ parsedURL = url.toString();
+ return true;
+ }
- if (!srcAttribute.isEmpty()) {
- ImageWithScale srcPlaceholderImage;
- imageCandidates.append(srcPlaceholderImage);
+ while (position < end && isHTMLSpace(*position))
+ ++position;
}
- if (imageCandidates.isEmpty())
- return String();
+ CharacterType quote;
+ if (position < end && (*position == '\'' || *position == '"')) {
+ quote = *position;
+ ++position;
+ } else
+ quote = '\0';
- std::stable_sort(imageCandidates.begin(), imageCandidates.end(), compareByScaleFactor);
+ StringView url(position, end - position);
- for (size_t i = 0; i < imageCandidates.size() - 1; ++i) {
- if (imageCandidates[i].scaleFactor >= deviceScaleFactor)
- return imageCandidates[i].hasImageURL() ? srcsetAttribute.substringSharingImpl(imageCandidates[i].imageURLStart, imageCandidates[i].imageURLLength) : srcAttribute;
+ if (quote != '\0') {
+ size_t index = url.find(quote);
+ if (index != notFound)
+ url = url.substring(0, index);
}
- const ImageWithScale& lastCandidate = imageCandidates.last();
- return lastCandidate.hasImageURL() ? srcsetAttribute.substringSharingImpl(lastCandidate.imageURLStart, lastCandidate.imageURLLength) : srcAttribute;
+
+ parsedURL = url.toString();
+ return true;
+}
+
+bool parseMetaHTTPEquivRefresh(const StringView& input, double& delay, String& url)
+{
+ if (LIKELY(input.is8Bit())) {
+ auto* start = input.characters8();
+ return parseHTTPRefreshInternal(start, start + input.length(), delay, url);
+ }
+
+ auto* start = input.characters16();
+ return parseHTTPRefreshInternal(start, start + input.length(), delay, url);
+}
+
+// https://html.spec.whatwg.org/#rules-for-parsing-a-hash-name-reference
+AtomicString parseHTMLHashNameReference(StringView usemap)
+{
+ size_t numberSignIndex = usemap.find('#');
+ if (numberSignIndex == notFound)
+ return nullAtom;
+ return usemap.substring(numberSignIndex + 1).toAtomicString();
}
}