// Copyright 2015 The Chromium Authors. All rights reserved. // Copyright (C) 2016 Apple Inc. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "config.h" #include "CSSTokenizer.h" #include "CSSParserIdioms.h" #include "CSSParserObserverWrapper.h" #include "CSSParserTokenRange.h" #include "CSSTokenizerInputStream.h" #include "HTMLParserIdioms.h" #include #include namespace WebCore { CSSTokenizer::CSSTokenizer(const String& string) : m_input(string) { // According to the spec, we should perform preprocessing here. // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing // // However, we can skip this step since: // * We're using HTML spaces (which accept \r and \f as a valid white space) // * Do not count white spaces // * CSSTokenizerInputStream::nextInputChar() replaces NULLs for replacement characters if (string.isEmpty()) return; // To avoid resizing we err on the side of reserving too much space. // Most strings we tokenize have about 3.5 to 5 characters per token. m_tokens.reserveInitialCapacity(string.length() / 3); while (true) { CSSParserToken token = nextToken(); if (token.type() == CommentToken) continue; if (token.type() == EOFToken) return; m_tokens.append(token); } } CSSTokenizer::CSSTokenizer(const String& string, CSSParserObserverWrapper& wrapper) : m_input(string) { if (string.isEmpty()) return; unsigned offset = 0; while (true) { CSSParserToken token = nextToken(); if (token.type() == EOFToken) break; if (token.type() == CommentToken) wrapper.addComment(offset, m_input.offset(), m_tokens.size()); else { m_tokens.append(token); wrapper.addToken(offset); } offset = m_input.offset(); } wrapper.addToken(offset); wrapper.finalizeConstruction(m_tokens.begin()); } CSSParserTokenRange CSSTokenizer::tokenRange() const { return m_tokens; } unsigned CSSTokenizer::tokenCount() { return m_tokens.size(); } static bool isNewLine(UChar cc) { // We check \r and \f here, since we have no preprocessing stage return (cc == '\r' || cc == '\n' || cc == '\f'); } // http://dev.w3.org/csswg/css-syntax/#check-if-two-code-points-are-a-valid-escape static bool twoCharsAreValidEscape(UChar first, UChar second) { return first == '\\' && !isNewLine(second); } void CSSTokenizer::reconsume(UChar c) { m_input.pushBack(c); } UChar CSSTokenizer::consume() { UChar current = m_input.nextInputChar(); m_input.advance(); return current; } CSSParserToken CSSTokenizer::whiteSpace(UChar /*cc*/) { m_input.advanceUntilNonWhitespace(); return CSSParserToken(WhitespaceToken); } CSSParserToken CSSTokenizer::blockStart(CSSParserTokenType type) { m_blockStack.append(type); return CSSParserToken(type, CSSParserToken::BlockStart); } CSSParserToken CSSTokenizer::blockStart(CSSParserTokenType blockType, CSSParserTokenType type, StringView name) { m_blockStack.append(blockType); return CSSParserToken(type, name, CSSParserToken::BlockStart); } CSSParserToken CSSTokenizer::blockEnd(CSSParserTokenType type, CSSParserTokenType startType) { if (!m_blockStack.isEmpty() && m_blockStack.last() == startType) { m_blockStack.removeLast(); return CSSParserToken(type, CSSParserToken::BlockEnd); } return CSSParserToken(type); } CSSParserToken CSSTokenizer::leftParenthesis(UChar /*cc*/) { return blockStart(LeftParenthesisToken); } CSSParserToken CSSTokenizer::rightParenthesis(UChar /*cc*/) { return blockEnd(RightParenthesisToken, LeftParenthesisToken); } CSSParserToken CSSTokenizer::leftBracket(UChar /*cc*/) { return blockStart(LeftBracketToken); } CSSParserToken CSSTokenizer::rightBracket(UChar /*cc*/) { return blockEnd(RightBracketToken, LeftBracketToken); } CSSParserToken CSSTokenizer::leftBrace(UChar /*cc*/) { return blockStart(LeftBraceToken); } CSSParserToken CSSTokenizer::rightBrace(UChar /*cc*/) { return blockEnd(RightBraceToken, LeftBraceToken); } CSSParserToken CSSTokenizer::plusOrFullStop(UChar cc) { if (nextCharsAreNumber(cc)) { reconsume(cc); return consumeNumericToken(); } return CSSParserToken(DelimiterToken, cc); } CSSParserToken CSSTokenizer::asterisk(UChar cc) { ASSERT_UNUSED(cc, cc == '*'); if (consumeIfNext('=')) return CSSParserToken(SubstringMatchToken); return CSSParserToken(DelimiterToken, '*'); } CSSParserToken CSSTokenizer::lessThan(UChar cc) { ASSERT_UNUSED(cc, cc == '<'); if (m_input.peekWithoutReplacement(0) == '!' && m_input.peekWithoutReplacement(1) == '-' && m_input.peekWithoutReplacement(2) == '-') { m_input.advance(3); return CSSParserToken(CDOToken); } return CSSParserToken(DelimiterToken, '<'); } CSSParserToken CSSTokenizer::comma(UChar /*cc*/) { return CSSParserToken(CommaToken); } CSSParserToken CSSTokenizer::hyphenMinus(UChar cc) { if (nextCharsAreNumber(cc)) { reconsume(cc); return consumeNumericToken(); } if (m_input.peekWithoutReplacement(0) == '-' && m_input.peekWithoutReplacement(1) == '>') { m_input.advance(2); return CSSParserToken(CDCToken); } if (nextCharsAreIdentifier(cc)) { reconsume(cc); return consumeIdentLikeToken(); } return CSSParserToken(DelimiterToken, cc); } CSSParserToken CSSTokenizer::solidus(UChar cc) { if (consumeIfNext('*')) { // These get ignored, but we need a value to return. consumeUntilCommentEndFound(); return CSSParserToken(CommentToken); } return CSSParserToken(DelimiterToken, cc); } CSSParserToken CSSTokenizer::colon(UChar /*cc*/) { return CSSParserToken(ColonToken); } CSSParserToken CSSTokenizer::semiColon(UChar /*cc*/) { return CSSParserToken(SemicolonToken); } CSSParserToken CSSTokenizer::hash(UChar cc) { UChar nextChar = m_input.peekWithoutReplacement(0); if (isNameCodePoint(nextChar) || twoCharsAreValidEscape(nextChar, m_input.peekWithoutReplacement(1))) { HashTokenType type = nextCharsAreIdentifier() ? HashTokenId : HashTokenUnrestricted; return CSSParserToken(type, consumeName()); } return CSSParserToken(DelimiterToken, cc); } CSSParserToken CSSTokenizer::circumflexAccent(UChar cc) { ASSERT_UNUSED(cc, cc == '^'); if (consumeIfNext('=')) return CSSParserToken(PrefixMatchToken); return CSSParserToken(DelimiterToken, '^'); } CSSParserToken CSSTokenizer::dollarSign(UChar cc) { ASSERT_UNUSED(cc, cc == '$'); if (consumeIfNext('=')) return CSSParserToken(SuffixMatchToken); return CSSParserToken(DelimiterToken, '$'); } CSSParserToken CSSTokenizer::verticalLine(UChar cc) { ASSERT_UNUSED(cc, cc == '|'); if (consumeIfNext('=')) return CSSParserToken(DashMatchToken); if (consumeIfNext('|')) return CSSParserToken(ColumnToken); return CSSParserToken(DelimiterToken, '|'); } CSSParserToken CSSTokenizer::tilde(UChar cc) { ASSERT_UNUSED(cc, cc == '~'); if (consumeIfNext('=')) return CSSParserToken(IncludeMatchToken); return CSSParserToken(DelimiterToken, '~'); } CSSParserToken CSSTokenizer::commercialAt(UChar cc) { ASSERT_UNUSED(cc, cc == '@'); if (nextCharsAreIdentifier()) return CSSParserToken(AtKeywordToken, consumeName()); return CSSParserToken(DelimiterToken, '@'); } CSSParserToken CSSTokenizer::reverseSolidus(UChar cc) { if (twoCharsAreValidEscape(cc, m_input.peekWithoutReplacement(0))) { reconsume(cc); return consumeIdentLikeToken(); } return CSSParserToken(DelimiterToken, cc); } CSSParserToken CSSTokenizer::asciiDigit(UChar cc) { reconsume(cc); return consumeNumericToken(); } CSSParserToken CSSTokenizer::letterU(UChar cc) { if (m_input.peekWithoutReplacement(0) == '+' && (isASCIIHexDigit(m_input.peekWithoutReplacement(1)) || m_input.peekWithoutReplacement(1) == '?')) { m_input.advance(); return consumeUnicodeRange(); } reconsume(cc); return consumeIdentLikeToken(); } CSSParserToken CSSTokenizer::nameStart(UChar cc) { reconsume(cc); return consumeIdentLikeToken(); } CSSParserToken CSSTokenizer::stringStart(UChar cc) { return consumeStringTokenUntil(cc); } CSSParserToken CSSTokenizer::endOfFile(UChar /*cc*/) { return CSSParserToken(EOFToken); } const CSSTokenizer::CodePoint CSSTokenizer::codePoints[128] = { &CSSTokenizer::endOfFile, 0, 0, 0, 0, 0, 0, 0, 0, &CSSTokenizer::whiteSpace, &CSSTokenizer::whiteSpace, 0, &CSSTokenizer::whiteSpace, &CSSTokenizer::whiteSpace, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, &CSSTokenizer::whiteSpace, 0, &CSSTokenizer::stringStart, &CSSTokenizer::hash, &CSSTokenizer::dollarSign, 0, 0, &CSSTokenizer::stringStart, &CSSTokenizer::leftParenthesis, &CSSTokenizer::rightParenthesis, &CSSTokenizer::asterisk, &CSSTokenizer::plusOrFullStop, &CSSTokenizer::comma, &CSSTokenizer::hyphenMinus, &CSSTokenizer::plusOrFullStop, &CSSTokenizer::solidus, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::asciiDigit, &CSSTokenizer::colon, &CSSTokenizer::semiColon, &CSSTokenizer::lessThan, 0, 0, 0, &CSSTokenizer::commercialAt, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::letterU, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::leftBracket, &CSSTokenizer::reverseSolidus, &CSSTokenizer::rightBracket, &CSSTokenizer::circumflexAccent, &CSSTokenizer::nameStart, 0, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::letterU, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::nameStart, &CSSTokenizer::leftBrace, &CSSTokenizer::verticalLine, &CSSTokenizer::rightBrace, &CSSTokenizer::tilde, 0, }; #if !ASSERT_WITH_SECURITY_IMPLICATION_DISABLED const unsigned codePointsNumber = 128; #endif CSSParserToken CSSTokenizer::nextToken() { // Unlike the HTMLTokenizer, the CSS Syntax spec is written // as a stateless, (fixed-size) look-ahead tokenizer. // We could move to the stateful model and instead create // states for all the "next 3 codepoints are X" cases. // State-machine tokenizers are easier to write to handle // incremental tokenization of partial sources. // However, for now we follow the spec exactly. UChar cc = consume(); CodePoint codePointFunc = 0; if (isASCII(cc)) { ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber); codePointFunc = codePoints[cc]; } else codePointFunc = &CSSTokenizer::nameStart; if (codePointFunc) return ((this)->*(codePointFunc))(cc); return CSSParserToken(DelimiterToken, cc); } // This method merges the following spec sections for efficiency // http://www.w3.org/TR/css3-syntax/#consume-a-number // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number CSSParserToken CSSTokenizer::consumeNumber() { ASSERT(nextCharsAreNumber()); NumericValueType type = IntegerValueType; NumericSign sign = NoSign; unsigned numberLength = 0; UChar next = m_input.peekWithoutReplacement(0); if (next == '+') { ++numberLength; sign = PlusSign; } else if (next == '-') { ++numberLength; sign = MinusSign; } numberLength = m_input.skipWhilePredicate(numberLength); next = m_input.peekWithoutReplacement(numberLength); if (next == '.' && isASCIIDigit(m_input.peekWithoutReplacement(numberLength + 1))) { type = NumberValueType; numberLength = m_input.skipWhilePredicate(numberLength + 2); next = m_input.peekWithoutReplacement(numberLength); } if (next == 'E' || next == 'e') { next = m_input.peekWithoutReplacement(numberLength + 1); if (isASCIIDigit(next)) { type = NumberValueType; numberLength = m_input.skipWhilePredicate(numberLength + 1); } else if ((next == '+' || next == '-') && isASCIIDigit(m_input.peekWithoutReplacement(numberLength + 2))) { type = NumberValueType; numberLength = m_input.skipWhilePredicate(numberLength + 3); } } double value = m_input.getDouble(0, numberLength); m_input.advance(numberLength); return CSSParserToken(NumberToken, value, type, sign); } // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token CSSParserToken CSSTokenizer::consumeNumericToken() { CSSParserToken token = consumeNumber(); if (nextCharsAreIdentifier()) token.convertToDimensionWithUnit(consumeName()); else if (consumeIfNext('%')) token.convertToPercentage(); return token; } // http://dev.w3.org/csswg/css-syntax/#consume-ident-like-token CSSParserToken CSSTokenizer::consumeIdentLikeToken() { StringView name = consumeName(); if (consumeIfNext('(')) { if (equalIgnoringASCIICase(name, "url")) { // The spec is slightly different so as to avoid dropping whitespace // tokens, but they wouldn't be used and this is easier. m_input.advanceUntilNonWhitespace(); UChar next = m_input.peekWithoutReplacement(0); if (next != '"' && next != '\'') return consumeUrlToken(); } return blockStart(LeftParenthesisToken, FunctionToken, name); } return CSSParserToken(IdentToken, name); } // http://dev.w3.org/csswg/css-syntax/#consume-a-string-token CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint) { // Strings without escapes get handled without allocations for (unsigned size = 0; ; size++) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == endingCodePoint) { unsigned startOffset = m_input.offset(); m_input.advance(size + 1); return CSSParserToken(StringToken, m_input.rangeAt(startOffset, size)); } if (isNewLine(cc)) { m_input.advance(size); return CSSParserToken(BadStringToken); } if (cc == '\0' || cc == '\\') break; } StringBuilder output; while (true) { UChar cc = consume(); if (cc == endingCodePoint || cc == kEndOfFileMarker) return CSSParserToken(StringToken, registerString(output.toString())); if (isNewLine(cc)) { reconsume(cc); return CSSParserToken(BadStringToken); } if (cc == '\\') { if (m_input.nextInputChar() == kEndOfFileMarker) continue; if (isNewLine(m_input.peekWithoutReplacement(0))) consumeSingleWhitespaceIfNext(); // This handles \r\n for us else output.append(consumeEscape()); } else output.append(cc); } } CSSParserToken CSSTokenizer::consumeUnicodeRange() { ASSERT(isASCIIHexDigit(m_input.peekWithoutReplacement(0)) || m_input.peekWithoutReplacement(0) == '?'); int lengthRemaining = 6; UChar32 start = 0; while (lengthRemaining && isASCIIHexDigit(m_input.peekWithoutReplacement(0))) { start = start * 16 + toASCIIHexValue(consume()); --lengthRemaining; } UChar32 end = start; if (lengthRemaining && consumeIfNext('?')) { do { start *= 16; end = end * 16 + 0xF; --lengthRemaining; } while (lengthRemaining && consumeIfNext('?')); } else if (m_input.peekWithoutReplacement(0) == '-' && isASCIIHexDigit(m_input.peekWithoutReplacement(1))) { m_input.advance(); lengthRemaining = 6; end = 0; do { end = end * 16 + toASCIIHexValue(consume()); --lengthRemaining; } while (lengthRemaining && isASCIIHexDigit(m_input.peekWithoutReplacement(0))); } return CSSParserToken(UnicodeRangeToken, start, end); } // http://dev.w3.org/csswg/css-syntax/#non-printable-code-point static bool isNonPrintableCodePoint(UChar cc) { return cc <= '\x8' || cc == '\xb' || (cc >= '\xe' && cc <= '\x1f') || cc == '\x7f'; } // http://dev.w3.org/csswg/css-syntax/#consume-url-token CSSParserToken CSSTokenizer::consumeUrlToken() { m_input.advanceUntilNonWhitespace(); // URL tokens without escapes get handled without allocations for (unsigned size = 0; ; size++) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == ')') { unsigned startOffset = m_input.offset(); m_input.advance(size + 1); return CSSParserToken(UrlToken, m_input.rangeAt(startOffset, size)); } if (cc <= ' ' || cc == '\\' || cc == '"' || cc == '\'' || cc == '(' || cc == '\x7f') break; } StringBuilder result; while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) return CSSParserToken(UrlToken, registerString(result.toString())); if (isHTMLSpace(cc)) { m_input.advanceUntilNonWhitespace(); if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker) return CSSParserToken(UrlToken, registerString(result.toString())); break; } if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc)) break; if (cc == '\\') { if (twoCharsAreValidEscape(cc, m_input.peekWithoutReplacement(0))) { result.append(consumeEscape()); continue; } break; } result.append(cc); } consumeBadUrlRemnants(); return CSSParserToken(BadUrlToken); } // http://dev.w3.org/csswg/css-syntax/#consume-the-remnants-of-a-bad-url void CSSTokenizer::consumeBadUrlRemnants() { while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) return; if (twoCharsAreValidEscape(cc, m_input.peekWithoutReplacement(0))) consumeEscape(); } } void CSSTokenizer::consumeSingleWhitespaceIfNext() { // We check for \r\n and HTML spaces since we don't do preprocessing UChar next = m_input.peekWithoutReplacement(0); if (next == '\r' && m_input.peekWithoutReplacement(1) == '\n') m_input.advance(2); else if (isHTMLSpace(next)) m_input.advance(); } void CSSTokenizer::consumeUntilCommentEndFound() { UChar c = consume(); while (true) { if (c == kEndOfFileMarker) return; if (c != '*') { c = consume(); continue; } c = consume(); if (c == '/') return; } } bool CSSTokenizer::consumeIfNext(UChar character) { // Since we're not doing replacement we can't tell the difference from // a NUL in the middle and the kEndOfFileMarker, so character must not be // NUL. ASSERT(character); if (m_input.peekWithoutReplacement(0) == character) { m_input.advance(); return true; } return false; } // http://www.w3.org/TR/css3-syntax/#consume-a-name StringView CSSTokenizer::consumeName() { // Names without escapes get handled without allocations for (unsigned size = 0; ; ++size) { UChar cc = m_input.peekWithoutReplacement(size); if (isNameCodePoint(cc)) continue; // peekWithoutReplacement will return NUL when we hit the end of the // input. In that case we want to still use the rangeAt() fast path // below. if (cc == '\0' && m_input.offset() + size < m_input.length()) break; if (cc == '\\') break; unsigned startOffset = m_input.offset(); m_input.advance(size); return m_input.rangeAt(startOffset, size); } StringBuilder result; while (true) { UChar cc = consume(); if (isNameCodePoint(cc)) { result.append(cc); continue; } if (twoCharsAreValidEscape(cc, m_input.peekWithoutReplacement(0))) { result.append(consumeEscape()); continue; } reconsume(cc); return registerString(result.toString()); } } // http://dev.w3.org/csswg/css-syntax/#consume-an-escaped-code-point UChar32 CSSTokenizer::consumeEscape() { UChar cc = consume(); ASSERT(!isNewLine(cc)); if (isASCIIHexDigit(cc)) { unsigned consumedHexDigits = 1; StringBuilder hexChars; hexChars.append(cc); while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.peekWithoutReplacement(0))) { cc = consume(); hexChars.append(cc); consumedHexDigits++; }; consumeSingleWhitespaceIfNext(); bool ok = false; UChar32 codePoint = hexChars.toString().toUIntStrict(&ok, 16); ASSERT(ok); if (!codePoint || (0xD800 <= codePoint && codePoint <= 0xDFFF) || codePoint > 0x10FFFF) return replacementCharacter; return codePoint; } if (cc == kEndOfFileMarker) return replacementCharacter; return cc; } bool CSSTokenizer::nextTwoCharsAreValidEscape() { return twoCharsAreValidEscape(m_input.peekWithoutReplacement(0), m_input.peekWithoutReplacement(1)); } // http://www.w3.org/TR/css3-syntax/#starts-with-a-number bool CSSTokenizer::nextCharsAreNumber(UChar first) { UChar second = m_input.peekWithoutReplacement(0); if (isASCIIDigit(first)) return true; if (first == '+' || first == '-') return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input.peekWithoutReplacement(1)))); if (first =='.') return (isASCIIDigit(second)); return false; } bool CSSTokenizer::nextCharsAreNumber() { UChar first = consume(); bool areNumber = nextCharsAreNumber(first); reconsume(first); return areNumber; } // http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier bool CSSTokenizer::nextCharsAreIdentifier(UChar first) { UChar second = m_input.peekWithoutReplacement(0); if (isNameStartCodePoint(first) || twoCharsAreValidEscape(first, second)) return true; if (first == '-') return isNameStartCodePoint(second) || second == '-' || nextTwoCharsAreValidEscape(); return false; } bool CSSTokenizer::nextCharsAreIdentifier() { UChar first = consume(); bool areIdentifier = nextCharsAreIdentifier(first); reconsume(first); return areIdentifier; } StringView CSSTokenizer::registerString(const String& string) { m_stringPool.append(string); return string; } } // namespace WebCore