diff options
Diffstat (limited to 'src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp')
-rw-r--r-- | src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp | 643 |
1 files changed, 0 insertions, 643 deletions
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp deleted file mode 100644 index 1709bf9..0000000 --- a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp +++ /dev/null @@ -1,643 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "WRECParser.h" - -#if ENABLE(WREC) - -#include "CharacterClassConstructor.h" -#include "WRECFunctors.h" - -using namespace WTF; - -namespace JSC { namespace WREC { - -// These error messages match the error messages used by PCRE. -const char* Parser::QuantifierOutOfOrder = "numbers out of order in {} quantifier"; -const char* Parser::QuantifierWithoutAtom = "nothing to repeat"; -const char* Parser::ParenthesesUnmatched = "unmatched parentheses"; -const char* Parser::ParenthesesTypeInvalid = "unrecognized character after (?"; -const char* Parser::ParenthesesNotSupported = ""; // Not a user-visible syntax error -- just signals a syntax that WREC doesn't support yet. -const char* Parser::CharacterClassUnmatched = "missing terminating ] for character class"; -const char* Parser::CharacterClassOutOfOrder = "range out of order in character class"; -const char* Parser::EscapeUnterminated = "\\ at end of pattern"; - -class PatternCharacterSequence { -typedef Generator::JumpList JumpList; - -public: - PatternCharacterSequence(Generator& generator, JumpList& failures) - : m_generator(generator) - , m_failures(failures) - { - } - - size_t size() { return m_sequence.size(); } - - void append(int ch) - { - m_sequence.append(ch); - } - - void flush() - { - if (!m_sequence.size()) - return; - - m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size()); - m_sequence.clear(); - } - - void flush(const Quantifier& quantifier) - { - if (!m_sequence.size()) - return; - - m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size() - 1); - - switch (quantifier.type) { - case Quantifier::None: - case Quantifier::Error: - ASSERT_NOT_REACHED(); - break; - - case Quantifier::Greedy: { - GeneratePatternCharacterFunctor functor(m_sequence.last()); - m_generator.generateGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max); - break; - } - - case Quantifier::NonGreedy: { - GeneratePatternCharacterFunctor functor(m_sequence.last()); - m_generator.generateNonGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max); - break; - } - } - - m_sequence.clear(); - } - -private: - Generator& m_generator; - JumpList& m_failures; - Vector<int, 8> m_sequence; -}; - -ALWAYS_INLINE Quantifier Parser::consumeGreedyQuantifier() -{ - switch (peek()) { - case '?': - consume(); - return Quantifier(Quantifier::Greedy, 0, 1); - - case '*': - consume(); - return Quantifier(Quantifier::Greedy, 0); - - case '+': - consume(); - return Quantifier(Quantifier::Greedy, 1); - - case '{': { - SavedState state(*this); - consume(); - - // Accept: {n}, {n,}, {n,m}. - // Reject: {n,m} where n > m. - // Ignore: Anything else, such as {n, m}. - - if (!peekIsDigit()) { - state.restore(); - return Quantifier(); - } - - unsigned min = consumeNumber(); - unsigned max = min; - - if (peek() == ',') { - consume(); - max = peekIsDigit() ? consumeNumber() : Quantifier::Infinity; - } - - if (peek() != '}') { - state.restore(); - return Quantifier(); - } - consume(); - - if (min > max) { - setError(QuantifierOutOfOrder); - return Quantifier(Quantifier::Error); - } - - return Quantifier(Quantifier::Greedy, min, max); - } - - default: - return Quantifier(); // No quantifier. - } -} - -Quantifier Parser::consumeQuantifier() -{ - Quantifier q = consumeGreedyQuantifier(); - - if ((q.type == Quantifier::Greedy) && (peek() == '?')) { - consume(); - q.type = Quantifier::NonGreedy; - } - - return q; -} - -bool Parser::parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert) -{ - Quantifier q = consumeQuantifier(); - - switch (q.type) { - case Quantifier::None: { - m_generator.generateCharacterClass(failures, charClass, invert); - break; - } - - case Quantifier::Greedy: { - GenerateCharacterClassFunctor functor(&charClass, invert); - m_generator.generateGreedyQuantifier(failures, functor, q.min, q.max); - break; - } - - case Quantifier::NonGreedy: { - GenerateCharacterClassFunctor functor(&charClass, invert); - m_generator.generateNonGreedyQuantifier(failures, functor, q.min, q.max); - break; - } - - case Quantifier::Error: - return false; - } - - return true; -} - -bool Parser::parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId) -{ - Quantifier q = consumeQuantifier(); - - switch (q.type) { - case Quantifier::None: { - m_generator.generateBackreference(failures, subpatternId); - break; - } - - case Quantifier::Greedy: - case Quantifier::NonGreedy: - m_generator.generateBackreferenceQuantifier(failures, q.type, subpatternId, q.min, q.max); - return true; - - case Quantifier::Error: - return false; - } - - return true; -} - -bool Parser::parseParentheses(JumpList& failures) -{ - ParenthesesType type = consumeParenthesesType(); - - // FIXME: WREC originally failed to backtrack correctly in cases such as - // "c".match(/(.*)c/). Now, most parentheses handling is disabled. For - // unsupported parentheses, we fall back on PCRE. - - switch (type) { - case Generator::Assertion: { - m_generator.generateParenthesesAssertion(failures); - - if (consume() != ')') { - setError(ParenthesesUnmatched); - return false; - } - - Quantifier quantifier = consumeQuantifier(); - if (quantifier.type != Quantifier::None && quantifier.min == 0) { - setError(ParenthesesNotSupported); - return false; - } - - return true; - } - case Generator::InvertedAssertion: { - m_generator.generateParenthesesInvertedAssertion(failures); - - if (consume() != ')') { - setError(ParenthesesUnmatched); - return false; - } - - Quantifier quantifier = consumeQuantifier(); - if (quantifier.type != Quantifier::None && quantifier.min == 0) { - setError(ParenthesesNotSupported); - return false; - } - - return true; - } - default: - setError(ParenthesesNotSupported); - return false; - } -} - -bool Parser::parseCharacterClass(JumpList& failures) -{ - bool invert = false; - if (peek() == '^') { - consume(); - invert = true; - } - - CharacterClassConstructor constructor(m_ignoreCase); - - int ch; - while ((ch = peek()) != ']') { - switch (ch) { - case EndOfPattern: - setError(CharacterClassUnmatched); - return false; - - case '\\': { - consume(); - Escape escape = consumeEscape(true); - - switch (escape.type()) { - case Escape::PatternCharacter: { - int character = PatternCharacterEscape::cast(escape).character(); - if (character == '-') - constructor.flushBeforeEscapedHyphen(); - constructor.put(character); - break; - } - case Escape::CharacterClass: { - const CharacterClassEscape& characterClassEscape = CharacterClassEscape::cast(escape); - ASSERT(!characterClassEscape.invert()); - constructor.append(characterClassEscape.characterClass()); - break; - } - case Escape::Error: - return false; - case Escape::Backreference: - case Escape::WordBoundaryAssertion: { - ASSERT_NOT_REACHED(); - break; - } - } - break; - } - - default: - consume(); - constructor.put(ch); - } - } - consume(); - - // lazily catch reversed ranges ([z-a])in character classes - if (constructor.isUpsideDown()) { - setError(CharacterClassOutOfOrder); - return false; - } - - constructor.flush(); - CharacterClass charClass = constructor.charClass(); - return parseCharacterClassQuantifier(failures, charClass, invert); -} - -bool Parser::parseNonCharacterEscape(JumpList& failures, const Escape& escape) -{ - switch (escape.type()) { - case Escape::PatternCharacter: - ASSERT_NOT_REACHED(); - return false; - - case Escape::CharacterClass: - return parseCharacterClassQuantifier(failures, CharacterClassEscape::cast(escape).characterClass(), CharacterClassEscape::cast(escape).invert()); - - case Escape::Backreference: - return parseBackreferenceQuantifier(failures, BackreferenceEscape::cast(escape).subpatternId()); - - case Escape::WordBoundaryAssertion: - m_generator.generateAssertionWordBoundary(failures, WordBoundaryAssertionEscape::cast(escape).invert()); - return true; - - case Escape::Error: - return false; - } - - ASSERT_NOT_REACHED(); - return false; -} - -Escape Parser::consumeEscape(bool inCharacterClass) -{ - switch (peek()) { - case EndOfPattern: - setError(EscapeUnterminated); - return Escape(Escape::Error); - - // Assertions - case 'b': - consume(); - if (inCharacterClass) - return PatternCharacterEscape('\b'); - return WordBoundaryAssertionEscape(false); // do not invert - case 'B': - consume(); - if (inCharacterClass) - return PatternCharacterEscape('B'); - return WordBoundaryAssertionEscape(true); // invert - - // CharacterClassEscape - case 'd': - consume(); - return CharacterClassEscape(CharacterClass::digits(), false); - case 's': - consume(); - return CharacterClassEscape(CharacterClass::spaces(), false); - case 'w': - consume(); - return CharacterClassEscape(CharacterClass::wordchar(), false); - case 'D': - consume(); - return inCharacterClass - ? CharacterClassEscape(CharacterClass::nondigits(), false) - : CharacterClassEscape(CharacterClass::digits(), true); - case 'S': - consume(); - return inCharacterClass - ? CharacterClassEscape(CharacterClass::nonspaces(), false) - : CharacterClassEscape(CharacterClass::spaces(), true); - case 'W': - consume(); - return inCharacterClass - ? CharacterClassEscape(CharacterClass::nonwordchar(), false) - : CharacterClassEscape(CharacterClass::wordchar(), true); - - // DecimalEscape - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - if (peekDigit() > m_numSubpatterns || inCharacterClass) { - // To match Firefox, we parse an invalid backreference in the range [1-7] - // as an octal escape. - return peekDigit() > 7 ? PatternCharacterEscape('\\') : PatternCharacterEscape(consumeOctal()); - } - - int value = 0; - do { - unsigned newValue = value * 10 + peekDigit(); - if (newValue > m_numSubpatterns) - break; - value = newValue; - consume(); - } while (peekIsDigit()); - - return BackreferenceEscape(value); - } - - // Octal escape - case '0': - consume(); - return PatternCharacterEscape(consumeOctal()); - - // ControlEscape - case 'f': - consume(); - return PatternCharacterEscape('\f'); - case 'n': - consume(); - return PatternCharacterEscape('\n'); - case 'r': - consume(); - return PatternCharacterEscape('\r'); - case 't': - consume(); - return PatternCharacterEscape('\t'); - case 'v': - consume(); - return PatternCharacterEscape('\v'); - - // ControlLetter - case 'c': { - SavedState state(*this); - consume(); - - int control = consume(); - // To match Firefox, inside a character class, we also accept numbers - // and '_' as control characters. - if ((!inCharacterClass && !isASCIIAlpha(control)) || (!isASCIIAlphanumeric(control) && control != '_')) { - state.restore(); - return PatternCharacterEscape('\\'); - } - return PatternCharacterEscape(control & 31); - } - - // HexEscape - case 'x': { - consume(); - - SavedState state(*this); - int x = consumeHex(2); - if (x == -1) { - state.restore(); - return PatternCharacterEscape('x'); - } - return PatternCharacterEscape(x); - } - - // UnicodeEscape - case 'u': { - consume(); - - SavedState state(*this); - int x = consumeHex(4); - if (x == -1) { - state.restore(); - return PatternCharacterEscape('u'); - } - return PatternCharacterEscape(x); - } - - // IdentityEscape - default: - return PatternCharacterEscape(consume()); - } -} - -void Parser::parseAlternative(JumpList& failures) -{ - PatternCharacterSequence sequence(m_generator, failures); - - while (1) { - switch (peek()) { - case EndOfPattern: - case '|': - case ')': - sequence.flush(); - return; - - case '*': - case '+': - case '?': - case '{': { - Quantifier q = consumeQuantifier(); - - if (q.type == Quantifier::None) { - sequence.append(consume()); - continue; - } - - if (q.type == Quantifier::Error) - return; - - if (!sequence.size()) { - setError(QuantifierWithoutAtom); - return; - } - - sequence.flush(q); - continue; - } - - case '^': - consume(); - - sequence.flush(); - m_generator.generateAssertionBOL(failures); - continue; - - case '$': - consume(); - - sequence.flush(); - m_generator.generateAssertionEOL(failures); - continue; - - case '.': - consume(); - - sequence.flush(); - if (!parseCharacterClassQuantifier(failures, CharacterClass::newline(), true)) - return; - continue; - - case '[': - consume(); - - sequence.flush(); - if (!parseCharacterClass(failures)) - return; - continue; - - case '(': - consume(); - - sequence.flush(); - if (!parseParentheses(failures)) - return; - continue; - - case '\\': { - consume(); - - Escape escape = consumeEscape(false); - if (escape.type() == Escape::PatternCharacter) { - sequence.append(PatternCharacterEscape::cast(escape).character()); - continue; - } - - sequence.flush(); - if (!parseNonCharacterEscape(failures, escape)) - return; - continue; - } - - default: - sequence.append(consume()); - continue; - } - } -} - -/* - TOS holds index. -*/ -void Parser::parseDisjunction(JumpList& failures) -{ - parseAlternative(failures); - if (peek() != '|') - return; - - JumpList successes; - do { - consume(); - m_generator.terminateAlternative(successes, failures); - parseAlternative(failures); - } while (peek() == '|'); - - m_generator.terminateDisjunction(successes); -} - -Generator::ParenthesesType Parser::consumeParenthesesType() -{ - if (peek() != '?') - return Generator::Capturing; - consume(); - - switch (consume()) { - case ':': - return Generator::NonCapturing; - - case '=': - return Generator::Assertion; - - case '!': - return Generator::InvertedAssertion; - - default: - setError(ParenthesesTypeInvalid); - return Generator::Error; - } -} - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) |