summaryrefslogtreecommitdiff
path: root/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp')
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp643
1 files changed, 0 insertions, 643 deletions
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp
deleted file mode 100644
index 1709bf9..0000000
--- a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp
+++ /dev/null
@@ -1,643 +0,0 @@
-/*
- * Copyright (C) 2008 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "WRECParser.h"
-
-#if ENABLE(WREC)
-
-#include "CharacterClassConstructor.h"
-#include "WRECFunctors.h"
-
-using namespace WTF;
-
-namespace JSC { namespace WREC {
-
-// These error messages match the error messages used by PCRE.
-const char* Parser::QuantifierOutOfOrder = "numbers out of order in {} quantifier";
-const char* Parser::QuantifierWithoutAtom = "nothing to repeat";
-const char* Parser::ParenthesesUnmatched = "unmatched parentheses";
-const char* Parser::ParenthesesTypeInvalid = "unrecognized character after (?";
-const char* Parser::ParenthesesNotSupported = ""; // Not a user-visible syntax error -- just signals a syntax that WREC doesn't support yet.
-const char* Parser::CharacterClassUnmatched = "missing terminating ] for character class";
-const char* Parser::CharacterClassOutOfOrder = "range out of order in character class";
-const char* Parser::EscapeUnterminated = "\\ at end of pattern";
-
-class PatternCharacterSequence {
-typedef Generator::JumpList JumpList;
-
-public:
- PatternCharacterSequence(Generator& generator, JumpList& failures)
- : m_generator(generator)
- , m_failures(failures)
- {
- }
-
- size_t size() { return m_sequence.size(); }
-
- void append(int ch)
- {
- m_sequence.append(ch);
- }
-
- void flush()
- {
- if (!m_sequence.size())
- return;
-
- m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size());
- m_sequence.clear();
- }
-
- void flush(const Quantifier& quantifier)
- {
- if (!m_sequence.size())
- return;
-
- m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size() - 1);
-
- switch (quantifier.type) {
- case Quantifier::None:
- case Quantifier::Error:
- ASSERT_NOT_REACHED();
- break;
-
- case Quantifier::Greedy: {
- GeneratePatternCharacterFunctor functor(m_sequence.last());
- m_generator.generateGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max);
- break;
- }
-
- case Quantifier::NonGreedy: {
- GeneratePatternCharacterFunctor functor(m_sequence.last());
- m_generator.generateNonGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max);
- break;
- }
- }
-
- m_sequence.clear();
- }
-
-private:
- Generator& m_generator;
- JumpList& m_failures;
- Vector<int, 8> m_sequence;
-};
-
-ALWAYS_INLINE Quantifier Parser::consumeGreedyQuantifier()
-{
- switch (peek()) {
- case '?':
- consume();
- return Quantifier(Quantifier::Greedy, 0, 1);
-
- case '*':
- consume();
- return Quantifier(Quantifier::Greedy, 0);
-
- case '+':
- consume();
- return Quantifier(Quantifier::Greedy, 1);
-
- case '{': {
- SavedState state(*this);
- consume();
-
- // Accept: {n}, {n,}, {n,m}.
- // Reject: {n,m} where n > m.
- // Ignore: Anything else, such as {n, m}.
-
- if (!peekIsDigit()) {
- state.restore();
- return Quantifier();
- }
-
- unsigned min = consumeNumber();
- unsigned max = min;
-
- if (peek() == ',') {
- consume();
- max = peekIsDigit() ? consumeNumber() : Quantifier::Infinity;
- }
-
- if (peek() != '}') {
- state.restore();
- return Quantifier();
- }
- consume();
-
- if (min > max) {
- setError(QuantifierOutOfOrder);
- return Quantifier(Quantifier::Error);
- }
-
- return Quantifier(Quantifier::Greedy, min, max);
- }
-
- default:
- return Quantifier(); // No quantifier.
- }
-}
-
-Quantifier Parser::consumeQuantifier()
-{
- Quantifier q = consumeGreedyQuantifier();
-
- if ((q.type == Quantifier::Greedy) && (peek() == '?')) {
- consume();
- q.type = Quantifier::NonGreedy;
- }
-
- return q;
-}
-
-bool Parser::parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert)
-{
- Quantifier q = consumeQuantifier();
-
- switch (q.type) {
- case Quantifier::None: {
- m_generator.generateCharacterClass(failures, charClass, invert);
- break;
- }
-
- case Quantifier::Greedy: {
- GenerateCharacterClassFunctor functor(&charClass, invert);
- m_generator.generateGreedyQuantifier(failures, functor, q.min, q.max);
- break;
- }
-
- case Quantifier::NonGreedy: {
- GenerateCharacterClassFunctor functor(&charClass, invert);
- m_generator.generateNonGreedyQuantifier(failures, functor, q.min, q.max);
- break;
- }
-
- case Quantifier::Error:
- return false;
- }
-
- return true;
-}
-
-bool Parser::parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId)
-{
- Quantifier q = consumeQuantifier();
-
- switch (q.type) {
- case Quantifier::None: {
- m_generator.generateBackreference(failures, subpatternId);
- break;
- }
-
- case Quantifier::Greedy:
- case Quantifier::NonGreedy:
- m_generator.generateBackreferenceQuantifier(failures, q.type, subpatternId, q.min, q.max);
- return true;
-
- case Quantifier::Error:
- return false;
- }
-
- return true;
-}
-
-bool Parser::parseParentheses(JumpList& failures)
-{
- ParenthesesType type = consumeParenthesesType();
-
- // FIXME: WREC originally failed to backtrack correctly in cases such as
- // "c".match(/(.*)c/). Now, most parentheses handling is disabled. For
- // unsupported parentheses, we fall back on PCRE.
-
- switch (type) {
- case Generator::Assertion: {
- m_generator.generateParenthesesAssertion(failures);
-
- if (consume() != ')') {
- setError(ParenthesesUnmatched);
- return false;
- }
-
- Quantifier quantifier = consumeQuantifier();
- if (quantifier.type != Quantifier::None && quantifier.min == 0) {
- setError(ParenthesesNotSupported);
- return false;
- }
-
- return true;
- }
- case Generator::InvertedAssertion: {
- m_generator.generateParenthesesInvertedAssertion(failures);
-
- if (consume() != ')') {
- setError(ParenthesesUnmatched);
- return false;
- }
-
- Quantifier quantifier = consumeQuantifier();
- if (quantifier.type != Quantifier::None && quantifier.min == 0) {
- setError(ParenthesesNotSupported);
- return false;
- }
-
- return true;
- }
- default:
- setError(ParenthesesNotSupported);
- return false;
- }
-}
-
-bool Parser::parseCharacterClass(JumpList& failures)
-{
- bool invert = false;
- if (peek() == '^') {
- consume();
- invert = true;
- }
-
- CharacterClassConstructor constructor(m_ignoreCase);
-
- int ch;
- while ((ch = peek()) != ']') {
- switch (ch) {
- case EndOfPattern:
- setError(CharacterClassUnmatched);
- return false;
-
- case '\\': {
- consume();
- Escape escape = consumeEscape(true);
-
- switch (escape.type()) {
- case Escape::PatternCharacter: {
- int character = PatternCharacterEscape::cast(escape).character();
- if (character == '-')
- constructor.flushBeforeEscapedHyphen();
- constructor.put(character);
- break;
- }
- case Escape::CharacterClass: {
- const CharacterClassEscape& characterClassEscape = CharacterClassEscape::cast(escape);
- ASSERT(!characterClassEscape.invert());
- constructor.append(characterClassEscape.characterClass());
- break;
- }
- case Escape::Error:
- return false;
- case Escape::Backreference:
- case Escape::WordBoundaryAssertion: {
- ASSERT_NOT_REACHED();
- break;
- }
- }
- break;
- }
-
- default:
- consume();
- constructor.put(ch);
- }
- }
- consume();
-
- // lazily catch reversed ranges ([z-a])in character classes
- if (constructor.isUpsideDown()) {
- setError(CharacterClassOutOfOrder);
- return false;
- }
-
- constructor.flush();
- CharacterClass charClass = constructor.charClass();
- return parseCharacterClassQuantifier(failures, charClass, invert);
-}
-
-bool Parser::parseNonCharacterEscape(JumpList& failures, const Escape& escape)
-{
- switch (escape.type()) {
- case Escape::PatternCharacter:
- ASSERT_NOT_REACHED();
- return false;
-
- case Escape::CharacterClass:
- return parseCharacterClassQuantifier(failures, CharacterClassEscape::cast(escape).characterClass(), CharacterClassEscape::cast(escape).invert());
-
- case Escape::Backreference:
- return parseBackreferenceQuantifier(failures, BackreferenceEscape::cast(escape).subpatternId());
-
- case Escape::WordBoundaryAssertion:
- m_generator.generateAssertionWordBoundary(failures, WordBoundaryAssertionEscape::cast(escape).invert());
- return true;
-
- case Escape::Error:
- return false;
- }
-
- ASSERT_NOT_REACHED();
- return false;
-}
-
-Escape Parser::consumeEscape(bool inCharacterClass)
-{
- switch (peek()) {
- case EndOfPattern:
- setError(EscapeUnterminated);
- return Escape(Escape::Error);
-
- // Assertions
- case 'b':
- consume();
- if (inCharacterClass)
- return PatternCharacterEscape('\b');
- return WordBoundaryAssertionEscape(false); // do not invert
- case 'B':
- consume();
- if (inCharacterClass)
- return PatternCharacterEscape('B');
- return WordBoundaryAssertionEscape(true); // invert
-
- // CharacterClassEscape
- case 'd':
- consume();
- return CharacterClassEscape(CharacterClass::digits(), false);
- case 's':
- consume();
- return CharacterClassEscape(CharacterClass::spaces(), false);
- case 'w':
- consume();
- return CharacterClassEscape(CharacterClass::wordchar(), false);
- case 'D':
- consume();
- return inCharacterClass
- ? CharacterClassEscape(CharacterClass::nondigits(), false)
- : CharacterClassEscape(CharacterClass::digits(), true);
- case 'S':
- consume();
- return inCharacterClass
- ? CharacterClassEscape(CharacterClass::nonspaces(), false)
- : CharacterClassEscape(CharacterClass::spaces(), true);
- case 'W':
- consume();
- return inCharacterClass
- ? CharacterClassEscape(CharacterClass::nonwordchar(), false)
- : CharacterClassEscape(CharacterClass::wordchar(), true);
-
- // DecimalEscape
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9': {
- if (peekDigit() > m_numSubpatterns || inCharacterClass) {
- // To match Firefox, we parse an invalid backreference in the range [1-7]
- // as an octal escape.
- return peekDigit() > 7 ? PatternCharacterEscape('\\') : PatternCharacterEscape(consumeOctal());
- }
-
- int value = 0;
- do {
- unsigned newValue = value * 10 + peekDigit();
- if (newValue > m_numSubpatterns)
- break;
- value = newValue;
- consume();
- } while (peekIsDigit());
-
- return BackreferenceEscape(value);
- }
-
- // Octal escape
- case '0':
- consume();
- return PatternCharacterEscape(consumeOctal());
-
- // ControlEscape
- case 'f':
- consume();
- return PatternCharacterEscape('\f');
- case 'n':
- consume();
- return PatternCharacterEscape('\n');
- case 'r':
- consume();
- return PatternCharacterEscape('\r');
- case 't':
- consume();
- return PatternCharacterEscape('\t');
- case 'v':
- consume();
- return PatternCharacterEscape('\v');
-
- // ControlLetter
- case 'c': {
- SavedState state(*this);
- consume();
-
- int control = consume();
- // To match Firefox, inside a character class, we also accept numbers
- // and '_' as control characters.
- if ((!inCharacterClass && !isASCIIAlpha(control)) || (!isASCIIAlphanumeric(control) && control != '_')) {
- state.restore();
- return PatternCharacterEscape('\\');
- }
- return PatternCharacterEscape(control & 31);
- }
-
- // HexEscape
- case 'x': {
- consume();
-
- SavedState state(*this);
- int x = consumeHex(2);
- if (x == -1) {
- state.restore();
- return PatternCharacterEscape('x');
- }
- return PatternCharacterEscape(x);
- }
-
- // UnicodeEscape
- case 'u': {
- consume();
-
- SavedState state(*this);
- int x = consumeHex(4);
- if (x == -1) {
- state.restore();
- return PatternCharacterEscape('u');
- }
- return PatternCharacterEscape(x);
- }
-
- // IdentityEscape
- default:
- return PatternCharacterEscape(consume());
- }
-}
-
-void Parser::parseAlternative(JumpList& failures)
-{
- PatternCharacterSequence sequence(m_generator, failures);
-
- while (1) {
- switch (peek()) {
- case EndOfPattern:
- case '|':
- case ')':
- sequence.flush();
- return;
-
- case '*':
- case '+':
- case '?':
- case '{': {
- Quantifier q = consumeQuantifier();
-
- if (q.type == Quantifier::None) {
- sequence.append(consume());
- continue;
- }
-
- if (q.type == Quantifier::Error)
- return;
-
- if (!sequence.size()) {
- setError(QuantifierWithoutAtom);
- return;
- }
-
- sequence.flush(q);
- continue;
- }
-
- case '^':
- consume();
-
- sequence.flush();
- m_generator.generateAssertionBOL(failures);
- continue;
-
- case '$':
- consume();
-
- sequence.flush();
- m_generator.generateAssertionEOL(failures);
- continue;
-
- case '.':
- consume();
-
- sequence.flush();
- if (!parseCharacterClassQuantifier(failures, CharacterClass::newline(), true))
- return;
- continue;
-
- case '[':
- consume();
-
- sequence.flush();
- if (!parseCharacterClass(failures))
- return;
- continue;
-
- case '(':
- consume();
-
- sequence.flush();
- if (!parseParentheses(failures))
- return;
- continue;
-
- case '\\': {
- consume();
-
- Escape escape = consumeEscape(false);
- if (escape.type() == Escape::PatternCharacter) {
- sequence.append(PatternCharacterEscape::cast(escape).character());
- continue;
- }
-
- sequence.flush();
- if (!parseNonCharacterEscape(failures, escape))
- return;
- continue;
- }
-
- default:
- sequence.append(consume());
- continue;
- }
- }
-}
-
-/*
- TOS holds index.
-*/
-void Parser::parseDisjunction(JumpList& failures)
-{
- parseAlternative(failures);
- if (peek() != '|')
- return;
-
- JumpList successes;
- do {
- consume();
- m_generator.terminateAlternative(successes, failures);
- parseAlternative(failures);
- } while (peek() == '|');
-
- m_generator.terminateDisjunction(successes);
-}
-
-Generator::ParenthesesType Parser::consumeParenthesesType()
-{
- if (peek() != '?')
- return Generator::Capturing;
- consume();
-
- switch (consume()) {
- case ':':
- return Generator::NonCapturing;
-
- case '=':
- return Generator::Assertion;
-
- case '!':
- return Generator::InvertedAssertion;
-
- default:
- setError(ParenthesesTypeInvalid);
- return Generator::Error;
- }
-}
-
-} } // namespace JSC::WREC
-
-#endif // ENABLE(WREC)