/* * Copyright (C) 2008 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef Parser_h #define Parser_h #include #if ENABLE(WREC) #include "Escapes.h" #include "Quantifier.h" #include "UString.h" #include "WRECGenerator.h" #include namespace JSC { namespace WREC { struct CharacterClass; class Parser { typedef Generator::JumpList JumpList; typedef Generator::ParenthesesType ParenthesesType; friend class SavedState; public: Parser(const UString& pattern, bool ignoreCase, bool multiline) : m_generator(*this) , m_data(pattern.data()) , m_size(pattern.size()) , m_ignoreCase(ignoreCase) , m_multiline(multiline) { reset(); } Generator& generator() { return m_generator; } bool ignoreCase() const { return m_ignoreCase; } bool multiline() const { return m_multiline; } void recordSubpattern() { ++m_numSubpatterns; } unsigned numSubpatterns() const { return m_numSubpatterns; } const char* error() const { return m_error; } const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; } void parsePattern(JumpList& failures) { reset(); parseDisjunction(failures); if (peek() != EndOfPattern) setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it. } void parseDisjunction(JumpList& failures); void parseAlternative(JumpList& failures); bool parseTerm(JumpList& failures); bool parseNonCharacterEscape(JumpList& failures, const Escape&); bool parseParentheses(JumpList& failures); bool parseCharacterClass(JumpList& failures); bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert); bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId); private: class SavedState { public: SavedState(Parser& parser) : m_parser(parser) , m_index(parser.m_index) { } void restore() { m_parser.m_index = m_index; } private: Parser& m_parser; unsigned m_index; }; void reset() { m_index = 0; m_numSubpatterns = 0; m_error = 0; } void setError(const char* error) { if (m_error) return; m_error = error; } int peek() { if (m_index >= m_size) return EndOfPattern; return m_data[m_index]; } int consume() { if (m_index >= m_size) return EndOfPattern; return m_data[m_index++]; } bool peekIsDigit() { return WTF::isASCIIDigit(peek()); } unsigned peekDigit() { ASSERT(peekIsDigit()); return peek() - '0'; } unsigned consumeDigit() { ASSERT(peekIsDigit()); return consume() - '0'; } unsigned consumeNumber() { int n = consumeDigit(); while (peekIsDigit()) { n *= 10; n += consumeDigit(); } return n; } int consumeHex(int count) { int n = 0; while (count--) { if (!WTF::isASCIIHexDigit(peek())) return -1; n = (n << 4) | WTF::toASCIIHexValue(consume()); } return n; } unsigned consumeOctal() { unsigned n = 0; while (n < 32 && WTF::isASCIIOctalDigit(peek())) n = n * 8 + consumeDigit(); return n; } ALWAYS_INLINE Quantifier consumeGreedyQuantifier(); Quantifier consumeQuantifier(); Escape consumeEscape(bool inCharacterClass); ParenthesesType consumeParenthesesType(); static const int EndOfPattern = -1; // Error messages. static const char* QuantifierOutOfOrder; static const char* QuantifierWithoutAtom; static const char* ParenthesesUnmatched; static const char* ParenthesesTypeInvalid; static const char* ParenthesesNotSupported; static const char* CharacterClassUnmatched; static const char* CharacterClassOutOfOrder; static const char* EscapeUnterminated; Generator m_generator; const UChar* m_data; unsigned m_size; unsigned m_index; bool m_ignoreCase; bool m_multiline; unsigned m_numSubpatterns; const char* m_error; }; } } // namespace JSC::WREC #endif // ENABLE(WREC) #endif // Parser_h