summaryrefslogtreecommitdiff
path: root/src/3rdparty/javascriptcore/JavaScriptCore/yarr
diff options
context:
space:
mode:
authorQt by Nokia <qt-info@nokia.com>2011-04-27 12:05:43 +0200
committeraxis <qt-info@nokia.com>2011-04-27 12:05:43 +0200
commitc4af45c2914381172e1bd7ee528481edaa2fff1a (patch)
tree01265e109316fda93845e1c96c5e566d870f51d0 /src/3rdparty/javascriptcore/JavaScriptCore/yarr
downloadqtscript-c4af45c2914381172e1bd7ee528481edaa2fff1a.tar.gz
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you want to look at revision history older than this, please refer to the Qt Git wiki for how to use Git history grafting. At the time of writing, this wiki is located here: http://qt.gitorious.org/qt/pages/GitIntroductionWithQt If you have already performed the grafting and you don't see any history beyond this commit, try running "git log" with the "--follow" argument. Branched from the monolithic repo, Qt master branch, at commit 896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to 'src/3rdparty/javascriptcore/JavaScriptCore/yarr')
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.cpp728
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.h45
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.cpp1638
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.h337
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp1407
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.h98
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h854
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexPattern.h356
8 files changed, 5463 insertions, 0 deletions
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.cpp
new file mode 100644
index 0000000..9cd3d12
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.cpp
@@ -0,0 +1,728 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegexCompiler.h"
+
+#include "RegexInterpreter.h"
+#include "RegexPattern.h"
+#include <wtf/Vector.h>
+
+#if ENABLE(YARR)
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+class CharacterClassConstructor {
+public:
+ CharacterClassConstructor(bool isCaseInsensitive = false)
+ : m_isCaseInsensitive(isCaseInsensitive)
+ {
+ }
+
+ void reset()
+ {
+ m_matches.clear();
+ m_ranges.clear();
+ m_matchesUnicode.clear();
+ m_rangesUnicode.clear();
+ }
+
+ void append(const CharacterClass* other)
+ {
+ for (size_t i = 0; i < other->m_matches.size(); ++i)
+ addSorted(m_matches, other->m_matches[i]);
+ for (size_t i = 0; i < other->m_ranges.size(); ++i)
+ addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end);
+ for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i)
+ addSorted(m_matchesUnicode, other->m_matchesUnicode[i]);
+ for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i)
+ addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
+ }
+
+ void putChar(UChar ch)
+ {
+ if (ch <= 0x7f) {
+ if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
+ addSorted(m_matches, toASCIIUpper(ch));
+ addSorted(m_matches, toASCIILower(ch));
+ } else
+ addSorted(m_matches, ch);
+ } else {
+ UChar upper, lower;
+ if (m_isCaseInsensitive && ((upper = Unicode::toUpper(ch)) != (lower = Unicode::toLower(ch)))) {
+ addSorted(m_matchesUnicode, upper);
+ addSorted(m_matchesUnicode, lower);
+ } else
+ addSorted(m_matchesUnicode, ch);
+ }
+ }
+
+ // returns true if this character has another case, and 'ch' is the upper case form.
+ static inline bool isUnicodeUpper(UChar ch)
+ {
+ return ch != Unicode::toLower(ch);
+ }
+
+ // returns true if this character has another case, and 'ch' is the lower case form.
+ static inline bool isUnicodeLower(UChar ch)
+ {
+ return ch != Unicode::toUpper(ch);
+ }
+
+ void putRange(UChar lo, UChar hi)
+ {
+ if (lo <= 0x7f) {
+ char asciiLo = lo;
+ char asciiHi = std::min(hi, (UChar)0x7f);
+ addSortedRange(m_ranges, lo, asciiHi);
+
+ if (m_isCaseInsensitive) {
+ if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
+ addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
+ if ((asciiLo <= 'z') && (asciiHi >= 'a'))
+ addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
+ }
+ }
+ if (hi >= 0x80) {
+ uint32_t unicodeCurr = std::max(lo, (UChar)0x80);
+ addSortedRange(m_rangesUnicode, unicodeCurr, hi);
+
+ if (m_isCaseInsensitive) {
+ while (unicodeCurr <= hi) {
+ // If the upper bound of the range (hi) is 0xffff, the increments to
+ // unicodeCurr in this loop may take it to 0x10000. This is fine
+ // (if so we won't re-enter the loop, since the loop condition above
+ // will definitely fail) - but this does mean we cannot use a UChar
+ // to represent unicodeCurr, we must use a 32-bit value instead.
+ ASSERT(unicodeCurr <= 0xffff);
+
+ if (isUnicodeUpper(unicodeCurr)) {
+ UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr);
+ UChar lowerCaseRangeEnd = lowerCaseRangeBegin;
+ while ((++unicodeCurr <= hi) && isUnicodeUpper(unicodeCurr) && (Unicode::toLower(unicodeCurr) == (lowerCaseRangeEnd + 1)))
+ lowerCaseRangeEnd++;
+ addSortedRange(m_rangesUnicode, lowerCaseRangeBegin, lowerCaseRangeEnd);
+ } else if (isUnicodeLower(unicodeCurr)) {
+ UChar upperCaseRangeBegin = Unicode::toUpper(unicodeCurr);
+ UChar upperCaseRangeEnd = upperCaseRangeBegin;
+ while ((++unicodeCurr <= hi) && isUnicodeLower(unicodeCurr) && (Unicode::toUpper(unicodeCurr) == (upperCaseRangeEnd + 1)))
+ upperCaseRangeEnd++;
+ addSortedRange(m_rangesUnicode, upperCaseRangeBegin, upperCaseRangeEnd);
+ } else
+ ++unicodeCurr;
+ }
+ }
+ }
+ }
+
+ CharacterClass* charClass()
+ {
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append(m_matches);
+ characterClass->m_ranges.append(m_ranges);
+ characterClass->m_matchesUnicode.append(m_matchesUnicode);
+ characterClass->m_rangesUnicode.append(m_rangesUnicode);
+
+ reset();
+
+ return characterClass;
+ }
+
+private:
+ void addSorted(Vector<UChar>& matches, UChar ch)
+ {
+ unsigned pos = 0;
+ unsigned range = matches.size();
+
+ // binary chop, find position to insert char.
+ while (range) {
+ unsigned index = range >> 1;
+
+ int val = matches[pos+index] - ch;
+ if (!val)
+ return;
+ else if (val > 0)
+ range = index;
+ else {
+ pos += (index+1);
+ range -= (index+1);
+ }
+ }
+
+ if (pos == matches.size())
+ matches.append(ch);
+ else
+ matches.insert(pos, ch);
+ }
+
+ void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
+ {
+ unsigned end = ranges.size();
+
+ // Simple linear scan - I doubt there are that many ranges anyway...
+ // feel free to fix this with something faster (eg binary chop).
+ for (unsigned i = 0; i < end; ++i) {
+ // does the new range fall before the current position in the array
+ if (hi < ranges[i].begin) {
+ // optional optimization: concatenate appending ranges? - may not be worthwhile.
+ if (hi == (ranges[i].begin - 1)) {
+ ranges[i].begin = lo;
+ return;
+ }
+ ranges.insert(i, CharacterRange(lo, hi));
+ return;
+ }
+ // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
+ // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
+ // end of the last range they concatenate, which is just as good.
+ if (lo <= (ranges[i].end + 1)) {
+ // found an intersect! we'll replace this entry in the array.
+ ranges[i].begin = std::min(ranges[i].begin, lo);
+ ranges[i].end = std::max(ranges[i].end, hi);
+
+ // now check if the new range can subsume any subsequent ranges.
+ unsigned next = i+1;
+ // each iteration of the loop we will either remove something from the list, or break the loop.
+ while (next < ranges.size()) {
+ if (ranges[next].begin <= (ranges[i].end + 1)) {
+ // the next entry now overlaps / concatenates this one.
+ ranges[i].end = std::max(ranges[i].end, ranges[next].end);
+ ranges.remove(next);
+ } else
+ break;
+ }
+
+ return;
+ }
+ }
+
+ // CharacterRange comes after all existing ranges.
+ ranges.append(CharacterRange(lo, hi));
+ }
+
+ bool m_isCaseInsensitive;
+
+ Vector<UChar> m_matches;
+ Vector<CharacterRange> m_ranges;
+ Vector<UChar> m_matchesUnicode;
+ Vector<CharacterRange> m_rangesUnicode;
+};
+
+
+CharacterClass* newlineCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append('\n');
+ characterClass->m_matches.append('\r');
+ characterClass->m_matchesUnicode.append(0x2028);
+ characterClass->m_matchesUnicode.append(0x2029);
+
+ return characterClass;
+}
+
+CharacterClass* digitsCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_ranges.append(CharacterRange('0', '9'));
+
+ return characterClass;
+}
+
+CharacterClass* spacesCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append(' ');
+ characterClass->m_ranges.append(CharacterRange('\t', '\r'));
+ characterClass->m_matchesUnicode.append(0x00a0);
+ characterClass->m_matchesUnicode.append(0x1680);
+ characterClass->m_matchesUnicode.append(0x180e);
+ characterClass->m_matchesUnicode.append(0x2028);
+ characterClass->m_matchesUnicode.append(0x2029);
+ characterClass->m_matchesUnicode.append(0x202f);
+ characterClass->m_matchesUnicode.append(0x205f);
+ characterClass->m_matchesUnicode.append(0x3000);
+ characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
+
+ return characterClass;
+}
+
+CharacterClass* wordcharCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append('_');
+ characterClass->m_ranges.append(CharacterRange('0', '9'));
+ characterClass->m_ranges.append(CharacterRange('A', 'Z'));
+ characterClass->m_ranges.append(CharacterRange('a', 'z'));
+
+ return characterClass;
+}
+
+CharacterClass* nondigitsCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
+ characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
+
+ return characterClass;
+}
+
+CharacterClass* nonspacesCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
+ characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
+ characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
+
+ return characterClass;
+}
+
+CharacterClass* nonwordcharCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append('`');
+ characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
+ characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
+ characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
+ characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
+
+ return characterClass;
+}
+
+
+class RegexPatternConstructor {
+public:
+ RegexPatternConstructor(RegexPattern& pattern)
+ : m_pattern(pattern)
+ , m_characterClassConstructor(pattern.m_ignoreCase)
+ {
+ }
+
+ ~RegexPatternConstructor()
+ {
+ }
+
+ void reset()
+ {
+ m_pattern.reset();
+ m_characterClassConstructor.reset();
+ }
+
+ void assertionBOL()
+ {
+ m_alternative->m_terms.append(PatternTerm::BOL());
+ }
+ void assertionEOL()
+ {
+ m_alternative->m_terms.append(PatternTerm::EOL());
+ }
+ void assertionWordBoundary(bool invert)
+ {
+ m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
+ }
+
+ void atomPatternCharacter(UChar ch)
+ {
+ // We handle case-insensitive checking of unicode characters which do have both
+ // cases by handling them as if they were defined using a CharacterClass.
+ if (m_pattern.m_ignoreCase && !isASCII(ch) && (Unicode::toUpper(ch) != Unicode::toLower(ch))) {
+ atomCharacterClassBegin();
+ atomCharacterClassAtom(ch);
+ atomCharacterClassEnd();
+ } else
+ m_alternative->m_terms.append(PatternTerm(ch));
+ }
+
+ void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
+ {
+ switch (classID) {
+ case DigitClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
+ break;
+ case SpaceClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
+ break;
+ case WordClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
+ break;
+ case NewlineClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
+ break;
+ }
+ }
+
+ void atomCharacterClassBegin(bool invert = false)
+ {
+ m_invertCharacterClass = invert;
+ }
+
+ void atomCharacterClassAtom(UChar ch)
+ {
+ m_characterClassConstructor.putChar(ch);
+ }
+
+ void atomCharacterClassRange(UChar begin, UChar end)
+ {
+ m_characterClassConstructor.putRange(begin, end);
+ }
+
+ void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
+ {
+ ASSERT(classID != NewlineClassID);
+
+ switch (classID) {
+ case DigitClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
+ break;
+
+ case SpaceClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
+ break;
+
+ case WordClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
+ break;
+
+ default:
+ ASSERT_NOT_REACHED();
+ }
+ }
+
+ void atomCharacterClassEnd()
+ {
+ CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
+ m_pattern.m_userCharacterClasses.append(newCharacterClass);
+ m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
+ }
+
+ void atomParenthesesSubpatternBegin(bool capture = true)
+ {
+ unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
+ if (capture)
+ m_pattern.m_numSubpatterns++;
+
+ PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
+ m_pattern.m_disjunctions.append(parenthesesDisjunction);
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture));
+ m_alternative = parenthesesDisjunction->addNewAlternative();
+ }
+
+ void atomParentheticalAssertionBegin(bool invert = false)
+ {
+ PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
+ m_pattern.m_disjunctions.append(parenthesesDisjunction);
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert));
+ m_alternative = parenthesesDisjunction->addNewAlternative();
+ }
+
+ void atomParenthesesEnd()
+ {
+ ASSERT(m_alternative->m_parent);
+ ASSERT(m_alternative->m_parent->m_parent);
+ m_alternative = m_alternative->m_parent->m_parent;
+
+ m_alternative->lastTerm().parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
+ }
+
+ void atomBackReference(unsigned subpatternId)
+ {
+ ASSERT(subpatternId);
+ m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
+
+ if (subpatternId > m_pattern.m_numSubpatterns) {
+ m_alternative->m_terms.append(PatternTerm::ForwardReference());
+ return;
+ }
+
+ PatternAlternative* currentAlternative = m_alternative;
+ ASSERT(currentAlternative);
+
+ // Note to self: if we waited until the AST was baked, we could also remove forwards refs
+ while ((currentAlternative = currentAlternative->m_parent->m_parent)) {
+ PatternTerm& term = currentAlternative->lastTerm();
+ ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
+
+ if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) {
+ m_alternative->m_terms.append(PatternTerm::ForwardReference());
+ return;
+ }
+ }
+
+ m_alternative->m_terms.append(PatternTerm(subpatternId));
+ }
+
+ PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction)
+ {
+ PatternDisjunction* newDisjunction = new PatternDisjunction();
+
+ newDisjunction->m_parent = disjunction->m_parent;
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
+ newAlternative->m_terms.append(copyTerm(alternative->m_terms[i]));
+ }
+
+ m_pattern.m_disjunctions.append(newDisjunction);
+ return newDisjunction;
+ }
+
+ PatternTerm copyTerm(PatternTerm& term)
+ {
+ if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
+ return PatternTerm(term);
+
+ PatternTerm termCopy = term;
+ termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction);
+ return termCopy;
+ }
+
+ void quantifyAtom(unsigned min, unsigned max, bool greedy)
+ {
+ ASSERT(min <= max);
+ ASSERT(m_alternative->m_terms.size());
+
+ if (!max) {
+ m_alternative->removeLastTerm();
+ return;
+ }
+
+ PatternTerm& term = m_alternative->lastTerm();
+ ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
+ ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
+
+ // For any assertion with a zero minimum, not matching is valid and has no effect,
+ // remove it. Otherwise, we need to match as least once, but there is no point
+ // matching more than once, so remove the quantifier. It is not entirely clear
+ // from the spec whether or not this behavior is correct, but I believe this
+ // matches Firefox. :-/
+ if (term.type == PatternTerm::TypeParentheticalAssertion) {
+ if (!min)
+ m_alternative->removeLastTerm();
+ return;
+ }
+
+ if (min == 0)
+ term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+ else if (min == max)
+ term.quantify(min, QuantifierFixedCount);
+ else {
+ term.quantify(min, QuantifierFixedCount);
+ m_alternative->m_terms.append(copyTerm(term));
+ // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
+ m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+ if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
+ m_alternative->lastTerm().parentheses.isCopy = true;
+ }
+ }
+
+ void disjunction()
+ {
+ m_alternative = m_alternative->m_parent->addNewAlternative();
+ }
+
+ void regexBegin()
+ {
+ m_pattern.m_body = new PatternDisjunction();
+ m_alternative = m_pattern.m_body->addNewAlternative();
+ m_pattern.m_disjunctions.append(m_pattern.m_body);
+ }
+ void regexEnd()
+ {
+ }
+ void regexError()
+ {
+ }
+
+ unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition)
+ {
+ alternative->m_hasFixedSize = true;
+ unsigned currentInputPosition = initialInputPosition;
+
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ case PatternTerm::TypeAssertionEOL:
+ case PatternTerm::TypeAssertionWordBoundary:
+ term.inputPosition = currentInputPosition;
+ break;
+
+ case PatternTerm::TypeBackReference:
+ term.inputPosition = currentInputPosition;
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoBackReference;
+ alternative->m_hasFixedSize = false;
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ term.inputPosition = currentInputPosition;
+ if (term.quantityType != QuantifierFixedCount) {
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoPatternCharacter;
+ alternative->m_hasFixedSize = false;
+ } else
+ currentInputPosition += term.quantityCount;
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ term.inputPosition = currentInputPosition;
+ if (term.quantityType != QuantifierFixedCount) {
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoCharacterClass;
+ alternative->m_hasFixedSize = false;
+ } else
+ currentInputPosition += term.quantityCount;
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
+ term.frameLocation = currentCallFrameSize;
+ if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
+ if (term.quantityType == QuantifierFixedCount) {
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
+ currentInputPosition += term.parentheses.disjunction->m_minimumSize;
+ } else {
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce;
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
+ }
+ term.inputPosition = currentInputPosition;
+ } else {
+ term.inputPosition = currentInputPosition;
+ setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition);
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoParentheses;
+ }
+ // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
+ alternative->m_hasFixedSize = false;
+ break;
+
+ case PatternTerm::TypeParentheticalAssertion:
+ term.inputPosition = currentInputPosition;
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
+ break;
+ }
+ }
+
+ alternative->m_minimumSize = currentInputPosition - initialInputPosition;
+ return currentCallFrameSize;
+ }
+
+ unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition)
+ {
+ if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
+ initialCallFrameSize += RegexStackSpaceForBackTrackInfoAlternative;
+
+ unsigned minimumInputSize = UINT_MAX;
+ unsigned maximumCallFrameSize = 0;
+ bool hasFixedSize = true;
+
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
+ minimumInputSize = min(minimumInputSize, alternative->m_minimumSize);
+ maximumCallFrameSize = max(maximumCallFrameSize, currentAlternativeCallFrameSize);
+ hasFixedSize &= alternative->m_hasFixedSize;
+ }
+
+ ASSERT(minimumInputSize != UINT_MAX);
+ ASSERT(maximumCallFrameSize >= initialCallFrameSize);
+
+ disjunction->m_hasFixedSize = hasFixedSize;
+ disjunction->m_minimumSize = minimumInputSize;
+ disjunction->m_callFrameSize = maximumCallFrameSize;
+ return maximumCallFrameSize;
+ }
+
+ void setupOffsets()
+ {
+ setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
+ }
+
+private:
+ RegexPattern& m_pattern;
+ PatternAlternative* m_alternative;
+ CharacterClassConstructor m_characterClassConstructor;
+ bool m_invertCharacterClass;
+};
+
+
+const char* compileRegex(const UString& patternString, RegexPattern& pattern)
+{
+ RegexPatternConstructor constructor(pattern);
+
+ if (const char* error = parse(constructor, patternString))
+ return error;
+
+ // If the pattern contains illegal backreferences reset & reparse.
+ // Quoting Netscape's "What's new in JavaScript 1.2",
+ // "Note: if the number of left parentheses is less than the number specified
+ // in \#, the \# is taken as an octal escape as described in the next row."
+ if (pattern.containsIllegalBackReference()) {
+ unsigned numSubpatterns = pattern.m_numSubpatterns;
+
+ constructor.reset();
+#if !ASSERT_DISABLED
+ const char* error =
+#endif
+ parse(constructor, patternString, numSubpatterns);
+
+ ASSERT(!error);
+ ASSERT(numSubpatterns == pattern.m_numSubpatterns);
+ }
+
+ constructor.setupOffsets();
+
+ return false;
+};
+
+
+} }
+
+#endif
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.h
new file mode 100644
index 0000000..3ed2be9
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexCompiler_h
+#define RegexCompiler_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <wtf/unicode/Unicode.h>
+#include "RegexParser.h"
+#include "RegexPattern.h"
+
+namespace JSC { namespace Yarr {
+
+const char* compileRegex(const UString& patternString, RegexPattern& pattern);
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexCompiler_h
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.cpp
new file mode 100644
index 0000000..d088086
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -0,0 +1,1638 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegexInterpreter.h"
+
+#include "RegexCompiler.h"
+#include "RegexPattern.h"
+
+#ifndef NDEBUG
+#include <stdio.h>
+#endif
+
+#if ENABLE(YARR)
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+class Interpreter {
+public:
+ struct ParenthesesDisjunctionContext;
+
+ struct BackTrackInfoPatternCharacter {
+ uintptr_t matchAmount;
+ };
+ struct BackTrackInfoCharacterClass {
+ uintptr_t matchAmount;
+ };
+ struct BackTrackInfoBackReference {
+ uintptr_t begin; // Not really needed for greedy quantifiers.
+ uintptr_t matchAmount; // Not really needed for fixed quantifiers.
+ };
+ struct BackTrackInfoAlternative {
+ uintptr_t offset;
+ };
+ struct BackTrackInfoParentheticalAssertion {
+ uintptr_t begin;
+ };
+ struct BackTrackInfoParenthesesOnce {
+ uintptr_t inParentheses;
+ };
+ struct BackTrackInfoParentheses {
+ uintptr_t matchAmount;
+ ParenthesesDisjunctionContext* lastContext;
+ uintptr_t prevBegin;
+ uintptr_t prevEnd;
+ };
+
+ static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
+ {
+ context->next = backTrack->lastContext;
+ backTrack->lastContext = context;
+ ++backTrack->matchAmount;
+ }
+
+ static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
+ {
+ ASSERT(backTrack->matchAmount);
+ ASSERT(backTrack->lastContext);
+ backTrack->lastContext = backTrack->lastContext->next;
+ --backTrack->matchAmount;
+ }
+
+ struct DisjunctionContext
+ {
+ DisjunctionContext()
+ : term(0)
+ {
+ }
+
+ void* operator new(size_t, void* where)
+ {
+ return where;
+ }
+
+ int term;
+ unsigned matchBegin;
+ unsigned matchEnd;
+ uintptr_t frame[1];
+ };
+
+ DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
+ {
+ return new(malloc(sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) DisjunctionContext();
+ }
+
+ void freeDisjunctionContext(DisjunctionContext* context)
+ {
+ free(context);
+ }
+
+ struct ParenthesesDisjunctionContext
+ {
+ ParenthesesDisjunctionContext(int* output, ByteTerm& term)
+ : next(0)
+ {
+ unsigned firstSubpatternId = term.atom.subpatternId;
+ unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
+
+ for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
+ subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
+ output[(firstSubpatternId << 1) + i] = -1;
+ }
+
+ new(getDisjunctionContext(term)) DisjunctionContext();
+ }
+
+ void* operator new(size_t, void* where)
+ {
+ return where;
+ }
+
+ void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
+ {
+ for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
+ output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
+ }
+
+ DisjunctionContext* getDisjunctionContext(ByteTerm& term)
+ {
+ return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
+ }
+
+ ParenthesesDisjunctionContext* next;
+ int subpatternBackup[1];
+ };
+
+ ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
+ {
+ return new(malloc(sizeof(ParenthesesDisjunctionContext) + (((term.atom.parenthesesDisjunction->m_numSubpatterns << 1) - 1) * sizeof(int)) + sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) ParenthesesDisjunctionContext(output, term);
+ }
+
+ void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
+ {
+ free(context);
+ }
+
+ class InputStream {
+ public:
+ InputStream(const UChar* input, unsigned start, unsigned length)
+ : input(input)
+ , pos(start)
+ , length(length)
+ {
+ }
+
+ void next()
+ {
+ ++pos;
+ }
+
+ void rewind(unsigned amount)
+ {
+ ASSERT(pos >= amount);
+ pos -= amount;
+ }
+
+ int read()
+ {
+ ASSERT(pos < length);
+ if (pos < length)
+ return input[pos];
+ return -1;
+ }
+
+ int readChecked(int position)
+ {
+ ASSERT(position < 0);
+ ASSERT((unsigned)-position <= pos);
+ unsigned p = pos + position;
+ ASSERT(p < length);
+ return input[p];
+ }
+
+ int reread(unsigned from)
+ {
+ ASSERT(from < length);
+ return input[from];
+ }
+
+ int prev()
+ {
+ ASSERT(!(pos > length));
+ if (pos && length)
+ return input[pos - 1];
+ return -1;
+ }
+
+ unsigned getPos()
+ {
+ return pos;
+ }
+
+ void setPos(unsigned p)
+ {
+ pos = p;
+ }
+
+ bool atStart()
+ {
+ return pos == 0;
+ }
+
+ bool atEnd()
+ {
+ return pos == length;
+ }
+
+ bool checkInput(int count)
+ {
+ if ((pos + count) <= length) {
+ pos += count;
+ return true;
+ } else
+ return false;
+ }
+
+ void uncheckInput(int count)
+ {
+ pos -= count;
+ }
+
+ bool atStart(int position)
+ {
+ return (pos + position) == 0;
+ }
+
+ bool atEnd(int position)
+ {
+ return (pos + position) == length;
+ }
+
+ private:
+ const UChar* input;
+ unsigned pos;
+ unsigned length;
+ };
+
+ bool testCharacterClass(CharacterClass* characterClass, int ch)
+ {
+ if (ch & 0xFF80) {
+ for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
+ if (ch == characterClass->m_matchesUnicode[i])
+ return true;
+ for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
+ if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
+ return true;
+ } else {
+ for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
+ if (ch == characterClass->m_matches[i])
+ return true;
+ for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
+ if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
+ return true;
+ }
+
+ return false;
+ }
+
+ bool tryConsumeCharacter(int testChar)
+ {
+ if (input.atEnd())
+ return false;
+
+ int ch = input.read();
+
+ if (pattern->m_ignoreCase ? ((Unicode::toLower(testChar) == ch) || (Unicode::toUpper(testChar) == ch)) : (testChar == ch)) {
+ input.next();
+ return true;
+ }
+ return false;
+ }
+
+ bool checkCharacter(int testChar, int inputPosition)
+ {
+ return testChar == input.readChecked(inputPosition);
+ }
+
+ bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
+ {
+ int ch = input.readChecked(inputPosition);
+ return (loChar == ch) || (hiChar == ch);
+ }
+
+ bool tryConsumeCharacterClass(CharacterClass* characterClass, bool invert)
+ {
+ if (input.atEnd())
+ return false;
+
+ bool match = testCharacterClass(characterClass, input.read());
+
+ if (invert)
+ match = !match;
+
+ if (match) {
+ input.next();
+ return true;
+ }
+ return false;
+ }
+
+ bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
+ {
+ bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
+ return invert ? !match : match;
+ }
+
+ bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
+ {
+ int matchSize = matchEnd - matchBegin;
+
+ if (!input.checkInput(matchSize))
+ return false;
+
+ for (int i = 0; i < matchSize; ++i) {
+ if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
+ input.uncheckInput(matchSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool matchAssertionBOL(ByteTerm& term)
+ {
+ return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
+ }
+
+ bool matchAssertionEOL(ByteTerm& term)
+ {
+ if (term.inputPosition)
+ return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
+ else
+ return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
+ }
+
+ bool matchAssertionWordBoundary(ByteTerm& term)
+ {
+ bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
+ bool readIsWordchar;
+ if (term.inputPosition)
+ readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
+ else
+ readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
+
+ bool wordBoundary = prevIsWordchar != readIsWordchar;
+ return term.invert() ? !wordBoundary : wordBoundary;
+ }
+
+ bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
+ {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
+ {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeCharacterClass);
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
+ if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
+ return false;
+ }
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ unsigned matchAmount = 0;
+ while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ return true;
+ }
+
+ case QuantifierNonGreedy:
+ backTrack->matchAmount = 0;
+ return true;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeCharacterClass);
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeBackReference);
+ BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
+
+ int matchBegin = output[(term.atom.subpatternId << 1)];
+ int matchEnd = output[(term.atom.subpatternId << 1) + 1];
+ ASSERT((matchBegin == -1) == (matchEnd == -1));
+ ASSERT(matchBegin <= matchEnd);
+
+ if (matchBegin == matchEnd)
+ return true;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ backTrack->begin = input.getPos();
+ for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
+ if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
+ input.setPos(backTrack->begin);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ unsigned matchAmount = 0;
+ while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
+ ++matchAmount;
+ backTrack->matchAmount = matchAmount;
+ return true;
+ }
+
+ case QuantifierNonGreedy:
+ backTrack->begin = input.getPos();
+ backTrack->matchAmount = 0;
+ return true;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeBackReference);
+ BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
+
+ int matchBegin = output[(term.atom.subpatternId << 1)];
+ int matchEnd = output[(term.atom.subpatternId << 1) + 1];
+ ASSERT((matchBegin == -1) == (matchEnd == -1));
+ ASSERT(matchBegin <= matchEnd);
+
+ if (matchBegin == matchEnd)
+ return false;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ // for quantityCount == 1, could rewind.
+ input.setPos(backTrack->begin);
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.rewind(matchEnd - matchBegin);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
+ ++backTrack->matchAmount;
+ return true;
+ } else
+ input.setPos(backTrack->begin);
+ break;
+ }
+
+ return false;
+ }
+
+ void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
+ {
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
+ output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
+ }
+ }
+ void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
+ {
+ unsigned firstSubpatternId = term.atom.subpatternId;
+ unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
+ context->restoreOutput(output, firstSubpatternId, count);
+ }
+ void resetAssertionMatches(ByteTerm& term)
+ {
+ unsigned firstSubpatternId = term.atom.subpatternId;
+ unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
+ for (unsigned i = 0; i < (count << 1); ++i)
+ output[(firstSubpatternId << 1) + i] = -1;
+ }
+ bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
+ {
+ while (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+
+ if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true))
+ return true;
+
+ resetMatches(term, context);
+ popParenthesesDisjunctionContext(backTrack);
+ freeParenthesesDisjunctionContext(context);
+ }
+
+ return false;
+ }
+
+ bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy: {
+ // set this speculatively; if we get to the parens end this will be true.
+ backTrack->inParentheses = 1;
+ break;
+ }
+ case QuantifierNonGreedy: {
+ backTrack->inParentheses = 0;
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ }
+ case QuantifierFixedCount:
+ break;
+ }
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
+ }
+
+ return true;
+ }
+
+ bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext*)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
+ }
+ return true;
+ }
+
+ bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = -1;
+ output[(subpatternId << 1) + 1] = -1;
+ }
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy:
+ // if we backtrack to this point, there is another chance - try matching nothing.
+ ASSERT(backTrack->inParentheses);
+ backTrack->inParentheses = 0;
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ case QuantifierNonGreedy:
+ ASSERT(backTrack->inParentheses);
+ case QuantifierFixedCount:
+ break;
+ }
+
+ return false;
+ }
+
+ bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy:
+ if (!backTrack->inParentheses) {
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+ case QuantifierNonGreedy:
+ if (!backTrack->inParentheses) {
+ // now try to match the parens; set this speculatively.
+ backTrack->inParentheses = 1;
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
+ }
+ context->term -= term.atom.parenthesesWidth;
+ return true;
+ }
+ case QuantifierFixedCount:
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ backTrack->begin = input.getPos();
+ return true;
+ }
+
+ bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ input.setPos(backTrack->begin);
+
+ // We've reached the end of the parens; if they are inverted, this is failure.
+ if (term.invert()) {
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+
+ return true;
+ }
+
+ bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ // We've failed to match parens; if they are inverted, this is win!
+ if (term.invert()) {
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ }
+
+ return false;
+ }
+
+ bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ input.setPos(backTrack->begin);
+
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+
+ bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
+
+ BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
+
+ unsigned subpatternId = term.atom.subpatternId;
+ ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
+
+ backTrack->prevBegin = output[(subpatternId << 1)];
+ backTrack->prevEnd = output[(subpatternId << 1) + 1];
+
+ backTrack->matchAmount = 0;
+ backTrack->lastContext = 0;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ // While we haven't yet reached our fixed limit,
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ // Try to do a match, and it it succeeds, add it to the list.
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ // The match failed; try to find an alternate point to carry on from.
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ if (!parenthesesDoBacktrack(term, backTrack))
+ return false;
+ }
+ }
+
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ break;
+ }
+ }
+
+ if (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return true;
+ }
+
+ case QuantifierNonGreedy:
+ return true;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ // Rules for backtracking differ depending on whether this is greedy or non-greedy.
+ //
+ // Greedy matches never should try just adding more - you should already have done
+ // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
+ // you backtrack an item off the list needs checking, since we'll never have matched
+ // the one less case. Tracking forwards, still add as much as possible.
+ //
+ // Non-greedy, we've already done the one less case, so don't match on popping.
+ // We haven't done the one more case, so always try to add that.
+ //
+ bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
+
+ BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = backTrack->prevBegin;
+ output[(subpatternId << 1) + 1] = backTrack->prevEnd;
+ }
+
+ ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+
+ ParenthesesDisjunctionContext* context = 0;
+
+ if (!parenthesesDoBacktrack(term, backTrack))
+ return false;
+
+ // While we haven't yet reached our fixed limit,
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ // Try to do a match, and it it succeeds, add it to the list.
+ context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ // The match failed; try to find an alternate point to carry on from.
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ if (!parenthesesDoBacktrack(term, backTrack))
+ return false;
+ }
+ }
+
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+ context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ if (!backTrack->matchAmount)
+ return false;
+
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ break;
+ }
+ }
+ } else {
+ resetMatches(term, context);
+ popParenthesesDisjunctionContext(backTrack);
+ freeParenthesesDisjunctionContext(context);
+ }
+
+ if (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return true;
+ }
+
+ case QuantifierNonGreedy: {
+ // If we've not reached the limit, try to add one more match.
+ if (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) {
+ appendParenthesesDisjunctionContext(backTrack, context);
+ recordParenthesesMatch(term, context);
+ return true;
+ } else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ }
+ }
+
+ // Nope - okay backtrack looking for an alternative.
+ while (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+ // successful backtrack! we're back in the game!
+ if (backTrack->matchAmount) {
+ context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return true;
+ }
+
+ // pop a match off the stack
+ resetMatches(term, context);
+ popParenthesesDisjunctionContext(backTrack);
+ freeParenthesesDisjunctionContext(context);
+ }
+
+ return false;
+ }
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+#define MATCH_NEXT() { ++context->term; goto matchAgain; }
+#define BACKTRACK() { --context->term; goto backtrack; }
+#define currentTerm() (disjunction->terms[context->term])
+ bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+ {
+ if (btrack)
+ BACKTRACK();
+
+ context->matchBegin = input.getPos();
+ context->term = 0;
+
+ matchAgain:
+ ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
+
+ switch (currentTerm().type) {
+ case ByteTerm::TypeSubpatternBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeSubpatternEnd:
+ context->matchEnd = input.getPos();
+ return true;
+
+ case ByteTerm::TypeBodyAlternativeBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeBodyAlternativeDisjunction:
+ case ByteTerm::TypeBodyAlternativeEnd:
+ context->matchEnd = input.getPos();
+ return true;
+
+ case ByteTerm::TypeAlternativeBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeAlternativeDisjunction:
+ case ByteTerm::TypeAlternativeEnd: {
+ int offset = currentTerm().alternative.end;
+ BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
+ backTrack->offset = offset;
+ context->term += offset;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypeAssertionBOL:
+ if (matchAssertionBOL(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeAssertionEOL:
+ if (matchAssertionEOL(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeAssertionWordBoundary:
+ if (matchAssertionWordBoundary(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypePatternCharacterOnce:
+ case ByteTerm::TypePatternCharacterFixed: {
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
+ if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
+ BACKTRACK();
+ }
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCharacterGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ unsigned matchAmount = 0;
+ while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCharacterNonGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ backTrack->matchAmount = 0;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypePatternCasedCharacterOnce:
+ case ByteTerm::TypePatternCasedCharacterFixed: {
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
+ if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
+ BACKTRACK();
+ }
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCasedCharacterGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ unsigned matchAmount = 0;
+ while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCasedCharacterNonGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ backTrack->matchAmount = 0;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypeCharacterClass:
+ if (matchCharacterClass(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeBackReference:
+ if (matchBackReference(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpattern:
+ if (matchParentheses(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+ if (matchParenthesesOnceBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+ if (matchParenthesesOnceEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionBegin:
+ if (matchParentheticalAssertionBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionEnd:
+ if (matchParentheticalAssertionEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypeCheckInput:
+ if (input.checkInput(currentTerm().checkInputCount))
+ MATCH_NEXT();
+ BACKTRACK();
+ }
+
+ // We should never fall-through to here.
+ ASSERT_NOT_REACHED();
+
+ backtrack:
+ ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
+
+ switch (currentTerm().type) {
+ case ByteTerm::TypeSubpatternBegin:
+ return false;
+ case ByteTerm::TypeSubpatternEnd:
+ ASSERT_NOT_REACHED();
+
+ case ByteTerm::TypeBodyAlternativeBegin:
+ case ByteTerm::TypeBodyAlternativeDisjunction: {
+ int offset = currentTerm().alternative.next;
+ context->term += offset;
+ if (offset > 0)
+ MATCH_NEXT();
+
+ if (input.atEnd())
+ return false;
+
+ input.next();
+ context->matchBegin = input.getPos();
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypeBodyAlternativeEnd:
+ ASSERT_NOT_REACHED();
+
+ case ByteTerm::TypeAlternativeBegin:
+ case ByteTerm::TypeAlternativeDisjunction: {
+ int offset = currentTerm().alternative.next;
+ context->term += offset;
+ if (offset > 0)
+ MATCH_NEXT();
+ BACKTRACK();
+ }
+ case ByteTerm::TypeAlternativeEnd: {
+ // We should never backtrack back into an alternative of the main body of the regex.
+ BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
+ unsigned offset = backTrack->offset;
+ context->term -= offset;
+ BACKTRACK();
+ }
+
+ case ByteTerm::TypeAssertionBOL:
+ case ByteTerm::TypeAssertionEOL:
+ case ByteTerm::TypeAssertionWordBoundary:
+ BACKTRACK();
+
+ case ByteTerm::TypePatternCharacterOnce:
+ case ByteTerm::TypePatternCharacterFixed:
+ case ByteTerm::TypePatternCharacterGreedy:
+ case ByteTerm::TypePatternCharacterNonGreedy:
+ if (backtrackPatternCharacter(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypePatternCasedCharacterOnce:
+ case ByteTerm::TypePatternCasedCharacterFixed:
+ case ByteTerm::TypePatternCasedCharacterGreedy:
+ case ByteTerm::TypePatternCasedCharacterNonGreedy:
+ if (backtrackPatternCasedCharacter(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeCharacterClass:
+ if (backtrackCharacterClass(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeBackReference:
+ if (backtrackBackReference(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpattern:
+ if (backtrackParentheses(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+ if (backtrackParenthesesOnceBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+ if (backtrackParenthesesOnceEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionBegin:
+ if (backtrackParentheticalAssertionBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionEnd:
+ if (backtrackParentheticalAssertionEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypeCheckInput:
+ input.uncheckInput(currentTerm().checkInputCount);
+ BACKTRACK();
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+ {
+ if (matchDisjunction(disjunction, context, btrack)) {
+ while (context->matchBegin == context->matchEnd) {
+ if (!matchDisjunction(disjunction, context, true))
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ int interpret()
+ {
+ for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
+ output[i] = -1;
+
+ DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
+
+ if (matchDisjunction(pattern->m_body.get(), context)) {
+ output[0] = context->matchBegin;
+ output[1] = context->matchEnd;
+ }
+
+ freeDisjunctionContext(context);
+
+ return output[0];
+ }
+
+ Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
+ : pattern(pattern)
+ , output(output)
+ , input(inputChar, start, length)
+ {
+ }
+
+private:
+ BytecodePattern *pattern;
+ int* output;
+ InputStream input;
+};
+
+
+
+class ByteCompiler {
+ struct ParenthesesStackEntry {
+ unsigned beginTerm;
+ unsigned savedAlternativeIndex;
+ ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
+ : beginTerm(beginTerm)
+ , savedAlternativeIndex(savedAlternativeIndex)
+ {
+ }
+ };
+
+public:
+ ByteCompiler(RegexPattern& pattern)
+ : m_pattern(pattern)
+ {
+ m_bodyDisjunction = 0;
+ m_currentAlternativeIndex = 0;
+ }
+
+ BytecodePattern* compile()
+ {
+ regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize);
+ emitDisjunction(m_pattern.m_body);
+ regexEnd();
+
+ return new BytecodePattern(m_bodyDisjunction, m_allParenthesesInfo, m_pattern);
+ }
+
+ void checkInput(unsigned count)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
+ }
+
+ void assertionBOL(int inputPosition)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
+ }
+
+ void assertionEOL(int inputPosition)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
+ }
+
+ void assertionWordBoundary(bool invert, int inputPosition)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
+ }
+
+ void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ if (m_pattern.m_ignoreCase) {
+ UChar lo = Unicode::toLower(ch);
+ UChar hi = Unicode::toUpper(ch);
+
+ if (lo != hi) {
+ m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
+ return;
+ }
+ }
+
+ m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
+ }
+
+ void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
+
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ }
+
+ void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ ASSERT(subpatternId);
+
+ m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
+
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ }
+
+ void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+ {
+ int beginTerm = m_bodyDisjunction->terms.size();
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+ m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
+ m_currentAlternativeIndex = beginTerm + 1;
+ }
+
+ void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
+ {
+ int beginTerm = m_bodyDisjunction->terms.size();
+
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0));
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+ m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
+ m_currentAlternativeIndex = beginTerm + 1;
+ }
+
+ unsigned popParenthesesStack()
+ {
+ ASSERT(m_parenthesesStack.size());
+ int stackEnd = m_parenthesesStack.size() - 1;
+ unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
+ m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
+ m_parenthesesStack.shrink(stackEnd);
+
+ ASSERT(beginTerm < m_bodyDisjunction->terms.size());
+ ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
+
+ return beginTerm;
+ }
+
+#ifndef NDEBUG
+ void dumpDisjunction(ByteDisjunction* disjunction)
+ {
+ printf("ByteDisjunction(%p):\n\t", disjunction);
+ for (unsigned i = 0; i < disjunction->terms.size(); ++i)
+ printf("{ %d } ", disjunction->terms[i].type);
+ printf("\n");
+ }
+#endif
+
+ void closeAlternative(int beginTerm)
+ {
+ int origBeginTerm = beginTerm;
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
+ int endIndex = m_bodyDisjunction->terms.size();
+
+ unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
+
+ if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
+ m_bodyDisjunction->terms.remove(beginTerm);
+ else {
+ while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
+ beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
+ m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
+ m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+
+ m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
+
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
+ m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
+ }
+ }
+
+ void closeBodyAlternative()
+ {
+ int beginTerm = 0;
+ int origBeginTerm = 0;
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
+ int endIndex = m_bodyDisjunction->terms.size();
+
+ unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
+
+ while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
+ beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
+ ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
+ m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
+ m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+
+ m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
+
+ m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
+ m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
+ }
+
+ void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
+ {
+ unsigned beginTerm = popParenthesesStack();
+ closeAlternative(beginTerm + 1);
+ unsigned endTerm = m_bodyDisjunction->terms.size();
+
+ bool isAssertion = m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin;
+ bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
+ unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
+
+ m_bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
+ m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
+
+ if (doInline) {
+ m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
+ } else {
+ ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
+ ASSERT(parenthesesBegin.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+
+ bool invertOrCapture = parenthesesBegin.invertOrCapture;
+ unsigned subpatternId = parenthesesBegin.atom.subpatternId;
+
+ unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
+ ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
+
+ parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
+ for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
+ parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
+ parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
+
+ m_bodyDisjunction->terms.shrink(beginTerm);
+
+ m_allParenthesesInfo.append(parenthesesDisjunction);
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition));
+
+ m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+ m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+ m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+ }
+
+ void regexBegin(unsigned numSubpatterns, unsigned callFrameSize)
+ {
+ m_bodyDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
+ m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin());
+ m_bodyDisjunction->terms[0].frameLocation = 0;
+ m_currentAlternativeIndex = 0;
+ }
+
+ void regexEnd()
+ {
+ closeBodyAlternative();
+ }
+
+ void alternativeBodyDisjunction()
+ {
+ int newAlternativeIndex = m_bodyDisjunction->terms.size();
+ m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
+ m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction());
+
+ m_currentAlternativeIndex = newAlternativeIndex;
+ }
+
+ void alternativeDisjunction()
+ {
+ int newAlternativeIndex = m_bodyDisjunction->terms.size();
+ m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
+ m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
+
+ m_currentAlternativeIndex = newAlternativeIndex;
+ }
+
+ void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
+ {
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
+
+ if (alt) {
+ if (disjunction == m_pattern.m_body)
+ alternativeBodyDisjunction();
+ else
+ alternativeDisjunction();
+ }
+
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ unsigned minimumSize = alternative->m_minimumSize;
+
+ ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
+ unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
+ if (countToCheck)
+ checkInput(countToCheck);
+ currentCountAlreadyChecked += countToCheck;
+
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ assertionBOL(term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypeAssertionEOL:
+ assertionEOL(term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypeAssertionWordBoundary:
+ assertionWordBoundary(term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ atomCharacterClass(term.characterClass, term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeBackReference:
+ atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern: {
+ unsigned disjunctionAlreadyCheckedCount = 0;
+ if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
+ if (term.quantityType == QuantifierFixedCount) {
+ disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize);
+ atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+ } else {
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
+ atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+ }
+ } else {
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
+ atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+ }
+ break;
+ }
+
+ case PatternTerm::TypeParentheticalAssertion: {
+ unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
+
+ atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
+ atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+private:
+ RegexPattern& m_pattern;
+ ByteDisjunction* m_bodyDisjunction;
+ unsigned m_currentAlternativeIndex;
+ Vector<ParenthesesStackEntry> m_parenthesesStack;
+ Vector<ByteDisjunction*> m_allParenthesesInfo;
+};
+
+
+BytecodePattern* byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
+{
+ RegexPattern pattern(ignoreCase, multiline);
+
+ if ((error = compileRegex(patternString, pattern)))
+ return 0;
+
+ numSubpatterns = pattern.m_numSubpatterns;
+
+ return ByteCompiler(pattern).compile();
+}
+
+int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output)
+{
+ return Interpreter(regex, output, input, start, length).interpret();
+}
+
+
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses);
+
+
+} }
+
+#endif
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.h
new file mode 100644
index 0000000..48c9a5e
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexInterpreter_h
+#define RegexInterpreter_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <wtf/unicode/Unicode.h>
+#include "RegexParser.h"
+#include "RegexPattern.h"
+
+namespace JSC { namespace Yarr {
+
+class ByteDisjunction;
+
+struct ByteTerm {
+ enum Type {
+ TypeBodyAlternativeBegin,
+ TypeBodyAlternativeDisjunction,
+ TypeBodyAlternativeEnd,
+ TypeAlternativeBegin,
+ TypeAlternativeDisjunction,
+ TypeAlternativeEnd,
+ TypeSubpatternBegin,
+ TypeSubpatternEnd,
+ TypeAssertionBOL,
+ TypeAssertionEOL,
+ TypeAssertionWordBoundary,
+ TypePatternCharacterOnce,
+ TypePatternCharacterFixed,
+ TypePatternCharacterGreedy,
+ TypePatternCharacterNonGreedy,
+ TypePatternCasedCharacterOnce,
+ TypePatternCasedCharacterFixed,
+ TypePatternCasedCharacterGreedy,
+ TypePatternCasedCharacterNonGreedy,
+ TypeCharacterClass,
+ TypeBackReference,
+ TypeParenthesesSubpattern,
+ TypeParenthesesSubpatternOnceBegin,
+ TypeParenthesesSubpatternOnceEnd,
+ TypeParentheticalAssertionBegin,
+ TypeParentheticalAssertionEnd,
+ TypeCheckInput,
+ } type;
+ bool invertOrCapture;
+ union {
+ struct {
+ union {
+ UChar patternCharacter;
+ struct {
+ UChar lo;
+ UChar hi;
+ } casedCharacter;
+ CharacterClass* characterClass;
+ unsigned subpatternId;
+ };
+ union {
+ ByteDisjunction* parenthesesDisjunction;
+ unsigned parenthesesWidth;
+ };
+ QuantifierType quantityType;
+ unsigned quantityCount;
+ } atom;
+ struct {
+ int next;
+ int end;
+ } alternative;
+ unsigned checkInputCount;
+ };
+ unsigned frameLocation;
+ int inputPosition;
+
+ ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ : frameLocation(frameLocation)
+ {
+ switch (quantityType) {
+ case QuantifierFixedCount:
+ type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
+ break;
+ case QuantifierGreedy:
+ type = ByteTerm::TypePatternCharacterGreedy;
+ break;
+ case QuantifierNonGreedy:
+ type = ByteTerm::TypePatternCharacterNonGreedy;
+ break;
+ }
+
+ atom.patternCharacter = ch;
+ atom.quantityType = quantityType;
+ atom.quantityCount = quantityCount;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ : frameLocation(frameLocation)
+ {
+ switch (quantityType) {
+ case QuantifierFixedCount:
+ type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
+ break;
+ case QuantifierGreedy:
+ type = ByteTerm::TypePatternCasedCharacterGreedy;
+ break;
+ case QuantifierNonGreedy:
+ type = ByteTerm::TypePatternCasedCharacterNonGreedy;
+ break;
+ }
+
+ atom.casedCharacter.lo = lo;
+ atom.casedCharacter.hi = hi;
+ atom.quantityType = quantityType;
+ atom.quantityCount = quantityCount;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
+ : type(ByteTerm::TypeCharacterClass)
+ , invertOrCapture(invert)
+ {
+ atom.characterClass = characterClass;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool invertOrCapture, int inputPos)
+ : type(type)
+ , invertOrCapture(invertOrCapture)
+ {
+ atom.subpatternId = subpatternId;
+ atom.parenthesesDisjunction = parenthesesInfo;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(Type type, bool invert = false)
+ : type(type)
+ , invertOrCapture(invert)
+ {
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ }
+
+ ByteTerm(Type type, unsigned subpatternId, bool invertOrCapture, int inputPos)
+ : type(type)
+ , invertOrCapture(invertOrCapture)
+ {
+ atom.subpatternId = subpatternId;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ static ByteTerm BOL(int inputPos)
+ {
+ ByteTerm term(TypeAssertionBOL);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm CheckInput(unsigned count)
+ {
+ ByteTerm term(TypeCheckInput);
+ term.checkInputCount = count;
+ return term;
+ }
+
+ static ByteTerm EOL(int inputPos)
+ {
+ ByteTerm term(TypeAssertionEOL);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm WordBoundary(bool invert, int inputPos)
+ {
+ ByteTerm term(TypeAssertionWordBoundary, invert);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm BackReference(unsigned subpatternId, int inputPos)
+ {
+ return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
+ }
+
+ static ByteTerm BodyAlternativeBegin()
+ {
+ ByteTerm term(TypeBodyAlternativeBegin);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm BodyAlternativeDisjunction()
+ {
+ ByteTerm term(TypeBodyAlternativeDisjunction);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm BodyAlternativeEnd()
+ {
+ ByteTerm term(TypeBodyAlternativeEnd);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeBegin()
+ {
+ ByteTerm term(TypeAlternativeBegin);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeDisjunction()
+ {
+ ByteTerm term(TypeAlternativeDisjunction);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeEnd()
+ {
+ ByteTerm term(TypeAlternativeEnd);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm SubpatternBegin()
+ {
+ return ByteTerm(TypeSubpatternBegin);
+ }
+
+ static ByteTerm SubpatternEnd()
+ {
+ return ByteTerm(TypeSubpatternEnd);
+ }
+
+ bool invert()
+ {
+ return invertOrCapture;
+ }
+
+ bool capture()
+ {
+ return invertOrCapture;
+ }
+};
+
+class ByteDisjunction : public FastAllocBase {
+public:
+ ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
+ : m_numSubpatterns(numSubpatterns)
+ , m_frameSize(frameSize)
+ {
+ }
+
+ Vector<ByteTerm> terms;
+ unsigned m_numSubpatterns;
+ unsigned m_frameSize;
+};
+
+struct BytecodePattern : FastAllocBase {
+ BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern)
+ : m_body(body)
+ , m_ignoreCase(pattern.m_ignoreCase)
+ , m_multiline(pattern.m_multiline)
+ {
+ newlineCharacterClass = pattern.newlineCharacterClass();
+ wordcharCharacterClass = pattern.wordcharCharacterClass();
+
+ m_allParenthesesInfo.append(allParenthesesInfo);
+ m_userCharacterClasses.append(pattern.m_userCharacterClasses);
+ // 'Steal' the RegexPattern's CharacterClasses! We clear its
+ // array, so that it won't delete them on destruction. We'll
+ // take responsibility for that.
+ pattern.m_userCharacterClasses.clear();
+ }
+
+ ~BytecodePattern()
+ {
+ deleteAllValues(m_allParenthesesInfo);
+ deleteAllValues(m_userCharacterClasses);
+ }
+
+ OwnPtr<ByteDisjunction> m_body;
+ bool m_ignoreCase;
+ bool m_multiline;
+
+ CharacterClass* newlineCharacterClass;
+ CharacterClass* wordcharCharacterClass;
+private:
+ Vector<ByteDisjunction*> m_allParenthesesInfo;
+ Vector<CharacterClass*> m_userCharacterClasses;
+};
+
+BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexInterpreter_h
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp
new file mode 100644
index 0000000..fcb8d86
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp
@@ -0,0 +1,1407 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegexJIT.h"
+
+#include "ASCIICType.h"
+#include "JSGlobalData.h"
+#include "LinkBuffer.h"
+#include "MacroAssembler.h"
+#include "RegexCompiler.h"
+
+#include "pcre.h" // temporary, remove when fallback is removed.
+
+#if ENABLE(YARR_JIT)
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+
+class RegexGenerator : private MacroAssembler {
+ friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
+
+#if CPU(ARM)
+ static const RegisterID input = ARMRegisters::r0;
+ static const RegisterID index = ARMRegisters::r1;
+ static const RegisterID length = ARMRegisters::r2;
+ static const RegisterID output = ARMRegisters::r4;
+
+ static const RegisterID regT0 = ARMRegisters::r5;
+ static const RegisterID regT1 = ARMRegisters::r6;
+
+ static const RegisterID returnRegister = ARMRegisters::r0;
+#elif CPU(X86)
+ static const RegisterID input = X86Registers::eax;
+ static const RegisterID index = X86Registers::edx;
+ static const RegisterID length = X86Registers::ecx;
+ static const RegisterID output = X86Registers::edi;
+
+ static const RegisterID regT0 = X86Registers::ebx;
+ static const RegisterID regT1 = X86Registers::esi;
+
+ static const RegisterID returnRegister = X86Registers::eax;
+#elif CPU(X86_64)
+ static const RegisterID input = X86Registers::edi;
+ static const RegisterID index = X86Registers::esi;
+ static const RegisterID length = X86Registers::edx;
+ static const RegisterID output = X86Registers::ecx;
+
+ static const RegisterID regT0 = X86Registers::eax;
+ static const RegisterID regT1 = X86Registers::ebx;
+
+ static const RegisterID returnRegister = X86Registers::eax;
+#endif
+
+ void optimizeAlternative(PatternAlternative* alternative)
+ {
+ if (!alternative->m_terms.size())
+ return;
+
+ for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+ PatternTerm& nextTerm = alternative->m_terms[i + 1];
+
+ if ((term.type == PatternTerm::TypeCharacterClass)
+ && (term.quantityType == QuantifierFixedCount)
+ && (nextTerm.type == PatternTerm::TypePatternCharacter)
+ && (nextTerm.quantityType == QuantifierFixedCount)) {
+ PatternTerm termCopy = term;
+ alternative->m_terms[i] = nextTerm;
+ alternative->m_terms[i + 1] = termCopy;
+ }
+ }
+ }
+
+ void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
+ {
+ do {
+ // pick which range we're going to generate
+ int which = count >> 1;
+ char lo = ranges[which].begin;
+ char hi = ranges[which].end;
+
+ // check if there are any ranges or matches below lo. If not, just jl to failure -
+ // if there is anything else to check, check that first, if it falls through jmp to failure.
+ if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
+
+ // generate code for all ranges before this one
+ if (which)
+ matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
+ ++*matchIndex;
+ }
+ failures.append(jump());
+
+ loOrAbove.link(this);
+ } else if (which) {
+ Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
+
+ matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+ failures.append(jump());
+
+ loOrAbove.link(this);
+ } else
+ failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
+ ++*matchIndex;
+
+ matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
+ // fall through to here, the value is above hi.
+
+ // shuffle along & loop around if there are any more matches to handle.
+ unsigned next = which + 1;
+ ranges += next;
+ count -= next;
+ } while (count);
+ }
+
+ void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
+ {
+ Jump unicodeFail;
+ if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
+ Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
+
+ if (charClass->m_matchesUnicode.size()) {
+ for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
+ UChar ch = charClass->m_matchesUnicode[i];
+ matchDest.append(branch32(Equal, character, Imm32(ch)));
+ }
+ }
+
+ if (charClass->m_rangesUnicode.size()) {
+ for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
+ UChar lo = charClass->m_rangesUnicode[i].begin;
+ UChar hi = charClass->m_rangesUnicode[i].end;
+
+ Jump below = branch32(LessThan, character, Imm32(lo));
+ matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
+ below.link(this);
+ }
+ }
+
+ unicodeFail = jump();
+ isAscii.link(this);
+ }
+
+ if (charClass->m_ranges.size()) {
+ unsigned matchIndex = 0;
+ JumpList failures;
+ matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
+ while (matchIndex < charClass->m_matches.size())
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
+
+ failures.link(this);
+ } else if (charClass->m_matches.size()) {
+ // optimization: gather 'a','A' etc back together, can mask & test once.
+ Vector<char> matchesAZaz;
+
+ for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
+ char ch = charClass->m_matches[i];
+ if (m_pattern.m_ignoreCase) {
+ if (isASCIILower(ch)) {
+ matchesAZaz.append(ch);
+ continue;
+ }
+ if (isASCIIUpper(ch))
+ continue;
+ }
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
+ }
+
+ if (unsigned countAZaz = matchesAZaz.size()) {
+ or32(Imm32(32), character);
+ for (unsigned i = 0; i < countAZaz; ++i)
+ matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
+ }
+ }
+
+ if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
+ unicodeFail.link(this);
+ }
+
+ // Jumps if input not available; will have (incorrectly) incremented already!
+ Jump jumpIfNoAvailableInput(unsigned countToCheck)
+ {
+ add32(Imm32(countToCheck), index);
+ return branch32(Above, index, length);
+ }
+
+ Jump jumpIfAvailableInput(unsigned countToCheck)
+ {
+ add32(Imm32(countToCheck), index);
+ return branch32(BelowOrEqual, index, length);
+ }
+
+ Jump checkInput()
+ {
+ return branch32(BelowOrEqual, index, length);
+ }
+
+ Jump atEndOfInput()
+ {
+ return branch32(Equal, index, length);
+ }
+
+ Jump notAtEndOfInput()
+ {
+ return branch32(NotEqual, index, length);
+ }
+
+ Jump jumpIfCharEquals(UChar ch, int inputPosition)
+ {
+ return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
+ }
+
+ Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
+ {
+ return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
+ }
+
+ void readCharacter(int inputPosition, RegisterID reg)
+ {
+ load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
+ }
+
+ void storeToFrame(RegisterID reg, unsigned frameLocation)
+ {
+ poke(reg, frameLocation);
+ }
+
+ void storeToFrame(Imm32 imm, unsigned frameLocation)
+ {
+ poke(imm, frameLocation);
+ }
+
+ DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
+ {
+ return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
+ }
+
+ void loadFromFrame(unsigned frameLocation, RegisterID reg)
+ {
+ peek(reg, frameLocation);
+ }
+
+ void loadFromFrameAndJump(unsigned frameLocation)
+ {
+ jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
+ }
+
+ struct AlternativeBacktrackRecord {
+ DataLabelPtr dataLabel;
+ Label backtrackLocation;
+
+ AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
+ : dataLabel(dataLabel)
+ , backtrackLocation(backtrackLocation)
+ {
+ }
+ };
+
+ struct TermGenerationState {
+ TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
+ : disjunction(disjunction)
+ , checkedTotal(checkedTotal)
+ {
+ }
+
+ void resetAlternative()
+ {
+ isBackTrackGenerated = false;
+ alt = 0;
+ }
+ bool alternativeValid()
+ {
+ return alt < disjunction->m_alternatives.size();
+ }
+ void nextAlternative()
+ {
+ ++alt;
+ }
+ PatternAlternative* alternative()
+ {
+ return disjunction->m_alternatives[alt];
+ }
+
+ void resetTerm()
+ {
+ ASSERT(alternativeValid());
+ t = 0;
+ }
+ bool termValid()
+ {
+ ASSERT(alternativeValid());
+ return t < alternative()->m_terms.size();
+ }
+ void nextTerm()
+ {
+ ASSERT(alternativeValid());
+ ++t;
+ }
+ PatternTerm& term()
+ {
+ ASSERT(alternativeValid());
+ return alternative()->m_terms[t];
+ }
+
+ PatternTerm& lookaheadTerm()
+ {
+ ASSERT(alternativeValid());
+ ASSERT((t + 1) < alternative()->m_terms.size());
+ return alternative()->m_terms[t + 1];
+ }
+ bool isSinglePatternCharacterLookaheadTerm()
+ {
+ ASSERT(alternativeValid());
+ return ((t + 1) < alternative()->m_terms.size())
+ && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
+ && (lookaheadTerm().quantityType == QuantifierFixedCount)
+ && (lookaheadTerm().quantityCount == 1);
+ }
+
+ int inputOffset()
+ {
+ return term().inputPosition - checkedTotal;
+ }
+
+ void jumpToBacktrack(Jump jump, MacroAssembler* masm)
+ {
+ if (isBackTrackGenerated)
+ jump.linkTo(backtrackLabel, masm);
+ else
+ backTrackJumps.append(jump);
+ }
+ void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
+ {
+ if (isBackTrackGenerated)
+ jumps.linkTo(backtrackLabel, masm);
+ else
+ backTrackJumps.append(jumps);
+ }
+ bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
+ {
+ if (isBackTrackGenerated) {
+ masm->jump(backtrackLabel);
+ return true;
+ }
+ return false;
+ }
+ void addBacktrackJump(Jump jump)
+ {
+ backTrackJumps.append(jump);
+ }
+ void setBacktrackGenerated(Label label)
+ {
+ isBackTrackGenerated = true;
+ backtrackLabel = label;
+ }
+ void linkAlternativeBacktracks(MacroAssembler* masm)
+ {
+ isBackTrackGenerated = false;
+ backTrackJumps.link(masm);
+ }
+ void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
+ {
+ isBackTrackGenerated = false;
+ backTrackJumps.linkTo(label, masm);
+ }
+ void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
+ {
+ jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
+ if (nestedParenthesesState.isBackTrackGenerated)
+ setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
+ }
+
+ PatternDisjunction* disjunction;
+ int checkedTotal;
+ private:
+ unsigned alt;
+ unsigned t;
+ JumpList backTrackJumps;
+ Label backtrackLabel;
+ bool isBackTrackGenerated;
+ };
+
+ void generateAssertionBOL(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ if (m_pattern.m_multiline) {
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ if (!term.inputPosition)
+ matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
+
+ readCharacter(state.inputOffset() - 1, character);
+ matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
+ state.jumpToBacktrack(jump(), this);
+
+ matchDest.link(this);
+ } else {
+ // Erk, really should poison out these alternatives early. :-/
+ if (term.inputPosition)
+ state.jumpToBacktrack(jump(), this);
+ else
+ state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
+ }
+ }
+
+ void generateAssertionEOL(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ if (m_pattern.m_multiline) {
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ if (term.inputPosition == state.checkedTotal)
+ matchDest.append(atEndOfInput());
+
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
+ state.jumpToBacktrack(jump(), this);
+
+ matchDest.link(this);
+ } else {
+ if (term.inputPosition == state.checkedTotal)
+ state.jumpToBacktrack(notAtEndOfInput(), this);
+ // Erk, really should poison out these alternatives early. :-/
+ else
+ state.jumpToBacktrack(jump(), this);
+ }
+ }
+
+ // Also falls though on nextIsNotWordChar.
+ void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ if (term.inputPosition == state.checkedTotal)
+ nextIsNotWordChar.append(atEndOfInput());
+
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
+ }
+
+ void generateAssertionWordBoundary(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ Jump atBegin;
+ JumpList matchDest;
+ if (!term.inputPosition)
+ atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
+ readCharacter(state.inputOffset() - 1, character);
+ matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
+ if (!term.inputPosition)
+ atBegin.link(this);
+
+ // We fall through to here if the last character was not a wordchar.
+ JumpList nonWordCharThenWordChar;
+ JumpList nonWordCharThenNonWordChar;
+ if (term.invertOrCapture) {
+ matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
+ nonWordCharThenWordChar.append(jump());
+ } else {
+ matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
+ nonWordCharThenNonWordChar.append(jump());
+ }
+ state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
+
+ // We jump here if the last character was a wordchar.
+ matchDest.link(this);
+ JumpList wordCharThenWordChar;
+ JumpList wordCharThenNonWordChar;
+ if (term.invertOrCapture) {
+ matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
+ wordCharThenWordChar.append(jump());
+ } else {
+ matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
+ // This can fall-though!
+ }
+
+ state.jumpToBacktrack(wordCharThenWordChar, this);
+
+ nonWordCharThenWordChar.link(this);
+ wordCharThenNonWordChar.link(this);
+ }
+
+ void generatePatternCharacterSingle(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ UChar ch = state.term().patternCharacter;
+
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
+ }
+ }
+
+ void generatePatternCharacterPair(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ UChar ch1 = state.term().patternCharacter;
+ UChar ch2 = state.lookaheadTerm().patternCharacter;
+
+ int mask = 0;
+ int chPair = ch1 | (ch2 << 16);
+
+ if (m_pattern.m_ignoreCase) {
+ if (isASCIIAlpha(ch1))
+ mask |= 32;
+ if (isASCIIAlpha(ch2))
+ mask |= 32 << 16;
+ }
+
+ if (mask) {
+ load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
+ or32(Imm32(mask), character);
+ state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
+ } else
+ state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
+ }
+
+ void generatePatternCharacterFixed(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(index, countRegister);
+ sub32(Imm32(term.quantityCount), countRegister);
+
+ Label loop(this);
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
+ or32(Imm32(32), character);
+ state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
+ }
+ add32(Imm32(1), countRegister);
+ branch32(NotEqual, countRegister, index).linkTo(loop, this);
+ }
+
+ void generatePatternCharacterGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(Imm32(0), countRegister);
+
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
+ }
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+ branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ failures.append(jump());
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+ state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
+ sub32(Imm32(1), countRegister);
+ sub32(Imm32(1), index);
+
+ failures.link(this);
+
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackGenerated(backtrackBegin);
+ }
+
+ void generatePatternCharacterNonGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(Imm32(0), countRegister);
+
+ Jump firstTimeDoNothing = jump();
+
+ Label hardFail(this);
+ sub32(countRegister, index);
+ state.jumpToBacktrack(jump(), this);
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+
+ atEndOfInput().linkTo(hardFail, this);
+ branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+
+ firstTimeDoNothing.link(this);
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackGenerated(backtrackBegin);
+ }
+
+ void generateCharacterClassSingle(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invertOrCapture)
+ state.jumpToBacktrack(matchDest, this);
+ else {
+ state.jumpToBacktrack(jump(), this);
+ matchDest.link(this);
+ }
+ }
+
+ void generateCharacterClassFixed(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(index, countRegister);
+ sub32(Imm32(term.quantityCount), countRegister);
+
+ Label loop(this);
+ JumpList matchDest;
+ load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invertOrCapture)
+ state.jumpToBacktrack(matchDest, this);
+ else {
+ state.jumpToBacktrack(jump(), this);
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ branch32(NotEqual, countRegister, index).linkTo(loop, this);
+ }
+
+ void generateCharacterClassGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(Imm32(0), countRegister);
+
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+
+ if (term.invertOrCapture) {
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, failures, term.characterClass);
+ } else {
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+ failures.append(jump());
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+ branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ failures.append(jump());
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+ state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
+ sub32(Imm32(1), countRegister);
+ sub32(Imm32(1), index);
+
+ failures.link(this);
+
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackGenerated(backtrackBegin);
+ }
+
+ void generateCharacterClassNonGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(Imm32(0), countRegister);
+
+ Jump firstTimeDoNothing = jump();
+
+ Label hardFail(this);
+ sub32(countRegister, index);
+ state.jumpToBacktrack(jump(), this);
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+
+ atEndOfInput().linkTo(hardFail, this);
+ branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invertOrCapture)
+ matchDest.linkTo(hardFail, this);
+ else {
+ jump(hardFail);
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+
+ firstTimeDoNothing.link(this);
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackGenerated(backtrackBegin);
+ }
+
+ void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
+ {
+ ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
+ ASSERT(parenthesesTerm.quantityCount == 1);
+
+ PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
+ unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
+
+ if (disjunction->m_alternatives.size() == 1) {
+ state.resetAlternative();
+ ASSERT(state.alternativeValid());
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ int countToCheck = alternative->m_minimumSize - preCheckedCount;
+ if (countToCheck) {
+ ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
+
+ // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
+ // will be forced to always trampoline into here, just to decrement the index.
+ // Ick.
+ Jump skip = jump();
+
+ Label backtrackBegin(this);
+ sub32(Imm32(countToCheck), index);
+ state.addBacktrackJump(jump());
+
+ skip.link(this);
+
+ state.setBacktrackGenerated(backtrackBegin);
+
+ state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
+ state.checkedTotal += countToCheck;
+ }
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ state.checkedTotal -= countToCheck;
+ } else {
+ JumpList successes;
+
+ for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
+
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ ASSERT(alternative->m_minimumSize >= preCheckedCount);
+ int countToCheck = alternative->m_minimumSize - preCheckedCount;
+ if (countToCheck) {
+ state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
+ state.checkedTotal += countToCheck;
+ }
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ // Matched an alternative.
+ DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
+ successes.append(jump());
+
+ // Alternative did not match.
+ Label backtrackLocation(this);
+
+ // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
+ state.plantJumpToBacktrackIfExists(this);
+
+ state.linkAlternativeBacktracks(this);
+
+ if (countToCheck) {
+ sub32(Imm32(countToCheck), index);
+ state.checkedTotal -= countToCheck;
+ }
+
+ m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
+ }
+ // We fall through to here when the last alternative fails.
+ // Add a backtrack out of here for the parenthese handling code to link up.
+ state.addBacktrackJump(jump());
+
+ // Generate a trampoline for the parens code to backtrack to, to retry the
+ // next alternative.
+ state.setBacktrackGenerated(label());
+ loadFromFrameAndJump(alternativeFrameLocation);
+
+ // FIXME: both of the above hooks are a little inefficient, in that you
+ // may end up trampolining here, just to trampoline back out to the
+ // parentheses code, or vice versa. We can probably eliminate a jump
+ // by restructuring, but coding this way for now for simplicity during
+ // development.
+
+ successes.link(this);
+ }
+ }
+
+ void generateParenthesesSingle(TermGenerationState& state)
+ {
+ const RegisterID indexTemporary = regT0;
+ PatternTerm& term = state.term();
+ PatternDisjunction* disjunction = term.parentheses.disjunction;
+ ASSERT(term.quantityCount == 1);
+
+ unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
+
+ unsigned parenthesesFrameLocation = term.frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term.quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
+
+ // optimized case - no capture & no quantifier can be handled in a light-weight manner.
+ if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // this expects that any backtracks back out of the parentheses will be in the
+ // parenthesesState's backTrackJumps vector, and that if they need backtracking
+ // they will have set an entry point on the parenthesesState's backtrackLabel.
+ state.propagateBacktrackingFrom(parenthesesState, this);
+ } else {
+ Jump nonGreedySkipParentheses;
+ Label nonGreedyTryParentheses;
+ if (term.quantityType == QuantifierGreedy)
+ storeToFrame(Imm32(1), parenthesesFrameLocation);
+ else if (term.quantityType == QuantifierNonGreedy) {
+ storeToFrame(Imm32(0), parenthesesFrameLocation);
+ nonGreedySkipParentheses = jump();
+ nonGreedyTryParentheses = label();
+ storeToFrame(Imm32(1), parenthesesFrameLocation);
+ }
+
+ // store the match start index
+ if (term.invertOrCapture) {
+ int inputOffset = state.inputOffset() - preCheckedCount;
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(inputOffset), indexTemporary);
+ store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ } else
+ store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ }
+
+ // generate the body of the parentheses
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+
+ // store the match end index
+ if (term.invertOrCapture) {
+ int inputOffset = state.inputOffset();
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(state.inputOffset()), indexTemporary);
+ store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ } else
+ store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ }
+ Jump success = jump();
+
+ // A failure AFTER the parens jumps here
+ Label backtrackFromAfterParens(this);
+
+ if (term.quantityType == QuantifierGreedy) {
+ // If this is zero we have now tested with both with and without the parens.
+ loadFromFrame(parenthesesFrameLocation, indexTemporary);
+ state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
+ } else if (term.quantityType == QuantifierNonGreedy) {
+ // If this is zero we have now tested with both with and without the parens.
+ loadFromFrame(parenthesesFrameLocation, indexTemporary);
+ branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
+ }
+
+ parenthesesState.plantJumpToBacktrackIfExists(this);
+ // A failure WITHIN the parens jumps here
+ parenthesesState.linkAlternativeBacktracks(this);
+ if (term.invertOrCapture) {
+ store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ }
+
+ if (term.quantityType == QuantifierGreedy)
+ storeToFrame(Imm32(0), parenthesesFrameLocation);
+ else
+ state.jumpToBacktrack(jump(), this);
+
+ state.setBacktrackGenerated(backtrackFromAfterParens);
+ if (term.quantityType == QuantifierNonGreedy)
+ nonGreedySkipParentheses.link(this);
+ success.link(this);
+ }
+ }
+
+ void generateParentheticalAssertion(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+ PatternDisjunction* disjunction = term.parentheses.disjunction;
+ ASSERT(term.quantityCount == 1);
+ ASSERT(term.quantityType == QuantifierFixedCount);
+
+ unsigned parenthesesFrameLocation = term.frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
+
+ int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
+
+ if (term.invertOrCapture) {
+ // Inverted case
+ storeToFrame(index, parenthesesFrameLocation);
+
+ state.checkedTotal -= countCheckedAfterAssertion;
+ if (countCheckedAfterAssertion)
+ sub32(Imm32(countCheckedAfterAssertion), index);
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // Success! - which means - Fail!
+ loadFromFrame(parenthesesFrameLocation, index);
+ state.jumpToBacktrack(jump(), this);
+
+ // And fail means success.
+ parenthesesState.linkAlternativeBacktracks(this);
+ loadFromFrame(parenthesesFrameLocation, index);
+
+ state.checkedTotal += countCheckedAfterAssertion;
+ } else {
+ // Normal case
+ storeToFrame(index, parenthesesFrameLocation);
+
+ state.checkedTotal -= countCheckedAfterAssertion;
+ if (countCheckedAfterAssertion)
+ sub32(Imm32(countCheckedAfterAssertion), index);
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // Success! - which means - Success!
+ loadFromFrame(parenthesesFrameLocation, index);
+ Jump success = jump();
+
+ parenthesesState.linkAlternativeBacktracks(this);
+ loadFromFrame(parenthesesFrameLocation, index);
+ state.jumpToBacktrack(jump(), this);
+
+ success.link(this);
+
+ state.checkedTotal += countCheckedAfterAssertion;
+ }
+ }
+
+ void generateTerm(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ generateAssertionBOL(state);
+ break;
+
+ case PatternTerm::TypeAssertionEOL:
+ generateAssertionEOL(state);
+ break;
+
+ case PatternTerm::TypeAssertionWordBoundary:
+ generateAssertionWordBoundary(state);
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ switch (term.quantityType) {
+ case QuantifierFixedCount:
+ if (term.quantityCount == 1) {
+ if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
+ generatePatternCharacterPair(state);
+ state.nextTerm();
+ } else
+ generatePatternCharacterSingle(state);
+ } else
+ generatePatternCharacterFixed(state);
+ break;
+ case QuantifierGreedy:
+ generatePatternCharacterGreedy(state);
+ break;
+ case QuantifierNonGreedy:
+ generatePatternCharacterNonGreedy(state);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ switch (term.quantityType) {
+ case QuantifierFixedCount:
+ if (term.quantityCount == 1)
+ generateCharacterClassSingle(state);
+ else
+ generateCharacterClassFixed(state);
+ break;
+ case QuantifierGreedy:
+ generateCharacterClassGreedy(state);
+ break;
+ case QuantifierNonGreedy:
+ generateCharacterClassNonGreedy(state);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeBackReference:
+ m_generationFailed = true;
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ if ((term.quantityCount == 1) && !term.parentheses.isCopy)
+ generateParenthesesSingle(state);
+ else
+ m_generationFailed = true;
+ break;
+
+ case PatternTerm::TypeParentheticalAssertion:
+ generateParentheticalAssertion(state);
+ break;
+ }
+ }
+
+ void generateDisjunction(PatternDisjunction* disjunction)
+ {
+ TermGenerationState state(disjunction, 0);
+ state.resetAlternative();
+
+ // Plant a check to see if there is sufficient input available to run the first alternative.
+ // Jumping back to the label 'firstAlternative' will get to this check, jumping to
+ // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
+ // skipped this check.
+
+ Label firstAlternative(this);
+
+ // check availability for the next alternative
+ int countCheckedForCurrentAlternative = 0;
+ int countToCheckForFirstAlternative = 0;
+ bool hasShorterAlternatives = false;
+ JumpList notEnoughInputForPreviousAlternative;
+
+ if (state.alternativeValid()) {
+ PatternAlternative* alternative = state.alternative();
+ countToCheckForFirstAlternative = alternative->m_minimumSize;
+ state.checkedTotal += countToCheckForFirstAlternative;
+ if (countToCheckForFirstAlternative)
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
+ countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
+ }
+
+ Label firstAlternativeInputChecked(this);
+
+ while (state.alternativeValid()) {
+ // Track whether any alternatives are shorter than the first one.
+ hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
+
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ // If we get here, the alternative matched.
+ if (m_pattern.m_body->m_callFrameSize)
+ addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+
+ ASSERT(index != returnRegister);
+ if (m_pattern.m_body->m_hasFixedSize) {
+ move(index, returnRegister);
+ if (alternative->m_minimumSize)
+ sub32(Imm32(alternative->m_minimumSize), returnRegister);
+ } else
+ pop(returnRegister);
+ store32(index, Address(output, 4));
+ store32(returnRegister, output);
+
+ generateReturn();
+
+ state.nextAlternative();
+
+ // if there are any more alternatives, plant the check for input before looping.
+ if (state.alternativeValid()) {
+ PatternAlternative* nextAlternative = state.alternative();
+ int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
+
+ if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
+ // If we get here, there the last input checked failed.
+ notEnoughInputForPreviousAlternative.link(this);
+
+ // Check if sufficent input available to run the next alternative
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
+ // We are now in the correct state to enter the next alternative; this add is only required
+ // to mirror and revert operation of the sub32, just below.
+ add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
+
+ // If we get here, there the last input checked passed.
+ state.linkAlternativeBacktracks(this);
+ // No need to check if we can run the next alternative, since it is shorter -
+ // just update index.
+ sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
+ } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
+ // If we get here, there the last input checked failed.
+ // If there is insufficient input to run the current alternative, and the next alternative is longer,
+ // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
+ // we had checked.
+ notEnoughInputForPreviousAlternative.link(this);
+ add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
+ notEnoughInputForPreviousAlternative.append(jump());
+
+ // The next alternative is longer than the current one; check the difference.
+ state.linkAlternativeBacktracks(this);
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
+ } else { // CASE 3: Both alternatives are the same length.
+ ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
+
+ // If the next alterative is the same length as this one, then no need to check the input -
+ // if there was sufficent input to run the current alternative then there is sufficient
+ // input to run the next one; if not, there isn't.
+ state.linkAlternativeBacktracks(this);
+ }
+
+ state.checkedTotal -= countCheckedForCurrentAlternative;
+ countCheckedForCurrentAlternative = countToCheckForNextAlternative;
+ state.checkedTotal += countCheckedForCurrentAlternative;
+ }
+ }
+
+ // If we get here, all Alternatives failed...
+
+ state.checkedTotal -= countCheckedForCurrentAlternative;
+
+ // How much more input need there be to be able to retry from the first alternative?
+ // examples:
+ // /yarr_jit/ or /wrec|pcre/
+ // In these examples we need check for one more input before looping.
+ // /yarr_jit|pcre/
+ // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
+ // being four longer than the last alternative checked, and another +1 to effectively move
+ // the start position along by one).
+ // /yarr|rules/ or /wrec|notsomuch/
+ // In these examples, provided that there was sufficient input to have just been matching for
+ // the second alternative we can loop without checking for available input (since the second
+ // alternative is longer than the first). In the latter example we need to decrement index
+ // (by 4) so the start position is only progressed by 1 from the last iteration.
+ int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
+
+ // First, deal with the cases where there was sufficient input to try the last alternative.
+ if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
+ state.linkAlternativeBacktracks(this);
+ else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
+ state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
+ else { // no need to check the input, but we do have some bookkeeping to do first.
+ state.linkAlternativeBacktracks(this);
+
+ // Where necessary update our preserved start position.
+ if (!m_pattern.m_body->m_hasFixedSize) {
+ move(index, regT0);
+ sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
+ poke(regT0, m_pattern.m_body->m_callFrameSize);
+ }
+
+ // Update index if necessary, and loop (without checking).
+ if (incrementForNextIter)
+ add32(Imm32(incrementForNextIter), index);
+ jump().linkTo(firstAlternativeInputChecked, this);
+ }
+
+ notEnoughInputForPreviousAlternative.link(this);
+ // Update our idea of the start position, if we're tracking this.
+ if (!m_pattern.m_body->m_hasFixedSize) {
+ if (countCheckedForCurrentAlternative - 1) {
+ move(index, regT0);
+ sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
+ poke(regT0, m_pattern.m_body->m_callFrameSize);
+ } else
+ poke(index, m_pattern.m_body->m_callFrameSize);
+ }
+ // Check if there is sufficent input to run the first alternative again.
+ jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
+ // No - insufficent input to run the first alteranative, are there any other alternatives we
+ // might need to check? If so, the last check will have left the index incremented by
+ // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
+ // LESS input is available, to have the effect of just progressing the start position by 1
+ // from the last iteration. If this check passes we can just jump up to the check associated
+ // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
+ // first alternative again, and this check will fail (otherwise the check planted just above
+ // here would have passed). This is a bit sad, however it saves trying to do something more
+ // complex here in compilation, and in the common case we should end up coallescing the checks.
+ //
+ // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
+ // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
+ // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
+ // is sufficient input to run either alternative (constantly failing). If there had been only
+ // one alternative, or if the shorter alternative had come first, we would have terminated
+ // immediately. :-/
+ if (hasShorterAlternatives)
+ jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
+ // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
+ // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
+ // but since we're about to return a failure this doesn't really matter!)
+
+ unsigned frameSize = m_pattern.m_body->m_callFrameSize;
+ if (!m_pattern.m_body->m_hasFixedSize)
+ ++frameSize;
+ if (frameSize)
+ addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
+
+ move(Imm32(-1), returnRegister);
+
+ generateReturn();
+ }
+
+ void generateEnter()
+ {
+#if CPU(X86_64)
+ push(X86Registers::ebp);
+ move(stackPointerRegister, X86Registers::ebp);
+ push(X86Registers::ebx);
+#elif CPU(X86)
+ push(X86Registers::ebp);
+ move(stackPointerRegister, X86Registers::ebp);
+ // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
+ push(X86Registers::ebx);
+ push(X86Registers::edi);
+ push(X86Registers::esi);
+ // load output into edi (2 = saved ebp + return address).
+ #if COMPILER(MSVC)
+ loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
+ loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
+ loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
+ loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
+ #else
+ loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
+ #endif
+#elif CPU(ARM)
+ push(ARMRegisters::r4);
+ push(ARMRegisters::r5);
+ push(ARMRegisters::r6);
+ move(ARMRegisters::r3, output);
+#endif
+ }
+
+ void generateReturn()
+ {
+#if CPU(X86_64)
+ pop(X86Registers::ebx);
+ pop(X86Registers::ebp);
+#elif CPU(X86)
+ pop(X86Registers::esi);
+ pop(X86Registers::edi);
+ pop(X86Registers::ebx);
+ pop(X86Registers::ebp);
+#elif CPU(ARM)
+ pop(ARMRegisters::r6);
+ pop(ARMRegisters::r5);
+ pop(ARMRegisters::r4);
+#endif
+ ret();
+ }
+
+public:
+ RegexGenerator(RegexPattern& pattern)
+ : m_pattern(pattern)
+ , m_generationFailed(false)
+ {
+ }
+
+ void generate()
+ {
+ generateEnter();
+
+ // TODO: do I really want this on the stack?
+ if (!m_pattern.m_body->m_hasFixedSize)
+ push(index);
+
+ if (m_pattern.m_body->m_callFrameSize)
+ subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+
+ generateDisjunction(m_pattern.m_body);
+ }
+
+ void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
+ {
+ generate();
+
+ LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
+
+ for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
+ patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
+
+ jitObject.set(patchBuffer.finalizeCode());
+ }
+
+ bool generationFailed()
+ {
+ return m_generationFailed;
+ }
+
+private:
+ RegexPattern& m_pattern;
+ Vector<AlternativeBacktrackRecord> m_backtrackRecords;
+ bool m_generationFailed;
+};
+
+void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
+{
+ RegexPattern pattern(ignoreCase, multiline);
+
+ if ((error = compileRegex(patternString, pattern)))
+ return;
+
+ numSubpatterns = pattern.m_numSubpatterns;
+
+ RegexGenerator generator(pattern);
+ generator.compile(globalData, jitObject);
+
+ if (generator.generationFailed()) {
+ JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
+ JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
+ jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
+ }
+}
+
+}}
+
+#endif
+
+
+
+
+
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.h
new file mode 100644
index 0000000..5ead00f
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexJIT_h
+#define RegexJIT_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR_JIT)
+
+#include "MacroAssembler.h"
+#include "RegexPattern.h"
+#include <UString.h>
+
+#include <pcre.h>
+struct JSRegExp; // temporary, remove when fallback is removed.
+
+#if CPU(X86) && !COMPILER(MSVC)
+#define YARR_CALL __attribute__ ((regparm (3)))
+#else
+#define YARR_CALL
+#endif
+
+namespace JSC {
+
+class JSGlobalData;
+class ExecutablePool;
+
+namespace Yarr {
+
+class RegexCodeBlock {
+ typedef int (*RegexJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
+
+public:
+ RegexCodeBlock()
+ : m_fallback(0)
+ {
+ }
+
+ ~RegexCodeBlock()
+ {
+ if (m_fallback)
+ jsRegExpFree(m_fallback);
+ }
+
+ JSRegExp* getFallback() { return m_fallback; }
+ void setFallback(JSRegExp* fallback) { m_fallback = fallback; }
+
+ bool operator!() { return !m_ref.m_code.executableAddress(); }
+ void set(MacroAssembler::CodeRef ref) { m_ref = ref; }
+
+ int execute(const UChar* input, unsigned start, unsigned length, int* output)
+ {
+ return ((RegexJITCode)(m_ref.m_code.executableAddress()))(input, start, length, output);
+ }
+
+private:
+ MacroAssembler::CodeRef m_ref;
+ JSRegExp* m_fallback;
+};
+
+void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+
+inline int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize)
+{
+ if (JSRegExp* fallback = jitObject.getFallback())
+ return (jsRegExpExecute(fallback, input, length, start, output, outputArraySize) < 0) ? -1 : output[0];
+
+ return jitObject.execute(input, start, length, output);
+}
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexJIT_h
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h
new file mode 100644
index 0000000..64e8463
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h
@@ -0,0 +1,854 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexParser_h
+#define RegexParser_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <UString.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/unicode/Unicode.h>
+#include <limits.h>
+
+namespace JSC { namespace Yarr {
+
+enum BuiltInCharacterClassID {
+ DigitClassID,
+ SpaceClassID,
+ WordClassID,
+ NewlineClassID,
+};
+
+// The Parser class should not be used directly - only via the Yarr::parse() method.
+template<class Delegate>
+class Parser {
+private:
+ template<class FriendDelegate>
+ friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
+
+ enum ErrorCode {
+ NoError,
+ PatternTooLarge,
+ QuantifierOutOfOrder,
+ QuantifierWithoutAtom,
+ MissingParentheses,
+ ParenthesesUnmatched,
+ ParenthesesTypeInvalid,
+ CharacterClassUnmatched,
+ CharacterClassOutOfOrder,
+ EscapeUnterminated,
+ NumberOfErrorCodes
+ };
+
+ /*
+ * CharacterClassParserDelegate:
+ *
+ * The class CharacterClassParserDelegate is used in the parsing of character
+ * classes. This class handles detection of character ranges. This class
+ * implements enough of the delegate interface such that it can be passed to
+ * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
+ * to perform the parsing of escape characters in character sets.
+ */
+ class CharacterClassParserDelegate {
+ public:
+ CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
+ : m_delegate(delegate)
+ , m_err(err)
+ , m_state(empty)
+ {
+ }
+
+ /*
+ * begin():
+ *
+ * Called at beginning of construction.
+ */
+ void begin(bool invert)
+ {
+ m_delegate.atomCharacterClassBegin(invert);
+ }
+
+ /*
+ * atomPatternCharacterUnescaped():
+ *
+ * This method is called directly from parseCharacterClass(), to report a new
+ * pattern character token. This method differs from atomPatternCharacter(),
+ * which will be called from parseEscape(), since a hypen provided via this
+ * method may be indicating a character range, but a hyphen parsed by
+ * parseEscape() cannot be interpreted as doing so.
+ */
+ void atomPatternCharacterUnescaped(UChar ch)
+ {
+ switch (m_state) {
+ case empty:
+ m_character = ch;
+ m_state = cachedCharacter;
+ break;
+
+ case cachedCharacter:
+ if (ch == '-')
+ m_state = cachedCharacterHyphen;
+ else {
+ m_delegate.atomCharacterClassAtom(m_character);
+ m_character = ch;
+ }
+ break;
+
+ case cachedCharacterHyphen:
+ if (ch >= m_character)
+ m_delegate.atomCharacterClassRange(m_character, ch);
+ else
+ m_err = CharacterClassOutOfOrder;
+ m_state = empty;
+ }
+ }
+
+ /*
+ * atomPatternCharacter():
+ *
+ * Adds a pattern character, called by parseEscape(), as such will not
+ * interpret a hyphen as indicating a character range.
+ */
+ void atomPatternCharacter(UChar ch)
+ {
+ // Flush if a character is already pending to prevent the
+ // hyphen from begin interpreted as indicating a range.
+ if((ch == '-') && (m_state == cachedCharacter))
+ flush();
+
+ atomPatternCharacterUnescaped(ch);
+ }
+
+ /*
+ * atomBuiltInCharacterClass():
+ *
+ * Adds a built-in character class, called by parseEscape().
+ */
+ void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
+ {
+ flush();
+ m_delegate.atomCharacterClassBuiltIn(classID, invert);
+ }
+
+ /*
+ * end():
+ *
+ * Called at end of construction.
+ */
+ void end()
+ {
+ flush();
+ m_delegate.atomCharacterClassEnd();
+ }
+
+ // parseEscape() should never call these delegate methods when
+ // invoked with inCharacterClass set.
+ void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
+ void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
+
+ private:
+ void flush()
+ {
+ if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
+ m_delegate.atomCharacterClassAtom(m_character);
+ if (m_state == cachedCharacterHyphen)
+ m_delegate.atomCharacterClassAtom('-');
+ m_state = empty;
+ }
+
+ Delegate& m_delegate;
+ ErrorCode& m_err;
+ enum CharacterClassConstructionState {
+ empty,
+ cachedCharacter,
+ cachedCharacterHyphen,
+ } m_state;
+ UChar m_character;
+ };
+
+ Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
+ : m_delegate(delegate)
+ , m_backReferenceLimit(backReferenceLimit)
+ , m_err(NoError)
+ , m_data(pattern.data())
+ , m_size(pattern.size())
+ , m_index(0)
+ , m_parenthesesNestingDepth(0)
+ {
+ }
+
+ /*
+ * parseEscape():
+ *
+ * Helper for parseTokens() AND parseCharacterClass().
+ * Unlike the other parser methods, this function does not report tokens
+ * directly to the member delegate (m_delegate), instead tokens are
+ * emitted to the delegate provided as an argument. In the case of atom
+ * escapes, parseTokens() will call parseEscape() passing m_delegate as
+ * an argument, and as such the escape will be reported to the delegate.
+ *
+ * However this method may also be used by parseCharacterClass(), in which
+ * case a CharacterClassParserDelegate will be passed as the delegate that
+ * tokens should be added to. A boolean flag is also provided to indicate
+ * whether that an escape in a CharacterClass is being parsed (some parsing
+ * rules change in this context).
+ *
+ * The boolean value returned by this method indicates whether the token
+ * parsed was an atom (outside of a characted class \b and \B will be
+ * interpreted as assertions).
+ */
+ template<bool inCharacterClass, class EscapeDelegate>
+ bool parseEscape(EscapeDelegate& delegate)
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '\\');
+ consume();
+
+ if (atEndOfPattern()) {
+ m_err = EscapeUnterminated;
+ return false;
+ }
+
+ switch (peek()) {
+ // Assertions
+ case 'b':
+ consume();
+ if (inCharacterClass)
+ delegate.atomPatternCharacter('\b');
+ else {
+ delegate.assertionWordBoundary(false);
+ return false;
+ }
+ break;
+ case 'B':
+ consume();
+ if (inCharacterClass)
+ delegate.atomPatternCharacter('B');
+ else {
+ delegate.assertionWordBoundary(true);
+ return false;
+ }
+ break;
+
+ // CharacterClassEscape
+ case 'd':
+ consume();
+ delegate.atomBuiltInCharacterClass(DigitClassID, false);
+ break;
+ case 's':
+ consume();
+ delegate.atomBuiltInCharacterClass(SpaceClassID, false);
+ break;
+ case 'w':
+ consume();
+ delegate.atomBuiltInCharacterClass(WordClassID, false);
+ break;
+ case 'D':
+ consume();
+ delegate.atomBuiltInCharacterClass(DigitClassID, true);
+ break;
+ case 'S':
+ consume();
+ delegate.atomBuiltInCharacterClass(SpaceClassID, true);
+ break;
+ case 'W':
+ consume();
+ delegate.atomBuiltInCharacterClass(WordClassID, true);
+ break;
+
+ // DecimalEscape
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
+ // First, try to parse this as backreference.
+ if (!inCharacterClass) {
+ ParseState state = saveState();
+
+ unsigned backReference = consumeNumber();
+ if (backReference <= m_backReferenceLimit) {
+ delegate.atomBackReference(backReference);
+ break;
+ }
+
+ restoreState(state);
+ }
+
+ // Not a backreference, and not octal.
+ if (peek() >= '8') {
+ delegate.atomPatternCharacter('\\');
+ break;
+ }
+
+ // Fall-through to handle this as an octal escape.
+ }
+
+ // Octal escape
+ case '0':
+ delegate.atomPatternCharacter(consumeOctal());
+ break;
+
+ // ControlEscape
+ case 'f':
+ consume();
+ delegate.atomPatternCharacter('\f');
+ break;
+ case 'n':
+ consume();
+ delegate.atomPatternCharacter('\n');
+ break;
+ case 'r':
+ consume();
+ delegate.atomPatternCharacter('\r');
+ break;
+ case 't':
+ consume();
+ delegate.atomPatternCharacter('\t');
+ break;
+ case 'v':
+ consume();
+ delegate.atomPatternCharacter('\v');
+ break;
+
+ // ControlLetter
+ case 'c': {
+ ParseState state = saveState();
+ consume();
+ if (!atEndOfPattern()) {
+ int control = consume();
+
+ // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
+ if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
+ delegate.atomPatternCharacter(control & 0x1f);
+ break;
+ }
+ }
+ restoreState(state);
+ delegate.atomPatternCharacter('\\');
+ break;
+ }
+
+ // HexEscape
+ case 'x': {
+ consume();
+ int x = tryConsumeHex(2);
+ if (x == -1)
+ delegate.atomPatternCharacter('x');
+ else
+ delegate.atomPatternCharacter(x);
+ break;
+ }
+
+ // UnicodeEscape
+ case 'u': {
+ consume();
+ int u = tryConsumeHex(4);
+ if (u == -1)
+ delegate.atomPatternCharacter('u');
+ else
+ delegate.atomPatternCharacter(u);
+ break;
+ }
+
+ // IdentityEscape
+ default:
+ delegate.atomPatternCharacter(consume());
+ }
+
+ return true;
+ }
+
+ /*
+ * parseAtomEscape(), parseCharacterClassEscape():
+ *
+ * These methods alias to parseEscape().
+ */
+ bool parseAtomEscape()
+ {
+ return parseEscape<false>(m_delegate);
+ }
+ void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
+ {
+ parseEscape<true>(delegate);
+ }
+
+ /*
+ * parseCharacterClass():
+ *
+ * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
+ * to an instance of CharacterClassParserDelegate, to describe the character class to the
+ * delegate.
+ */
+ void parseCharacterClass()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '[');
+ consume();
+
+ CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
+
+ characterClassConstructor.begin(tryConsume('^'));
+
+ while (!atEndOfPattern()) {
+ switch (peek()) {
+ case ']':
+ consume();
+ characterClassConstructor.end();
+ return;
+
+ case '\\':
+ parseCharacterClassEscape(characterClassConstructor);
+ break;
+
+ default:
+ characterClassConstructor.atomPatternCharacterUnescaped(consume());
+ }
+
+ if (m_err)
+ return;
+ }
+
+ m_err = CharacterClassUnmatched;
+ }
+
+ /*
+ * parseParenthesesBegin():
+ *
+ * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
+ */
+ void parseParenthesesBegin()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '(');
+ consume();
+
+ if (tryConsume('?')) {
+ if (atEndOfPattern()) {
+ m_err = ParenthesesTypeInvalid;
+ return;
+ }
+
+ switch (consume()) {
+ case ':':
+ m_delegate.atomParenthesesSubpatternBegin(false);
+ break;
+
+ case '=':
+ m_delegate.atomParentheticalAssertionBegin();
+ break;
+
+ case '!':
+ m_delegate.atomParentheticalAssertionBegin(true);
+ break;
+
+ default:
+ m_err = ParenthesesTypeInvalid;
+ }
+ } else
+ m_delegate.atomParenthesesSubpatternBegin();
+
+ ++m_parenthesesNestingDepth;
+ }
+
+ /*
+ * parseParenthesesEnd():
+ *
+ * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
+ */
+ void parseParenthesesEnd()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == ')');
+ consume();
+
+ if (m_parenthesesNestingDepth > 0)
+ m_delegate.atomParenthesesEnd();
+ else
+ m_err = ParenthesesUnmatched;
+
+ --m_parenthesesNestingDepth;
+ }
+
+ /*
+ * parseQuantifier():
+ *
+ * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
+ */
+ void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
+ {
+ ASSERT(!m_err);
+ ASSERT(min <= max);
+
+ if (lastTokenWasAnAtom)
+ m_delegate.quantifyAtom(min, max, !tryConsume('?'));
+ else
+ m_err = QuantifierWithoutAtom;
+ }
+
+ /*
+ * parseTokens():
+ *
+ * This method loops over the input pattern reporting tokens to the delegate.
+ * The method returns when a parse error is detected, or the end of the pattern
+ * is reached. One piece of state is tracked around the loop, which is whether
+ * the last token passed to the delegate was an atom (this is necessary to detect
+ * a parse error when a quantifier provided without an atom to quantify).
+ */
+ void parseTokens()
+ {
+ bool lastTokenWasAnAtom = false;
+
+ while (!atEndOfPattern()) {
+ switch (peek()) {
+ case '|':
+ consume();
+ m_delegate.disjunction();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '(':
+ parseParenthesesBegin();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case ')':
+ parseParenthesesEnd();
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '^':
+ consume();
+ m_delegate.assertionBOL();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '$':
+ consume();
+ m_delegate.assertionEOL();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '.':
+ consume();
+ m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '[':
+ parseCharacterClass();
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '\\':
+ lastTokenWasAnAtom = parseAtomEscape();
+ break;
+
+ case '*':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '+':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '?':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 0, 1);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '{': {
+ ParseState state = saveState();
+
+ consume();
+ if (peekIsDigit()) {
+ unsigned min = consumeNumber();
+ unsigned max = min;
+
+ if (tryConsume(','))
+ max = peekIsDigit() ? consumeNumber() : UINT_MAX;
+
+ if (tryConsume('}')) {
+ if (min <= max)
+ parseQuantifier(lastTokenWasAnAtom, min, max);
+ else
+ m_err = QuantifierOutOfOrder;
+ lastTokenWasAnAtom = false;
+ break;
+ }
+ }
+
+ restoreState(state);
+ } // if we did not find a complete quantifer, fall through to the default case.
+
+ default:
+ m_delegate.atomPatternCharacter(consume());
+ lastTokenWasAnAtom = true;
+ }
+
+ if (m_err)
+ return;
+ }
+
+ if (m_parenthesesNestingDepth > 0)
+ m_err = MissingParentheses;
+ }
+
+ /*
+ * parse():
+ *
+ * This method calls regexBegin(), calls parseTokens() to parse over the input
+ * patterns, calls regexEnd() or regexError() as appropriate, and converts any
+ * error code to a const char* for a result.
+ */
+ const char* parse()
+ {
+ m_delegate.regexBegin();
+
+ if (m_size > MAX_PATTERN_SIZE)
+ m_err = PatternTooLarge;
+ else
+ parseTokens();
+ ASSERT(atEndOfPattern() || m_err);
+
+ if (m_err)
+ m_delegate.regexError();
+ else
+ m_delegate.regexEnd();
+
+ // The order of this array must match the ErrorCode enum.
+ static const char* errorMessages[NumberOfErrorCodes] = {
+ 0, // NoError
+ "regular expression too large",
+ "numbers out of order in {} quantifier",
+ "nothing to repeat",
+ "missing )",
+ "unmatched parentheses",
+ "unrecognized character after (?",
+ "missing terminating ] for character class",
+ "range out of order in character class",
+ "\\ at end of pattern"
+ };
+
+ return errorMessages[m_err];
+ }
+
+
+ // Misc helper functions:
+
+ typedef unsigned ParseState;
+
+ ParseState saveState()
+ {
+ return m_index;
+ }
+
+ void restoreState(ParseState state)
+ {
+ m_index = state;
+ }
+
+ bool atEndOfPattern()
+ {
+ ASSERT(m_index <= m_size);
+ return m_index == m_size;
+ }
+
+ int peek()
+ {
+ ASSERT(m_index < m_size);
+ return m_data[m_index];
+ }
+
+ bool peekIsDigit()
+ {
+ return !atEndOfPattern() && WTF::isASCIIDigit(peek());
+ }
+
+ unsigned peekDigit()
+ {
+ ASSERT(peekIsDigit());
+ return peek() - '0';
+ }
+
+ int consume()
+ {
+ ASSERT(m_index < m_size);
+ return m_data[m_index++];
+ }
+
+ unsigned consumeDigit()
+ {
+ ASSERT(peekIsDigit());
+ return consume() - '0';
+ }
+
+ unsigned consumeNumber()
+ {
+ unsigned n = consumeDigit();
+ // check for overflow.
+ for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
+ n = newValue;
+ consume();
+ }
+ return n;
+ }
+
+ unsigned consumeOctal()
+ {
+ ASSERT(WTF::isASCIIOctalDigit(peek()));
+
+ unsigned n = consumeDigit();
+ while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
+ n = n * 8 + consumeDigit();
+ return n;
+ }
+
+ bool tryConsume(UChar ch)
+ {
+ if (atEndOfPattern() || (m_data[m_index] != ch))
+ return false;
+ ++m_index;
+ return true;
+ }
+
+ int tryConsumeHex(int count)
+ {
+ ParseState state = saveState();
+
+ int n = 0;
+ while (count--) {
+ if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
+ restoreState(state);
+ return -1;
+ }
+ n = (n << 4) | WTF::toASCIIHexValue(consume());
+ }
+ return n;
+ }
+
+ Delegate& m_delegate;
+ unsigned m_backReferenceLimit;
+ ErrorCode m_err;
+ const UChar* m_data;
+ unsigned m_size;
+ unsigned m_index;
+ unsigned m_parenthesesNestingDepth;
+
+ // Derived by empirical testing of compile time in PCRE and WREC.
+ static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
+};
+
+/*
+ * Yarr::parse():
+ *
+ * The parse method is passed a pattern to be parsed and a delegate upon which
+ * callbacks will be made to record the parsed tokens forming the regex.
+ * Yarr::parse() returns null on success, or a const C string providing an error
+ * message where a parse error occurs.
+ *
+ * The Delegate must implement the following interface:
+ *
+ * void assertionBOL();
+ * void assertionEOL();
+ * void assertionWordBoundary(bool invert);
+ *
+ * void atomPatternCharacter(UChar ch);
+ * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
+ * void atomCharacterClassBegin(bool invert)
+ * void atomCharacterClassAtom(UChar ch)
+ * void atomCharacterClassRange(UChar begin, UChar end)
+ * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
+ * void atomCharacterClassEnd()
+ * void atomParenthesesSubpatternBegin(bool capture = true);
+ * void atomParentheticalAssertionBegin(bool invert = false);
+ * void atomParenthesesEnd();
+ * void atomBackReference(unsigned subpatternId);
+ *
+ * void quantifyAtom(unsigned min, unsigned max, bool greedy);
+ *
+ * void disjunction();
+ *
+ * void regexBegin();
+ * void regexEnd();
+ * void regexError();
+ *
+ * Before any call recording tokens are made, regexBegin() will be called on the
+ * delegate once. Once parsing is complete either regexEnd() or regexError() will
+ * be called, as appropriate.
+ *
+ * The regular expression is described by a sequence of assertion*() and atom*()
+ * callbacks to the delegate, describing the terms in the regular expression.
+ * Following an atom a quantifyAtom() call may occur to indicate that the previous
+ * atom should be quantified. In the case of atoms described across multiple
+ * calls (parentheses and character classes) the call to quantifyAtom() will come
+ * after the call to the atom*End() method, never after atom*Begin().
+ *
+ * Character classes may either be described by a single call to
+ * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
+ * In the latter case, ...Begin() will be called, followed by a sequence of
+ * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
+ *
+ * Sequences of atoms and assertions are broken into alternatives via calls to
+ * disjunction(). Assertions, atoms, and disjunctions emitted between calls to
+ * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
+ * atomParenthesesBegin() is passed a subpatternId. In the case of a regular
+ * capturing subpattern, this will be the subpatternId associated with these
+ * parentheses, and will also by definition be the lowest subpatternId of these
+ * parentheses and of any nested paretheses. The atomParenthesesEnd() method
+ * is passed the subpatternId of the last capturing subexpression nested within
+ * these paretheses. In the case of a capturing subpattern with no nested
+ * capturing subpatterns, the same subpatternId will be passed to the begin and
+ * end functions. In the case of non-capturing subpatterns the subpatternId
+ * passed to the begin method is also the first possible subpatternId that might
+ * be nested within these paretheses. If a set of non-capturing parentheses does
+ * not contain any capturing subpatterns, then the subpatternId passed to begin
+ * will be greater than the subpatternId passed to end.
+ */
+
+template<class Delegate>
+const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
+{
+ return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
+}
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexParser_h
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexPattern.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexPattern.h
new file mode 100644
index 0000000..dd7512d
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexPattern.h
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexPattern_h
+#define RegexPattern_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+
+namespace JSC { namespace Yarr {
+
+#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
+#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
+#define RegexStackSpaceForBackTrackInfoBackReference 2
+#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
+#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
+#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
+#define RegexStackSpaceForBackTrackInfoParentheses 4
+
+struct PatternDisjunction;
+
+struct CharacterRange {
+ UChar begin;
+ UChar end;
+
+ CharacterRange(UChar begin, UChar end)
+ : begin(begin)
+ , end(end)
+ {
+ }
+};
+
+struct CharacterClass : FastAllocBase {
+ Vector<UChar> m_matches;
+ Vector<CharacterRange> m_ranges;
+ Vector<UChar> m_matchesUnicode;
+ Vector<CharacterRange> m_rangesUnicode;
+};
+
+enum QuantifierType {
+ QuantifierFixedCount,
+ QuantifierGreedy,
+ QuantifierNonGreedy,
+};
+
+struct PatternTerm {
+ enum Type {
+ TypeAssertionBOL,
+ TypeAssertionEOL,
+ TypeAssertionWordBoundary,
+ TypePatternCharacter,
+ TypeCharacterClass,
+ TypeBackReference,
+ TypeForwardReference,
+ TypeParenthesesSubpattern,
+ TypeParentheticalAssertion,
+ } type;
+ bool invertOrCapture;
+ union {
+ UChar patternCharacter;
+ CharacterClass* characterClass;
+ unsigned subpatternId;
+ struct {
+ PatternDisjunction* disjunction;
+ unsigned subpatternId;
+ unsigned lastSubpatternId;
+ bool isCopy;
+ } parentheses;
+ };
+ QuantifierType quantityType;
+ unsigned quantityCount;
+ int inputPosition;
+ unsigned frameLocation;
+
+ PatternTerm(UChar ch)
+ : type(PatternTerm::TypePatternCharacter)
+ {
+ patternCharacter = ch;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(CharacterClass* charClass, bool invert)
+ : type(PatternTerm::TypeCharacterClass)
+ , invertOrCapture(invert)
+ {
+ characterClass = charClass;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture)
+ : type(type)
+ , invertOrCapture(invertOrCapture)
+ {
+ parentheses.disjunction = disjunction;
+ parentheses.subpatternId = subpatternId;
+ parentheses.isCopy = false;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(Type type, bool invert = false)
+ : type(type)
+ , invertOrCapture(invert)
+ {
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(unsigned spatternId)
+ : type(TypeBackReference)
+ , invertOrCapture(false)
+ {
+ subpatternId = spatternId;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ static PatternTerm ForwardReference()
+ {
+ return PatternTerm(TypeForwardReference);
+ }
+
+ static PatternTerm BOL()
+ {
+ return PatternTerm(TypeAssertionBOL);
+ }
+
+ static PatternTerm EOL()
+ {
+ return PatternTerm(TypeAssertionEOL);
+ }
+
+ static PatternTerm WordBoundary(bool invert)
+ {
+ return PatternTerm(TypeAssertionWordBoundary, invert);
+ }
+
+ bool invert()
+ {
+ return invertOrCapture;
+ }
+
+ bool capture()
+ {
+ return invertOrCapture;
+ }
+
+ void quantify(unsigned count, QuantifierType type)
+ {
+ quantityCount = count;
+ quantityType = type;
+ }
+};
+
+struct PatternAlternative : FastAllocBase {
+ PatternAlternative(PatternDisjunction* disjunction)
+ : m_parent(disjunction)
+ {
+ }
+
+ PatternTerm& lastTerm()
+ {
+ ASSERT(m_terms.size());
+ return m_terms[m_terms.size() - 1];
+ }
+
+ void removeLastTerm()
+ {
+ ASSERT(m_terms.size());
+ m_terms.shrink(m_terms.size() - 1);
+ }
+
+ Vector<PatternTerm> m_terms;
+ PatternDisjunction* m_parent;
+ unsigned m_minimumSize;
+ bool m_hasFixedSize;
+};
+
+struct PatternDisjunction : FastAllocBase {
+ PatternDisjunction(PatternAlternative* parent = 0)
+ : m_parent(parent)
+ {
+ }
+
+ ~PatternDisjunction()
+ {
+ deleteAllValues(m_alternatives);
+ }
+
+ PatternAlternative* addNewAlternative()
+ {
+ PatternAlternative* alternative = new PatternAlternative(this);
+ m_alternatives.append(alternative);
+ return alternative;
+ }
+
+ Vector<PatternAlternative*> m_alternatives;
+ PatternAlternative* m_parent;
+ unsigned m_minimumSize;
+ unsigned m_callFrameSize;
+ bool m_hasFixedSize;
+};
+
+// You probably don't want to be calling these functions directly
+// (please to be calling newlineCharacterClass() et al on your
+// friendly neighborhood RegexPattern instance to get nicely
+// cached copies).
+CharacterClass* newlineCreate();
+CharacterClass* digitsCreate();
+CharacterClass* spacesCreate();
+CharacterClass* wordcharCreate();
+CharacterClass* nondigitsCreate();
+CharacterClass* nonspacesCreate();
+CharacterClass* nonwordcharCreate();
+
+struct RegexPattern {
+ RegexPattern(bool ignoreCase, bool multiline)
+ : m_ignoreCase(ignoreCase)
+ , m_multiline(multiline)
+ , m_numSubpatterns(0)
+ , m_maxBackReference(0)
+ , newlineCached(0)
+ , digitsCached(0)
+ , spacesCached(0)
+ , wordcharCached(0)
+ , nondigitsCached(0)
+ , nonspacesCached(0)
+ , nonwordcharCached(0)
+ {
+ }
+
+ ~RegexPattern()
+ {
+ deleteAllValues(m_disjunctions);
+ deleteAllValues(m_userCharacterClasses);
+ }
+
+ void reset()
+ {
+ m_numSubpatterns = 0;
+ m_maxBackReference = 0;
+
+ newlineCached = 0;
+ digitsCached = 0;
+ spacesCached = 0;
+ wordcharCached = 0;
+ nondigitsCached = 0;
+ nonspacesCached = 0;
+ nonwordcharCached = 0;
+
+ deleteAllValues(m_disjunctions);
+ m_disjunctions.clear();
+ deleteAllValues(m_userCharacterClasses);
+ m_userCharacterClasses.clear();
+ }
+
+ bool containsIllegalBackReference()
+ {
+ return m_maxBackReference > m_numSubpatterns;
+ }
+
+ CharacterClass* newlineCharacterClass()
+ {
+ if (!newlineCached)
+ m_userCharacterClasses.append(newlineCached = newlineCreate());
+ return newlineCached;
+ }
+ CharacterClass* digitsCharacterClass()
+ {
+ if (!digitsCached)
+ m_userCharacterClasses.append(digitsCached = digitsCreate());
+ return digitsCached;
+ }
+ CharacterClass* spacesCharacterClass()
+ {
+ if (!spacesCached)
+ m_userCharacterClasses.append(spacesCached = spacesCreate());
+ return spacesCached;
+ }
+ CharacterClass* wordcharCharacterClass()
+ {
+ if (!wordcharCached)
+ m_userCharacterClasses.append(wordcharCached = wordcharCreate());
+ return wordcharCached;
+ }
+ CharacterClass* nondigitsCharacterClass()
+ {
+ if (!nondigitsCached)
+ m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
+ return nondigitsCached;
+ }
+ CharacterClass* nonspacesCharacterClass()
+ {
+ if (!nonspacesCached)
+ m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
+ return nonspacesCached;
+ }
+ CharacterClass* nonwordcharCharacterClass()
+ {
+ if (!nonwordcharCached)
+ m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
+ return nonwordcharCached;
+ }
+
+ bool m_ignoreCase;
+ bool m_multiline;
+ unsigned m_numSubpatterns;
+ unsigned m_maxBackReference;
+ PatternDisjunction* m_body;
+ Vector<PatternDisjunction*, 4> m_disjunctions;
+ Vector<CharacterClass*> m_userCharacterClasses;
+
+private:
+ CharacterClass* newlineCached;
+ CharacterClass* digitsCached;
+ CharacterClass* spacesCached;
+ CharacterClass* wordcharCached;
+ CharacterClass* nondigitsCached;
+ CharacterClass* nonspacesCached;
+ CharacterClass* nonwordcharCached;
+};
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexPattern_h