diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/yarr/YarrInterpreter.cpp | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/JavaScriptCore/yarr/YarrInterpreter.cpp')
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrInterpreter.cpp | 411 |
1 files changed, 271 insertions, 140 deletions
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp index 8645b5f20..9cf14e702 100644 --- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2013, 2016 Apple Inc. All rights reserved. * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without @@ -27,8 +27,9 @@ #include "config.h" #include "YarrInterpreter.h" +#include "SuperSampler.h" #include "Yarr.h" -#include "YarrCanonicalizeUCS2.h" +#include "YarrCanonicalize.h" #include <wtf/BumpPointerAllocator.h> #include <wtf/DataLog.h> #include <wtf/text/CString.h> @@ -44,9 +45,11 @@ public: struct ParenthesesDisjunctionContext; struct BackTrackInfoPatternCharacter { + uintptr_t begin; // Only needed for unicode patterns uintptr_t matchAmount; }; struct BackTrackInfoCharacterClass { + uintptr_t begin; // Only needed for unicode patterns uintptr_t matchAmount; }; struct BackTrackInfoBackReference { @@ -154,7 +157,7 @@ public: ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) { - size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); + size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t); allocatorPool = allocatorPool->ensureCapacity(size); RELEASE_ASSERT(allocatorPool); return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); @@ -167,10 +170,11 @@ public: class InputStream { public: - InputStream(const CharType* input, unsigned start, unsigned length) + InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs) : input(input) , pos(start) , length(length) + , decodeSurrogatePairs(decodeSurrogatePairs) { } @@ -204,13 +208,40 @@ public: RELEASE_ASSERT(pos >= negativePositionOffest); unsigned p = pos - negativePositionOffest; ASSERT(p < length); - return input[p]; + int result = input[p]; + if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) { + if (atEnd()) + return -1; + + result = U16_GET_SUPPLEMENTARY(result, input[p + 1]); + next(); + } + return result; + } + + int readSurrogatePairChecked(unsigned negativePositionOffset) + { + RELEASE_ASSERT(pos >= negativePositionOffset); + unsigned p = pos - negativePositionOffset; + ASSERT(p < length); + if (p + 1 >= length) + return -1; + + int first = input[p]; + int second = input[p + 1]; + if (U16_IS_LEAD(first) && U16_IS_TRAIL(second)) + return U16_GET_SUPPLEMENTARY(first, second); + + return -1; } int reread(unsigned from) { ASSERT(from < length); - return input[from]; + int result = input[from]; + if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1])) + result = U16_GET_SUPPLEMENTARY(result, input[from + 1]); + return result; } int prev() @@ -261,9 +292,9 @@ public: pos -= count; } - bool atStart(unsigned negativePositionOffest) + bool atStart(unsigned negativePositionOffset) { - return pos == negativePositionOffest; + return pos == negativePositionOffset; } bool atEnd(unsigned negativePositionOffest) @@ -281,11 +312,12 @@ public: const CharType* input; unsigned pos; unsigned length; + bool decodeSurrogatePairs; }; bool testCharacterClass(CharacterClass* characterClass, int ch) { - if (ch & 0xFF80) { + if (!isASCII(ch)) { for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) if (ch == characterClass->m_matchesUnicode[i]) return true; @@ -309,6 +341,11 @@ public: return testChar == input.readChecked(negativeInputOffset); } + bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset) + { + return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset); + } + bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset) { int ch = input.readChecked(negativeInputOffset); @@ -328,32 +365,31 @@ public: if (!input.checkInput(matchSize)) return false; - if (pattern->m_ignoreCase) { - for (unsigned i = 0; i < matchSize; ++i) { - int oldCh = input.reread(matchBegin + i); - int ch = input.readChecked(negativeInputOffset + matchSize - i); - - if (oldCh == ch) - continue; - - // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that - // unicode values are never allowed to match against ascii ones. - if (isASCII(oldCh) || isASCII(ch)) { + for (unsigned i = 0; i < matchSize; ++i) { + int oldCh = input.reread(matchBegin + i); + int ch; + if (!U_IS_BMP(oldCh)) { + ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i); + ++i; + } else + ch = input.readChecked(negativeInputOffset + matchSize - i); + + if (oldCh == ch) + continue; + + if (pattern->ignoreCase()) { + // See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode + // patterns, Unicode values are never allowed to match against ASCII ones. + // For Unicode, we need to check all canonical equivalents of a character. + if (!unicode && (isASCII(oldCh) || isASCII(ch))) { if (toASCIIUpper(oldCh) == toASCIIUpper(ch)) continue; - } else if (areCanonicallyEquivalent(oldCh, ch)) + } else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2)) continue; - - input.uncheckInput(matchSize); - return false; - } - } else { - for (unsigned i = 0; i < matchSize; ++i) { - if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) { - input.uncheckInput(matchSize); - return false; - } } + + input.uncheckInput(matchSize); + return false; } return true; @@ -361,15 +397,15 @@ public: bool matchAssertionBOL(ByteTerm& term) { - return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); + return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); } bool matchAssertionEOL(ByteTerm& term) { if (term.inputPosition) - return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); + return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); - return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read())); + return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read())); } bool matchAssertionWordBoundary(ByteTerm& term) @@ -396,18 +432,18 @@ public: case QuantifierGreedy: if (backTrack->matchAmount) { --backTrack->matchAmount; - input.uncheckInput(1); + input.uncheckInput(U16_LENGTH(term.atom.patternCharacter)); return true; } break; case QuantifierNonGreedy: - if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) { ++backTrack->matchAmount; if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1)) return true; } - input.uncheckInput(backTrack->matchAmount); + input.setPos(backTrack->begin); break; } @@ -431,7 +467,7 @@ public: break; case QuantifierNonGreedy: - if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) { ++backTrack->matchAmount; if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1)) return true; @@ -446,11 +482,24 @@ public: bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeCharacterClass); - BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); + BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation); switch (term.atom.quantityType) { case QuantifierFixedCount: { - for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { + if (unicode) { + backTrack->begin = input.getPos(); + unsigned matchAmount = 0; + for (matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) { + if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) { + input.setPos(backTrack->begin); + return false; + } + } + + return true; + } + + for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) { if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) return false; } @@ -458,13 +507,16 @@ public: } case QuantifierGreedy: { + unsigned position = input.getPos(); + backTrack->begin = position; unsigned matchAmount = 0; - while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) { if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { - input.uncheckInput(1); + input.setPos(position); break; } ++matchAmount; + position = input.getPos(); } backTrack->matchAmount = matchAmount; @@ -472,6 +524,7 @@ public: } case QuantifierNonGreedy: + backTrack->begin = input.getPos(); backTrack->matchAmount = 0; return true; } @@ -483,14 +536,28 @@ public: bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeCharacterClass); - BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); + BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation); switch (term.atom.quantityType) { case QuantifierFixedCount: + if (unicode) + input.setPos(backTrack->begin); break; case QuantifierGreedy: if (backTrack->matchAmount) { + if (unicode) { + // Rematch one less match + input.setPos(backTrack->begin); + --backTrack->matchAmount; + for (unsigned matchAmount = 0; (matchAmount < backTrack->matchAmount) && input.checkInput(1); ++matchAmount) { + if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { + input.uncheckInput(1); + break; + } + } + return true; + } --backTrack->matchAmount; input.uncheckInput(1); return true; @@ -498,12 +565,12 @@ public: break; case QuantifierNonGreedy: - if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) { ++backTrack->matchAmount; if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) return true; } - input.uncheckInput(backTrack->matchAmount); + input.setPos(backTrack->begin); break; } @@ -535,7 +602,7 @@ public: switch (term.atom.quantityType) { case QuantifierFixedCount: { backTrack->begin = input.getPos(); - for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { + for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) { if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { input.setPos(backTrack->begin); return false; @@ -546,7 +613,7 @@ public: case QuantifierGreedy: { unsigned matchAmount = 0; - while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) + while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) ++matchAmount; backTrack->matchAmount = matchAmount; return true; @@ -580,7 +647,7 @@ public: switch (term.atom.quantityType) { case QuantifierFixedCount: - // for quantityCount == 1, could rewind. + // for quantityMaxCount == 1, could rewind. input.setPos(backTrack->begin); break; @@ -593,7 +660,7 @@ public: break; case QuantifierNonGreedy: - if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { + if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { ++backTrack->matchAmount; return true; } @@ -608,8 +675,8 @@ public: { if (term.capture()) { unsigned subpatternId = term.atom.subpatternId; - output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; - output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; + output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition; + output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition; } } void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) @@ -641,7 +708,7 @@ public: bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); @@ -671,11 +738,11 @@ public: bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); if (term.capture()) { unsigned subpatternId = term.atom.subpatternId; - output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; + output[(subpatternId << 1) + 1] = input.getPos() - term.inputPosition; } if (term.atom.quantityType == QuantifierFixedCount) @@ -688,7 +755,7 @@ public: bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); @@ -718,7 +785,7 @@ public: bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); @@ -739,7 +806,7 @@ public: ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin); ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition); unsigned subpatternId = term.atom.subpatternId; - output[subpatternId << 1] = input.getPos() + term.inputPosition; + output[subpatternId << 1] = input.getPos() - term.inputPosition; } context->term -= term.atom.parenthesesWidth; return true; @@ -756,7 +823,7 @@ public: { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); ASSERT(term.atom.quantityType == QuantifierGreedy); - ASSERT(term.atom.quantityCount == quantifyInfinite); + ASSERT(term.atom.quantityMaxCount == quantifyInfinite); ASSERT(!term.capture()); BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); @@ -773,7 +840,7 @@ public: if (backTrack->begin == input.getPos()) return false; - // Successful match! Okay, what's next? - loop around and try to match moar! + // Successful match! Okay, what's next? - loop around and try to match more! context->term -= (term.atom.parenthesesWidth + 1); return true; } @@ -782,7 +849,7 @@ public: { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); ASSERT(term.atom.quantityType == QuantifierGreedy); - ASSERT(term.atom.quantityCount == quantifyInfinite); + ASSERT(term.atom.quantityMaxCount == quantifyInfinite); ASSERT(!term.capture()); // If we backtrack to this point, we have failed to match this iteration of the parens. @@ -802,7 +869,7 @@ public: bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); @@ -813,7 +880,7 @@ public: bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); @@ -831,7 +898,7 @@ public: bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); // We've failed to match parens; if they are inverted, this is win! if (term.invert()) { @@ -845,7 +912,7 @@ public: bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); - ASSERT(term.atom.quantityCount == 1); + ASSERT(term.atom.quantityMaxCount == 1); BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); @@ -865,36 +932,45 @@ public: backTrack->matchAmount = 0; backTrack->lastContext = 0; - switch (term.atom.quantityType) { - case QuantifierFixedCount: { + ASSERT(term.atom.quantityType != QuantifierFixedCount || term.atom.quantityMinCount == term.atom.quantityMaxCount); + + unsigned minimumMatchCount = term.atom.quantityMinCount; + JSRegExpResult fixedMatchResult; + + // Handle fixed matches and the minimum part of a variable length match. + if (minimumMatchCount) { // While we haven't yet reached our fixed limit, - while (backTrack->matchAmount < term.atom.quantityCount) { + while (backTrack->matchAmount < minimumMatchCount) { // Try to do a match, and it it succeeds, add it to the list. ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); - JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); - if (result == JSRegExpMatch) + fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (fixedMatchResult == JSRegExpMatch) appendParenthesesDisjunctionContext(backTrack, context); else { // The match failed; try to find an alternate point to carry on from. resetMatches(term, context); freeParenthesesDisjunctionContext(context); - - if (result != JSRegExpNoMatch) - return result; + + if (fixedMatchResult != JSRegExpNoMatch) + return fixedMatchResult; JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); if (backtrackResult != JSRegExpMatch) return backtrackResult; } } - ASSERT(backTrack->matchAmount == term.atom.quantityCount); ParenthesesDisjunctionContext* context = backTrack->lastContext; recordParenthesesMatch(term, context); + } + + switch (term.atom.quantityType) { + case QuantifierFixedCount: { + ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount); return JSRegExpMatch; } case QuantifierGreedy: { - while (backTrack->matchAmount < term.atom.quantityCount) { + while (backTrack->matchAmount < term.atom.quantityMaxCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); if (result == JSRegExpMatch) @@ -944,7 +1020,7 @@ public: switch (term.atom.quantityType) { case QuantifierFixedCount: { - ASSERT(backTrack->matchAmount == term.atom.quantityCount); + ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount); ParenthesesDisjunctionContext* context = 0; JSRegExpResult result = parenthesesDoBacktrack(term, backTrack); @@ -953,7 +1029,7 @@ public: return result; // While we haven't yet reached our fixed limit, - while (backTrack->matchAmount < term.atom.quantityCount) { + while (backTrack->matchAmount < term.atom.quantityMaxCount) { // Try to do a match, and it it succeeds, add it to the list. context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); @@ -973,7 +1049,7 @@ public: } } - ASSERT(backTrack->matchAmount == term.atom.quantityCount); + ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount); context = backTrack->lastContext; recordParenthesesMatch(term, context); return JSRegExpMatch; @@ -986,7 +1062,7 @@ public: ParenthesesDisjunctionContext* context = backTrack->lastContext; JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); if (result == JSRegExpMatch) { - while (backTrack->matchAmount < term.atom.quantityCount) { + while (backTrack->matchAmount < term.atom.quantityMaxCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); if (parenthesesResult == JSRegExpMatch) @@ -1019,7 +1095,7 @@ public: case QuantifierNonGreedy: { // If we've not reached the limit, try to add one more match. - if (backTrack->matchAmount < term.atom.quantityCount) { + if (backTrack->matchAmount < term.atom.quantityMaxCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); if (result == JSRegExpMatch) { @@ -1089,7 +1165,7 @@ public: if (((matchBegin && term.anchors.m_bol) || ((matchEnd != input.end()) && term.anchors.m_eol)) - && !pattern->m_multiline) + && !pattern->multiline()) return false; context->matchBegin = matchBegin; @@ -1154,21 +1230,37 @@ public: case ByteTerm::TypePatternCharacterOnce: case ByteTerm::TypePatternCharacterFixed: { - for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { - if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) + if (unicode) { + if (!U_IS_BMP(currentTerm().atom.patternCharacter)) { + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) { + if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 2 * matchAmount)) { + BACKTRACK(); + } + } + MATCH_NEXT(); + } + } + unsigned position = input.getPos(); // May need to back out reading a surrogate pair. + + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) { + if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) { + input.setPos(position); BACKTRACK(); + } } MATCH_NEXT(); } case ByteTerm::TypePatternCharacterGreedy: { BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); unsigned matchAmount = 0; - while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { + unsigned position = input.getPos(); // May need to back out reading a surrogate pair. + while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) { if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) { - input.uncheckInput(1); + input.setPos(position); break; } ++matchAmount; + position = input.getPos(); } backTrack->matchAmount = matchAmount; @@ -1176,13 +1268,29 @@ public: } case ByteTerm::TypePatternCharacterNonGreedy: { BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); + backTrack->begin = input.getPos(); backTrack->matchAmount = 0; MATCH_NEXT(); } case ByteTerm::TypePatternCasedCharacterOnce: case ByteTerm::TypePatternCasedCharacterFixed: { - for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { + if (unicode) { + // Case insensitive matching of unicode characters is handled as TypeCharacterClass. + ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter)); + + unsigned position = input.getPos(); // May need to back out reading a surrogate pair. + + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) { + if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) { + input.setPos(position); + BACKTRACK(); + } + } + MATCH_NEXT(); + } + + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) { if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) BACKTRACK(); } @@ -1190,8 +1298,12 @@ public: } case ByteTerm::TypePatternCasedCharacterGreedy: { BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); + + // Case insensitive matching of unicode characters is handled as TypeCharacterClass. + ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter)); + unsigned matchAmount = 0; - while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { + while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) { if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) { input.uncheckInput(1); break; @@ -1204,6 +1316,10 @@ public: } case ByteTerm::TypePatternCasedCharacterNonGreedy: { BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); + + // Case insensitive matching of unicode characters is handled as TypeCharacterClass. + ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter)); + backTrack->matchAmount = 0; MATCH_NEXT(); } @@ -1285,7 +1401,7 @@ public: if (offset > 0) MATCH_NEXT(); - if (input.atEnd()) + if (input.atEnd() || pattern->sticky()) return JSRegExpNoMatch; input.next(); @@ -1415,6 +1531,9 @@ public: if (!input.isAvailableInput(0)) return offsetNoMatch; + if (pattern->m_lock) + pattern->m_lock->lock(); + for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i) output[i << 1] = offsetNoMatch; @@ -1434,13 +1553,18 @@ public: pattern->m_allocator->stopAllocator(); ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch)); + + if (pattern->m_lock) + pattern->m_lock->unlock(); + return output[0]; } Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start) : pattern(pattern) + , unicode(pattern->unicode()) , output(output) - , input(input, start, length) + , input(input, start, length, pattern->unicode()) , allocatorPool(0) , remainingMatchCount(matchLimit) { @@ -1448,6 +1572,7 @@ public: private: BytecodePattern* pattern; + bool unicode; unsigned* output; InputStream input; BumpPointerPool* allocatorPool; @@ -1472,13 +1597,13 @@ public: m_currentAlternativeIndex = 0; } - PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator) + std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock) { regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); emitDisjunction(m_pattern.m_body); regexEnd(); - return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator)); + return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock); } void checkInput(unsigned count) @@ -1506,40 +1631,37 @@ public: m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition)); } - void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { - if (m_pattern.m_ignoreCase) { - ASSERT(u_tolower(ch) <= 0xFFFF); - ASSERT(u_toupper(ch) <= 0xFFFF); - - UChar lo = u_tolower(ch); - UChar hi = u_toupper(ch); + if (m_pattern.ignoreCase()) { + UChar32 lo = u_tolower(ch); + UChar32 hi = u_toupper(ch); if (lo != hi) { - m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); + m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType)); return; } } - m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); + m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityMaxCount, quantityType)); } - void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition)); - m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; } - void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { ASSERT(subpatternId); m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition)); - m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; } @@ -1600,7 +1722,7 @@ public: m_currentAlternativeIndex = beginTerm + 1; } - void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { unsigned beginTerm = popParenthesesStack(); closeAlternative(beginTerm + 1); @@ -1616,9 +1738,9 @@ public: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; - m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; - m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; } @@ -1698,7 +1820,7 @@ public: m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; } - void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) + void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = 0) { unsigned beginTerm = popParenthesesStack(); closeAlternative(beginTerm + 1); @@ -1712,7 +1834,7 @@ public: unsigned subpatternId = parenthesesBegin.atom.subpatternId; unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; - OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); + auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize); unsigned firstTermInParentheses = beginTerm + 1; parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2); @@ -1725,14 +1847,15 @@ public: m_bodyDisjunction->terms.shrink(beginTerm); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition)); - m_allParenthesesInfo.append(parenthesesDisjunction.release()); + m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction)); - m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; } - void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { unsigned beginTerm = popParenthesesStack(); closeAlternative(beginTerm + 1); @@ -1748,13 +1871,15 @@ public: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; - m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; - m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; } - void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType) { unsigned beginTerm = popParenthesesStack(); closeAlternative(beginTerm + 1); @@ -1770,15 +1895,17 @@ public: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; - m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; - m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet(); + m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet(); m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; } void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) { - m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); + m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize); m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); m_bodyDisjunction->terms[0].frameLocation = 0; m_currentAlternativeIndex = 0; @@ -1830,9 +1957,7 @@ public: currentCountAlreadyChecked += countToCheck; } - for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { - PatternTerm& term = alternative->m_terms[i]; - + for (auto& term : alternative->m_terms) { switch (term.type) { case PatternTerm::TypeAssertionBOL: assertionBOL(currentCountAlreadyChecked - term.inputPosition); @@ -1847,15 +1972,15 @@ public: break; case PatternTerm::TypePatternCharacter: - atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); + atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType); break; case PatternTerm::TypeCharacterClass: - atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); + atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType); break; case PatternTerm::TypeBackReference: - atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); + atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType); break; case PatternTerm::TypeForwardReference: @@ -1863,27 +1988,30 @@ public: case PatternTerm::TypeParenthesesSubpattern: { unsigned disjunctionAlreadyCheckedCount = 0; - if (term.quantityCount == 1 && !term.parentheses.isCopy) { + if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) { unsigned alternativeFrameLocation = term.frameLocation; // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame. if (term.quantityType == QuantifierFixedCount) disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; else alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; - unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation); + ASSERT(currentCountAlreadyChecked >= term.inputPosition); + unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition; + atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); - atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); + atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType); } else if (term.parentheses.isTerminal) { - unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); + ASSERT(currentCountAlreadyChecked >= term.inputPosition); + unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition; + atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); - atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); + atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType); } else { - unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0); + ASSERT(currentCountAlreadyChecked >= term.inputPosition); + unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition; + atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, 0); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); - atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); + atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); } break; } @@ -1891,8 +2019,8 @@ public: case PatternTerm::TypeParentheticalAssertion: { unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; - ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition)); - unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition); + ASSERT(currentCountAlreadyChecked >= term.inputPosition); + unsigned positiveInputOffset = currentCountAlreadyChecked - term.inputPosition; unsigned uncheckAmount = 0; if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) { uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize; @@ -1902,7 +2030,7 @@ public: atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount); - atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType); + atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityMaxCount, term.quantityType); if (uncheckAmount) { checkInput(uncheckAmount); currentCountAlreadyChecked += uncheckAmount; @@ -1920,19 +2048,20 @@ public: private: YarrPattern& m_pattern; - OwnPtr<ByteDisjunction> m_bodyDisjunction; + std::unique_ptr<ByteDisjunction> m_bodyDisjunction; unsigned m_currentAlternativeIndex; Vector<ParenthesesStackEntry> m_parenthesesStack; - Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo; + Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; }; -PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) +std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock) { - return ByteCompiler(pattern).compile(allocator); + return ByteCompiler(pattern).compile(allocator, lock); } unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output) { + SuperSamplerScope superSamplerScope(false); if (input.is8Bit()) return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret(); return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret(); @@ -1940,11 +2069,13 @@ unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned star unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output) { + SuperSamplerScope superSamplerScope(false); return Interpreter<LChar>(bytecode, output, input, length, start).interpret(); } unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output) { + SuperSamplerScope superSamplerScope(false); return Interpreter<UChar>(bytecode, output, input, length, start).interpret(); } |