summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/yarr/YarrInterpreter.cpp')
-rw-r--r--Source/JavaScriptCore/yarr/YarrInterpreter.cpp411
1 files changed, 271 insertions, 140 deletions
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
index 8645b5f20..9cf14e702 100644
--- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2013, 2016 Apple Inc. All rights reserved.
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
*
* Redistribution and use in source and binary forms, with or without
@@ -27,8 +27,9 @@
#include "config.h"
#include "YarrInterpreter.h"
+#include "SuperSampler.h"
#include "Yarr.h"
-#include "YarrCanonicalizeUCS2.h"
+#include "YarrCanonicalize.h"
#include <wtf/BumpPointerAllocator.h>
#include <wtf/DataLog.h>
#include <wtf/text/CString.h>
@@ -44,9 +45,11 @@ public:
struct ParenthesesDisjunctionContext;
struct BackTrackInfoPatternCharacter {
+ uintptr_t begin; // Only needed for unicode patterns
uintptr_t matchAmount;
};
struct BackTrackInfoCharacterClass {
+ uintptr_t begin; // Only needed for unicode patterns
uintptr_t matchAmount;
};
struct BackTrackInfoBackReference {
@@ -154,7 +157,7 @@ public:
ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
{
- size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
+ size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t);
allocatorPool = allocatorPool->ensureCapacity(size);
RELEASE_ASSERT(allocatorPool);
return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
@@ -167,10 +170,11 @@ public:
class InputStream {
public:
- InputStream(const CharType* input, unsigned start, unsigned length)
+ InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
: input(input)
, pos(start)
, length(length)
+ , decodeSurrogatePairs(decodeSurrogatePairs)
{
}
@@ -204,13 +208,40 @@ public:
RELEASE_ASSERT(pos >= negativePositionOffest);
unsigned p = pos - negativePositionOffest;
ASSERT(p < length);
- return input[p];
+ int result = input[p];
+ if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
+ if (atEnd())
+ return -1;
+
+ result = U16_GET_SUPPLEMENTARY(result, input[p + 1]);
+ next();
+ }
+ return result;
+ }
+
+ int readSurrogatePairChecked(unsigned negativePositionOffset)
+ {
+ RELEASE_ASSERT(pos >= negativePositionOffset);
+ unsigned p = pos - negativePositionOffset;
+ ASSERT(p < length);
+ if (p + 1 >= length)
+ return -1;
+
+ int first = input[p];
+ int second = input[p + 1];
+ if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
+ return U16_GET_SUPPLEMENTARY(first, second);
+
+ return -1;
}
int reread(unsigned from)
{
ASSERT(from < length);
- return input[from];
+ int result = input[from];
+ if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1]))
+ result = U16_GET_SUPPLEMENTARY(result, input[from + 1]);
+ return result;
}
int prev()
@@ -261,9 +292,9 @@ public:
pos -= count;
}
- bool atStart(unsigned negativePositionOffest)
+ bool atStart(unsigned negativePositionOffset)
{
- return pos == negativePositionOffest;
+ return pos == negativePositionOffset;
}
bool atEnd(unsigned negativePositionOffest)
@@ -281,11 +312,12 @@ public:
const CharType* input;
unsigned pos;
unsigned length;
+ bool decodeSurrogatePairs;
};
bool testCharacterClass(CharacterClass* characterClass, int ch)
{
- if (ch & 0xFF80) {
+ if (!isASCII(ch)) {
for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
if (ch == characterClass->m_matchesUnicode[i])
return true;
@@ -309,6 +341,11 @@ public:
return testChar == input.readChecked(negativeInputOffset);
}
+ bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
+ {
+ return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
+ }
+
bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
{
int ch = input.readChecked(negativeInputOffset);
@@ -328,32 +365,31 @@ public:
if (!input.checkInput(matchSize))
return false;
- if (pattern->m_ignoreCase) {
- for (unsigned i = 0; i < matchSize; ++i) {
- int oldCh = input.reread(matchBegin + i);
- int ch = input.readChecked(negativeInputOffset + matchSize - i);
-
- if (oldCh == ch)
- continue;
-
- // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
- // unicode values are never allowed to match against ascii ones.
- if (isASCII(oldCh) || isASCII(ch)) {
+ for (unsigned i = 0; i < matchSize; ++i) {
+ int oldCh = input.reread(matchBegin + i);
+ int ch;
+ if (!U_IS_BMP(oldCh)) {
+ ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
+ ++i;
+ } else
+ ch = input.readChecked(negativeInputOffset + matchSize - i);
+
+ if (oldCh == ch)
+ continue;
+
+ if (pattern->ignoreCase()) {
+ // See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
+ // patterns, Unicode values are never allowed to match against ASCII ones.
+ // For Unicode, we need to check all canonical equivalents of a character.
+ if (!unicode && (isASCII(oldCh) || isASCII(ch))) {
if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
continue;
- } else if (areCanonicallyEquivalent(oldCh, ch))
+ } else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
continue;
-
- input.uncheckInput(matchSize);
- return false;
- }
- } else {
- for (unsigned i = 0; i < matchSize; ++i) {
- if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
- input.uncheckInput(matchSize);
- return false;
- }
}
+
+ input.uncheckInput(matchSize);
+ return false;
}
return true;
@@ -361,15 +397,15 @@ public:
bool matchAssertionBOL(ByteTerm& term)
{
- return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
+ return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
}
bool matchAssertionEOL(ByteTerm& term)
{
if (term.inputPosition)
- return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
+ return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
- return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
+ return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
}
bool matchAssertionWordBoundary(ByteTerm& term)
@@ -396,18 +432,18 @@ public:
case QuantifierGreedy:
if (backTrack->matchAmount) {
--backTrack->matchAmount;
- input.uncheckInput(1);
+ input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
return true;
}
break;
case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
++backTrack->matchAmount;
if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
return true;
}
- input.uncheckInput(backTrack->matchAmount);
+ input.setPos(backTrack->begin);
break;
}
@@ -431,7 +467,7 @@ public:
break;
case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
++backTrack->matchAmount;
if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
return true;
@@ -446,11 +482,24 @@ public:
bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeCharacterClass);
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+ BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
switch (term.atom.quantityType) {
case QuantifierFixedCount: {
- for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
+ if (unicode) {
+ backTrack->begin = input.getPos();
+ unsigned matchAmount = 0;
+ for (matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
+ if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) {
+ input.setPos(backTrack->begin);
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
return false;
}
@@ -458,13 +507,16 @@ public:
}
case QuantifierGreedy: {
+ unsigned position = input.getPos();
+ backTrack->begin = position;
unsigned matchAmount = 0;
- while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
- input.uncheckInput(1);
+ input.setPos(position);
break;
}
++matchAmount;
+ position = input.getPos();
}
backTrack->matchAmount = matchAmount;
@@ -472,6 +524,7 @@ public:
}
case QuantifierNonGreedy:
+ backTrack->begin = input.getPos();
backTrack->matchAmount = 0;
return true;
}
@@ -483,14 +536,28 @@ public:
bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeCharacterClass);
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+ BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
switch (term.atom.quantityType) {
case QuantifierFixedCount:
+ if (unicode)
+ input.setPos(backTrack->begin);
break;
case QuantifierGreedy:
if (backTrack->matchAmount) {
+ if (unicode) {
+ // Rematch one less match
+ input.setPos(backTrack->begin);
+ --backTrack->matchAmount;
+ for (unsigned matchAmount = 0; (matchAmount < backTrack->matchAmount) && input.checkInput(1); ++matchAmount) {
+ if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ }
+ return true;
+ }
--backTrack->matchAmount;
input.uncheckInput(1);
return true;
@@ -498,12 +565,12 @@ public:
break;
case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(1)) {
++backTrack->matchAmount;
if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
return true;
}
- input.uncheckInput(backTrack->matchAmount);
+ input.setPos(backTrack->begin);
break;
}
@@ -535,7 +602,7 @@ public:
switch (term.atom.quantityType) {
case QuantifierFixedCount: {
backTrack->begin = input.getPos();
- for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
+ for (unsigned matchAmount = 0; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
input.setPos(backTrack->begin);
return false;
@@ -546,7 +613,7 @@ public:
case QuantifierGreedy: {
unsigned matchAmount = 0;
- while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
+ while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
++matchAmount;
backTrack->matchAmount = matchAmount;
return true;
@@ -580,7 +647,7 @@ public:
switch (term.atom.quantityType) {
case QuantifierFixedCount:
- // for quantityCount == 1, could rewind.
+ // for quantityMaxCount == 1, could rewind.
input.setPos(backTrack->begin);
break;
@@ -593,7 +660,7 @@ public:
break;
case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
+ if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
++backTrack->matchAmount;
return true;
}
@@ -608,8 +675,8 @@ public:
{
if (term.capture()) {
unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
- output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
+ output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition;
+ output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition;
}
}
void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
@@ -641,7 +708,7 @@ public:
bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- ASSERT(term.atom.quantityCount == 1);
+ ASSERT(term.atom.quantityMaxCount == 1);
BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
@@ -671,11 +738,11 @@ public:
bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
- ASSERT(term.atom.quantityCount == 1);
+ ASSERT(term.atom.quantityMaxCount == 1);
if (term.capture()) {
unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
+ output[(subpatternId << 1) + 1] = input.getPos() - term.inputPosition;
}
if (term.atom.quantityType == QuantifierFixedCount)
@@ -688,7 +755,7 @@ public:
bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- ASSERT(term.atom.quantityCount == 1);
+ ASSERT(term.atom.quantityMaxCount == 1);
BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
@@ -718,7 +785,7 @@ public:
bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
- ASSERT(term.atom.quantityCount == 1);
+ ASSERT(term.atom.quantityMaxCount == 1);
BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
@@ -739,7 +806,7 @@ public:
ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
unsigned subpatternId = term.atom.subpatternId;
- output[subpatternId << 1] = input.getPos() + term.inputPosition;
+ output[subpatternId << 1] = input.getPos() - term.inputPosition;
}
context->term -= term.atom.parenthesesWidth;
return true;
@@ -756,7 +823,7 @@ public:
{
ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
ASSERT(term.atom.quantityType == QuantifierGreedy);
- ASSERT(term.atom.quantityCount == quantifyInfinite);
+ ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
ASSERT(!term.capture());
BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
@@ -773,7 +840,7 @@ public:
if (backTrack->begin == input.getPos())
return false;
- // Successful match! Okay, what's next? - loop around and try to match moar!
+ // Successful match! Okay, what's next? - loop around and try to match more!
context->term -= (term.atom.parenthesesWidth + 1);
return true;
}
@@ -782,7 +849,7 @@ public:
{
ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
ASSERT(term.atom.quantityType == QuantifierGreedy);
- ASSERT(term.atom.quantityCount == quantifyInfinite);
+ ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
ASSERT(!term.capture());
// If we backtrack to this point, we have failed to match this iteration of the parens.
@@ -802,7 +869,7 @@ public:
bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
- ASSERT(term.atom.quantityCount == 1);
+ ASSERT(term.atom.quantityMaxCount == 1);
BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
@@ -813,7 +880,7 @@ public:
bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
- ASSERT(term.atom.quantityCount == 1);
+ ASSERT(term.atom.quantityMaxCount == 1);
BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
@@ -831,7 +898,7 @@ public:
bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
- ASSERT(term.atom.quantityCount == 1);
+ ASSERT(term.atom.quantityMaxCount == 1);
// We've failed to match parens; if they are inverted, this is win!
if (term.invert()) {
@@ -845,7 +912,7 @@ public:
bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
{
ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
- ASSERT(term.atom.quantityCount == 1);
+ ASSERT(term.atom.quantityMaxCount == 1);
BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
@@ -865,36 +932,45 @@ public:
backTrack->matchAmount = 0;
backTrack->lastContext = 0;
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
+ ASSERT(term.atom.quantityType != QuantifierFixedCount || term.atom.quantityMinCount == term.atom.quantityMaxCount);
+
+ unsigned minimumMatchCount = term.atom.quantityMinCount;
+ JSRegExpResult fixedMatchResult;
+
+ // Handle fixed matches and the minimum part of a variable length match.
+ if (minimumMatchCount) {
// While we haven't yet reached our fixed limit,
- while (backTrack->matchAmount < term.atom.quantityCount) {
+ while (backTrack->matchAmount < minimumMatchCount) {
// Try to do a match, and it it succeeds, add it to the list.
ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (result == JSRegExpMatch)
+ fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+ if (fixedMatchResult == JSRegExpMatch)
appendParenthesesDisjunctionContext(backTrack, context);
else {
// The match failed; try to find an alternate point to carry on from.
resetMatches(term, context);
freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
+
+ if (fixedMatchResult != JSRegExpNoMatch)
+ return fixedMatchResult;
JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
if (backtrackResult != JSRegExpMatch)
return backtrackResult;
}
}
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
ParenthesesDisjunctionContext* context = backTrack->lastContext;
recordParenthesesMatch(term, context);
+ }
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
return JSRegExpMatch;
}
case QuantifierGreedy: {
- while (backTrack->matchAmount < term.atom.quantityCount) {
+ while (backTrack->matchAmount < term.atom.quantityMaxCount) {
ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
if (result == JSRegExpMatch)
@@ -944,7 +1020,7 @@ public:
switch (term.atom.quantityType) {
case QuantifierFixedCount: {
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+ ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
ParenthesesDisjunctionContext* context = 0;
JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
@@ -953,7 +1029,7 @@ public:
return result;
// While we haven't yet reached our fixed limit,
- while (backTrack->matchAmount < term.atom.quantityCount) {
+ while (backTrack->matchAmount < term.atom.quantityMaxCount) {
// Try to do a match, and it it succeeds, add it to the list.
context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
@@ -973,7 +1049,7 @@ public:
}
}
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+ ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
context = backTrack->lastContext;
recordParenthesesMatch(term, context);
return JSRegExpMatch;
@@ -986,7 +1062,7 @@ public:
ParenthesesDisjunctionContext* context = backTrack->lastContext;
JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
if (result == JSRegExpMatch) {
- while (backTrack->matchAmount < term.atom.quantityCount) {
+ while (backTrack->matchAmount < term.atom.quantityMaxCount) {
ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
if (parenthesesResult == JSRegExpMatch)
@@ -1019,7 +1095,7 @@ public:
case QuantifierNonGreedy: {
// If we've not reached the limit, try to add one more match.
- if (backTrack->matchAmount < term.atom.quantityCount) {
+ if (backTrack->matchAmount < term.atom.quantityMaxCount) {
ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
if (result == JSRegExpMatch) {
@@ -1089,7 +1165,7 @@ public:
if (((matchBegin && term.anchors.m_bol)
|| ((matchEnd != input.end()) && term.anchors.m_eol))
- && !pattern->m_multiline)
+ && !pattern->multiline())
return false;
context->matchBegin = matchBegin;
@@ -1154,21 +1230,37 @@ public:
case ByteTerm::TypePatternCharacterOnce:
case ByteTerm::TypePatternCharacterFixed: {
- for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
- if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
+ if (unicode) {
+ if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
+ if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 2 * matchAmount)) {
+ BACKTRACK();
+ }
+ }
+ MATCH_NEXT();
+ }
+ }
+ unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
+
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
+ if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
+ input.setPos(position);
BACKTRACK();
+ }
}
MATCH_NEXT();
}
case ByteTerm::TypePatternCharacterGreedy: {
BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
unsigned matchAmount = 0;
- while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
+ unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
+ while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) {
if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
- input.uncheckInput(1);
+ input.setPos(position);
break;
}
++matchAmount;
+ position = input.getPos();
}
backTrack->matchAmount = matchAmount;
@@ -1176,13 +1268,29 @@ public:
}
case ByteTerm::TypePatternCharacterNonGreedy: {
BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ backTrack->begin = input.getPos();
backTrack->matchAmount = 0;
MATCH_NEXT();
}
case ByteTerm::TypePatternCasedCharacterOnce:
case ByteTerm::TypePatternCasedCharacterFixed: {
- for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
+ if (unicode) {
+ // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
+ ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
+
+ unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
+
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
+ if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
+ input.setPos(position);
+ BACKTRACK();
+ }
+ }
+ MATCH_NEXT();
+ }
+
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
BACKTRACK();
}
@@ -1190,8 +1298,12 @@ public:
}
case ByteTerm::TypePatternCasedCharacterGreedy: {
BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+
+ // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
+ ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
+
unsigned matchAmount = 0;
- while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
+ while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(1)) {
if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
input.uncheckInput(1);
break;
@@ -1204,6 +1316,10 @@ public:
}
case ByteTerm::TypePatternCasedCharacterNonGreedy: {
BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+
+ // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
+ ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
+
backTrack->matchAmount = 0;
MATCH_NEXT();
}
@@ -1285,7 +1401,7 @@ public:
if (offset > 0)
MATCH_NEXT();
- if (input.atEnd())
+ if (input.atEnd() || pattern->sticky())
return JSRegExpNoMatch;
input.next();
@@ -1415,6 +1531,9 @@ public:
if (!input.isAvailableInput(0))
return offsetNoMatch;
+ if (pattern->m_lock)
+ pattern->m_lock->lock();
+
for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
output[i << 1] = offsetNoMatch;
@@ -1434,13 +1553,18 @@ public:
pattern->m_allocator->stopAllocator();
ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
+
+ if (pattern->m_lock)
+ pattern->m_lock->unlock();
+
return output[0];
}
Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
: pattern(pattern)
+ , unicode(pattern->unicode())
, output(output)
- , input(input, start, length)
+ , input(input, start, length, pattern->unicode())
, allocatorPool(0)
, remainingMatchCount(matchLimit)
{
@@ -1448,6 +1572,7 @@ public:
private:
BytecodePattern* pattern;
+ bool unicode;
unsigned* output;
InputStream input;
BumpPointerPool* allocatorPool;
@@ -1472,13 +1597,13 @@ public:
m_currentAlternativeIndex = 0;
}
- PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
+ std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
{
regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
emitDisjunction(m_pattern.m_body);
regexEnd();
- return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
+ return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
}
void checkInput(unsigned count)
@@ -1506,40 +1631,37 @@ public:
m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
}
- void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
+ void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
{
- if (m_pattern.m_ignoreCase) {
- ASSERT(u_tolower(ch) <= 0xFFFF);
- ASSERT(u_toupper(ch) <= 0xFFFF);
-
- UChar lo = u_tolower(ch);
- UChar hi = u_toupper(ch);
+ if (m_pattern.ignoreCase()) {
+ UChar32 lo = u_tolower(ch);
+ UChar32 hi = u_toupper(ch);
if (lo != hi) {
- m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
+ m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType));
return;
}
}
- m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
+ m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityMaxCount, quantityType));
}
- void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
+ void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
{
m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
}
- void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
+ void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
{
ASSERT(subpatternId);
m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
}
@@ -1600,7 +1722,7 @@ public:
m_currentAlternativeIndex = beginTerm + 1;
}
- void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
+ void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
{
unsigned beginTerm = popParenthesesStack();
closeAlternative(beginTerm + 1);
@@ -1616,9 +1738,9 @@ public:
m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
}
@@ -1698,7 +1820,7 @@ public:
m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
}
- void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
+ void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = 0)
{
unsigned beginTerm = popParenthesesStack();
closeAlternative(beginTerm + 1);
@@ -1712,7 +1834,7 @@ public:
unsigned subpatternId = parenthesesBegin.atom.subpatternId;
unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
- OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
+ auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
unsigned firstTermInParentheses = beginTerm + 1;
parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
@@ -1725,14 +1847,15 @@ public:
m_bodyDisjunction->terms.shrink(beginTerm);
m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
- m_allParenthesesInfo.append(parenthesesDisjunction.release());
+ m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
+ m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
}
- void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
+ void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
{
unsigned beginTerm = popParenthesesStack();
closeAlternative(beginTerm + 1);
@@ -1748,13 +1871,15 @@ public:
m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
+ m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
+ m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
}
- void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
+ void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
{
unsigned beginTerm = popParenthesesStack();
closeAlternative(beginTerm + 1);
@@ -1770,15 +1895,17 @@ public:
m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
+ m_bodyDisjunction->terms[beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
+ m_bodyDisjunction->terms[endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
+ m_bodyDisjunction->terms[endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
}
void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
{
- m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
+ m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
m_bodyDisjunction->terms[0].frameLocation = 0;
m_currentAlternativeIndex = 0;
@@ -1830,9 +1957,7 @@ public:
currentCountAlreadyChecked += countToCheck;
}
- for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
- PatternTerm& term = alternative->m_terms[i];
-
+ for (auto& term : alternative->m_terms) {
switch (term.type) {
case PatternTerm::TypeAssertionBOL:
assertionBOL(currentCountAlreadyChecked - term.inputPosition);
@@ -1847,15 +1972,15 @@ public:
break;
case PatternTerm::TypePatternCharacter:
- atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
+ atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
break;
case PatternTerm::TypeCharacterClass:
- atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
+ atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
break;
case PatternTerm::TypeBackReference:
- atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
+ atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
break;
case PatternTerm::TypeForwardReference:
@@ -1863,27 +1988,30 @@ public:
case PatternTerm::TypeParenthesesSubpattern: {
unsigned disjunctionAlreadyCheckedCount = 0;
- if (term.quantityCount == 1 && !term.parentheses.isCopy) {
+ if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) {
unsigned alternativeFrameLocation = term.frameLocation;
// For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
if (term.quantityType == QuantifierFixedCount)
disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
else
alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
+ ASSERT(currentCountAlreadyChecked >= term.inputPosition);
+ unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
+ atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
- atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
+ atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
} else if (term.parentheses.isTerminal) {
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
+ ASSERT(currentCountAlreadyChecked >= term.inputPosition);
+ unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
+ atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
- atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
+ atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
} else {
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
+ ASSERT(currentCountAlreadyChecked >= term.inputPosition);
+ unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, 0);
emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
- atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+ atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
}
break;
}
@@ -1891,8 +2019,8 @@ public:
case PatternTerm::TypeParentheticalAssertion: {
unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
- ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
- unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
+ ASSERT(currentCountAlreadyChecked >= term.inputPosition);
+ unsigned positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
unsigned uncheckAmount = 0;
if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
@@ -1902,7 +2030,7 @@ public:
atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
- atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
+ atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityMaxCount, term.quantityType);
if (uncheckAmount) {
checkInput(uncheckAmount);
currentCountAlreadyChecked += uncheckAmount;
@@ -1920,19 +2048,20 @@ public:
private:
YarrPattern& m_pattern;
- OwnPtr<ByteDisjunction> m_bodyDisjunction;
+ std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
unsigned m_currentAlternativeIndex;
Vector<ParenthesesStackEntry> m_parenthesesStack;
- Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo;
+ Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
};
-PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
+std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
{
- return ByteCompiler(pattern).compile(allocator);
+ return ByteCompiler(pattern).compile(allocator, lock);
}
unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
{
+ SuperSamplerScope superSamplerScope(false);
if (input.is8Bit())
return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
@@ -1940,11 +2069,13 @@ unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned star
unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
{
+ SuperSamplerScope superSamplerScope(false);
return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
}
unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
{
+ SuperSamplerScope superSamplerScope(false);
return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
}