summaryrefslogtreecommitdiff
path: root/deps/v8/src/regexp
diff options
context:
space:
mode:
authorMyles Borins <mylesborins@google.com>2019-09-24 11:56:38 -0400
committerMyles Borins <myles.borins@gmail.com>2019-10-07 03:19:23 -0400
commitf7f6c928c1c9c136b7926f892b8a2fda11d8b4b2 (patch)
treef5edbccb3ffda2573d70a6e291e7157f290e0ae0 /deps/v8/src/regexp
parentffd22e81983056d09c064c59343a0e488236272d (diff)
downloadnode-new-f7f6c928c1c9c136b7926f892b8a2fda11d8b4b2.tar.gz
deps: update V8 to 7.8.279.9
PR-URL: https://github.com/nodejs/node/pull/29694 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Gus Caplan <me@gus.host> Reviewed-By: Jiawen Geng <technicalcute@gmail.com> Reviewed-By: Michaël Zasso <targos@protonmail.com> Reviewed-By: Tobias Nießen <tniessen@tnie.de> Reviewed-By: Ujjwal Sharma <usharma1998@gmail.com>
Diffstat (limited to 'deps/v8/src/regexp')
-rw-r--r--deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc37
-rw-r--r--deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h9
-rw-r--r--deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc42
-rw-r--r--deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h9
-rw-r--r--deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc36
-rw-r--r--deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h9
-rw-r--r--deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc30
-rw-r--r--deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h9
-rw-r--r--deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc31
-rw-r--r--deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h9
-rw-r--r--deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc34
-rw-r--r--deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h8
-rw-r--r--deps/v8/src/regexp/regexp-bytecode-generator.cc21
-rw-r--r--deps/v8/src/regexp/regexp-bytecode-generator.h8
-rw-r--r--deps/v8/src/regexp/regexp-bytecodes.h39
-rw-r--r--deps/v8/src/regexp/regexp-compiler-tonode.cc6
-rw-r--r--deps/v8/src/regexp/regexp-compiler.cc616
-rw-r--r--deps/v8/src/regexp/regexp-compiler.h51
-rw-r--r--deps/v8/src/regexp/regexp-dotprinter.cc11
-rw-r--r--deps/v8/src/regexp/regexp-interpreter.cc1166
-rw-r--r--deps/v8/src/regexp/regexp-interpreter.h37
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-tracer.cc35
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-tracer.h8
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler.cc75
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler.h28
-rw-r--r--deps/v8/src/regexp/regexp-nodes.h144
-rw-r--r--deps/v8/src/regexp/regexp-parser.cc4
-rw-r--r--deps/v8/src/regexp/regexp-parser.h1
-rw-r--r--deps/v8/src/regexp/regexp-stack.cc4
-rw-r--r--deps/v8/src/regexp/regexp-stack.h19
-rw-r--r--deps/v8/src/regexp/regexp-utils.cc10
-rw-r--r--deps/v8/src/regexp/regexp-utils.h3
-rw-r--r--deps/v8/src/regexp/regexp.cc176
-rw-r--r--deps/v8/src/regexp/regexp.h23
-rw-r--r--deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc32
-rw-r--r--deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h8
-rw-r--r--deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc40
-rw-r--r--deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h8
38 files changed, 1803 insertions, 1033 deletions
diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
index 8b462cb03c..2f81b6de86 100644
--- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
+++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
@@ -40,6 +40,9 @@ namespace internal {
* Each call to a public method should retain this convention.
*
* The stack will have the following structure:
+ * - fp[56] Address regexp (address of the JSRegExp object; unused in
+ * native code, passed to match signature of
+ * the interpreter)
* - fp[52] Isolate* isolate (address of the current isolate)
* - fp[48] direct_call (if 1, direct call from JavaScript code,
* if 0, call through the runtime system).
@@ -83,7 +86,8 @@ namespace internal {
* int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
- * Isolate* isolate);
+ * Isolate* isolate,
+ * Address regexp);
* The call is performed by NativeRegExpMacroAssembler::Execute()
* (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
@@ -172,15 +176,14 @@ void RegExpMacroAssemblerARM::CheckCharacterGT(uc16 limit, Label* on_greater) {
BranchOrBacktrack(gt, on_greater);
}
-
-void RegExpMacroAssemblerARM::CheckAtStart(Label* on_at_start) {
+void RegExpMacroAssemblerARM::CheckAtStart(int cp_offset, Label* on_at_start) {
__ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne));
- __ add(r0, current_input_offset(), Operand(-char_size()));
+ __ add(r0, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
__ cmp(r0, r1);
BranchOrBacktrack(eq, on_at_start);
}
-
void RegExpMacroAssemblerARM::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
__ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne));
@@ -647,7 +650,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
Label stack_ok;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ mov(r0, Operand(stack_limit));
__ ldr(r0, MemOperand(r0));
__ sub(r0, sp, r0, SetCC);
@@ -929,15 +932,19 @@ RegExpMacroAssembler::IrregexpImplementation
return kARMImplementation;
}
+void RegExpMacroAssemblerARM::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ DCHECK_GE(eats_at_least, characters);
-void RegExpMacroAssemblerARM::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds) {
if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
} else {
CheckPosition(cp_offset, on_end_of_input);
}
@@ -945,7 +952,6 @@ void RegExpMacroAssemblerARM::LoadCurrentCharacter(int cp_offset,
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
-
void RegExpMacroAssemblerARM::PopCurrentPosition() {
Pop(current_input_offset());
}
@@ -1109,7 +1115,8 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
- frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
+ static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
+ return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
@@ -1193,7 +1200,7 @@ void RegExpMacroAssemblerARM::Pop(Register target) {
void RegExpMacroAssemblerARM::CheckPreemption() {
// Check for preemption.
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ mov(r0, Operand(stack_limit));
__ ldr(r0, MemOperand(r0));
__ cmp(sp, r0);
@@ -1203,7 +1210,7 @@ void RegExpMacroAssemblerARM::CheckPreemption() {
void RegExpMacroAssemblerARM::CheckStackLimit() {
ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(isolate());
+ ExternalReference::address_of_regexp_stack_limit_address(isolate());
__ mov(r0, Operand(stack_limit));
__ ldr(r0, MemOperand(r0));
__ cmp(backtrack_stackpointer(), Operand(r0));
diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h
index 9e95f8e1f2..9b21c5a11c 100644
--- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h
+++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h
@@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
- virtual void CheckAtStart(Label* on_at_start);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckCharacter(unsigned c, Label* on_equal);
virtual void CheckCharacterAfterAnd(unsigned c,
unsigned mask,
@@ -67,10 +67,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
index b299ad0535..9e00063487 100644
--- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
+++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
@@ -55,7 +55,10 @@ namespace internal {
* (as referred to in
* the code)
*
- * - fp[96] isolate Address of the current isolate.
+ * - fp[104] Address regexp Address of the JSRegExp object. Unused in
+ * native code, passed to match signature of
+ * the interpreter.
+ * - fp[96] isolate Address of the current isolate.
* ^^^ sp when called ^^^
* - fp[88] lr Return from the RegExp code.
* - fp[80] r29 Old frame pointer (CalleeSaved).
@@ -93,7 +96,8 @@ namespace internal {
* int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
- * Isolate* isolate);
+ * Isolate* isolate,
+ * Address regexp);
* The call is performed by NativeRegExpMacroAssembler::Execute()
* (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
@@ -201,14 +205,14 @@ void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
}
-
-void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
- __ Add(w10, current_input_offset(), Operand(-char_size()));
+void RegExpMacroAssemblerARM64::CheckAtStart(int cp_offset,
+ Label* on_at_start) {
+ __ Add(w10, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
__ Cmp(w10, string_start_minus_one());
BranchOrBacktrack(eq, on_at_start);
}
-
void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
__ Add(w10, current_input_offset(),
@@ -750,7 +754,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
Label stack_ok;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ Mov(x10, stack_limit);
__ Ldr(x10, MemOperand(x10));
__ Subs(x10, sp, x10);
@@ -1106,18 +1110,22 @@ RegExpMacroAssembler::IrregexpImplementation
return kARM64Implementation;
}
+void RegExpMacroAssemblerARM64::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ DCHECK_GE(eats_at_least, characters);
-void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
// TODO(pielan): Make sure long strings are caught before this, and not
// just asserted in debug mode.
// Be sane! (And ensure that an int32_t can be used to index the string)
DCHECK(cp_offset < (1<<30));
if (check_bounds) {
if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
} else {
CheckPosition(cp_offset, on_end_of_input);
}
@@ -1125,7 +1133,6 @@ void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
-
void RegExpMacroAssemblerARM64::PopCurrentPosition() {
Pop(current_input_offset());
}
@@ -1326,8 +1333,9 @@ int RegExpMacroAssemblerARM64::CheckStackGuardState(
Code re_code = Code::cast(Object(raw_code));
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), start_index,
- frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
- frame_entry_address<Address>(re_frame, kInput), input_start, input_end);
+ static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
+ return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
+ input_start, input_end);
}
@@ -1448,7 +1456,7 @@ void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
void RegExpMacroAssemblerARM64::CheckPreemption() {
// Check for preemption.
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ Mov(x10, stack_limit);
__ Ldr(x10, MemOperand(x10));
__ Cmp(sp, x10);
@@ -1458,7 +1466,7 @@ void RegExpMacroAssemblerARM64::CheckPreemption() {
void RegExpMacroAssemblerARM64::CheckStackLimit() {
ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(isolate());
+ ExternalReference::address_of_regexp_stack_limit_address(isolate());
__ Mov(x10, stack_limit);
__ Ldr(x10, MemOperand(x10));
__ Cmp(backtrack_stackpointer(), x10);
diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
index ef83f9e43c..6154c6cf60 100644
--- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
+++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
@@ -24,7 +24,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
- virtual void CheckAtStart(Label* on_at_start);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckCharacter(unsigned c, Label* on_equal);
virtual void CheckCharacterAfterAnd(unsigned c,
unsigned mask,
@@ -72,10 +72,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
index eb42c23215..5ee7b90988 100644
--- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
+++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
@@ -34,6 +34,9 @@ namespace internal {
*
* Each call to a public method should retain this convention.
* The stack will have the following structure:
+ * - Address regexp (address of the JSRegExp object; unused in
+ * native code, passed to match signature of
+ * the interpreter)
* - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0
* call through the runtime system)
@@ -73,7 +76,8 @@ namespace internal {
* int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
- * Isolate* isolate);
+ * Isolate* isolate
+ * Address regexp);
*/
#define __ ACCESS_MASM(masm_)
@@ -161,14 +165,12 @@ void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
BranchOrBacktrack(greater, on_greater);
}
-
-void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) {
- __ lea(eax, Operand(edi, -char_size()));
+void RegExpMacroAssemblerIA32::CheckAtStart(int cp_offset, Label* on_at_start) {
+ __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size()));
__ cmp(eax, Operand(ebp, kStringStartMinusOne));
BranchOrBacktrack(equal, on_at_start);
}
-
void RegExpMacroAssemblerIA32::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
__ lea(eax, Operand(edi, -char_size() + cp_offset * char_size()));
@@ -684,7 +686,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
Label stack_ok;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ mov(ecx, esp);
__ sub(ecx, StaticVariable(stack_limit));
// Handle it if the stack pointer is already below the stack limit.
@@ -971,15 +973,19 @@ RegExpMacroAssembler::IrregexpImplementation
return kIA32Implementation;
}
+void RegExpMacroAssemblerIA32::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ DCHECK_GE(eats_at_least, characters);
-void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds) {
if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
} else {
CheckPosition(cp_offset, on_end_of_input);
}
@@ -987,7 +993,6 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
-
void RegExpMacroAssemblerIA32::PopCurrentPosition() {
Pop(edi);
}
@@ -1120,7 +1125,8 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
- frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
+ static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
+ return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
@@ -1214,7 +1220,7 @@ void RegExpMacroAssemblerIA32::CheckPreemption() {
// Check for preemption.
Label no_preempt;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ cmp(esp, StaticVariable(stack_limit));
__ j(above, &no_preempt);
@@ -1227,7 +1233,7 @@ void RegExpMacroAssemblerIA32::CheckPreemption() {
void RegExpMacroAssemblerIA32::CheckStackLimit() {
Label no_stack_overflow;
ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(isolate());
+ ExternalReference::address_of_regexp_stack_limit_address(isolate());
__ cmp(backtrack_stackpointer(), StaticVariable(stack_limit));
__ j(above, &no_stack_overflow);
diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
index 914552cc93..3464d81fac 100644
--- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
+++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
@@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
- virtual void CheckAtStart(Label* on_at_start);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
@@ -66,10 +66,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
index e8104ced7e..8d2800f004 100644
--- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
+++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
@@ -178,9 +178,10 @@ void RegExpMacroAssemblerMIPS::CheckCharacterGT(uc16 limit, Label* on_greater) {
}
-void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) {
+void RegExpMacroAssemblerMIPS::CheckAtStart(int cp_offset, Label* on_at_start) {
__ lw(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
- __ Addu(a0, current_input_offset(), Operand(-char_size()));
+ __ Addu(a0, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
BranchOrBacktrack(on_at_start, eq, a0, Operand(a1));
}
@@ -647,7 +648,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
Label stack_ok;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(masm_->isolate());
+ ExternalReference::address_of_jslimit(masm_->isolate());
__ li(a0, Operand(stack_limit));
__ lw(a0, MemOperand(a0));
__ Subu(a0, sp, a0);
@@ -946,15 +947,19 @@ RegExpMacroAssembler::IrregexpImplementation
return kMIPSImplementation;
}
+void RegExpMacroAssemblerMIPS::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ DCHECK_GE(eats_at_least, characters);
-void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works).
if (check_bounds) {
if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
} else {
CheckPosition(cp_offset, on_end_of_input);
}
@@ -962,7 +967,6 @@ void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset,
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
-
void RegExpMacroAssemblerMIPS::PopCurrentPosition() {
Pop(current_input_offset());
}
@@ -1176,7 +1180,8 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
- frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
+ static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
+ return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
@@ -1267,7 +1272,7 @@ void RegExpMacroAssemblerMIPS::Pop(Register target) {
void RegExpMacroAssemblerMIPS::CheckPreemption() {
// Check for preemption.
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(masm_->isolate());
+ ExternalReference::address_of_jslimit(masm_->isolate());
__ li(a0, Operand(stack_limit));
__ lw(a0, MemOperand(a0));
SafeCall(&check_preempt_label_, ls, sp, Operand(a0));
@@ -1276,7 +1281,8 @@ void RegExpMacroAssemblerMIPS::CheckPreemption() {
void RegExpMacroAssemblerMIPS::CheckStackLimit() {
ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(masm_->isolate());
+ ExternalReference::address_of_regexp_stack_limit_address(
+ masm_->isolate());
__ li(a0, Operand(stack_limit));
__ lw(a0, MemOperand(a0));
diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h
index b785910466..084436bbbd 100644
--- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h
+++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h
@@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
- virtual void CheckAtStart(Label* on_at_start);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
@@ -67,10 +67,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
index 239cc87ae8..2d5402ebdb 100644
--- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
+++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
@@ -214,9 +214,10 @@ void RegExpMacroAssemblerMIPS::CheckCharacterGT(uc16 limit, Label* on_greater) {
}
-void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) {
+void RegExpMacroAssemblerMIPS::CheckAtStart(int cp_offset, Label* on_at_start) {
__ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
- __ Daddu(a0, current_input_offset(), Operand(-char_size()));
+ __ Daddu(a0, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
BranchOrBacktrack(on_at_start, eq, a0, Operand(a1));
}
@@ -683,7 +684,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
Label stack_ok;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(masm_->isolate());
+ ExternalReference::address_of_jslimit(masm_->isolate());
__ li(a0, Operand(stack_limit));
__ Ld(a0, MemOperand(a0));
__ Dsubu(a0, sp, a0);
@@ -983,15 +984,19 @@ RegExpMacroAssembler::IrregexpImplementation
return kMIPSImplementation;
}
+void RegExpMacroAssemblerMIPS::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ DCHECK_GE(eats_at_least, characters);
-void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works).
if (check_bounds) {
if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
} else {
CheckPosition(cp_offset, on_end_of_input);
}
@@ -999,7 +1004,6 @@ void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset,
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
-
void RegExpMacroAssemblerMIPS::PopCurrentPosition() {
Pop(current_input_offset());
}
@@ -1213,7 +1217,9 @@ int64_t RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)),
- frame_entry<int64_t>(re_frame, kDirectCall) == 1, return_address, re_code,
+ static_cast<RegExp::CallOrigin>(
+ frame_entry<int64_t>(re_frame, kDirectCall)),
+ return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
@@ -1304,7 +1310,7 @@ void RegExpMacroAssemblerMIPS::Pop(Register target) {
void RegExpMacroAssemblerMIPS::CheckPreemption() {
// Check for preemption.
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(masm_->isolate());
+ ExternalReference::address_of_jslimit(masm_->isolate());
__ li(a0, Operand(stack_limit));
__ Ld(a0, MemOperand(a0));
SafeCall(&check_preempt_label_, ls, sp, Operand(a0));
@@ -1313,7 +1319,8 @@ void RegExpMacroAssemblerMIPS::CheckPreemption() {
void RegExpMacroAssemblerMIPS::CheckStackLimit() {
ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(masm_->isolate());
+ ExternalReference::address_of_regexp_stack_limit_address(
+ masm_->isolate());
__ li(a0, Operand(stack_limit));
__ Ld(a0, MemOperand(a0));
diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
index d24735d08e..9189a6a72d 100644
--- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
+++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
@@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
- virtual void CheckAtStart(Label* on_at_start);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
@@ -67,10 +67,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
index bce612e66f..13b5c85605 100644
--- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
+++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
@@ -189,15 +189,14 @@ void RegExpMacroAssemblerPPC::CheckCharacterGT(uc16 limit, Label* on_greater) {
BranchOrBacktrack(gt, on_greater);
}
-
-void RegExpMacroAssemblerPPC::CheckAtStart(Label* on_at_start) {
+void RegExpMacroAssemblerPPC::CheckAtStart(int cp_offset, Label* on_at_start) {
__ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne));
- __ addi(r3, current_input_offset(), Operand(-char_size()));
+ __ addi(r3, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
__ cmp(r3, r4);
BranchOrBacktrack(eq, on_at_start);
}
-
void RegExpMacroAssemblerPPC::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
__ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne));
@@ -689,7 +688,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) {
Label stack_ok;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ mov(r3, Operand(stack_limit));
__ LoadP(r3, MemOperand(r3));
__ sub(r3, sp, r3, LeaveOE, SetRC);
@@ -978,15 +977,19 @@ RegExpMacroAssemblerPPC::Implementation() {
return kPPCImplementation;
}
+void RegExpMacroAssemblerPPC::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ DCHECK_GE(eats_at_least, characters);
-void RegExpMacroAssemblerPPC::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
DCHECK(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
if (check_bounds) {
if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
} else {
CheckPosition(cp_offset, on_end_of_input);
}
@@ -994,7 +997,6 @@ void RegExpMacroAssemblerPPC::LoadCurrentCharacter(int cp_offset,
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
-
void RegExpMacroAssemblerPPC::PopCurrentPosition() {
Pop(current_input_offset());
}
@@ -1177,8 +1179,10 @@ int RegExpMacroAssemblerPPC::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<intptr_t>(re_frame, kStartIndex),
- frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address,
- re_code, frame_entry_address<Address>(re_frame, kInputString),
+ static_cast<RegExp::CallOrigin>(
+ frame_entry<intptr_t>(re_frame, kDirectCall)),
+ return_address, re_code,
+ frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
}
@@ -1267,7 +1271,7 @@ void RegExpMacroAssemblerPPC::Pop(Register target) {
void RegExpMacroAssemblerPPC::CheckPreemption() {
// Check for preemption.
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ mov(r3, Operand(stack_limit));
__ LoadP(r3, MemOperand(r3));
__ cmpl(sp, r3);
@@ -1277,7 +1281,7 @@ void RegExpMacroAssemblerPPC::CheckPreemption() {
void RegExpMacroAssemblerPPC::CheckStackLimit() {
ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(isolate());
+ ExternalReference::address_of_regexp_stack_limit_address(isolate());
__ mov(r3, Operand(stack_limit));
__ LoadP(r3, MemOperand(r3));
__ cmpl(backtrack_stackpointer(), r3);
diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
index 418a01a9a4..60236a4000 100644
--- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
+++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
@@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
- virtual void CheckAtStart(Label* on_at_start);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckCharacter(unsigned c, Label* on_equal);
virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask,
Label* on_equal);
@@ -59,9 +59,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
diff --git a/deps/v8/src/regexp/regexp-bytecode-generator.cc b/deps/v8/src/regexp/regexp-bytecode-generator.cc
index ee3b4015d5..85b144438e 100644
--- a/deps/v8/src/regexp/regexp-bytecode-generator.cc
+++ b/deps/v8/src/regexp/regexp-bytecode-generator.cc
@@ -171,10 +171,19 @@ void RegExpBytecodeGenerator::CheckGreedyLoop(
EmitOrLink(on_tos_equals_current_position);
}
-void RegExpBytecodeGenerator::LoadCurrentCharacter(int cp_offset,
- Label* on_failure,
- bool check_bounds,
- int characters) {
+void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_failure,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ DCHECK_GE(eats_at_least, characters);
+ if (eats_at_least > characters && check_bounds) {
+ DCHECK(is_uint24(cp_offset + eats_at_least));
+ Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least);
+ EmitOrLink(on_failure);
+ check_bounds = false; // Load below doesn't need to check.
+ }
+
DCHECK_LE(kMinCPOffset, cp_offset);
DCHECK_GE(kMaxCPOffset, cp_offset);
int bytecode;
@@ -221,8 +230,8 @@ void RegExpBytecodeGenerator::CheckCharacter(uint32_t c, Label* on_equal) {
EmitOrLink(on_equal);
}
-void RegExpBytecodeGenerator::CheckAtStart(Label* on_at_start) {
- Emit(BC_CHECK_AT_START, 0);
+void RegExpBytecodeGenerator::CheckAtStart(int cp_offset, Label* on_at_start) {
+ Emit(BC_CHECK_AT_START, cp_offset);
EmitOrLink(on_at_start);
}
diff --git a/deps/v8/src/regexp/regexp-bytecode-generator.h b/deps/v8/src/regexp/regexp-bytecode-generator.h
index b7207e977c..84b7ce361c 100644
--- a/deps/v8/src/regexp/regexp-bytecode-generator.h
+++ b/deps/v8/src/regexp/regexp-bytecode-generator.h
@@ -46,16 +46,16 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
- virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
virtual void CheckCharacter(unsigned c, Label* on_equal);
virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask,
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckAtStart(Label* on_at_start);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
diff --git a/deps/v8/src/regexp/regexp-bytecodes.h b/deps/v8/src/regexp/regexp-bytecodes.h
index 8b1468c1bf..3dd7637b88 100644
--- a/deps/v8/src/regexp/regexp-bytecodes.h
+++ b/deps/v8/src/regexp/regexp-bytecodes.h
@@ -5,6 +5,8 @@
#ifndef V8_REGEXP_REGEXP_BYTECODES_H_
#define V8_REGEXP_REGEXP_BYTECODES_H_
+#include "src/base/macros.h"
+
namespace v8 {
namespace internal {
@@ -67,16 +69,43 @@ const int BYTECODE_SHIFT = 8;
V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \
V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \
V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
- V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */
+ V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \
+ V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */
+
+#define COUNT(...) +1
+static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
+#undef COUNT
+
+// Just making sure we assigned values above properly. They should be
+// contiguous, strictly increasing, and start at 0.
+// TODO(jgruber): Do not explicitly assign values, instead generate them
+// implicitly from the list order.
+STATIC_ASSERT(kRegExpBytecodeCount == 53);
-#define DECLARE_BYTECODES(name, code, length) static const int BC_##name = code;
+#define DECLARE_BYTECODES(name, code, length) \
+ static constexpr int BC_##name = code;
BYTECODE_ITERATOR(DECLARE_BYTECODES)
#undef DECLARE_BYTECODES
-#define DECLARE_BYTECODE_LENGTH(name, code, length) \
- static const int BC_##name##_LENGTH = length;
-BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
+static constexpr int kRegExpBytecodeLengths[] = {
+#define DECLARE_BYTECODE_LENGTH(name, code, length) length,
+ BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
#undef DECLARE_BYTECODE_LENGTH
+};
+
+inline constexpr int RegExpBytecodeLength(int bytecode) {
+ return kRegExpBytecodeLengths[bytecode];
+}
+
+static const char* const kRegExpBytecodeNames[] = {
+#define DECLARE_BYTECODE_NAME(name, ...) #name,
+ BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME)
+#undef DECLARE_BYTECODE_NAME
+};
+
+inline const char* RegExpBytecodeName(int bytecode) {
+ return kRegExpBytecodeNames[bytecode];
+}
} // namespace internal
} // namespace v8
diff --git a/deps/v8/src/regexp/regexp-compiler-tonode.cc b/deps/v8/src/regexp/regexp-compiler-tonode.cc
index d12c35682e..2d86d3ea9e 100644
--- a/deps/v8/src/regexp/regexp-compiler-tonode.cc
+++ b/deps/v8/src/regexp/regexp-compiler-tonode.cc
@@ -1627,8 +1627,8 @@ RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
bool needs_counter = has_min || has_max;
int reg_ctr = needs_counter ? compiler->AllocateRegister()
: RegExpCompiler::kNoRegister;
- LoopChoiceNode* center = new (zone)
- LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone);
+ LoopChoiceNode* center = new (zone) LoopChoiceNode(
+ body->min_match() == 0, compiler->read_backward(), min, zone);
if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
RegExpNode* loop_return =
needs_counter ? static_cast<RegExpNode*>(
@@ -1668,7 +1668,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
center->AddLoopAlternative(body_alt);
}
if (needs_counter) {
- return ActionNode::SetRegister(reg_ctr, 0, center);
+ return ActionNode::SetRegisterForLoop(reg_ctr, 0, center);
} else {
return center;
}
diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc
index c70bbc3e4a..85da69f308 100644
--- a/deps/v8/src/regexp/regexp-compiler.cc
+++ b/deps/v8/src/regexp/regexp-compiler.cc
@@ -4,13 +4,12 @@
#include "src/regexp/regexp-compiler.h"
-#include "src/diagnostics/code-tracer.h"
+#include "src/base/safe_conversions.h"
#include "src/execution/isolate.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-macro-assembler-tracer.h"
#include "src/strings/unicode-inl.h"
-#include "src/utils/ostreams.h"
#include "src/zone/zone-list-inl.h"
#ifdef V8_INTL_SUPPORT
@@ -272,13 +271,7 @@ RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
isolate->IncreaseTotalRegexpCodeGenerated(code->Size());
work_list_ = nullptr;
-#ifdef ENABLE_DISASSEMBLER
- if (FLAG_print_code && !FLAG_regexp_interpret_all) {
- CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
- OFStream os(trace_scope.file());
- Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os);
- }
-#endif
+
#ifdef DEBUG
if (FLAG_trace_regexp_assembler) {
delete macro_assembler_;
@@ -422,14 +415,14 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
action = action->next()) {
if (action->Mentions(reg)) {
switch (action->action_type()) {
- case ActionNode::SET_REGISTER: {
- Trace::DeferredSetRegister* psr =
- static_cast<Trace::DeferredSetRegister*>(action);
+ case ActionNode::SET_REGISTER_FOR_LOOP: {
+ Trace::DeferredSetRegisterForLoop* psr =
+ static_cast<Trace::DeferredSetRegisterForLoop*>(action);
if (!absolute) {
value += psr->value();
absolute = true;
}
- // SET_REGISTER is currently only used for newly introduced loop
+ // SET_REGISTER_FOR_LOOP is only used for newly introduced loop
// counters. They can have a significant previous value if they
// occur in a loop. TODO(lrn): Propagate this information, so
// we can set undo_action to IGNORE if we know there is no value to
@@ -634,9 +627,10 @@ void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
guards_->Add(guard, zone);
}
-ActionNode* ActionNode::SetRegister(int reg, int val, RegExpNode* on_success) {
+ActionNode* ActionNode::SetRegisterForLoop(int reg, int val,
+ RegExpNode* on_success) {
ActionNode* result =
- new (on_success->zone()) ActionNode(SET_REGISTER, on_success);
+ new (on_success->zone()) ActionNode(SET_REGISTER_FOR_LOOP, on_success);
result->data_.u_store_register.reg = reg;
result->data_.u_store_register.value = val;
return result;
@@ -705,10 +699,6 @@ ActionNode* ActionNode::EmptyMatchCheck(int start_register,
FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
#undef DEFINE_ACCEPT
-void LoopChoiceNode::Accept(NodeVisitor* visitor) {
- visitor->VisitLoopChoice(this);
-}
-
// -------------------------------------------------------------------
// Emit code.
@@ -1326,12 +1316,6 @@ bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
}
-int ActionNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) {
- if (budget <= 0) return 0;
- if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
- return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
-}
-
void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) {
@@ -1344,16 +1328,16 @@ void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
SaveBMInfo(bm, not_at_start, offset);
}
-int AssertionNode::EatsAtLeast(int still_to_find, int budget,
- bool not_at_start) {
- if (budget <= 0) return 0;
- // If we know we are not at the start and we are asked "how many characters
- // will you match if you succeed?" then we can answer anything since false
- // implies false. So lets just return the max answer (still_to_find) since
- // that won't prevent us from preloading a lot of characters for the other
- // branches in the node graph.
- if (assertion_type() == AT_START && not_at_start) return still_to_find;
- return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
+void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler, int filled_in,
+ bool not_at_start) {
+ if (action_type_ == SET_REGISTER_FOR_LOOP) {
+ on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
+ filled_in, not_at_start);
+ } else {
+ on_success()->GetQuickCheckDetails(details, compiler, filled_in,
+ not_at_start);
+ }
}
void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
@@ -1364,68 +1348,13 @@ void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
SaveBMInfo(bm, not_at_start, offset);
}
-int BackReferenceNode::EatsAtLeast(int still_to_find, int budget,
- bool not_at_start) {
- if (read_backward()) return 0;
- if (budget <= 0) return 0;
- return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
-}
-
-int TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) {
- if (read_backward()) return 0;
- int answer = Length();
- if (answer >= still_to_find) return answer;
- if (budget <= 0) return answer;
- // We are not at start after this node so we set the last argument to 'true'.
- return answer +
- on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true);
-}
-
-int NegativeLookaroundChoiceNode::EatsAtLeast(int still_to_find, int budget,
- bool not_at_start) {
- if (budget <= 0) return 0;
- // Alternative 0 is the negative lookahead, alternative 1 is what comes
- // afterwards.
- RegExpNode* node = alternatives_->at(1).node();
- return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
-}
-
void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
bool not_at_start) {
- // Alternative 0 is the negative lookahead, alternative 1 is what comes
- // afterwards.
- RegExpNode* node = alternatives_->at(1).node();
+ RegExpNode* node = continue_node();
return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
}
-int ChoiceNode::EatsAtLeastHelper(int still_to_find, int budget,
- RegExpNode* ignore_this_node,
- bool not_at_start) {
- if (budget <= 0) return 0;
- int min = 100;
- int choice_count = alternatives_->length();
- budget = (budget - 1) / choice_count;
- for (int i = 0; i < choice_count; i++) {
- RegExpNode* node = alternatives_->at(i).node();
- if (node == ignore_this_node) continue;
- int node_eats_at_least =
- node->EatsAtLeast(still_to_find, budget, not_at_start);
- if (node_eats_at_least < min) min = node_eats_at_least;
- if (min == 0) return 0;
- }
- return min;
-}
-
-int LoopChoiceNode::EatsAtLeast(int still_to_find, int budget,
- bool not_at_start) {
- return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start);
-}
-
-int ChoiceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) {
- return EatsAtLeastHelper(still_to_find, budget, nullptr, not_at_start);
-}
-
// Takes the left-most 1-bit and smears it out, setting all bits to its right.
static inline uint32_t SmearBitsRight(uint32_t v) {
v |= v >> 1;
@@ -1459,12 +1388,78 @@ bool QuickCheckDetails::Rationalize(bool asc) {
return found_useful_op;
}
+int RegExpNode::EatsAtLeast(bool not_at_start) {
+ return not_at_start ? eats_at_least_.eats_at_least_from_not_start
+ : eats_at_least_.eats_at_least_from_possibly_start;
+}
+
+EatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() {
+ // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it
+ // implies that the following node must be a LoopChoiceNode. If we need to
+ // set registers to constant values for other reasons, we could introduce a
+ // new action type SET_REGISTER that doesn't imply anything about its
+ // successor.
+ UNREACHABLE();
+}
+
+void RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in,
+ bool not_at_start) {
+ // See comment in RegExpNode::EatsAtLeastFromLoopEntry.
+ UNREACHABLE();
+}
+
+EatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() {
+ DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue.
+
+ if (read_backward()) {
+ // Can't do anything special for a backward loop, so return the basic values
+ // that we got during analysis.
+ return *eats_at_least_info();
+ }
+
+ // Figure out how much the loop body itself eats, not including anything in
+ // the continuation case. In general, the nodes in the loop body should report
+ // that they eat at least the number eaten by the continuation node, since any
+ // successful match in the loop body must also include the continuation node.
+ // However, in some cases involving positive lookaround, the loop body under-
+ // reports its appetite, so use saturated math here to avoid negative numbers.
+ uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>(
+ loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true));
+ uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>(
+ loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true));
+
+ // Limit the number of loop iterations to avoid overflow in subsequent steps.
+ int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations());
+
+ EatsAtLeastInfo result;
+ result.eats_at_least_from_not_start =
+ base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start +
+ continue_node_->EatsAtLeast(true));
+ if (loop_iterations > 0 && loop_body_from_possibly_start > 0) {
+ // First loop iteration eats at least one, so all subsequent iterations
+ // and the after-loop chunk are guaranteed to not be at the start.
+ result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>(
+ loop_body_from_possibly_start +
+ (loop_iterations - 1) * loop_body_from_not_start +
+ continue_node_->EatsAtLeast(true));
+ } else {
+ // Loop body might eat nothing, so only continue node contributes.
+ result.eats_at_least_from_possibly_start =
+ continue_node_->EatsAtLeast(false);
+ }
+ return result;
+}
+
bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
Trace* bounds_check_trace, Trace* trace,
bool preload_has_checked_bounds,
Label* on_possible_success,
QuickCheckDetails* details,
- bool fall_through_on_failure) {
+ bool fall_through_on_failure,
+ ChoiceNode* predecessor) {
+ DCHECK_NOT_NULL(predecessor);
if (details->characters() == 0) return false;
GetQuickCheckDetails(details, compiler, 0,
trace->at_start() == Trace::FALSE_VALUE);
@@ -1479,13 +1474,17 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
if (trace->characters_preloaded() != details->characters()) {
DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
- // We are attempting to preload the minimum number of characters
+ // The bounds check is performed using the minimum number of characters
// any choice would eat, so if the bounds check fails, then none of the
// choices can succeed, so we can just immediately backtrack, rather
- // than go to the next choice.
+ // than go to the next choice. The number of characters preloaded may be
+ // less than the number used for the bounds check.
+ int eats_at_least = predecessor->EatsAtLeast(
+ bounds_check_trace->at_start() == Trace::FALSE_VALUE);
+ DCHECK_GE(eats_at_least, details->characters());
assembler->LoadCurrentCharacter(
trace->cp_offset(), bounds_check_trace->backtrack(),
- !preload_has_checked_bounds, details->characters());
+ !preload_has_checked_bounds, details->characters(), eats_at_least);
}
bool need_mask = true;
@@ -1579,7 +1578,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
// and the mask-compare will determine definitely whether we have
// a match at this character position.
pos->mask = char_mask;
- pos->value = c;
+ pos->value = chars[0];
pos->determines_perfectly = true;
} else {
uint32_t common_bits = char_mask;
@@ -1764,6 +1763,37 @@ class VisitMarker {
NodeInfo* info_;
};
+// Temporarily sets traversed_loop_initialization_node_.
+class LoopInitializationMarker {
+ public:
+ explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) {
+ DCHECK(!node_->traversed_loop_initialization_node_);
+ node_->traversed_loop_initialization_node_ = true;
+ }
+ ~LoopInitializationMarker() {
+ DCHECK(node_->traversed_loop_initialization_node_);
+ node_->traversed_loop_initialization_node_ = false;
+ }
+
+ private:
+ LoopChoiceNode* node_;
+ DISALLOW_COPY_AND_ASSIGN(LoopInitializationMarker);
+};
+
+// Temporarily decrements min_loop_iterations_.
+class IterationDecrementer {
+ public:
+ explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) {
+ DCHECK_GT(node_->min_loop_iterations_, 0);
+ --node_->min_loop_iterations_;
+ }
+ ~IterationDecrementer() { ++node_->min_loop_iterations_; }
+
+ private:
+ LoopChoiceNode* node_;
+ DISALLOW_COPY_AND_ASSIGN(IterationDecrementer);
+};
+
RegExpNode* SeqRegExpNode::FilterOneByte(int depth) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
@@ -1916,17 +1946,17 @@ RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
VisitMarker marker(info());
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
- RegExpNode* node = alternatives_->at(1).node();
+ RegExpNode* node = continue_node();
RegExpNode* replacement = node->FilterOneByte(depth - 1);
if (replacement == nullptr) return set_replacement(nullptr);
- alternatives_->at(1).set_node(replacement);
+ alternatives_->at(kContinueIndex).set_node(replacement);
- RegExpNode* neg_node = alternatives_->at(0).node();
+ RegExpNode* neg_node = lookaround_node();
RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
// If the negative lookahead is always going to fail then
// we don't need to check it.
if (neg_replacement == nullptr) return set_replacement(replacement);
- alternatives_->at(0).set_node(neg_replacement);
+ alternatives_->at(kLookaroundIndex).set_node(neg_replacement);
return set_replacement(this);
}
@@ -1935,9 +1965,48 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
int characters_filled_in,
bool not_at_start) {
if (body_can_be_zero_length_ || info()->visited) return;
- VisitMarker marker(info());
- return ChoiceNode::GetQuickCheckDetails(details, compiler,
- characters_filled_in, not_at_start);
+ not_at_start = not_at_start || this->not_at_start();
+ DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue.
+ if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 &&
+ loop_node_->EatsAtLeast(not_at_start) >
+ continue_node_->EatsAtLeast(true)) {
+ // Loop body is guaranteed to execute at least once, and consume characters
+ // when it does, meaning the only possible quick checks from this point
+ // begin with the loop body. We may recursively visit this LoopChoiceNode,
+ // but we temporarily decrease its minimum iteration counter so we know when
+ // to check the continue case.
+ IterationDecrementer next_iteration(this);
+ loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in,
+ not_at_start);
+ } else {
+ // Might not consume anything in the loop body, so treat it like a normal
+ // ChoiceNode (and don't recursively visit this node again).
+ VisitMarker marker(info());
+ ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in,
+ not_at_start);
+ }
+}
+
+void LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry(
+ QuickCheckDetails* details, RegExpCompiler* compiler,
+ int characters_filled_in, bool not_at_start) {
+ if (traversed_loop_initialization_node_) {
+ // We already entered this loop once, exited via its continuation node, and
+ // followed an outer loop's back-edge to before the loop entry point. We
+ // could try to reset the minimum iteration count to its starting value at
+ // this point, but that seems like more trouble than it's worth. It's safe
+ // to keep going with the current (possibly reduced) minimum iteration
+ // count.
+ GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
+ } else {
+ // We are entering a loop via its counter initialization action, meaning we
+ // are guaranteed to run the loop body at least some minimum number of times
+ // before running the continuation node. Set a flag so that this node knows
+ // (now and any times we visit it again recursively) that it was entered
+ // from the top.
+ LoopInitializationMarker marker(this);
+ GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
+ }
}
void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
@@ -2014,12 +2083,7 @@ void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
if (may_be_at_or_before_subject_string_start) {
// The start of input counts as a newline in this context, so skip to ok if
// we are at the start.
- // TODO(jgruber): It would be less awkward to use CheckAtStart here, but
- // that currently does not support a non-zero cp_offset.
- Label not_at_start;
- assembler->CheckNotAtStart(new_trace.cp_offset(), &not_at_start);
- assembler->GoTo(&ok);
- assembler->Bind(&not_at_start);
+ assembler->CheckAtStart(new_trace.cp_offset(), &ok);
}
// If we've already checked that we are not at the start of input, it's okay
@@ -2049,9 +2113,8 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
BoyerMooreLookahead* lookahead = bm_info(not_at_start);
if (lookahead == nullptr) {
- int eats_at_least = Min(kMaxLookaheadForBoyerMoore,
- EatsAtLeast(kMaxLookaheadForBoyerMoore,
- kRecursionBudget, not_at_start));
+ int eats_at_least =
+ Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start));
if (eats_at_least >= 1) {
BoyerMooreLookahead* bm =
new (zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
@@ -2113,12 +2176,7 @@ void AssertionNode::BacktrackIfPrevious(
if (may_be_at_or_before_subject_string_start) {
// The start of input counts as a non-word character, so the question is
// decided if we are at the start.
- // TODO(jgruber): It would be less awkward to use CheckAtStart here, but
- // that currently does not support a non-zero cp_offset.
- Label not_at_start;
- assembler->CheckNotAtStart(new_trace.cp_offset(), &not_at_start);
- assembler->GoTo(non_word);
- assembler->Bind(&not_at_start);
+ assembler->CheckAtStart(new_trace.cp_offset(), non_word);
}
// If we've already checked that we are not at the start of input, it's okay
@@ -2939,8 +2997,7 @@ void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
// Save some time by looking at most one machine word ahead.
state->eats_at_least_ =
- EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
- current_trace->at_start() == Trace::FALSE_VALUE);
+ EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE);
}
state->preload_characters_ =
CalculatePreloadCharacters(compiler, state->eats_at_least_);
@@ -3090,9 +3147,7 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
// small alternation.
BoyerMooreLookahead* bm = bm_info(false);
if (bm == nullptr) {
- eats_at_least =
- Min(kMaxLookaheadForBoyerMoore,
- EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, false));
+ eats_at_least = Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false));
if (eats_at_least >= 1) {
bm = new (zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
GuardedAlternative alt0 = alternatives_->at(0);
@@ -3144,7 +3199,7 @@ void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
alternative.node()->EmitQuickCheck(
compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
&alt_gen->possible_success, &alt_gen->quick_check_details,
- fall_through_on_failure)) {
+ fall_through_on_failure, this)) {
// Quick check was generated for this choice.
preload->preload_is_current_ = true;
preload->preload_has_checked_bounds_ = true;
@@ -3253,9 +3308,9 @@ void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
on_success()->Emit(compiler, &new_trace);
break;
}
- case SET_REGISTER: {
- Trace::DeferredSetRegister new_set(data_.u_store_register.reg,
- data_.u_store_register.value);
+ case SET_REGISTER_FOR_LOOP: {
+ Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg,
+ data_.u_store_register.value);
Trace new_trace = *trace;
new_trace.add_action(&new_set);
on_success()->Emit(compiler, &new_trace);
@@ -3377,26 +3432,6 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
on_success()->Emit(compiler, trace);
}
-// -------------------------------------------------------------------
-// Analysis
-
-void Analysis::EnsureAnalyzed(RegExpNode* that) {
- StackLimitCheck check(isolate());
- if (check.HasOverflowed()) {
- fail("Stack overflow");
- return;
- }
- if (that->info()->been_analyzed || that->info()->being_analyzed) return;
- that->info()->being_analyzed = true;
- that->Accept(this);
- that->info()->being_analyzed = false;
- that->info()->been_analyzed = true;
-}
-
-void Analysis::VisitEnd(EndNode* that) {
- // nothing to do
-}
-
void TextNode::CalculateOffsets() {
int element_count = elements()->length();
// Set up the offsets of the elements relative to the start. This is a fixed
@@ -3409,60 +3444,269 @@ void TextNode::CalculateOffsets() {
}
}
-void Analysis::VisitText(TextNode* that) {
- that->MakeCaseIndependent(isolate(), is_one_byte_);
- EnsureAnalyzed(that->on_success());
- if (!has_failed()) {
- that->CalculateOffsets();
- }
-}
+namespace {
-void Analysis::VisitAction(ActionNode* that) {
- RegExpNode* target = that->on_success();
- EnsureAnalyzed(target);
- if (!has_failed()) {
+// Assertion propagation moves information about assertions such as
+// \b to the affected nodes. For instance, in /.\b./ information must
+// be propagated to the first '.' that whatever follows needs to know
+// if it matched a word or a non-word, and to the second '.' that it
+// has to check if it succeeds a word or non-word. In this case the
+// result will be something like:
+//
+// +-------+ +------------+
+// | . | | . |
+// +-------+ ---> +------------+
+// | word? | | check word |
+// +-------+ +------------+
+class AssertionPropagator : public AllStatic {
+ public:
+ static void VisitText(TextNode* that) {}
+
+ static void VisitAction(ActionNode* that) {
// If the next node is interested in what it follows then this node
// has to be interested too so it can pass the information on.
- that->info()->AddFromFollowing(target->info());
+ that->info()->AddFromFollowing(that->on_success()->info());
}
-}
-void Analysis::VisitChoice(ChoiceNode* that) {
- NodeInfo* info = that->info();
- for (int i = 0; i < that->alternatives()->length(); i++) {
- RegExpNode* node = that->alternatives()->at(i).node();
- EnsureAnalyzed(node);
- if (has_failed()) return;
+ static void VisitChoice(ChoiceNode* that, int i) {
// Anything the following nodes need to know has to be known by
// this node also, so it can pass it on.
- info->AddFromFollowing(node->info());
+ that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info());
}
-}
-void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
- NodeInfo* info = that->info();
- for (int i = 0; i < that->alternatives()->length(); i++) {
- RegExpNode* node = that->alternatives()->at(i).node();
- if (node != that->loop_node()) {
- EnsureAnalyzed(node);
+ static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
+ that->info()->AddFromFollowing(that->continue_node()->info());
+ }
+
+ static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {
+ that->info()->AddFromFollowing(that->loop_node()->info());
+ }
+
+ static void VisitNegativeLookaroundChoiceLookaroundNode(
+ NegativeLookaroundChoiceNode* that) {
+ VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex);
+ }
+
+ static void VisitNegativeLookaroundChoiceContinueNode(
+ NegativeLookaroundChoiceNode* that) {
+ VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex);
+ }
+
+ static void VisitBackReference(BackReferenceNode* that) {}
+
+ static void VisitAssertion(AssertionNode* that) {}
+};
+
+// Propagates information about the minimum size of successful matches from
+// successor nodes to their predecessors. Note that all eats_at_least values
+// are initialized to zero before analysis.
+class EatsAtLeastPropagator : public AllStatic {
+ public:
+ static void VisitText(TextNode* that) {
+ // The eats_at_least value is not used if reading backward.
+ if (!that->read_backward()) {
+ // We are not at the start after this node, and thus we can use the
+ // successor's eats_at_least_from_not_start value.
+ uint8_t eats_at_least = base::saturated_cast<uint8_t>(
+ that->Length() + that->on_success()
+ ->eats_at_least_info()
+ ->eats_at_least_from_not_start);
+ that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least));
+ }
+ }
+
+ static void VisitAction(ActionNode* that) {
+ // POSITIVE_SUBMATCH_SUCCESS rewinds input, so we must not consider
+ // successor nodes for eats_at_least. SET_REGISTER_FOR_LOOP indicates a loop
+ // entry point, which means the loop body will run at least the minimum
+ // number of times before the continuation case can run. Otherwise the
+ // current node eats at least as much as its successor.
+ switch (that->action_type()) {
+ case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
+ break; // Was already initialized to zero.
+ case ActionNode::SET_REGISTER_FOR_LOOP:
+ that->set_eats_at_least_info(
+ that->on_success()->EatsAtLeastFromLoopEntry());
+ break;
+ default:
+ that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
+ break;
+ }
+ }
+
+ static void VisitChoice(ChoiceNode* that, int i) {
+ // The minimum possible match from a choice node is the minimum of its
+ // successors.
+ EatsAtLeastInfo eats_at_least =
+ i == 0 ? EatsAtLeastInfo(UINT8_MAX) : *that->eats_at_least_info();
+ eats_at_least.SetMin(
+ *that->alternatives()->at(i).node()->eats_at_least_info());
+ that->set_eats_at_least_info(eats_at_least);
+ }
+
+ static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
+ that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
+ }
+
+ static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}
+
+ static void VisitNegativeLookaroundChoiceLookaroundNode(
+ NegativeLookaroundChoiceNode* that) {}
+
+ static void VisitNegativeLookaroundChoiceContinueNode(
+ NegativeLookaroundChoiceNode* that) {
+ that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
+ }
+
+ static void VisitBackReference(BackReferenceNode* that) {
+ if (!that->read_backward()) {
+ that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
+ }
+ }
+
+ static void VisitAssertion(AssertionNode* that) {
+ EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info();
+ if (that->assertion_type() == AssertionNode::AT_START) {
+ // If we know we are not at the start and we are asked "how many
+ // characters will you match if you succeed?" then we can answer anything
+ // since false implies false. So let's just set the max answer
+ // (UINT8_MAX) since that won't prevent us from preloading a lot of
+ // characters for the other branches in the node graph.
+ eats_at_least.eats_at_least_from_not_start = UINT8_MAX;
+ }
+ that->set_eats_at_least_info(eats_at_least);
+ }
+};
+
+} // namespace
+
+// -------------------------------------------------------------------
+// Analysis
+
+// Iterates the node graph and provides the opportunity for propagators to set
+// values that depend on successor nodes.
+template <typename... Propagators>
+class Analysis : public NodeVisitor {
+ public:
+ Analysis(Isolate* isolate, bool is_one_byte)
+ : isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
+
+ void EnsureAnalyzed(RegExpNode* that) {
+ StackLimitCheck check(isolate());
+ if (check.HasOverflowed()) {
+ fail("Stack overflow");
+ return;
+ }
+ if (that->info()->been_analyzed || that->info()->being_analyzed) return;
+ that->info()->being_analyzed = true;
+ that->Accept(this);
+ that->info()->being_analyzed = false;
+ that->info()->been_analyzed = true;
+ }
+
+ bool has_failed() { return error_message_ != nullptr; }
+ const char* error_message() {
+ DCHECK(error_message_ != nullptr);
+ return error_message_;
+ }
+ void fail(const char* error_message) { error_message_ = error_message; }
+
+ Isolate* isolate() const { return isolate_; }
+
+ void VisitEnd(EndNode* that) override {
+ // nothing to do
+ }
+
+// Used to call the given static function on each propagator / variadic template
+// argument.
+#define STATIC_FOR_EACH(expr) \
+ do { \
+ int dummy[] = {((expr), 0)...}; \
+ USE(dummy); \
+ } while (false)
+
+ void VisitText(TextNode* that) override {
+ that->MakeCaseIndependent(isolate(), is_one_byte_);
+ EnsureAnalyzed(that->on_success());
+ if (has_failed()) return;
+ that->CalculateOffsets();
+ STATIC_FOR_EACH(Propagators::VisitText(that));
+ }
+
+ void VisitAction(ActionNode* that) override {
+ EnsureAnalyzed(that->on_success());
+ if (has_failed()) return;
+ STATIC_FOR_EACH(Propagators::VisitAction(that));
+ }
+
+ void VisitChoice(ChoiceNode* that) override {
+ for (int i = 0; i < that->alternatives()->length(); i++) {
+ EnsureAnalyzed(that->alternatives()->at(i).node());
if (has_failed()) return;
- info->AddFromFollowing(node->info());
+ STATIC_FOR_EACH(Propagators::VisitChoice(that, i));
}
}
- // Check the loop last since it may need the value of this node
- // to get a correct result.
- EnsureAnalyzed(that->loop_node());
- if (!has_failed()) {
- info->AddFromFollowing(that->loop_node()->info());
+
+ void VisitLoopChoice(LoopChoiceNode* that) override {
+ DCHECK_EQ(that->alternatives()->length(), 2); // Just loop and continue.
+
+ // First propagate all information from the continuation node.
+ EnsureAnalyzed(that->continue_node());
+ if (has_failed()) return;
+ STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that));
+
+ // Check the loop last since it may need the value of this node
+ // to get a correct result.
+ EnsureAnalyzed(that->loop_node());
+ if (has_failed()) return;
+ STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that));
+ }
+
+ void VisitNegativeLookaroundChoice(
+ NegativeLookaroundChoiceNode* that) override {
+ DCHECK_EQ(that->alternatives()->length(), 2); // Lookaround and continue.
+
+ EnsureAnalyzed(that->lookaround_node());
+ if (has_failed()) return;
+ STATIC_FOR_EACH(
+ Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that));
+
+ EnsureAnalyzed(that->continue_node());
+ if (has_failed()) return;
+ STATIC_FOR_EACH(
+ Propagators::VisitNegativeLookaroundChoiceContinueNode(that));
}
-}
-void Analysis::VisitBackReference(BackReferenceNode* that) {
- EnsureAnalyzed(that->on_success());
-}
+ void VisitBackReference(BackReferenceNode* that) override {
+ EnsureAnalyzed(that->on_success());
+ if (has_failed()) return;
+ STATIC_FOR_EACH(Propagators::VisitBackReference(that));
+ }
+
+ void VisitAssertion(AssertionNode* that) override {
+ EnsureAnalyzed(that->on_success());
+ if (has_failed()) return;
+ STATIC_FOR_EACH(Propagators::VisitAssertion(that));
+ }
+
+#undef STATIC_FOR_EACH
+
+ private:
+ Isolate* isolate_;
+ bool is_one_byte_;
+ const char* error_message_;
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
+};
-void Analysis::VisitAssertion(AssertionNode* that) {
- EnsureAnalyzed(that->on_success());
+const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
+ RegExpNode* node) {
+ Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(isolate,
+ is_one_byte);
+ DCHECK_EQ(node->info()->been_analyzed, false);
+ analysis.EnsureAnalyzed(node);
+ DCHECK_IMPLIES(analysis.has_failed(), analysis.error_message() != nullptr);
+ return analysis.has_failed() ? analysis.error_message() : nullptr;
}
void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
diff --git a/deps/v8/src/regexp/regexp-compiler.h b/deps/v8/src/regexp/regexp-compiler.h
index 1b70abfd98..2de221f35d 100644
--- a/deps/v8/src/regexp/regexp-compiler.h
+++ b/deps/v8/src/regexp/regexp-compiler.h
@@ -285,10 +285,11 @@ class Trace {
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
};
- class DeferredSetRegister : public DeferredAction {
+ class DeferredSetRegisterForLoop : public DeferredAction {
public:
- DeferredSetRegister(int reg, int value)
- : DeferredAction(ActionNode::SET_REGISTER, reg), value_(value) {}
+ DeferredSetRegisterForLoop(int reg, int value)
+ : DeferredAction(ActionNode::SET_REGISTER_FOR_LOOP, reg),
+ value_(value) {}
int value() { return value_; }
private:
@@ -419,45 +420,13 @@ struct PreloadState {
void init() { eats_at_least_ = kEatsAtLeastNotYetInitialized; }
};
-// Assertion propagation moves information about assertions such as
-// \b to the affected nodes. For instance, in /.\b./ information must
-// be propagated to the first '.' that whatever follows needs to know
-// if it matched a word or a non-word, and to the second '.' that it
-// has to check if it succeeds a word or non-word. In this case the
-// result will be something like:
+// Analysis performs assertion propagation and computes eats_at_least_ values.
+// See the comments on AssertionPropagator and EatsAtLeastPropagator for more
+// details.
//
-// +-------+ +------------+
-// | . | | . |
-// +-------+ ---> +------------+
-// | word? | | check word |
-// +-------+ +------------+
-class Analysis : public NodeVisitor {
- public:
- Analysis(Isolate* isolate, bool is_one_byte)
- : isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
- void EnsureAnalyzed(RegExpNode* node);
-
-#define DECLARE_VISIT(Type) void Visit##Type(Type##Node* that) override;
- FOR_EACH_NODE_TYPE(DECLARE_VISIT)
-#undef DECLARE_VISIT
- void VisitLoopChoice(LoopChoiceNode* that) override;
-
- bool has_failed() { return error_message_ != nullptr; }
- const char* error_message() {
- DCHECK(error_message_ != nullptr);
- return error_message_;
- }
- void fail(const char* error_message) { error_message_ = error_message; }
-
- Isolate* isolate() const { return isolate_; }
-
- private:
- Isolate* isolate_;
- bool is_one_byte_;
- const char* error_message_;
-
- DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
-};
+// This method returns nullptr on success or a null-terminated failure message
+// on failure.
+const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
class FrequencyCollator {
public:
diff --git a/deps/v8/src/regexp/regexp-dotprinter.cc b/deps/v8/src/regexp/regexp-dotprinter.cc
index a6d72aaf5b..b6640626f2 100644
--- a/deps/v8/src/regexp/regexp-dotprinter.cc
+++ b/deps/v8/src/regexp/regexp-dotprinter.cc
@@ -114,6 +114,15 @@ void DotPrinterImpl::VisitChoice(ChoiceNode* that) {
}
}
+void DotPrinterImpl::VisitLoopChoice(LoopChoiceNode* that) {
+ VisitChoice(that);
+}
+
+void DotPrinterImpl::VisitNegativeLookaroundChoice(
+ NegativeLookaroundChoiceNode* that) {
+ VisitChoice(that);
+}
+
void DotPrinterImpl::VisitText(TextNode* that) {
Zone* zone = that->zone();
os_ << " n" << that << " [label=\"";
@@ -191,7 +200,7 @@ void DotPrinterImpl::VisitAssertion(AssertionNode* that) {
void DotPrinterImpl::VisitAction(ActionNode* that) {
os_ << " n" << that << " [";
switch (that->action_type_) {
- case ActionNode::SET_REGISTER:
+ case ActionNode::SET_REGISTER_FOR_LOOP:
os_ << "label=\"$" << that->data_.u_store_register.reg
<< ":=" << that->data_.u_store_register.value << "\", shape=octagon";
break;
diff --git a/deps/v8/src/regexp/regexp-interpreter.cc b/deps/v8/src/regexp/regexp-interpreter.cc
index 881758861c..cf2fb55e4a 100644
--- a/deps/v8/src/regexp/regexp-interpreter.cc
+++ b/deps/v8/src/regexp/regexp-interpreter.cc
@@ -8,6 +8,7 @@
#include "src/ast/ast.h"
#include "src/base/small-vector.h"
+#include "src/objects/js-regexp-inl.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-macro-assembler.h"
@@ -19,12 +20,20 @@
#include "unicode/uchar.h"
#endif // V8_INTL_SUPPORT
+// Use token threaded dispatch iff the compiler supports computed gotos and the
+// build argument v8_enable_regexp_interpreter_threaded_dispatch was set.
+#if V8_HAS_COMPUTED_GOTO && \
+ defined(V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH)
+#define V8_USE_COMPUTED_GOTO 1
+#endif // V8_HAS_COMPUTED_GOTO
+
namespace v8 {
namespace internal {
-static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
- int len, Vector<const uc16> subject,
- bool unicode) {
+namespace {
+
+bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
+ Vector<const uc16> subject, bool unicode) {
Address offset_a =
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
Address offset_b =
@@ -34,9 +43,8 @@ static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
}
-static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
- int len, Vector<const uint8_t> subject,
- bool unicode) {
+bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
+ Vector<const uint8_t> subject, bool unicode) {
// For Latin1 characters the unicode flag makes no difference.
for (int i = 0; i < len; i++) {
unsigned int old_char = subject[from++];
@@ -55,49 +63,48 @@ static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
return true;
}
+void DisassembleSingleBytecode(const byte* code_base, const byte* pc) {
+ PrintF("%s", RegExpBytecodeName(*pc));
+
+ // Args and the bytecode as hex.
+ for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
+ PrintF(", %02x", pc[i]);
+ }
+ PrintF(" ");
+
+ // Args as ascii.
+ for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
+ unsigned char b = pc[i];
+ PrintF("%c", std::isprint(b) ? b : '.');
+ }
+ PrintF("\n");
+}
+
#ifdef DEBUG
-static void TraceInterpreter(const byte* code_base, const byte* pc,
- int stack_depth, int current_position,
- uint32_t current_char, int bytecode_length,
- const char* bytecode_name) {
+void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
+ int stack_depth, int current_position,
+ uint32_t current_char, int bytecode_length,
+ const char* bytecode_name) {
if (FLAG_trace_regexp_bytecodes) {
- bool printable = (current_char < 127 && current_char >= 32);
+ const bool printable = std::isprint(current_char);
const char* format =
printable
- ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s"
- : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
+ ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = "
+ : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = ";
PrintF(format, pc - code_base, stack_depth, current_position, current_char,
- printable ? current_char : '.', bytecode_name);
- for (int i = 0; i < bytecode_length; i++) {
- printf(", %02x", pc[i]);
- }
- printf(" ");
- for (int i = 1; i < bytecode_length; i++) {
- unsigned char b = pc[i];
- if (b < 127 && b >= 32) {
- printf("%c", b);
- } else {
- printf(".");
- }
- }
- printf("\n");
+ printable ? current_char : '.');
+
+ DisassembleSingleBytecode(code_base, pc);
}
}
+#endif // DEBUG
-#define BYTECODE(name) \
- case BC_##name: \
- TraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
- current_char, BC_##name##_LENGTH, #name);
-#else
-#define BYTECODE(name) case BC_##name:
-#endif
-
-static int32_t Load32Aligned(const byte* pc) {
+int32_t Load32Aligned(const byte* pc) {
DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
return *reinterpret_cast<const int32_t*>(pc);
}
-static int32_t Load16Aligned(const byte* pc) {
+int32_t Load16Aligned(const byte* pc) {
DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
return *reinterpret_cast<const uint16_t*>(pc);
}
@@ -139,9 +146,9 @@ class BacktrackStack {
DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
};
-namespace {
-
-IrregexpInterpreter::Result StackOverflow(Isolate* isolate) {
+IrregexpInterpreter::Result StackOverflow(Isolate* isolate,
+ RegExp::CallOrigin call_origin) {
+ CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
// We abort interpreter execution after the stack overflow is thrown, and thus
// allow allocation here despite the outer DisallowHeapAllocationScope.
AllowHeapAllocation yes_gc;
@@ -149,72 +156,154 @@ IrregexpInterpreter::Result StackOverflow(Isolate* isolate) {
return IrregexpInterpreter::EXCEPTION;
}
-// Runs all pending interrupts. Callers must update unhandlified object
-// references after this function completes.
-IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate,
- Handle<String> subject_string) {
+template <typename Char>
+void UpdateCodeAndSubjectReferences(
+ Isolate* isolate, Handle<ByteArray> code_array,
+ Handle<String> subject_string, ByteArray* code_array_out,
+ const byte** code_base_out, const byte** pc_out, String* subject_string_out,
+ Vector<const Char>* subject_string_vector_out) {
DisallowHeapAllocation no_gc;
- StackLimitCheck check(isolate);
- if (check.JsHasOverflowed()) {
- return StackOverflow(isolate); // A real stack overflow.
+ if (*code_base_out != code_array->GetDataStartAddress()) {
+ *code_array_out = *code_array;
+ const intptr_t pc_offset = *pc_out - *code_base_out;
+ DCHECK_GT(pc_offset, 0);
+ *code_base_out = code_array->GetDataStartAddress();
+ *pc_out = *code_base_out + pc_offset;
}
- // Handle interrupts if any exist.
- if (check.InterruptRequested()) {
- const bool was_one_byte =
- String::IsOneByteRepresentationUnderneath(*subject_string);
+ DCHECK(subject_string->IsFlat());
+ *subject_string_out = *subject_string;
+ *subject_string_vector_out = subject_string->GetCharVector<Char>(no_gc);
+}
- Object result;
- {
- AllowHeapAllocation yes_gc;
- result = isolate->stack_guard()->HandleInterrupts();
- }
+// Runs all pending interrupts and updates unhandlified object references if
+// necessary.
+template <typename Char>
+IrregexpInterpreter::Result HandleInterrupts(
+ Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
+ String* subject_string_out, const byte** code_base_out,
+ Vector<const Char>* subject_string_vector_out, const byte** pc_out) {
+ DisallowHeapAllocation no_gc;
- if (result.IsException(isolate)) {
+ StackLimitCheck check(isolate);
+ bool js_has_overflowed = check.JsHasOverflowed();
+
+ if (call_origin == RegExp::CallOrigin::kFromJs) {
+ // Direct calls from JavaScript can be interrupted in two ways:
+ // 1. A real stack overflow, in which case we let the caller throw the
+ // exception.
+ // 2. The stack guard was used to interrupt execution for another purpose,
+ // forcing the call through the runtime system.
+ if (js_has_overflowed) {
return IrregexpInterpreter::EXCEPTION;
- }
-
- // If we changed between a LATIN1 and a UC16 string, we need to restart
- // regexp matching with the appropriate template instantiation of RawMatch.
- if (String::IsOneByteRepresentationUnderneath(*subject_string) !=
- was_one_byte) {
+ } else if (check.InterruptRequested()) {
return IrregexpInterpreter::RETRY;
}
+ } else {
+ DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
+ // Prepare for possible GC.
+ HandleScope handles(isolate);
+ Handle<ByteArray> code_handle(*code_array_out, isolate);
+ Handle<String> subject_handle(*subject_string_out, isolate);
+
+ if (js_has_overflowed) {
+ return StackOverflow(isolate, call_origin);
+ } else if (check.InterruptRequested()) {
+ const bool was_one_byte =
+ String::IsOneByteRepresentationUnderneath(*subject_string_out);
+ Object result;
+ {
+ AllowHeapAllocation yes_gc;
+ result = isolate->stack_guard()->HandleInterrupts();
+ }
+ if (result.IsException(isolate)) {
+ return IrregexpInterpreter::EXCEPTION;
+ }
+
+ // If we changed between a LATIN1 and a UC16 string, we need to restart
+ // regexp matching with the appropriate template instantiation of
+ // RawMatch.
+ if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
+ was_one_byte) {
+ return IrregexpInterpreter::RETRY;
+ }
+
+ UpdateCodeAndSubjectReferences(
+ isolate, code_handle, subject_handle, code_array_out, code_base_out,
+ pc_out, subject_string_out, subject_string_vector_out);
+ }
}
return IrregexpInterpreter::SUCCESS;
}
-template <typename Char>
-void UpdateCodeAndSubjectReferences(Isolate* isolate,
- Handle<ByteArray> code_array,
- Handle<String> subject_string,
- const byte** code_base_out,
- const byte** pc_out,
- Vector<const Char>* subject_string_out) {
- DisallowHeapAllocation no_gc;
+// If computed gotos are supported by the compiler, we can get addresses to
+// labels directly in C/C++. Every bytecode handler has its own label and we
+// store the addresses in a dispatch table indexed by bytecode. To execute the
+// next handler we simply jump (goto) directly to its address.
+#if V8_USE_COMPUTED_GOTO
+#define BC_LABEL(name) BC_##name:
+#define DECODE() \
+ do { \
+ next_insn = Load32Aligned(next_pc); \
+ next_handler_addr = dispatch_table[next_insn & BYTECODE_MASK]; \
+ } while (false)
+#define DISPATCH() \
+ pc = next_pc; \
+ insn = next_insn; \
+ goto* next_handler_addr
+// Without computed goto support, we fall back to a simple switch-based
+// dispatch (A large switch statement inside a loop with a case for every
+// bytecode).
+#else // V8_USE_COMPUTED_GOTO
+#define BC_LABEL(name) case BC_##name:
+#define DECODE() next_insn = Load32Aligned(next_pc)
+#define DISPATCH() \
+ pc = next_pc; \
+ insn = next_insn; \
+ break
+#endif // V8_USE_COMPUTED_GOTO
+
+// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
+// instructions can be executed between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH.
+// We want those two macros as far apart as possible, because the goto in
+// DISPATCH is dependent on a memory load in ADVANCE/SET_PC_FROM_OFFSET. If we
+// don't hit the cache and have to fetch the next handler address from physical
+// memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can
+// potentially be executed unconditionally, reducing memory stall.
+#define ADVANCE(name) \
+ next_pc = pc + RegExpBytecodeLength(BC_##name); \
+ DECODE()
+#define SET_PC_FROM_OFFSET(offset) \
+ next_pc = code_base + offset; \
+ DECODE()
- if (*code_base_out != code_array->GetDataStartAddress()) {
- const intptr_t pc_offset = *pc_out - *code_base_out;
- DCHECK_GT(pc_offset, 0);
- *code_base_out = code_array->GetDataStartAddress();
- *pc_out = *code_base_out + pc_offset;
- }
-
- DCHECK(subject_string->IsFlat());
- *subject_string_out = subject_string->GetCharVector<Char>(no_gc);
-}
+#ifdef DEBUG
+#define BYTECODE(name) \
+ BC_LABEL(name) \
+ MaybeTraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
+ current_char, RegExpBytecodeLength(BC_##name), #name);
+#else
+#define BYTECODE(name) BC_LABEL(name)
+#endif // DEBUG
template <typename Char>
-IrregexpInterpreter::Result RawMatch(Isolate* isolate,
- Handle<ByteArray> code_array,
- Handle<String> subject_string,
+IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
+ String subject_string,
Vector<const Char> subject, int* registers,
- int current, uint32_t current_char) {
+ int current, uint32_t current_char,
+ RegExp::CallOrigin call_origin) {
DisallowHeapAllocation no_gc;
- const byte* pc = code_array->GetDataStartAddress();
+#if V8_USE_COMPUTED_GOTO
+#define DECLARE_DISPATCH_TABLE_ENTRY(name, code, length) &&BC_##name,
+ static const void* const dispatch_table[] = {
+ BYTECODE_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)};
+#undef DECLARE_DISPATCH_TABLE_ENTRY
+#endif
+
+ const byte* pc = code_array.GetDataStartAddress();
const byte* code_base = pc;
BacktrackStack backtrack_stack;
@@ -224,457 +313,572 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
PrintF("\n\nStart bytecode interpreter\n\n");
}
#endif
+
while (true) {
- const int32_t insn = Load32Aligned(pc);
+ const byte* next_pc = pc;
+ int32_t insn;
+ int32_t next_insn;
+#if V8_USE_COMPUTED_GOTO
+ const void* next_handler_addr;
+ DECODE();
+ DISPATCH();
+#else
+ insn = Load32Aligned(pc);
switch (insn & BYTECODE_MASK) {
- BYTECODE(BREAK) { UNREACHABLE(); }
- BYTECODE(PUSH_CP) {
- backtrack_stack.push(current);
- pc += BC_PUSH_CP_LENGTH;
- break;
- }
- BYTECODE(PUSH_BT) {
- backtrack_stack.push(Load32Aligned(pc + 4));
- pc += BC_PUSH_BT_LENGTH;
- break;
- }
- BYTECODE(PUSH_REGISTER) {
- backtrack_stack.push(registers[insn >> BYTECODE_SHIFT]);
- pc += BC_PUSH_REGISTER_LENGTH;
- break;
- }
- BYTECODE(SET_REGISTER) {
- registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
- pc += BC_SET_REGISTER_LENGTH;
- break;
- }
- BYTECODE(ADVANCE_REGISTER) {
- registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
- pc += BC_ADVANCE_REGISTER_LENGTH;
- break;
- }
- BYTECODE(SET_REGISTER_TO_CP) {
- registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
- pc += BC_SET_REGISTER_TO_CP_LENGTH;
- break;
- }
- BYTECODE(SET_CP_TO_REGISTER) {
- current = registers[insn >> BYTECODE_SHIFT];
- pc += BC_SET_CP_TO_REGISTER_LENGTH;
- break;
- }
- BYTECODE(SET_REGISTER_TO_SP) {
- registers[insn >> BYTECODE_SHIFT] = backtrack_stack.sp();
- pc += BC_SET_REGISTER_TO_SP_LENGTH;
- break;
- }
- BYTECODE(SET_SP_TO_REGISTER) {
- backtrack_stack.set_sp(registers[insn >> BYTECODE_SHIFT]);
- pc += BC_SET_SP_TO_REGISTER_LENGTH;
- break;
- }
- BYTECODE(POP_CP) {
- current = backtrack_stack.pop();
- pc += BC_POP_CP_LENGTH;
- break;
- }
- BYTECODE(POP_BT) {
- IrregexpInterpreter::Result return_code =
- HandleInterrupts(isolate, subject_string);
- if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
-
- UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
- &code_base, &pc, &subject);
-
- pc = code_base + backtrack_stack.pop();
- break;
- }
- BYTECODE(POP_REGISTER) {
- registers[insn >> BYTECODE_SHIFT] = backtrack_stack.pop();
- pc += BC_POP_REGISTER_LENGTH;
- break;
- }
- BYTECODE(FAIL) { return IrregexpInterpreter::FAILURE; }
- BYTECODE(SUCCEED) { return IrregexpInterpreter::SUCCESS; }
- BYTECODE(ADVANCE_CP) {
- current += insn >> BYTECODE_SHIFT;
- pc += BC_ADVANCE_CP_LENGTH;
- break;
- }
- BYTECODE(GOTO) {
- pc = code_base + Load32Aligned(pc + 4);
- break;
- }
- BYTECODE(ADVANCE_CP_AND_GOTO) {
- current += insn >> BYTECODE_SHIFT;
- pc = code_base + Load32Aligned(pc + 4);
- break;
- }
- BYTECODE(CHECK_GREEDY) {
- if (current == backtrack_stack.peek()) {
- backtrack_stack.pop();
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- pc += BC_CHECK_GREEDY_LENGTH;
- }
- break;
- }
- BYTECODE(LOAD_CURRENT_CHAR) {
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos >= subject.length() || pos < 0) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- current_char = subject[pos];
- pc += BC_LOAD_CURRENT_CHAR_LENGTH;
- }
- break;
+#endif // V8_USE_COMPUTED_GOTO
+ BYTECODE(BREAK) { UNREACHABLE(); }
+ BYTECODE(PUSH_CP) {
+ ADVANCE(PUSH_CP);
+ backtrack_stack.push(current);
+ DISPATCH();
+ }
+ BYTECODE(PUSH_BT) {
+ ADVANCE(PUSH_BT);
+ backtrack_stack.push(Load32Aligned(pc + 4));
+ DISPATCH();
+ }
+ BYTECODE(PUSH_REGISTER) {
+ ADVANCE(PUSH_REGISTER);
+ backtrack_stack.push(registers[insn >> BYTECODE_SHIFT]);
+ DISPATCH();
+ }
+ BYTECODE(SET_REGISTER) {
+ ADVANCE(SET_REGISTER);
+ registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
+ DISPATCH();
+ }
+ BYTECODE(ADVANCE_REGISTER) {
+ ADVANCE(ADVANCE_REGISTER);
+ registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
+ DISPATCH();
+ }
+ BYTECODE(SET_REGISTER_TO_CP) {
+ ADVANCE(SET_REGISTER_TO_CP);
+ registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
+ DISPATCH();
+ }
+ BYTECODE(SET_CP_TO_REGISTER) {
+ ADVANCE(SET_CP_TO_REGISTER);
+ current = registers[insn >> BYTECODE_SHIFT];
+ DISPATCH();
+ }
+ BYTECODE(SET_REGISTER_TO_SP) {
+ ADVANCE(SET_REGISTER_TO_SP);
+ registers[insn >> BYTECODE_SHIFT] = backtrack_stack.sp();
+ DISPATCH();
+ }
+ BYTECODE(SET_SP_TO_REGISTER) {
+ ADVANCE(SET_SP_TO_REGISTER);
+ backtrack_stack.set_sp(registers[insn >> BYTECODE_SHIFT]);
+ DISPATCH();
+ }
+ BYTECODE(POP_CP) {
+ ADVANCE(POP_CP);
+ current = backtrack_stack.pop();
+ DISPATCH();
+ }
+ BYTECODE(POP_BT) {
+ IrregexpInterpreter::Result return_code =
+ HandleInterrupts(isolate, call_origin, &code_array, &subject_string,
+ &code_base, &subject, &pc);
+ if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
+
+ SET_PC_FROM_OFFSET(backtrack_stack.pop());
+ DISPATCH();
+ }
+ BYTECODE(POP_REGISTER) {
+ ADVANCE(POP_REGISTER);
+ registers[insn >> BYTECODE_SHIFT] = backtrack_stack.pop();
+ DISPATCH();
+ }
+ BYTECODE(FAIL) { return IrregexpInterpreter::FAILURE; }
+ BYTECODE(SUCCEED) { return IrregexpInterpreter::SUCCESS; }
+ BYTECODE(ADVANCE_CP) {
+ ADVANCE(ADVANCE_CP);
+ current += insn >> BYTECODE_SHIFT;
+ DISPATCH();
+ }
+ BYTECODE(GOTO) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
+ }
+ BYTECODE(ADVANCE_CP_AND_GOTO) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ current += insn >> BYTECODE_SHIFT;
+ DISPATCH();
+ }
+ BYTECODE(CHECK_GREEDY) {
+ if (current == backtrack_stack.peek()) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ backtrack_stack.pop();
+ } else {
+ ADVANCE(CHECK_GREEDY);
}
- BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
- int pos = current + (insn >> BYTECODE_SHIFT);
+ DISPATCH();
+ }
+ BYTECODE(LOAD_CURRENT_CHAR) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos >= subject.length() || pos < 0) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(LOAD_CURRENT_CHAR);
current_char = subject[pos];
- pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
- break;
- }
- BYTECODE(LOAD_2_CURRENT_CHARS) {
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 2 > subject.length() || pos < 0) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- Char next = subject[pos + 1];
- current_char =
- (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
- pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
- }
- break;
}
- BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
- int pos = current + (insn >> BYTECODE_SHIFT);
+ DISPATCH();
+ }
+ BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
+ ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ current_char = subject[pos];
+ DISPATCH();
+ }
+ BYTECODE(LOAD_2_CURRENT_CHARS) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos + 2 > subject.length() || pos < 0) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(LOAD_2_CURRENT_CHARS);
Char next = subject[pos + 1];
current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
- pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
- break;
- }
- BYTECODE(LOAD_4_CURRENT_CHARS) {
- DCHECK_EQ(1, sizeof(Char));
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 4 > subject.length() || pos < 0) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- Char next1 = subject[pos + 1];
- Char next2 = subject[pos + 2];
- Char next3 = subject[pos + 3];
- current_char =
- (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
- pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
- }
- break;
}
- BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
- DCHECK_EQ(1, sizeof(Char));
- int pos = current + (insn >> BYTECODE_SHIFT);
+ DISPATCH();
+ }
+ BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
+ ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ Char next = subject[pos + 1];
+ current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
+ DISPATCH();
+ }
+ BYTECODE(LOAD_4_CURRENT_CHARS) {
+ DCHECK_EQ(1, sizeof(Char));
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos + 4 > subject.length() || pos < 0) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(LOAD_4_CURRENT_CHARS);
Char next1 = subject[pos + 1];
Char next2 = subject[pos + 2];
Char next3 = subject[pos + 3];
current_char =
(subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
- pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
- break;
- }
- BYTECODE(CHECK_4_CHARS) {
- uint32_t c = Load32Aligned(pc + 4);
- if (c == current_char) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_CHECK_4_CHARS_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- if (c == current_char) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- pc += BC_CHECK_CHAR_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_NOT_4_CHARS) {
- uint32_t c = Load32Aligned(pc + 4);
- if (c != current_char) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_CHECK_NOT_4_CHARS_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- if (c != current_char) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- pc += BC_CHECK_NOT_CHAR_LENGTH;
- }
- break;
- }
- BYTECODE(AND_CHECK_4_CHARS) {
- uint32_t c = Load32Aligned(pc + 4);
- if (c == (current_char & Load32Aligned(pc + 8))) {
- pc = code_base + Load32Aligned(pc + 12);
- } else {
- pc += BC_AND_CHECK_4_CHARS_LENGTH;
- }
- break;
- }
- BYTECODE(AND_CHECK_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- if (c == (current_char & Load32Aligned(pc + 4))) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_AND_CHECK_CHAR_LENGTH;
- }
- break;
- }
- BYTECODE(AND_CHECK_NOT_4_CHARS) {
- uint32_t c = Load32Aligned(pc + 4);
- if (c != (current_char & Load32Aligned(pc + 8))) {
- pc = code_base + Load32Aligned(pc + 12);
- } else {
- pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
- }
- break;
- }
- BYTECODE(AND_CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- if (c != (current_char & Load32Aligned(pc + 4))) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
- }
- break;
- }
- BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- uint32_t minus = Load16Aligned(pc + 4);
- uint32_t mask = Load16Aligned(pc + 6);
- if (c != ((current_char - minus) & mask)) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_CHAR_IN_RANGE) {
- uint32_t from = Load16Aligned(pc + 4);
- uint32_t to = Load16Aligned(pc + 6);
- if (from <= current_char && current_char <= to) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
- uint32_t from = Load16Aligned(pc + 4);
- uint32_t to = Load16Aligned(pc + 6);
- if (from > current_char || current_char > to) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_BIT_IN_TABLE) {
- int mask = RegExpMacroAssembler::kTableMask;
- byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
- int bit = (current_char & (kBitsPerByte - 1));
- if ((b & (1 << bit)) != 0) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_LT) {
- uint32_t limit = (insn >> BYTECODE_SHIFT);
- if (current_char < limit) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- pc += BC_CHECK_LT_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_GT) {
- uint32_t limit = (insn >> BYTECODE_SHIFT);
- if (current_char > limit) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- pc += BC_CHECK_GT_LENGTH;
- }
- break;
}
- BYTECODE(CHECK_REGISTER_LT) {
- if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_CHECK_REGISTER_LT_LENGTH;
- }
- break;
+ DISPATCH();
+ }
+ BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
+ ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
+ DCHECK_EQ(1, sizeof(Char));
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ Char next1 = subject[pos + 1];
+ Char next2 = subject[pos + 2];
+ Char next3 = subject[pos + 3];
+ current_char =
+ (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
+ DISPATCH();
+ }
+ BYTECODE(CHECK_4_CHARS) {
+ uint32_t c = Load32Aligned(pc + 4);
+ if (c == current_char) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(CHECK_4_CHARS);
}
- BYTECODE(CHECK_REGISTER_GE) {
- if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
- pc = code_base + Load32Aligned(pc + 8);
- } else {
- pc += BC_CHECK_REGISTER_GE_LENGTH;
- }
- break;
+ DISPATCH();
+ }
+ BYTECODE(CHECK_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ if (c == current_char) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(CHECK_CHAR);
}
- BYTECODE(CHECK_REGISTER_EQ_POS) {
- if (registers[insn >> BYTECODE_SHIFT] == current) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
- }
- break;
- }
- BYTECODE(CHECK_NOT_REGS_EQUAL) {
- if (registers[insn >> BYTECODE_SHIFT] ==
- registers[Load32Aligned(pc + 4)]) {
- pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
- } else {
- pc = code_base + Load32Aligned(pc + 8);
- }
- break;
- }
- BYTECODE(CHECK_NOT_BACK_REF) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from >= 0 && len > 0) {
- if (current + len > subject.length() ||
- CompareChars(&subject[from], &subject[current], len) != 0) {
- pc = code_base + Load32Aligned(pc + 4);
- break;
- }
- current += len;
- }
- pc += BC_CHECK_NOT_BACK_REF_LENGTH;
- break;
- }
- BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from >= 0 && len > 0) {
- if (current - len < 0 ||
- CompareChars(&subject[from], &subject[current - len], len) != 0) {
- pc = code_base + Load32Aligned(pc + 4);
- break;
- }
- current -= len;
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_4_CHARS) {
+ uint32_t c = Load32Aligned(pc + 4);
+ if (c != current_char) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(CHECK_NOT_4_CHARS);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ if (c != current_char) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(CHECK_NOT_CHAR);
+ }
+ DISPATCH();
+ }
+ BYTECODE(AND_CHECK_4_CHARS) {
+ uint32_t c = Load32Aligned(pc + 4);
+ if (c == (current_char & Load32Aligned(pc + 8))) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
+ } else {
+ ADVANCE(AND_CHECK_4_CHARS);
+ }
+ DISPATCH();
+ }
+ BYTECODE(AND_CHECK_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ if (c == (current_char & Load32Aligned(pc + 4))) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(AND_CHECK_CHAR);
+ }
+ DISPATCH();
+ }
+ BYTECODE(AND_CHECK_NOT_4_CHARS) {
+ uint32_t c = Load32Aligned(pc + 4);
+ if (c != (current_char & Load32Aligned(pc + 8))) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
+ } else {
+ ADVANCE(AND_CHECK_NOT_4_CHARS);
+ }
+ DISPATCH();
+ }
+ BYTECODE(AND_CHECK_NOT_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ if (c != (current_char & Load32Aligned(pc + 4))) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(AND_CHECK_NOT_CHAR);
+ }
+ DISPATCH();
+ }
+ BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
+ uint32_t c = (insn >> BYTECODE_SHIFT);
+ uint32_t minus = Load16Aligned(pc + 4);
+ uint32_t mask = Load16Aligned(pc + 6);
+ if (c != ((current_char - minus) & mask)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(MINUS_AND_CHECK_NOT_CHAR);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_CHAR_IN_RANGE) {
+ uint32_t from = Load16Aligned(pc + 4);
+ uint32_t to = Load16Aligned(pc + 6);
+ if (from <= current_char && current_char <= to) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(CHECK_CHAR_IN_RANGE);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
+ uint32_t from = Load16Aligned(pc + 4);
+ uint32_t to = Load16Aligned(pc + 6);
+ if (from > current_char || current_char > to) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(CHECK_CHAR_NOT_IN_RANGE);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_BIT_IN_TABLE) {
+ int mask = RegExpMacroAssembler::kTableMask;
+ byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
+ int bit = (current_char & (kBitsPerByte - 1));
+ if ((b & (1 << bit)) != 0) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(CHECK_BIT_IN_TABLE);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_LT) {
+ uint32_t limit = (insn >> BYTECODE_SHIFT);
+ if (current_char < limit) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(CHECK_LT);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_GT) {
+ uint32_t limit = (insn >> BYTECODE_SHIFT);
+ if (current_char > limit) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(CHECK_GT);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_REGISTER_LT) {
+ if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(CHECK_REGISTER_LT);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_REGISTER_GE) {
+ if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ } else {
+ ADVANCE(CHECK_REGISTER_GE);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_REGISTER_EQ_POS) {
+ if (registers[insn >> BYTECODE_SHIFT] == current) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(CHECK_REGISTER_EQ_POS);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_REGS_EQUAL) {
+ if (registers[insn >> BYTECODE_SHIFT] ==
+ registers[Load32Aligned(pc + 4)]) {
+ ADVANCE(CHECK_NOT_REGS_EQUAL);
+ } else {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_BACK_REF) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current + len > subject.length() ||
+ CompareChars(&subject[from], &subject[current], len) != 0) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
}
- pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
- break;
- }
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
- V8_FALLTHROUGH;
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
- bool unicode =
- (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from >= 0 && len > 0) {
- if (current + len > subject.length() ||
- !BackRefMatchesNoCase(isolate, from, current, len, subject,
- unicode)) {
- pc = code_base + Load32Aligned(pc + 4);
- break;
- }
- current += len;
+ current += len;
+ }
+ ADVANCE(CHECK_NOT_BACK_REF);
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current - len < 0 ||
+ CompareChars(&subject[from], &subject[current - len], len) != 0) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
}
- pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
- break;
- }
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
- V8_FALLTHROUGH;
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
- bool unicode = (insn & BYTECODE_MASK) ==
- BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from >= 0 && len > 0) {
- if (current - len < 0 ||
- !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
- unicode)) {
- pc = code_base + Load32Aligned(pc + 4);
- break;
- }
- current -= len;
+ current -= len;
+ }
+ ADVANCE(CHECK_NOT_BACK_REF_BACKWARD);
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current + len > subject.length() ||
+ !BackRefMatchesNoCase(isolate, from, current, len, subject, true)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
}
- pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
- break;
- }
- BYTECODE(CHECK_AT_START) {
- if (current == 0) {
- pc = code_base + Load32Aligned(pc + 4);
- } else {
- pc += BC_CHECK_AT_START_LENGTH;
+ current += len;
+ }
+ ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE);
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current + len > subject.length() ||
+ !BackRefMatchesNoCase(isolate, from, current, len, subject,
+ false)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
}
- break;
+ current += len;
}
- BYTECODE(CHECK_NOT_AT_START) {
- if (current + (insn >> BYTECODE_SHIFT) == 0) {
- pc += BC_CHECK_NOT_AT_START_LENGTH;
- } else {
- pc = code_base + Load32Aligned(pc + 4);
+ ADVANCE(CHECK_NOT_BACK_REF_NO_CASE);
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current - len < 0 ||
+ !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
+ true)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
}
- break;
+ current -= len;
}
- BYTECODE(SET_CURRENT_POSITION_FROM_END) {
- int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
- if (subject.length() - current > by) {
- current = subject.length() - by;
- current_char = subject[current - 1];
+ ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD);
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current - len < 0 ||
+ !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
+ false)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
}
- pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
- break;
+ current -= len;
}
+ ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD);
+ DISPATCH();
+ }
+ BYTECODE(CHECK_AT_START) {
+ if (current + (insn >> BYTECODE_SHIFT) == 0) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(CHECK_AT_START);
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_NOT_AT_START) {
+ if (current + (insn >> BYTECODE_SHIFT) == 0) {
+ ADVANCE(CHECK_NOT_AT_START);
+ } else {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ }
+ DISPATCH();
+ }
+ BYTECODE(SET_CURRENT_POSITION_FROM_END) {
+ ADVANCE(SET_CURRENT_POSITION_FROM_END);
+ int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
+ if (subject.length() - current > by) {
+ current = subject.length() - by;
+ current_char = subject[current - 1];
+ }
+ DISPATCH();
+ }
+ BYTECODE(CHECK_CURRENT_POSITION) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos > subject.length() || pos < 0) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ } else {
+ ADVANCE(CHECK_CURRENT_POSITION);
+ }
+ DISPATCH();
+ }
+#if V8_USE_COMPUTED_GOTO
+// Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
+// V8_USE_COMPUTED_GOTO here.
+#else
default:
UNREACHABLE();
- break;
}
+#endif // V8_USE_COMPUTED_GOTO
}
}
#undef BYTECODE
+#undef DISPATCH
+#undef DECODE
+#undef SET_PC_FROM_OFFSET
+#undef ADVANCE
+#undef BC_LABEL
+#undef V8_USE_COMPUTED_GOTO
} // namespace
// static
+void IrregexpInterpreter::Disassemble(ByteArray byte_array,
+ const std::string& pattern) {
+ DisallowHeapAllocation no_gc;
+
+ PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str());
+
+ const byte* const code_base = byte_array.GetDataStartAddress();
+ const int byte_array_length = byte_array.length();
+ ptrdiff_t offset = 0;
+
+ while (offset < byte_array_length) {
+ const byte* const pc = code_base + offset;
+ PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
+ DisassembleSingleBytecode(code_base, pc);
+ offset += RegExpBytecodeLength(*pc);
+ }
+}
+
+// static
IrregexpInterpreter::Result IrregexpInterpreter::Match(
- Isolate* isolate, Handle<ByteArray> code_array,
- Handle<String> subject_string, int* registers, int start_position) {
- DCHECK(subject_string->IsFlat());
+ Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
+ int registers_length, int start_position, RegExp::CallOrigin call_origin) {
+ if (FLAG_regexp_tier_up) {
+ regexp.MarkTierUpForNextExec();
+ }
+
+ bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
+ ByteArray code_array = ByteArray::cast(regexp.Bytecode(is_one_byte));
- // Note: Heap allocation *is* allowed in two situations:
+ return MatchInternal(isolate, code_array, subject_string, registers,
+ registers_length, start_position, call_origin);
+}
+
+IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
+ Isolate* isolate, ByteArray code_array, String subject_string,
+ int* registers, int registers_length, int start_position,
+ RegExp::CallOrigin call_origin) {
+ DCHECK(subject_string.IsFlat());
+
+ // Note: Heap allocation *is* allowed in two situations if calling from
+ // Runtime:
// 1. When creating & throwing a stack overflow exception. The interpreter
// aborts afterwards, and thus possible-moved objects are never used.
// 2. When handling interrupts. We manually relocate unhandlified references
// after interrupts have run.
DisallowHeapAllocation no_gc;
+ // Reset registers to -1 (=undefined).
+ // This is necessary because registers are only written when a
+ // capture group matched.
+ // Resetting them ensures that previous matches are cleared.
+ memset(registers, -1, sizeof(registers[0]) * registers_length);
+
uc16 previous_char = '\n';
- String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
+ String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
if (subject_content.IsOneByte()) {
Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate, code_array, subject_string, subject_vector,
- registers, start_position, previous_char);
+ registers, start_position, previous_char, call_origin);
} else {
DCHECK(subject_content.IsTwoByte());
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate, code_array, subject_string, subject_vector,
- registers, start_position, previous_char);
+ registers, start_position, previous_char, call_origin);
}
}
+// This method is called through an external reference from RegExpExecInternal
+// builtin.
+IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
+ Address subject, int32_t start_position, Address, Address, int* registers,
+ int32_t registers_length, Address, RegExp::CallOrigin call_origin,
+ Isolate* isolate, Address regexp) {
+ DCHECK_NOT_NULL(isolate);
+ DCHECK_NOT_NULL(registers);
+ DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
+
+ DisallowHeapAllocation no_gc;
+ DisallowJavascriptExecution no_js(isolate);
+
+ String subject_string = String::cast(Object(subject));
+ JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
+
+ return Match(isolate, regexp_obj, subject_string, registers, registers_length,
+ start_position, call_origin);
+}
+
+IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
+ Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string,
+ int* registers, int registers_length, int start_position) {
+ return Match(isolate, *regexp, *subject_string, registers, registers_length,
+ start_position, RegExp::CallOrigin::kFromRuntime);
+}
+
} // namespace internal
} // namespace v8
diff --git a/deps/v8/src/regexp/regexp-interpreter.h b/deps/v8/src/regexp/regexp-interpreter.h
index ad27dcd296..fbc5a3b290 100644
--- a/deps/v8/src/regexp/regexp-interpreter.h
+++ b/deps/v8/src/regexp/regexp-interpreter.h
@@ -12,7 +12,7 @@
namespace v8 {
namespace internal {
-class V8_EXPORT_PRIVATE IrregexpInterpreter {
+class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
public:
enum Result {
FAILURE = RegExp::kInternalRegExpFailure,
@@ -21,10 +21,37 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter {
RETRY = RegExp::kInternalRegExpRetry,
};
- // The caller is responsible for initializing registers before each call.
- static Result Match(Isolate* isolate, Handle<ByteArray> code_array,
- Handle<String> subject_string, int* registers,
- int start_position);
+ // In case a StackOverflow occurs, a StackOverflowException is created and
+ // EXCEPTION is returned.
+ static Result MatchForCallFromRuntime(Isolate* isolate,
+ Handle<JSRegExp> regexp,
+ Handle<String> subject_string,
+ int* registers, int registers_length,
+ int start_position);
+
+ // In case a StackOverflow occurs, EXCEPTION is returned. The caller is
+ // responsible for creating the exception.
+ // Arguments input_start, input_end and backtrack_stack are
+ // unused. They are only passed to match the signature of the native irregex
+ // code.
+ static Result MatchForCallFromJs(Address subject, int32_t start_position,
+ Address input_start, Address input_end,
+ int* registers, int32_t registers_length,
+ Address backtrack_stack,
+ RegExp::CallOrigin call_origin,
+ Isolate* isolate, Address regexp);
+
+ static Result MatchInternal(Isolate* isolate, ByteArray code_array,
+ String subject_string, int* registers,
+ int registers_length, int start_position,
+ RegExp::CallOrigin call_origin);
+
+ static void Disassemble(ByteArray byte_array, const std::string& pattern);
+
+ private:
+ static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
+ int* registers, int registers_length, int start_position,
+ RegExp::CallOrigin call_origin);
};
} // namespace internal
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
index db9c5af569..5dca04a18c 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
+++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
@@ -162,24 +162,19 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) {
assembler_->ReadStackPointerFromRegister(reg);
}
-
-void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
+void RegExpMacroAssemblerTracer::LoadCurrentCharacterImpl(
+ int cp_offset, Label* on_end_of_input, bool check_bounds, int characters,
+ int eats_at_least) {
const char* check_msg = check_bounds ? "" : " (unchecked)";
- PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]%s (%d chars));\n",
- cp_offset,
- LabelToInt(on_end_of_input),
- check_msg,
- characters);
- assembler_->LoadCurrentCharacter(cp_offset,
- on_end_of_input,
- check_bounds,
- characters);
+ PrintF(
+ " LoadCurrentCharacter(cp_offset=%d, label[%08x]%s (%d chars) (eats at "
+ "least %d));\n",
+ cp_offset, LabelToInt(on_end_of_input), check_msg, characters,
+ eats_at_least);
+ assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input, check_bounds,
+ characters, eats_at_least);
}
-
class PrintablePrinter {
public:
explicit PrintablePrinter(uc16 character) : character_(character) { }
@@ -232,13 +227,13 @@ void RegExpMacroAssemblerTracer::CheckCharacter(unsigned c, Label* on_equal) {
assembler_->CheckCharacter(c, on_equal);
}
-
-void RegExpMacroAssemblerTracer::CheckAtStart(Label* on_at_start) {
- PrintF(" CheckAtStart(label[%08x]);\n", LabelToInt(on_at_start));
- assembler_->CheckAtStart(on_at_start);
+void RegExpMacroAssemblerTracer::CheckAtStart(int cp_offset,
+ Label* on_at_start) {
+ PrintF(" CheckAtStart(cp_offset=%d, label[%08x]);\n", cp_offset,
+ LabelToInt(on_at_start));
+ assembler_->CheckAtStart(cp_offset, on_at_start);
}
-
void RegExpMacroAssemblerTracer::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
PrintF(" CheckNotAtStart(cp_offset=%d, label[%08x]);\n", cp_offset,
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h
index d0b68bd59d..2a44146e73 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h
+++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h
@@ -22,13 +22,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
void AdvanceRegister(int reg, int by) override; // r[reg] += by.
void Backtrack() override;
void Bind(Label* label) override;
- void CheckAtStart(Label* on_at_start) override;
void CheckCharacter(unsigned c, Label* on_equal) override;
void CheckCharacterAfterAnd(unsigned c, unsigned and_with,
Label* on_equal) override;
void CheckCharacterGT(uc16 limit, Label* on_greater) override;
void CheckCharacterLT(uc16 limit, Label* on_less) override;
void CheckGreedyLoop(Label* on_tos_equals_current_position) override;
+ void CheckAtStart(int cp_offset, Label* on_at_start) override;
void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override;
void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) override;
@@ -53,9 +53,9 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
void IfRegisterLT(int reg, int comparand, Label* if_lt) override;
void IfRegisterEqPos(int reg, Label* if_eq) override;
IrregexpImplementation Implementation() override;
- void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1) override;
+ void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least) override;
void PopCurrentPosition() override;
void PopRegister(int register_index) override;
void PushBacktrack(Label* label) override;
diff --git a/deps/v8/src/regexp/regexp-macro-assembler.cc b/deps/v8/src/regexp/regexp-macro-assembler.cc
index 68fa16db61..96fb53d2a0 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler.cc
+++ b/deps/v8/src/regexp/regexp-macro-assembler.cc
@@ -85,6 +85,20 @@ void RegExpMacroAssembler::CheckPosition(int cp_offset,
LoadCurrentCharacter(cp_offset, on_outside_input, true);
}
+void RegExpMacroAssembler::LoadCurrentCharacter(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // By default, eats_at_least = characters.
+ if (eats_at_least == kUseCharactersValue) {
+ eats_at_least = characters;
+ }
+
+ LoadCurrentCharacterImpl(cp_offset, on_end_of_input, check_bounds, characters,
+ eats_at_least);
+}
+
bool RegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
Label* on_no_match) {
return false;
@@ -129,32 +143,46 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
}
}
+// This method may only be called after an interrupt.
int NativeRegExpMacroAssembler::CheckStackGuardState(
- Isolate* isolate, int start_index, bool is_direct_call,
+ Isolate* isolate, int start_index, RegExp::CallOrigin call_origin,
Address* return_address, Code re_code, Address* subject,
const byte** input_start, const byte** input_end) {
DisallowHeapAllocation no_gc;
DCHECK(re_code.raw_instruction_start() <= *return_address);
DCHECK(*return_address <= re_code.raw_instruction_end());
- int return_value = 0;
- // Prepare for possible GC.
- HandleScope handles(isolate);
- Handle<Code> code_handle(re_code, isolate);
- Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
- bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
-
StackLimitCheck check(isolate);
bool js_has_overflowed = check.JsHasOverflowed();
- if (is_direct_call) {
+ if (call_origin == RegExp::CallOrigin::kFromJs) {
// Direct calls from JavaScript can be interrupted in two ways:
// 1. A real stack overflow, in which case we let the caller throw the
// exception.
// 2. The stack guard was used to interrupt execution for another purpose,
// forcing the call through the runtime system.
- return_value = js_has_overflowed ? EXCEPTION : RETRY;
- } else if (js_has_overflowed) {
+
+ // Bug(v8:9540) Investigate why this method is called from JS although no
+ // stackoverflow or interrupt is pending on ARM64. We return 0 in this case
+ // to continue execution normally.
+ if (js_has_overflowed) {
+ return EXCEPTION;
+ } else if (check.InterruptRequested()) {
+ return RETRY;
+ } else {
+ return 0;
+ }
+ }
+ DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
+
+ // Prepare for possible GC.
+ HandleScope handles(isolate);
+ Handle<Code> code_handle(re_code, isolate);
+ Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
+ bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
+ int return_value = 0;
+
+ if (js_has_overflowed) {
AllowHeapAllocation yes_gc;
isolate->StackOverflow();
return_value = EXCEPTION;
@@ -191,7 +219,7 @@ int NativeRegExpMacroAssembler::CheckStackGuardState(
}
// Returns a {Result} sentinel, or the number of successful matches.
-int NativeRegExpMacroAssembler::Match(Handle<Code> regexp_code,
+int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
@@ -234,31 +262,36 @@ int NativeRegExpMacroAssembler::Match(Handle<Code> regexp_code,
StringCharacterPosition(subject_ptr, start_offset + slice_offset, no_gc);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
- return Execute(*regexp_code, *subject, start_offset, input_start, input_end,
- offsets_vector, offsets_vector_length, isolate);
+ return Execute(*subject, start_offset, input_start, input_end, offsets_vector,
+ offsets_vector_length, isolate, *regexp);
}
// Returns a {Result} sentinel, or the number of successful matches.
+// TODO(pthier): The JSRegExp object is passed to native irregexp code to match
+// the signature of the interpreter. We should get rid of JS objects passed to
+// internal methods.
int NativeRegExpMacroAssembler::Execute(
- Code code,
String input, // This needs to be the unpacked (sliced, cons) string.
int start_offset, const byte* input_start, const byte* input_end,
- int* output, int output_size, Isolate* isolate) {
+ int* output, int output_size, Isolate* isolate, JSRegExp regexp) {
// Ensure that the minimum stack has been allocated.
RegExpStackScope stack_scope(isolate);
Address stack_base = stack_scope.stack()->stack_base();
- int direct_call = 0;
+ bool is_one_byte = String::IsOneByteRepresentationUnderneath(input);
+ Code code = Code::cast(regexp.Code(is_one_byte));
+ RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime;
using RegexpMatcherSig = int(
Address input_string, int start_offset, // NOLINT(readability/casting)
const byte* input_start, const byte* input_end, int* output,
- int output_size, Address stack_base, int direct_call, Isolate* isolate);
+ int output_size, Address stack_base, int call_origin, Isolate* isolate,
+ Address regexp);
auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code);
- int result =
- fn.CallIrregexp(input.ptr(), start_offset, input_start, input_end, output,
- output_size, stack_base, direct_call, isolate);
+ int result = fn.CallIrregexp(input.ptr(), start_offset, input_start,
+ input_end, output, output_size, stack_base,
+ call_origin, isolate, regexp.ptr());
DCHECK(result >= RETRY);
if (result == EXCEPTION && !isolate->has_pending_exception()) {
diff --git a/deps/v8/src/regexp/regexp-macro-assembler.h b/deps/v8/src/regexp/regexp-macro-assembler.h
index b55ac13590..ccf19d3fb6 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler.h
+++ b/deps/v8/src/regexp/regexp-macro-assembler.h
@@ -36,6 +36,8 @@ class RegExpMacroAssembler {
static const int kTableSize = 1 << kTableSizeBits;
static const int kTableMask = kTableSize - 1;
+ static constexpr int kUseCharactersValue = -1;
+
enum IrregexpImplementation {
kIA32Implementation,
kARMImplementation,
@@ -69,7 +71,6 @@ class RegExpMacroAssembler {
// stack by an earlier PushBacktrack(Label*).
virtual void Backtrack() = 0;
virtual void Bind(Label* label) = 0;
- virtual void CheckAtStart(Label* on_at_start) = 0;
// Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels.
virtual void CheckCharacter(unsigned c, Label* on_equal) = 0;
@@ -81,6 +82,7 @@ class RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start) = 0;
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) = 0;
@@ -133,10 +135,12 @@ class RegExpMacroAssembler {
// label if it is.
virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
virtual IrregexpImplementation Implementation() = 0;
- virtual void LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1) = 0;
+ V8_EXPORT_PRIVATE void LoadCurrentCharacter(
+ int cp_offset, Label* on_end_of_input, bool check_bounds = true,
+ int characters = 1, int eats_at_least = kUseCharactersValue);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least) = 0;
virtual void PopCurrentPosition() = 0;
virtual void PopRegister(int register_index) = 0;
// Pushes the label on the backtrack stack, so that a following Backtrack
@@ -219,7 +223,7 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
bool CanReadUnaligned() override;
// Returns a {Result} sentinel, or the number of successful matches.
- static int Match(Handle<Code> regexp, Handle<String> subject,
+ static int Match(Handle<JSRegExp> regexp, Handle<String> subject,
int* offsets_vector, int offsets_vector_length,
int previous_index, Isolate* isolate);
@@ -235,9 +239,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
String subject, int start_index, const DisallowHeapAllocation& no_gc);
static int CheckStackGuardState(Isolate* isolate, int start_index,
- bool is_direct_call, Address* return_address,
- Code re_code, Address* subject,
- const byte** input_start,
+ RegExp::CallOrigin call_origin,
+ Address* return_address, Code re_code,
+ Address* subject, const byte** input_start,
const byte** input_end);
// Byte map of one byte characters with a 0xff if the character is a word
@@ -250,11 +254,11 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
}
// Returns a {Result} sentinel, or the number of successful matches.
- V8_EXPORT_PRIVATE static int Execute(Code code, String input,
- int start_offset,
+ V8_EXPORT_PRIVATE static int Execute(String input, int start_offset,
const byte* input_start,
const byte* input_end, int* output,
- int output_size, Isolate* isolate);
+ int output_size, Isolate* isolate,
+ JSRegExp regexp);
};
} // namespace internal
diff --git a/deps/v8/src/regexp/regexp-nodes.h b/deps/v8/src/regexp/regexp-nodes.h
index 4c13b74926..d618c9bb27 100644
--- a/deps/v8/src/regexp/regexp-nodes.h
+++ b/deps/v8/src/regexp/regexp-nodes.h
@@ -20,11 +20,14 @@ class QuickCheckDetails;
class RegExpCompiler;
class Trace;
struct PreloadState;
+class ChoiceNode;
#define FOR_EACH_NODE_TYPE(VISIT) \
VISIT(End) \
VISIT(Action) \
VISIT(Choice) \
+ VISIT(LoopChoice) \
+ VISIT(NegativeLookaroundChoice) \
VISIT(BackReference) \
VISIT(Assertion) \
VISIT(Text)
@@ -90,6 +93,34 @@ struct NodeInfo final {
bool replacement_calculated : 1;
};
+struct EatsAtLeastInfo final {
+ EatsAtLeastInfo() : EatsAtLeastInfo(0) {}
+ explicit EatsAtLeastInfo(uint8_t eats)
+ : eats_at_least_from_possibly_start(eats),
+ eats_at_least_from_not_start(eats) {}
+ void SetMin(const EatsAtLeastInfo& other) {
+ if (other.eats_at_least_from_possibly_start <
+ eats_at_least_from_possibly_start) {
+ eats_at_least_from_possibly_start =
+ other.eats_at_least_from_possibly_start;
+ }
+ if (other.eats_at_least_from_not_start < eats_at_least_from_not_start) {
+ eats_at_least_from_not_start = other.eats_at_least_from_not_start;
+ }
+ }
+
+ // Any successful match starting from the current node will consume at least
+ // this many characters. This does not necessarily mean that there is a
+ // possible match with exactly this many characters, but we generally try to
+ // get this number as high as possible to allow for early exit on failure.
+ uint8_t eats_at_least_from_possibly_start;
+
+ // Like eats_at_least_from_possibly_start, but with the additional assumption
+ // that start-of-string assertions (^) can't match. This value is greater than
+ // or equal to eats_at_least_from_possibly_start.
+ uint8_t eats_at_least_from_not_start;
+};
+
class RegExpNode : public ZoneObject {
public:
explicit RegExpNode(Zone* zone)
@@ -104,13 +135,20 @@ class RegExpNode : public ZoneObject {
// Generates a goto to this node or actually generates the code at this point.
virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
// How many characters must this node consume at a minimum in order to
- // succeed. If we have found at least 'still_to_find' characters that
- // must be consumed there is no need to ask any following nodes whether
- // they are sure to eat any more characters. The not_at_start argument is
- // used to indicate that we know we are not at the start of the input. In
- // this case anchored branches will always fail and can be ignored when
- // determining how many characters are consumed on success.
- virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start) = 0;
+ // succeed. The not_at_start argument is used to indicate that we know we are
+ // not at the start of the input. In this case anchored branches will always
+ // fail and can be ignored when determining how many characters are consumed
+ // on success. If this node has not been analyzed yet, EatsAtLeast returns 0.
+ int EatsAtLeast(bool not_at_start);
+ // Returns how many characters this node must consume in order to succeed,
+ // given that this is a LoopChoiceNode whose counter register is in a
+ // newly-initialized state at the current position in the generated code. For
+ // example, consider /a{6,8}/. Absent any extra information, the
+ // LoopChoiceNode for the repetition must report that it consumes at least
+ // zero characters, because it may have already looped several times. However,
+ // with a newly-initialized counter, it can report that it consumes at least
+ // six characters.
+ virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry();
// Emits some quick code that checks whether the preloaded characters match.
// Falls through on certain failure, jumps to the label on possible success.
// If the node cannot make a quick check it does nothing and returns false.
@@ -118,7 +156,7 @@ class RegExpNode : public ZoneObject {
Trace* trace, bool preload_has_checked_bounds,
Label* on_possible_success,
QuickCheckDetails* details_return,
- bool fall_through_on_failure);
+ bool fall_through_on_failure, ChoiceNode* predecessor);
// For a given number of characters this returns a mask and a value. The
// next n characters are anded with the mask and compared with the value.
// A comparison failure indicates the node cannot match the next n characters.
@@ -127,6 +165,17 @@ class RegExpNode : public ZoneObject {
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start) = 0;
+ // Fills in quick check details for this node, given that this is a
+ // LoopChoiceNode whose counter register is in a newly-initialized state at
+ // the current position in the generated code. For example, consider /a{6,8}/.
+ // Absent any extra information, the LoopChoiceNode for the repetition cannot
+ // generate any useful quick check because a match might be the (empty)
+ // continuation node. However, with a newly-initialized counter, it can
+ // generate a quick check for several 'a' characters at once.
+ virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in,
+ bool not_at_start);
static const int kNodeIsTooComplexForGreedyLoops = kMinInt;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
// Only returns the successor for a text node of length 1 that matches any
@@ -183,6 +232,10 @@ class RegExpNode : public ZoneObject {
void set_on_work_list(bool value) { on_work_list_ = value; }
NodeInfo* info() { return &info_; }
+ const EatsAtLeastInfo* eats_at_least_info() const { return &eats_at_least_; }
+ void set_eats_at_least_info(const EatsAtLeastInfo& eats_at_least) {
+ eats_at_least_ = eats_at_least;
+ }
BoyerMooreLookahead* bm_info(bool not_at_start) {
return bm_info_[not_at_start ? 1 : 0];
@@ -205,6 +258,11 @@ class RegExpNode : public ZoneObject {
Label label_;
bool on_work_list_;
NodeInfo info_;
+
+ // Saved values for EatsAtLeast results, to avoid recomputation. Filled in
+ // during analysis (valid if info_.been_analyzed is true).
+ EatsAtLeastInfo eats_at_least_;
+
// This variable keeps track of how many times code has been generated for
// this node (in different traces). We don't keep track of where the
// generated code is located unless the code is generated at the start of
@@ -239,7 +297,7 @@ class SeqRegExpNode : public RegExpNode {
class ActionNode : public SeqRegExpNode {
public:
enum ActionType {
- SET_REGISTER,
+ SET_REGISTER_FOR_LOOP,
INCREMENT_REGISTER,
STORE_POSITION,
BEGIN_SUBMATCH,
@@ -247,7 +305,8 @@ class ActionNode : public SeqRegExpNode {
EMPTY_MATCH_CHECK,
CLEAR_CAPTURES
};
- static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
+ static ActionNode* SetRegisterForLoop(int reg, int val,
+ RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg, bool is_capture,
RegExpNode* on_success);
@@ -265,13 +324,9 @@ class ActionNode : public SeqRegExpNode {
RegExpNode* on_success);
void Accept(NodeVisitor* visitor) override;
void Emit(RegExpCompiler* compiler, Trace* trace) override;
- int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int filled_in,
- bool not_at_start) override {
- return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
- not_at_start);
- }
+ bool not_at_start) override;
void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override;
ActionType action_type() { return action_type_; }
@@ -342,7 +397,6 @@ class TextNode : public SeqRegExpNode {
JSRegExp::Flags flags);
void Accept(NodeVisitor* visitor) override;
void Emit(RegExpCompiler* compiler, Trace* trace) override;
- int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in,
bool not_at_start) override;
@@ -356,6 +410,7 @@ class TextNode : public SeqRegExpNode {
BoyerMooreLookahead* bm, bool not_at_start) override;
void CalculateOffsets();
RegExpNode* FilterOneByte(int depth) override;
+ int Length();
private:
enum TextEmitPassType {
@@ -371,7 +426,6 @@ class TextNode : public SeqRegExpNode {
void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
bool preloaded, Trace* trace, bool first_element_checked,
int* checked_up_to);
- int Length();
ZoneList<TextElement>* elms_;
bool read_backward_;
};
@@ -402,7 +456,6 @@ class AssertionNode : public SeqRegExpNode {
}
void Accept(NodeVisitor* visitor) override;
void Emit(RegExpCompiler* compiler, Trace* trace) override;
- int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int filled_in,
bool not_at_start) override;
@@ -434,8 +487,6 @@ class BackReferenceNode : public SeqRegExpNode {
int end_register() { return end_reg_; }
bool read_backward() { return read_backward_; }
void Emit(RegExpCompiler* compiler, Trace* trace) override;
- int EatsAtLeast(int still_to_find, int recursion_depth,
- bool not_at_start) override;
void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in,
bool not_at_start) override {
@@ -457,10 +508,6 @@ class EndNode : public RegExpNode {
EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {}
void Accept(NodeVisitor* visitor) override;
void Emit(RegExpCompiler* compiler, Trace* trace) override;
- int EatsAtLeast(int still_to_find, int recursion_depth,
- bool not_at_start) override {
- return 0;
- }
void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in,
bool not_at_start) override {
@@ -540,9 +587,6 @@ class ChoiceNode : public RegExpNode {
}
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
void Emit(RegExpCompiler* compiler, Trace* trace) override;
- int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
- int EatsAtLeastHelper(int still_to_find, int budget,
- RegExpNode* ignore_this_node, bool not_at_start);
void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in,
bool not_at_start) override;
@@ -564,6 +608,7 @@ class ChoiceNode : public RegExpNode {
ZoneList<GuardedAlternative>* alternatives_;
private:
+ template <typename...>
friend class Analysis;
void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard,
@@ -601,16 +646,23 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
AddAlternative(this_must_fail);
AddAlternative(then_do_this);
}
- int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in,
bool not_at_start) override;
void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override {
- alternatives_->at(1).node()->FillInBMInfo(isolate, offset, budget - 1, bm,
- not_at_start);
+ continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm,
+ not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm);
}
+ static constexpr int kLookaroundIndex = 0;
+ static constexpr int kContinueIndex = 1;
+ RegExpNode* lookaround_node() {
+ return alternatives()->at(kLookaroundIndex).node();
+ }
+ RegExpNode* continue_node() {
+ return alternatives()->at(kContinueIndex).node();
+ }
// For a negative lookahead we don't emit the quick check for the
// alternative that is expected to fail. This is because quick check code
// starts by loading enough characters for the alternative that takes fewest
@@ -619,29 +671,38 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
bool try_to_emit_quick_check_for_alternative(bool is_first) override {
return !is_first;
}
+ void Accept(NodeVisitor* visitor) override;
RegExpNode* FilterOneByte(int depth) override;
};
class LoopChoiceNode : public ChoiceNode {
public:
- LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, Zone* zone)
+ LoopChoiceNode(bool body_can_be_zero_length, bool read_backward,
+ int min_loop_iterations, Zone* zone)
: ChoiceNode(2, zone),
loop_node_(nullptr),
continue_node_(nullptr),
body_can_be_zero_length_(body_can_be_zero_length),
- read_backward_(read_backward) {}
+ read_backward_(read_backward),
+ traversed_loop_initialization_node_(false),
+ min_loop_iterations_(min_loop_iterations) {}
void AddLoopAlternative(GuardedAlternative alt);
void AddContinueAlternative(GuardedAlternative alt);
void Emit(RegExpCompiler* compiler, Trace* trace) override;
- int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in,
bool not_at_start) override;
+ void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int characters_filled_in,
+ bool not_at_start) override;
void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override;
+ EatsAtLeastInfo EatsAtLeastFromLoopEntry() override;
RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
+ int min_loop_iterations() const { return min_loop_iterations_; }
bool read_backward() override { return read_backward_; }
void Accept(NodeVisitor* visitor) override;
RegExpNode* FilterOneByte(int depth) override;
@@ -658,6 +719,22 @@ class LoopChoiceNode : public ChoiceNode {
RegExpNode* continue_node_;
bool body_can_be_zero_length_;
bool read_backward_;
+
+ // Temporary marker set only while generating quick check details. Represents
+ // whether GetQuickCheckDetails traversed the initialization node for this
+ // loop's counter. If so, we may be able to generate stricter quick checks
+ // because we know the loop node must match at least min_loop_iterations_
+ // times before the continuation node can match.
+ bool traversed_loop_initialization_node_;
+
+ // The minimum number of times the loop_node_ must match before the
+ // continue_node_ might be considered. This value can be temporarily decreased
+ // while generating quick check details, to represent the remaining iterations
+ // after the completed portion of the quick check details.
+ int min_loop_iterations_;
+
+ friend class IterationDecrementer;
+ friend class LoopInitializationMarker;
};
class NodeVisitor {
@@ -666,7 +743,6 @@ class NodeVisitor {
#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0;
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
- virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
};
} // namespace internal
diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc
index 3647680969..d6e421cafa 100644
--- a/deps/v8/src/regexp/regexp-parser.cc
+++ b/deps/v8/src/regexp/regexp-parser.cc
@@ -692,7 +692,7 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
}
}
if (subexpr_type == CAPTURE) {
- if (captures_started_ >= kMaxCaptures) {
+ if (captures_started_ >= JSRegExp::kMaxCaptures) {
ReportError(CStrVector("Too many captures"));
return nullptr;
}
@@ -800,7 +800,7 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
uc32 c = current();
if (IsDecimalDigit(c)) {
value = 10 * value + (c - '0');
- if (value > kMaxCaptures) {
+ if (value > JSRegExp::kMaxCaptures) {
Reset(start);
return false;
}
diff --git a/deps/v8/src/regexp/regexp-parser.h b/deps/v8/src/regexp/regexp-parser.h
index 36cec7e984..cc1948b101 100644
--- a/deps/v8/src/regexp/regexp-parser.h
+++ b/deps/v8/src/regexp/regexp-parser.h
@@ -221,7 +221,6 @@ class V8_EXPORT_PRIVATE RegExpParser {
static bool IsSyntaxCharacterOrSlash(uc32 c);
- static const int kMaxCaptures = 1 << 16;
static const uc32 kEndMarker = (1 << 21);
private:
diff --git a/deps/v8/src/regexp/regexp-stack.cc b/deps/v8/src/regexp/regexp-stack.cc
index 3885fd8e8d..a6a128841f 100644
--- a/deps/v8/src/regexp/regexp-stack.cc
+++ b/deps/v8/src/regexp/regexp-stack.cc
@@ -72,12 +72,12 @@ Address RegExpStack::EnsureCapacity(size_t size) {
DeleteArray(thread_local_.memory_);
}
thread_local_.memory_ = new_memory;
+ thread_local_.memory_top_ = new_memory + size;
thread_local_.memory_size_ = size;
thread_local_.limit_ = reinterpret_cast<Address>(new_memory) +
kStackLimitSlack * kSystemPointerSize;
}
- return reinterpret_cast<Address>(thread_local_.memory_) +
- thread_local_.memory_size_;
+ return reinterpret_cast<Address>(thread_local_.memory_top_);
}
diff --git a/deps/v8/src/regexp/regexp-stack.h b/deps/v8/src/regexp/regexp-stack.h
index b1d4571760..7ecaa40b81 100644
--- a/deps/v8/src/regexp/regexp-stack.h
+++ b/deps/v8/src/regexp/regexp-stack.h
@@ -46,8 +46,9 @@ class RegExpStack {
// Gives the top of the memory used as stack.
Address stack_base() {
DCHECK_NE(0, thread_local_.memory_size_);
- return reinterpret_cast<Address>(thread_local_.memory_) +
- thread_local_.memory_size_;
+ DCHECK_EQ(thread_local_.memory_top_,
+ thread_local_.memory_ + thread_local_.memory_size_);
+ return reinterpret_cast<Address>(thread_local_.memory_top_);
}
// The total size of the memory allocated for the stack.
@@ -58,7 +59,7 @@ class RegExpStack {
// There is only a limited number of locations below the stack limit,
// so users of the stack should check the stack limit during any
// sequence of pushes longer that this.
- Address* limit_address() { return &(thread_local_.limit_); }
+ Address* limit_address_address() { return &(thread_local_.limit_); }
// Ensures that there is a memory area with at least the specified size.
// If passing zero, the default/minimum size buffer is allocated.
@@ -89,12 +90,15 @@ class RegExpStack {
// Structure holding the allocated memory, size and limit.
struct ThreadLocal {
ThreadLocal() { Clear(); }
- // If memory_size_ > 0 then memory_ must be non-nullptr.
+ // If memory_size_ > 0 then memory_ and memory_top_ must be non-nullptr
+ // and memory_top_ = memory_ + memory_size_
byte* memory_;
+ byte* memory_top_;
size_t memory_size_;
Address limit_;
void Clear() {
memory_ = nullptr;
+ memory_top_ = nullptr;
memory_size_ = 0;
limit_ = kMemoryTop;
}
@@ -102,7 +106,7 @@ class RegExpStack {
};
// Address of allocated memory.
- Address memory_address() {
+ Address memory_address_address() {
return reinterpret_cast<Address>(&thread_local_.memory_);
}
@@ -111,6 +115,11 @@ class RegExpStack {
return reinterpret_cast<Address>(&thread_local_.memory_size_);
}
+ // Address of top of memory used as stack.
+ Address memory_top_address_address() {
+ return reinterpret_cast<Address>(&thread_local_.memory_top_);
+ }
+
// Resets the buffer if it has grown beyond the default/minimum size.
// After this, the buffer is either the default size, or it is empty, so
// you have to call EnsureCapacity before using it again.
diff --git a/deps/v8/src/regexp/regexp-utils.cc b/deps/v8/src/regexp/regexp-utils.cc
index ad50270fdc..c9194d5170 100644
--- a/deps/v8/src/regexp/regexp-utils.cc
+++ b/deps/v8/src/regexp/regexp-utils.cc
@@ -5,6 +5,7 @@
#include "src/regexp/regexp-utils.h"
#include "src/execution/isolate.h"
+#include "src/execution/protectors-inl.h"
#include "src/heap/factory.h"
#include "src/objects/js-regexp-inl.h"
#include "src/objects/objects-inl.h"
@@ -179,7 +180,14 @@ bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
return false;
}
- if (!isolate->IsRegExpSpeciesLookupChainIntact(isolate->native_context())) {
+ // Note: Unlike the more involved check in CSA (see BranchIfFastRegExp), this
+ // does not go on to check the actual value of the exec property. This would
+ // not be valid since this method is called from places that access the flags
+ // property. Similar spots in CSA would use BranchIfFastRegExp_Strict in this
+ // case.
+
+ if (!Protectors::IsRegExpSpeciesLookupChainProtectorIntact(
+ recv.GetCreationContext())) {
return false;
}
diff --git a/deps/v8/src/regexp/regexp-utils.h b/deps/v8/src/regexp/regexp-utils.h
index 4b8714c55f..19f1f24039 100644
--- a/deps/v8/src/regexp/regexp-utils.h
+++ b/deps/v8/src/regexp/regexp-utils.h
@@ -38,6 +38,9 @@ class RegExpUtils : public AllStatic {
// Checks whether the given object is an unmodified JSRegExp instance.
// Neither the object's map, nor its prototype's map, nor any relevant
// method on the prototype may be modified.
+ //
+ // Note: This check is limited may only be used in situations where the only
+ // relevant property is 'exec'.
static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj);
// ES#sec-advancestringindex
diff --git a/deps/v8/src/regexp/regexp.cc b/deps/v8/src/regexp/regexp.cc
index 15b0321c46..e0bc4b8e32 100644
--- a/deps/v8/src/regexp/regexp.cc
+++ b/deps/v8/src/regexp/regexp.cc
@@ -5,6 +5,7 @@
#include "src/regexp/regexp.h"
#include "src/codegen/compilation-cache.h"
+#include "src/diagnostics/code-tracer.h"
#include "src/heap/heap-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/regexp-bytecode-generator.h"
@@ -14,6 +15,7 @@
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-parser.h"
#include "src/strings/string-search.h"
+#include "src/utils/ostreams.h"
namespace v8 {
namespace internal {
@@ -298,29 +300,72 @@ Handle<Object> RegExpImpl::AtomExec(Isolate* isolate, Handle<JSRegExp> re,
bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte) {
- Object compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
- if (compiled_code != Smi::FromInt(JSRegExp::kUninitializedValue)) {
- DCHECK(FLAG_regexp_interpret_all ? compiled_code.IsByteArray()
- : compiled_code.IsCode());
+ Object compiled_code = re->Code(is_one_byte);
+ Object bytecode = re->Bytecode(is_one_byte);
+ bool needs_initial_compilation =
+ compiled_code == Smi::FromInt(JSRegExp::kUninitializedValue);
+ // Recompile is needed when we're dealing with the first execution of the
+ // regexp after the decision to tier up has been made. If the tiering up
+ // strategy is not in use, this value is always false.
+ bool needs_tier_up_compilation =
+ re->MarkedForTierUp() && bytecode.IsByteArray();
+
+ if (FLAG_trace_regexp_tier_up && needs_tier_up_compilation) {
+ PrintF("JSRegExp object %p needs tier-up compilation\n",
+ reinterpret_cast<void*>(re->ptr()));
+ }
+
+ if (!needs_initial_compilation && !needs_tier_up_compilation) {
+ DCHECK(compiled_code.IsCode());
+ DCHECK_IMPLIES(FLAG_regexp_interpret_all, bytecode.IsByteArray());
return true;
}
+
+ DCHECK_IMPLIES(needs_tier_up_compilation, bytecode.IsByteArray());
+
return CompileIrregexp(isolate, re, sample_subject, is_one_byte);
}
+#ifdef DEBUG
+namespace {
+
+bool RegExpCodeIsValidForPreCompilation(Handle<JSRegExp> re, bool is_one_byte) {
+ Object entry = re->Code(is_one_byte);
+ Object bytecode = re->Bytecode(is_one_byte);
+ // If we're not using the tier-up strategy, entry can only be a smi
+ // representing an uncompiled regexp here. If we're using the tier-up
+ // strategy, entry can still be a smi representing an uncompiled regexp, when
+ // compiling the regexp before the tier-up, or it can contain a trampoline to
+ // the regexp interpreter, in which case the bytecode field contains compiled
+ // bytecode, when recompiling the regexp after the tier-up. If the
+ // tier-up was forced, which happens for global replaces, entry is a smi
+ // representing an uncompiled regexp, even though we're "recompiling" after
+ // the tier-up.
+ if (re->ShouldProduceBytecode()) {
+ DCHECK(entry.IsSmi());
+ DCHECK(bytecode.IsSmi());
+ int entry_value = Smi::ToInt(entry);
+ int bytecode_value = Smi::ToInt(bytecode);
+ DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value);
+ DCHECK_EQ(JSRegExp::kUninitializedValue, bytecode_value);
+ } else {
+ DCHECK(entry.IsSmi() || (entry.IsCode() && bytecode.IsByteArray()));
+ }
+
+ return true;
+}
+
+} // namespace
+#endif
+
bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte) {
// Compile the RegExp.
Zone zone(isolate->allocator(), ZONE_NAME);
PostponeInterruptsScope postpone(isolate);
-#ifdef DEBUG
- Object entry = re->DataAt(JSRegExp::code_index(is_one_byte));
- // When arriving here entry can only be a smi representing an uncompiled
- // regexp.
- DCHECK(entry.IsSmi());
- int entry_value = Smi::ToInt(entry);
- DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value);
-#endif
+
+ DCHECK(RegExpCodeIsValidForPreCompilation(re, is_one_byte));
JSRegExp::Flags flags = re->GetFlags();
@@ -335,6 +380,14 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
USE(ThrowRegExpException(isolate, re, pattern, compile_data.error));
return false;
}
+ // The compilation target is a kBytecode if we're interpreting all regexp
+ // objects, or if we're using the tier-up strategy but the tier-up hasn't
+ // happened yet. The compilation target is a kNative if we're using the
+ // tier-up strategy and we need to recompile to tier-up, or if we're producing
+ // native code for all regexp objects.
+ compile_data.compilation_target = re->ShouldProduceBytecode()
+ ? RegExpCompilationTarget::kBytecode
+ : RegExpCompilationTarget::kNative;
const bool compilation_succeeded =
Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject,
is_one_byte);
@@ -346,13 +399,37 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<FixedArray> data =
Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
- data->set(JSRegExp::code_index(is_one_byte), compile_data.code);
+ if (compile_data.compilation_target == RegExpCompilationTarget::kNative) {
+ data->set(JSRegExp::code_index(is_one_byte), compile_data.code);
+ // Reset bytecode to uninitialized. In case we use tier-up we know that
+ // tier-up has happened this way.
+ data->set(JSRegExp::bytecode_index(is_one_byte),
+ Smi::FromInt(JSRegExp::kUninitializedValue));
+ } else {
+ DCHECK_EQ(compile_data.compilation_target,
+ RegExpCompilationTarget::kBytecode);
+ // Store code generated by compiler in bytecode and trampoline to
+ // interpreter in code.
+ data->set(JSRegExp::bytecode_index(is_one_byte), compile_data.code);
+ Handle<Code> trampoline =
+ BUILTIN_CODE(isolate, RegExpInterpreterTrampoline);
+ data->set(JSRegExp::code_index(is_one_byte), *trampoline);
+ }
SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map);
int register_max = IrregexpMaxRegisterCount(*data);
if (compile_data.register_count > register_max) {
SetIrregexpMaxRegisterCount(*data, compile_data.register_count);
}
+ if (FLAG_trace_regexp_tier_up) {
+ PrintF("JSRegExp object %p %s size: %d\n",
+ reinterpret_cast<void*>(re->ptr()),
+ re->ShouldProduceBytecode() ? "bytecode" : "native code",
+ re->ShouldProduceBytecode()
+ ? IrregexpByteCode(*data, is_one_byte).Size()
+ : IrregexpNativeCode(*data, is_one_byte).Size());
+ }
+
return true;
}
@@ -382,7 +459,7 @@ int RegExpImpl::IrregexpNumberOfRegisters(FixedArray re) {
}
ByteArray RegExpImpl::IrregexpByteCode(FixedArray re, bool is_one_byte) {
- return ByteArray::cast(re.get(JSRegExp::code_index(is_one_byte)));
+ return ByteArray::cast(re.get(JSRegExp::bytecode_index(is_one_byte)));
}
Code RegExpImpl::IrregexpNativeCode(FixedArray re, bool is_one_byte) {
@@ -411,7 +488,7 @@ int RegExp::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
DisallowHeapAllocation no_gc;
FixedArray data = FixedArray::cast(regexp->data());
- if (FLAG_regexp_interpret_all) {
+ if (regexp->ShouldProduceBytecode()) {
// Byte-code regexp needs space allocated for all its registers.
// The result captures are copied to the start of the registers array
// if the match succeeds. This way those registers are not clobbered
@@ -436,16 +513,15 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
- if (!FLAG_regexp_interpret_all) {
+ if (!regexp->ShouldProduceBytecode()) {
DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
- Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
// The stack is used to allocate registers for the compiled regexp code.
// This means that in case of failure, the output registers array is left
// untouched and contains the capture results from the previous successful
// match. We can use that to set the last match info lazily.
- int res = NativeRegExpMacroAssembler::Match(code, subject, output,
+ int res = NativeRegExpMacroAssembler::Match(regexp, subject, output,
output_size, index, isolate);
if (res != NativeRegExpMacroAssembler::RETRY) {
DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION ||
@@ -464,12 +540,11 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
// the, potentially, different subject (the string can switch between
// being internal and external, and even between being Latin1 and UC16,
// but the characters are always the same).
- RegExp::IrregexpPrepare(isolate, regexp, subject);
is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
} while (true);
UNREACHABLE();
} else {
- DCHECK(FLAG_regexp_interpret_all);
+ DCHECK(regexp->ShouldProduceBytecode());
DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
// We must have done EnsureCompiledIrregexp, so we can get the number of
// registers.
@@ -478,17 +553,10 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
int32_t* raw_output = &output[number_of_capture_registers];
do {
- // We do not touch the actual capture result registers until we know there
- // has been a match so that we can use those capture results to set the
- // last match info.
- for (int i = number_of_capture_registers - 1; i >= 0; i--) {
- raw_output[i] = -1;
- }
- Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
- isolate);
-
- IrregexpInterpreter::Result result = IrregexpInterpreter::Match(
- isolate, byte_codes, subject, raw_output, index);
+ IrregexpInterpreter::Result result =
+ IrregexpInterpreter::MatchForCallFromRuntime(
+ isolate, regexp, subject, raw_output, number_of_capture_registers,
+ index);
DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
isolate->has_pending_exception());
@@ -504,6 +572,10 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
case IrregexpInterpreter::RETRY:
// The string has changed representation, and we must restart the
// match.
+ // We need to reset the tier up to start over with compilation.
+ if (FLAG_regexp_tier_up) {
+ regexp->ResetTierUp();
+ }
is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
break;
@@ -520,14 +592,15 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
subject = String::Flatten(isolate, subject);
- // Prepare space for the return values.
#ifdef DEBUG
- if (FLAG_regexp_interpret_all && FLAG_trace_regexp_bytecodes) {
+ if (FLAG_trace_regexp_bytecodes && regexp->ShouldProduceBytecode()) {
String pattern = regexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", pattern.ToCString().get());
PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
}
#endif
+
+ // Prepare space for the return values.
int required_registers = RegExp::IrregexpPrepare(isolate, regexp, subject);
if (required_registers < 0) {
// Compiling failed with an exception.
@@ -547,6 +620,7 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
int res =
RegExpImpl::IrregexpExecRaw(isolate, regexp, subject, previous_index,
output_registers, required_registers);
+
if (res == RegExp::RE_SUCCESS) {
int capture_count =
IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
@@ -706,17 +780,14 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
- Analysis analysis(isolate, is_one_byte);
- analysis.EnsureAnalyzed(node);
- if (analysis.has_failed()) {
- data->error =
- isolate->factory()->NewStringFromAsciiChecked(analysis.error_message());
+ if (const char* error_message = AnalyzeRegExp(isolate, is_one_byte, node)) {
+ data->error = isolate->factory()->NewStringFromAsciiChecked(error_message);
return false;
}
// Create the correct assembler for the architecture.
std::unique_ptr<RegExpMacroAssembler> macro_assembler;
- if (!FLAG_regexp_interpret_all) {
+ if (data->compilation_target == RegExpCompilationTarget::kNative) {
// Native regexp implementation.
DCHECK(!FLAG_jitless);
@@ -752,8 +823,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
#error "Unsupported architecture"
#endif
} else {
- DCHECK(FLAG_regexp_interpret_all);
-
+ DCHECK_EQ(data->compilation_target, RegExpCompilationTarget::kBytecode);
// Interpreted regexp implementation.
macro_assembler.reset(new RegExpBytecodeGenerator(isolate, zone));
}
@@ -781,6 +851,26 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
RegExpCompiler::CompilationResult result = compiler.Assemble(
isolate, macro_assembler.get(), node, data->capture_count, pattern);
+ // Code / bytecode printing.
+ {
+#ifdef ENABLE_DISASSEMBLER
+ if (FLAG_print_regexp_code &&
+ data->compilation_target == RegExpCompilationTarget::kNative) {
+ CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
+ OFStream os(trace_scope.file());
+ Handle<Code> c(Code::cast(result.code), isolate);
+ auto pattern_cstring = pattern->ToCString();
+ c->Disassemble(pattern_cstring.get(), os);
+ }
+#endif
+ if (FLAG_print_regexp_bytecode &&
+ data->compilation_target == RegExpCompilationTarget::kBytecode) {
+ Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate);
+ auto pattern_cstring = pattern->ToCString();
+ IrregexpInterpreter::Disassemble(*bytecode, pattern_cstring.get());
+ }
+ }
+
if (FLAG_correctness_fuzzer_suppressions &&
strncmp(result.error_message, "Stack overflow", 15) == 0) {
FATAL("Aborting on stack overflow");
@@ -790,6 +880,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
data->error =
isolate->factory()->NewStringFromAsciiChecked(result.error_message);
}
+
data->code = result.code;
data->register_count = result.num_registers;
@@ -803,7 +894,7 @@ RegExpGlobalCache::RegExpGlobalCache(Handle<JSRegExp> regexp,
regexp_(regexp),
subject_(subject),
isolate_(isolate) {
- bool interpreted = FLAG_regexp_interpret_all;
+ bool interpreted = regexp->ShouldProduceBytecode();
if (regexp_->TypeTag() == JSRegExp::ATOM) {
static const int kAtomRegistersPerMatch = 2;
@@ -868,6 +959,7 @@ int RegExpGlobalCache::AdvanceZeroLength(int last_index) {
int32_t* RegExpGlobalCache::FetchNext() {
current_match_index_++;
+
if (current_match_index_ >= num_matches_) {
// Current batch of results exhausted.
// Fail if last batch was not even fully filled.
diff --git a/deps/v8/src/regexp/regexp.h b/deps/v8/src/regexp/regexp.h
index 0f3ed463da..8ccc9789a3 100644
--- a/deps/v8/src/regexp/regexp.h
+++ b/deps/v8/src/regexp/regexp.h
@@ -13,6 +13,8 @@ namespace internal {
class RegExpNode;
class RegExpTree;
+enum class RegExpCompilationTarget : int { kBytecode, kNative };
+
// TODO(jgruber): Consider splitting between ParseData and CompileData.
struct RegExpCompileData {
// The parsed AST as produced by the RegExpParser.
@@ -21,8 +23,8 @@ struct RegExpCompileData {
// The compiled Node graph as produced by RegExpTree::ToNode methods.
RegExpNode* node = nullptr;
- // The generated code as produced by the compiler. Either a Code object (for
- // irregexp native code) or a ByteArray (for irregexp bytecode).
+ // Either the generated code as produced by the compiler or a trampoline
+ // to the interpreter.
Object code;
// True, iff the pattern is a 'simple' atom with zero captures. In other
@@ -46,12 +48,20 @@ struct RegExpCompileData {
// The number of registers used by the generated code.
int register_count = 0;
+
+ // The compilation target (bytecode or native code).
+ RegExpCompilationTarget compilation_target;
};
class RegExp final : public AllStatic {
public:
// Whether the irregexp engine generates native code or interpreter bytecode.
- static bool GeneratesNativeCode() { return !FLAG_regexp_interpret_all; }
+ static bool CanGenerateNativeCode() {
+ return !FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
+ }
+ static bool CanGenerateBytecode() {
+ return FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
+ }
// Parses the RegExp pattern and prepares the JSRegExp object with
// generic data and choice of implementation - as well as what
@@ -61,6 +71,11 @@ class RegExp final : public AllStatic {
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
JSRegExp::Flags flags);
+ enum CallOrigin : int {
+ kFromRuntime = 0,
+ kFromJs = 1,
+ };
+
// See ECMA-262 section 15.10.6.2.
// This function calls the garbage collector if necessary.
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
@@ -73,7 +88,7 @@ class RegExp final : public AllStatic {
static constexpr int kInternalRegExpException = -1;
static constexpr int kInternalRegExpRetry = -2;
- enum IrregexpResult {
+ enum IrregexpResult : int32_t {
RE_FAILURE = kInternalRegExpFailure,
RE_SUCCESS = kInternalRegExpSuccess,
RE_EXCEPTION = kInternalRegExpException,
diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
index 5ebdd6ce15..d4144e7e64 100644
--- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
+++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
@@ -178,9 +178,10 @@ void RegExpMacroAssemblerS390::CheckCharacterGT(uc16 limit, Label* on_greater) {
BranchOrBacktrack(gt, on_greater);
}
-void RegExpMacroAssemblerS390::CheckAtStart(Label* on_at_start) {
+void RegExpMacroAssemblerS390::CheckAtStart(int cp_offset, Label* on_at_start) {
__ LoadP(r3, MemOperand(frame_pointer(), kStringStartMinusOne));
- __ AddP(r2, current_input_offset(), Operand(-char_size()));
+ __ AddP(r2, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
__ CmpP(r2, r3);
BranchOrBacktrack(eq, on_at_start);
}
@@ -663,7 +664,7 @@ Handle<HeapObject> RegExpMacroAssemblerS390::GetCode(Handle<String> source) {
Label stack_ok;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ mov(r2, Operand(stack_limit));
__ LoadP(r2, MemOperand(r2));
__ SubP(r2, sp, r2);
@@ -965,14 +966,19 @@ RegExpMacroAssemblerS390::Implementation() {
return kS390Implementation;
}
-void RegExpMacroAssemblerS390::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
+void RegExpMacroAssemblerS390::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ DCHECK_GE(eats_at_least, characters);
+
DCHECK(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
if (check_bounds) {
if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
} else {
CheckPosition(cp_offset, on_end_of_input);
}
@@ -1120,8 +1126,10 @@ int RegExpMacroAssemblerS390::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<intptr_t>(re_frame, kStartIndex),
- frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address,
- re_code, frame_entry_address<Address>(re_frame, kInputString),
+ static_cast<RegExp::CallOrigin>(
+ frame_entry<intptr_t>(re_frame, kDirectCall)),
+ return_address, re_code,
+ frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
}
@@ -1206,7 +1214,7 @@ void RegExpMacroAssemblerS390::Pop(Register target) {
void RegExpMacroAssemblerS390::CheckPreemption() {
// Check for preemption.
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ mov(r2, Operand(stack_limit));
__ CmpLogicalP(sp, MemOperand(r2));
SafeCall(&check_preempt_label_, le);
@@ -1214,7 +1222,7 @@ void RegExpMacroAssemblerS390::CheckPreemption() {
void RegExpMacroAssemblerS390::CheckStackLimit() {
ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(isolate());
+ ExternalReference::address_of_regexp_stack_limit_address(isolate());
__ mov(r2, Operand(stack_limit));
__ CmpLogicalP(backtrack_stackpointer(), MemOperand(r2));
SafeCall(&stack_overflow_label_, le);
diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h
index 636ba76079..3a6a915263 100644
--- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h
+++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h
@@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
- virtual void CheckAtStart(Label* on_at_start);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
virtual void CheckCharacter(unsigned c, Label* on_equal);
virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask,
Label* on_equal);
@@ -59,9 +59,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
index 798484d52f..42ba13c4ee 100644
--- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
@@ -48,6 +48,8 @@ namespace internal {
*
* The stack will have the following content, in some order, indexable from the
* frame pointer (see, e.g., kStackHighEnd):
+ * - Address regexp (address of the JSRegExp object; unused in native
+ * code, passed to match signature of interpreter)
* - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0 call
* through the runtime system)
@@ -75,9 +77,8 @@ namespace internal {
* "character -1" in the string (i.e., char_size() bytes before the first
* character of the string). The remaining registers starts out uninitialized.
*
- * The first seven values must be provided by the calling code by
- * calling the code's entry address cast to a function pointer with the
- * following signature:
+ * The argument values must be provided by the calling code by calling the
+ * code's entry address cast to a function pointer with the following signature:
* int (*match)(String input_string,
* int start_index,
* Address start,
@@ -86,7 +87,8 @@ namespace internal {
* int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
- * Isolate* isolate);
+ * Isolate* isolate,
+ * Address regexp);
*/
#define __ ACCESS_MASM((&masm_))
@@ -172,14 +174,12 @@ void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) {
BranchOrBacktrack(greater, on_greater);
}
-
-void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) {
- __ leaq(rax, Operand(rdi, -char_size()));
+void RegExpMacroAssemblerX64::CheckAtStart(int cp_offset, Label* on_at_start) {
+ __ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size()));
__ cmpq(rax, Operand(rbp, kStringStartMinusOne));
BranchOrBacktrack(equal, on_at_start);
}
-
void RegExpMacroAssemblerX64::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
__ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size()));
@@ -721,7 +721,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
Label stack_ok;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ movq(rcx, rsp);
__ Move(kScratchRegister, stack_limit);
__ subq(rcx, Operand(kScratchRegister, 0));
@@ -1035,15 +1035,19 @@ RegExpMacroAssembler::IrregexpImplementation
return kX64Implementation;
}
+void RegExpMacroAssemblerX64::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ DCHECK_GE(eats_at_least, characters);
-void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds) {
if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
} else {
CheckPosition(cp_offset, on_end_of_input);
}
@@ -1051,7 +1055,6 @@ void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset,
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
-
void RegExpMacroAssemblerX64::PopCurrentPosition() {
Pop(rdi);
}
@@ -1198,7 +1201,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
- frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
+ static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
+ return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
@@ -1318,7 +1322,7 @@ void RegExpMacroAssemblerX64::CheckPreemption() {
// Check for preemption.
Label no_preempt;
ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
+ ExternalReference::address_of_jslimit(isolate());
__ load_rax(stack_limit);
__ cmpq(rsp, rax);
__ j(above, &no_preempt);
@@ -1332,7 +1336,7 @@ void RegExpMacroAssemblerX64::CheckPreemption() {
void RegExpMacroAssemblerX64::CheckStackLimit() {
Label no_stack_overflow;
ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(isolate());
+ ExternalReference::address_of_regexp_stack_limit_address(isolate());
__ load_rax(stack_limit);
__ cmpq(backtrack_stackpointer(), rax);
__ j(above, &no_stack_overflow);
diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h
index 59b80ef802..9d011dcd46 100644
--- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h
+++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h
@@ -24,7 +24,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64
void AdvanceRegister(int reg, int by) override;
void Backtrack() override;
void Bind(Label* label) override;
- void CheckAtStart(Label* on_at_start) override;
+ void CheckAtStart(int cp_offset, Label* on_at_start) override;
void CheckCharacter(uint32_t c, Label* on_equal) override;
void CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
Label* on_equal) override;
@@ -60,9 +60,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64
void IfRegisterLT(int reg, int comparand, Label* if_lt) override;
void IfRegisterEqPos(int reg, Label* if_eq) override;
IrregexpImplementation Implementation() override;
- void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1) override;
+ void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least) override;
void PopCurrentPosition() override;
void PopRegister(int register_index) override;
void PushBacktrack(Label* label) override;