diff options
author | Myles Borins <mylesborins@google.com> | 2019-09-24 11:56:38 -0400 |
---|---|---|
committer | Myles Borins <myles.borins@gmail.com> | 2019-10-07 03:19:23 -0400 |
commit | f7f6c928c1c9c136b7926f892b8a2fda11d8b4b2 (patch) | |
tree | f5edbccb3ffda2573d70a6e291e7157f290e0ae0 /deps/v8/src/regexp | |
parent | ffd22e81983056d09c064c59343a0e488236272d (diff) | |
download | node-new-f7f6c928c1c9c136b7926f892b8a2fda11d8b4b2.tar.gz |
deps: update V8 to 7.8.279.9
PR-URL: https://github.com/nodejs/node/pull/29694
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Jiawen Geng <technicalcute@gmail.com>
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Tobias Nießen <tniessen@tnie.de>
Reviewed-By: Ujjwal Sharma <usharma1998@gmail.com>
Diffstat (limited to 'deps/v8/src/regexp')
38 files changed, 1803 insertions, 1033 deletions
diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc index 8b462cb03c..2f81b6de86 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc @@ -40,6 +40,9 @@ namespace internal { * Each call to a public method should retain this convention. * * The stack will have the following structure: + * - fp[56] Address regexp (address of the JSRegExp object; unused in + * native code, passed to match signature of + * the interpreter) * - fp[52] Isolate* isolate (address of the current isolate) * - fp[48] direct_call (if 1, direct call from JavaScript code, * if 0, call through the runtime system). @@ -83,7 +86,8 @@ namespace internal { * int num_capture_registers, * byte* stack_area_base, * bool direct_call = false, - * Isolate* isolate); + * Isolate* isolate, + * Address regexp); * The call is performed by NativeRegExpMacroAssembler::Execute() * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. */ @@ -172,15 +176,14 @@ void RegExpMacroAssemblerARM::CheckCharacterGT(uc16 limit, Label* on_greater) { BranchOrBacktrack(gt, on_greater); } - -void RegExpMacroAssemblerARM::CheckAtStart(Label* on_at_start) { +void RegExpMacroAssemblerARM::CheckAtStart(int cp_offset, Label* on_at_start) { __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne)); - __ add(r0, current_input_offset(), Operand(-char_size())); + __ add(r0, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); __ cmp(r0, r1); BranchOrBacktrack(eq, on_at_start); } - void RegExpMacroAssemblerARM::CheckNotAtStart(int cp_offset, Label* on_not_at_start) { __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne)); @@ -647,7 +650,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { Label stack_ok; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ mov(r0, Operand(stack_limit)); __ ldr(r0, MemOperand(r0)); __ sub(r0, sp, r0, SetCC); @@ -929,15 +932,19 @@ RegExpMacroAssembler::IrregexpImplementation return kARMImplementation; } +void RegExpMacroAssemblerARM::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + DCHECK_GE(eats_at_least, characters); -void RegExpMacroAssemblerARM::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) { if (cp_offset >= 0) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); } else { CheckPosition(cp_offset, on_end_of_input); } @@ -945,7 +952,6 @@ void RegExpMacroAssemblerARM::LoadCurrentCharacter(int cp_offset, LoadCurrentCharacterUnchecked(cp_offset, characters); } - void RegExpMacroAssemblerARM::PopCurrentPosition() { Pop(current_input_offset()); } @@ -1109,7 +1115,8 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address, return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<int>(re_frame, kStartIndex), - frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, + static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)), + return_address, re_code, frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputEnd)); @@ -1193,7 +1200,7 @@ void RegExpMacroAssemblerARM::Pop(Register target) { void RegExpMacroAssemblerARM::CheckPreemption() { // Check for preemption. ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ mov(r0, Operand(stack_limit)); __ ldr(r0, MemOperand(r0)); __ cmp(sp, r0); @@ -1203,7 +1210,7 @@ void RegExpMacroAssemblerARM::CheckPreemption() { void RegExpMacroAssemblerARM::CheckStackLimit() { ExternalReference stack_limit = - ExternalReference::address_of_regexp_stack_limit(isolate()); + ExternalReference::address_of_regexp_stack_limit_address(isolate()); __ mov(r0, Operand(stack_limit)); __ ldr(r0, MemOperand(r0)); __ cmp(backtrack_stackpointer(), Operand(r0)); diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h index 9e95f8e1f2..9b21c5a11c 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h @@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM virtual void AdvanceRegister(int reg, int by); virtual void Backtrack(); virtual void Bind(Label* label); - virtual void CheckAtStart(Label* on_at_start); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); virtual void CheckCharacter(unsigned c, Label* on_equal); virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask, @@ -67,10 +67,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterEqPos(int reg, Label* if_eq); virtual IrregexpImplementation Implementation(); - virtual void LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds = true, - int characters = 1); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); virtual void PopCurrentPosition(); virtual void PopRegister(int register_index); virtual void PushBacktrack(Label* label); diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc index b299ad0535..9e00063487 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc @@ -55,7 +55,10 @@ namespace internal { * (as referred to in * the code) * - * - fp[96] isolate Address of the current isolate. + * - fp[104] Address regexp Address of the JSRegExp object. Unused in + * native code, passed to match signature of + * the interpreter. + * - fp[96] isolate Address of the current isolate. * ^^^ sp when called ^^^ * - fp[88] lr Return from the RegExp code. * - fp[80] r29 Old frame pointer (CalleeSaved). @@ -93,7 +96,8 @@ namespace internal { * int num_capture_registers, * byte* stack_area_base, * bool direct_call = false, - * Isolate* isolate); + * Isolate* isolate, + * Address regexp); * The call is performed by NativeRegExpMacroAssembler::Execute() * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. */ @@ -201,14 +205,14 @@ void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit, CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater); } - -void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) { - __ Add(w10, current_input_offset(), Operand(-char_size())); +void RegExpMacroAssemblerARM64::CheckAtStart(int cp_offset, + Label* on_at_start) { + __ Add(w10, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); __ Cmp(w10, string_start_minus_one()); BranchOrBacktrack(eq, on_at_start); } - void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset, Label* on_not_at_start) { __ Add(w10, current_input_offset(), @@ -750,7 +754,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { Label stack_ok; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ Mov(x10, stack_limit); __ Ldr(x10, MemOperand(x10)); __ Subs(x10, sp, x10); @@ -1106,18 +1110,22 @@ RegExpMacroAssembler::IrregexpImplementation return kARM64Implementation; } +void RegExpMacroAssemblerARM64::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + DCHECK_GE(eats_at_least, characters); -void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { // TODO(pielan): Make sure long strings are caught before this, and not // just asserted in debug mode. // Be sane! (And ensure that an int32_t can be used to index the string) DCHECK(cp_offset < (1<<30)); if (check_bounds) { if (cp_offset >= 0) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); } else { CheckPosition(cp_offset, on_end_of_input); } @@ -1125,7 +1133,6 @@ void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset, LoadCurrentCharacterUnchecked(cp_offset, characters); } - void RegExpMacroAssemblerARM64::PopCurrentPosition() { Pop(current_input_offset()); } @@ -1326,8 +1333,9 @@ int RegExpMacroAssemblerARM64::CheckStackGuardState( Code re_code = Code::cast(Object(raw_code)); return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry<Isolate*>(re_frame, kIsolate), start_index, - frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, - frame_entry_address<Address>(re_frame, kInput), input_start, input_end); + static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)), + return_address, re_code, frame_entry_address<Address>(re_frame, kInput), + input_start, input_end); } @@ -1448,7 +1456,7 @@ void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg, void RegExpMacroAssemblerARM64::CheckPreemption() { // Check for preemption. ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ Mov(x10, stack_limit); __ Ldr(x10, MemOperand(x10)); __ Cmp(sp, x10); @@ -1458,7 +1466,7 @@ void RegExpMacroAssemblerARM64::CheckPreemption() { void RegExpMacroAssemblerARM64::CheckStackLimit() { ExternalReference stack_limit = - ExternalReference::address_of_regexp_stack_limit(isolate()); + ExternalReference::address_of_regexp_stack_limit_address(isolate()); __ Mov(x10, stack_limit); __ Ldr(x10, MemOperand(x10)); __ Cmp(backtrack_stackpointer(), x10); diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h index ef83f9e43c..6154c6cf60 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h @@ -24,7 +24,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64 virtual void AdvanceRegister(int reg, int by); virtual void Backtrack(); virtual void Bind(Label* label); - virtual void CheckAtStart(Label* on_at_start); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); virtual void CheckCharacter(unsigned c, Label* on_equal); virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask, @@ -72,10 +72,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterEqPos(int reg, Label* if_eq); virtual IrregexpImplementation Implementation(); - virtual void LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds = true, - int characters = 1); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); virtual void PopCurrentPosition(); virtual void PopRegister(int register_index); virtual void PushBacktrack(Label* label); diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc index eb42c23215..5ee7b90988 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc @@ -34,6 +34,9 @@ namespace internal { * * Each call to a public method should retain this convention. * The stack will have the following structure: + * - Address regexp (address of the JSRegExp object; unused in + * native code, passed to match signature of + * the interpreter) * - Isolate* isolate (address of the current isolate) * - direct_call (if 1, direct call from JavaScript code, if 0 * call through the runtime system) @@ -73,7 +76,8 @@ namespace internal { * int num_capture_registers, * byte* stack_area_base, * bool direct_call = false, - * Isolate* isolate); + * Isolate* isolate + * Address regexp); */ #define __ ACCESS_MASM(masm_) @@ -161,14 +165,12 @@ void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) { BranchOrBacktrack(greater, on_greater); } - -void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) { - __ lea(eax, Operand(edi, -char_size())); +void RegExpMacroAssemblerIA32::CheckAtStart(int cp_offset, Label* on_at_start) { + __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size())); __ cmp(eax, Operand(ebp, kStringStartMinusOne)); BranchOrBacktrack(equal, on_at_start); } - void RegExpMacroAssemblerIA32::CheckNotAtStart(int cp_offset, Label* on_not_at_start) { __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size())); @@ -684,7 +686,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { Label stack_ok; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ mov(ecx, esp); __ sub(ecx, StaticVariable(stack_limit)); // Handle it if the stack pointer is already below the stack limit. @@ -971,15 +973,19 @@ RegExpMacroAssembler::IrregexpImplementation return kIA32Implementation; } +void RegExpMacroAssemblerIA32::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + DCHECK_GE(eats_at_least, characters); -void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) { if (cp_offset >= 0) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); } else { CheckPosition(cp_offset, on_end_of_input); } @@ -987,7 +993,6 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset, LoadCurrentCharacterUnchecked(cp_offset, characters); } - void RegExpMacroAssemblerIA32::PopCurrentPosition() { Pop(edi); } @@ -1120,7 +1125,8 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address, return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<int>(re_frame, kStartIndex), - frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, + static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)), + return_address, re_code, frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputEnd)); @@ -1214,7 +1220,7 @@ void RegExpMacroAssemblerIA32::CheckPreemption() { // Check for preemption. Label no_preempt; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ cmp(esp, StaticVariable(stack_limit)); __ j(above, &no_preempt); @@ -1227,7 +1233,7 @@ void RegExpMacroAssemblerIA32::CheckPreemption() { void RegExpMacroAssemblerIA32::CheckStackLimit() { Label no_stack_overflow; ExternalReference stack_limit = - ExternalReference::address_of_regexp_stack_limit(isolate()); + ExternalReference::address_of_regexp_stack_limit_address(isolate()); __ cmp(backtrack_stackpointer(), StaticVariable(stack_limit)); __ j(above, &no_stack_overflow); diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h index 914552cc93..3464d81fac 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h @@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32 virtual void AdvanceRegister(int reg, int by); virtual void Backtrack(); virtual void Bind(Label* label); - virtual void CheckAtStart(Label* on_at_start); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); virtual void CheckCharacter(uint32_t c, Label* on_equal); virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, @@ -66,10 +66,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterEqPos(int reg, Label* if_eq); virtual IrregexpImplementation Implementation(); - virtual void LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds = true, - int characters = 1); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); virtual void PopCurrentPosition(); virtual void PopRegister(int register_index); virtual void PushBacktrack(Label* label); diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc index e8104ced7e..8d2800f004 100644 --- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc +++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc @@ -178,9 +178,10 @@ void RegExpMacroAssemblerMIPS::CheckCharacterGT(uc16 limit, Label* on_greater) { } -void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) { +void RegExpMacroAssemblerMIPS::CheckAtStart(int cp_offset, Label* on_at_start) { __ lw(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); - __ Addu(a0, current_input_offset(), Operand(-char_size())); + __ Addu(a0, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); BranchOrBacktrack(on_at_start, eq, a0, Operand(a1)); } @@ -647,7 +648,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { Label stack_ok; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(masm_->isolate()); + ExternalReference::address_of_jslimit(masm_->isolate()); __ li(a0, Operand(stack_limit)); __ lw(a0, MemOperand(a0)); __ Subu(a0, sp, a0); @@ -946,15 +947,19 @@ RegExpMacroAssembler::IrregexpImplementation return kMIPSImplementation; } +void RegExpMacroAssemblerMIPS::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + DCHECK_GE(eats_at_least, characters); -void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works). if (check_bounds) { if (cp_offset >= 0) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); } else { CheckPosition(cp_offset, on_end_of_input); } @@ -962,7 +967,6 @@ void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset, LoadCurrentCharacterUnchecked(cp_offset, characters); } - void RegExpMacroAssemblerMIPS::PopCurrentPosition() { Pop(current_input_offset()); } @@ -1176,7 +1180,8 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address, return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<int>(re_frame, kStartIndex), - frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, + static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)), + return_address, re_code, frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputEnd)); @@ -1267,7 +1272,7 @@ void RegExpMacroAssemblerMIPS::Pop(Register target) { void RegExpMacroAssemblerMIPS::CheckPreemption() { // Check for preemption. ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(masm_->isolate()); + ExternalReference::address_of_jslimit(masm_->isolate()); __ li(a0, Operand(stack_limit)); __ lw(a0, MemOperand(a0)); SafeCall(&check_preempt_label_, ls, sp, Operand(a0)); @@ -1276,7 +1281,8 @@ void RegExpMacroAssemblerMIPS::CheckPreemption() { void RegExpMacroAssemblerMIPS::CheckStackLimit() { ExternalReference stack_limit = - ExternalReference::address_of_regexp_stack_limit(masm_->isolate()); + ExternalReference::address_of_regexp_stack_limit_address( + masm_->isolate()); __ li(a0, Operand(stack_limit)); __ lw(a0, MemOperand(a0)); diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h index b785910466..084436bbbd 100644 --- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h +++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h @@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS virtual void AdvanceRegister(int reg, int by); virtual void Backtrack(); virtual void Bind(Label* label); - virtual void CheckAtStart(Label* on_at_start); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); virtual void CheckCharacter(uint32_t c, Label* on_equal); virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, @@ -67,10 +67,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterEqPos(int reg, Label* if_eq); virtual IrregexpImplementation Implementation(); - virtual void LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds = true, - int characters = 1); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); virtual void PopCurrentPosition(); virtual void PopRegister(int register_index); virtual void PushBacktrack(Label* label); diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc index 239cc87ae8..2d5402ebdb 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc @@ -214,9 +214,10 @@ void RegExpMacroAssemblerMIPS::CheckCharacterGT(uc16 limit, Label* on_greater) { } -void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) { +void RegExpMacroAssemblerMIPS::CheckAtStart(int cp_offset, Label* on_at_start) { __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); - __ Daddu(a0, current_input_offset(), Operand(-char_size())); + __ Daddu(a0, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); BranchOrBacktrack(on_at_start, eq, a0, Operand(a1)); } @@ -683,7 +684,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { Label stack_ok; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(masm_->isolate()); + ExternalReference::address_of_jslimit(masm_->isolate()); __ li(a0, Operand(stack_limit)); __ Ld(a0, MemOperand(a0)); __ Dsubu(a0, sp, a0); @@ -983,15 +984,19 @@ RegExpMacroAssembler::IrregexpImplementation return kMIPSImplementation; } +void RegExpMacroAssemblerMIPS::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + DCHECK_GE(eats_at_least, characters); -void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works). if (check_bounds) { if (cp_offset >= 0) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); } else { CheckPosition(cp_offset, on_end_of_input); } @@ -999,7 +1004,6 @@ void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset, LoadCurrentCharacterUnchecked(cp_offset, characters); } - void RegExpMacroAssemblerMIPS::PopCurrentPosition() { Pop(current_input_offset()); } @@ -1213,7 +1217,9 @@ int64_t RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address, return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry<Isolate*>(re_frame, kIsolate), static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)), - frame_entry<int64_t>(re_frame, kDirectCall) == 1, return_address, re_code, + static_cast<RegExp::CallOrigin>( + frame_entry<int64_t>(re_frame, kDirectCall)), + return_address, re_code, frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputEnd)); @@ -1304,7 +1310,7 @@ void RegExpMacroAssemblerMIPS::Pop(Register target) { void RegExpMacroAssemblerMIPS::CheckPreemption() { // Check for preemption. ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(masm_->isolate()); + ExternalReference::address_of_jslimit(masm_->isolate()); __ li(a0, Operand(stack_limit)); __ Ld(a0, MemOperand(a0)); SafeCall(&check_preempt_label_, ls, sp, Operand(a0)); @@ -1313,7 +1319,8 @@ void RegExpMacroAssemblerMIPS::CheckPreemption() { void RegExpMacroAssemblerMIPS::CheckStackLimit() { ExternalReference stack_limit = - ExternalReference::address_of_regexp_stack_limit(masm_->isolate()); + ExternalReference::address_of_regexp_stack_limit_address( + masm_->isolate()); __ li(a0, Operand(stack_limit)); __ Ld(a0, MemOperand(a0)); diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h index d24735d08e..9189a6a72d 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h @@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS virtual void AdvanceRegister(int reg, int by); virtual void Backtrack(); virtual void Bind(Label* label); - virtual void CheckAtStart(Label* on_at_start); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); virtual void CheckCharacter(uint32_t c, Label* on_equal); virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, @@ -67,10 +67,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterEqPos(int reg, Label* if_eq); virtual IrregexpImplementation Implementation(); - virtual void LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds = true, - int characters = 1); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); virtual void PopCurrentPosition(); virtual void PopRegister(int register_index); virtual void PushBacktrack(Label* label); diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc index bce612e66f..13b5c85605 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc @@ -189,15 +189,14 @@ void RegExpMacroAssemblerPPC::CheckCharacterGT(uc16 limit, Label* on_greater) { BranchOrBacktrack(gt, on_greater); } - -void RegExpMacroAssemblerPPC::CheckAtStart(Label* on_at_start) { +void RegExpMacroAssemblerPPC::CheckAtStart(int cp_offset, Label* on_at_start) { __ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne)); - __ addi(r3, current_input_offset(), Operand(-char_size())); + __ addi(r3, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); __ cmp(r3, r4); BranchOrBacktrack(eq, on_at_start); } - void RegExpMacroAssemblerPPC::CheckNotAtStart(int cp_offset, Label* on_not_at_start) { __ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne)); @@ -689,7 +688,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) { Label stack_ok; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ mov(r3, Operand(stack_limit)); __ LoadP(r3, MemOperand(r3)); __ sub(r3, sp, r3, LeaveOE, SetRC); @@ -978,15 +977,19 @@ RegExpMacroAssemblerPPC::Implementation() { return kPPCImplementation; } +void RegExpMacroAssemblerPPC::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + DCHECK_GE(eats_at_least, characters); -void RegExpMacroAssemblerPPC::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { DCHECK(cp_offset < (1 << 30)); // Be sane! (And ensure negation works) if (check_bounds) { if (cp_offset >= 0) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); } else { CheckPosition(cp_offset, on_end_of_input); } @@ -994,7 +997,6 @@ void RegExpMacroAssemblerPPC::LoadCurrentCharacter(int cp_offset, LoadCurrentCharacterUnchecked(cp_offset, characters); } - void RegExpMacroAssemblerPPC::PopCurrentPosition() { Pop(current_input_offset()); } @@ -1177,8 +1179,10 @@ int RegExpMacroAssemblerPPC::CheckStackGuardState(Address* return_address, return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<intptr_t>(re_frame, kStartIndex), - frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address, - re_code, frame_entry_address<Address>(re_frame, kInputString), + static_cast<RegExp::CallOrigin>( + frame_entry<intptr_t>(re_frame, kDirectCall)), + return_address, re_code, + frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputEnd)); } @@ -1267,7 +1271,7 @@ void RegExpMacroAssemblerPPC::Pop(Register target) { void RegExpMacroAssemblerPPC::CheckPreemption() { // Check for preemption. ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ mov(r3, Operand(stack_limit)); __ LoadP(r3, MemOperand(r3)); __ cmpl(sp, r3); @@ -1277,7 +1281,7 @@ void RegExpMacroAssemblerPPC::CheckPreemption() { void RegExpMacroAssemblerPPC::CheckStackLimit() { ExternalReference stack_limit = - ExternalReference::address_of_regexp_stack_limit(isolate()); + ExternalReference::address_of_regexp_stack_limit_address(isolate()); __ mov(r3, Operand(stack_limit)); __ LoadP(r3, MemOperand(r3)); __ cmpl(backtrack_stackpointer(), r3); diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h index 418a01a9a4..60236a4000 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h @@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC virtual void AdvanceRegister(int reg, int by); virtual void Backtrack(); virtual void Bind(Label* label); - virtual void CheckAtStart(Label* on_at_start); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); virtual void CheckCharacter(unsigned c, Label* on_equal); virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask, Label* on_equal); @@ -59,9 +59,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterEqPos(int reg, Label* if_eq); virtual IrregexpImplementation Implementation(); - virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, - bool check_bounds = true, - int characters = 1); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); virtual void PopCurrentPosition(); virtual void PopRegister(int register_index); virtual void PushBacktrack(Label* label); diff --git a/deps/v8/src/regexp/regexp-bytecode-generator.cc b/deps/v8/src/regexp/regexp-bytecode-generator.cc index ee3b4015d5..85b144438e 100644 --- a/deps/v8/src/regexp/regexp-bytecode-generator.cc +++ b/deps/v8/src/regexp/regexp-bytecode-generator.cc @@ -171,10 +171,19 @@ void RegExpBytecodeGenerator::CheckGreedyLoop( EmitOrLink(on_tos_equals_current_position); } -void RegExpBytecodeGenerator::LoadCurrentCharacter(int cp_offset, - Label* on_failure, - bool check_bounds, - int characters) { +void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset, + Label* on_failure, + bool check_bounds, + int characters, + int eats_at_least) { + DCHECK_GE(eats_at_least, characters); + if (eats_at_least > characters && check_bounds) { + DCHECK(is_uint24(cp_offset + eats_at_least)); + Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least); + EmitOrLink(on_failure); + check_bounds = false; // Load below doesn't need to check. + } + DCHECK_LE(kMinCPOffset, cp_offset); DCHECK_GE(kMaxCPOffset, cp_offset); int bytecode; @@ -221,8 +230,8 @@ void RegExpBytecodeGenerator::CheckCharacter(uint32_t c, Label* on_equal) { EmitOrLink(on_equal); } -void RegExpBytecodeGenerator::CheckAtStart(Label* on_at_start) { - Emit(BC_CHECK_AT_START, 0); +void RegExpBytecodeGenerator::CheckAtStart(int cp_offset, Label* on_at_start) { + Emit(BC_CHECK_AT_START, cp_offset); EmitOrLink(on_at_start); } diff --git a/deps/v8/src/regexp/regexp-bytecode-generator.h b/deps/v8/src/regexp/regexp-bytecode-generator.h index b7207e977c..84b7ce361c 100644 --- a/deps/v8/src/regexp/regexp-bytecode-generator.h +++ b/deps/v8/src/regexp/regexp-bytecode-generator.h @@ -46,16 +46,16 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler { virtual void ReadCurrentPositionFromRegister(int reg); virtual void WriteStackPointerToRegister(int reg); virtual void ReadStackPointerFromRegister(int reg); - virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, - bool check_bounds = true, - int characters = 1); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); virtual void CheckCharacter(unsigned c, Label* on_equal); virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask, Label* on_equal); virtual void CheckCharacterGT(uc16 limit, Label* on_greater); virtual void CheckCharacterLT(uc16 limit, Label* on_less); virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckAtStart(Label* on_at_start); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); virtual void CheckNotCharacter(unsigned c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask, diff --git a/deps/v8/src/regexp/regexp-bytecodes.h b/deps/v8/src/regexp/regexp-bytecodes.h index 8b1468c1bf..3dd7637b88 100644 --- a/deps/v8/src/regexp/regexp-bytecodes.h +++ b/deps/v8/src/regexp/regexp-bytecodes.h @@ -5,6 +5,8 @@ #ifndef V8_REGEXP_REGEXP_BYTECODES_H_ #define V8_REGEXP_REGEXP_BYTECODES_H_ +#include "src/base/macros.h" + namespace v8 { namespace internal { @@ -67,16 +69,43 @@ const int BYTECODE_SHIFT = 8; V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \ V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \ V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \ - V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ + V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \ + V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */ + +#define COUNT(...) +1 +static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT); +#undef COUNT + +// Just making sure we assigned values above properly. They should be +// contiguous, strictly increasing, and start at 0. +// TODO(jgruber): Do not explicitly assign values, instead generate them +// implicitly from the list order. +STATIC_ASSERT(kRegExpBytecodeCount == 53); -#define DECLARE_BYTECODES(name, code, length) static const int BC_##name = code; +#define DECLARE_BYTECODES(name, code, length) \ + static constexpr int BC_##name = code; BYTECODE_ITERATOR(DECLARE_BYTECODES) #undef DECLARE_BYTECODES -#define DECLARE_BYTECODE_LENGTH(name, code, length) \ - static const int BC_##name##_LENGTH = length; -BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH) +static constexpr int kRegExpBytecodeLengths[] = { +#define DECLARE_BYTECODE_LENGTH(name, code, length) length, + BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH) #undef DECLARE_BYTECODE_LENGTH +}; + +inline constexpr int RegExpBytecodeLength(int bytecode) { + return kRegExpBytecodeLengths[bytecode]; +} + +static const char* const kRegExpBytecodeNames[] = { +#define DECLARE_BYTECODE_NAME(name, ...) #name, + BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME) +#undef DECLARE_BYTECODE_NAME +}; + +inline const char* RegExpBytecodeName(int bytecode) { + return kRegExpBytecodeNames[bytecode]; +} } // namespace internal } // namespace v8 diff --git a/deps/v8/src/regexp/regexp-compiler-tonode.cc b/deps/v8/src/regexp/regexp-compiler-tonode.cc index d12c35682e..2d86d3ea9e 100644 --- a/deps/v8/src/regexp/regexp-compiler-tonode.cc +++ b/deps/v8/src/regexp/regexp-compiler-tonode.cc @@ -1627,8 +1627,8 @@ RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy, bool needs_counter = has_min || has_max; int reg_ctr = needs_counter ? compiler->AllocateRegister() : RegExpCompiler::kNoRegister; - LoopChoiceNode* center = new (zone) - LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone); + LoopChoiceNode* center = new (zone) LoopChoiceNode( + body->min_match() == 0, compiler->read_backward(), min, zone); if (not_at_start && !compiler->read_backward()) center->set_not_at_start(); RegExpNode* loop_return = needs_counter ? static_cast<RegExpNode*>( @@ -1668,7 +1668,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy, center->AddLoopAlternative(body_alt); } if (needs_counter) { - return ActionNode::SetRegister(reg_ctr, 0, center); + return ActionNode::SetRegisterForLoop(reg_ctr, 0, center); } else { return center; } diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc index c70bbc3e4a..85da69f308 100644 --- a/deps/v8/src/regexp/regexp-compiler.cc +++ b/deps/v8/src/regexp/regexp-compiler.cc @@ -4,13 +4,12 @@ #include "src/regexp/regexp-compiler.h" -#include "src/diagnostics/code-tracer.h" +#include "src/base/safe_conversions.h" #include "src/execution/isolate.h" #include "src/objects/objects-inl.h" #include "src/regexp/regexp-macro-assembler-arch.h" #include "src/regexp/regexp-macro-assembler-tracer.h" #include "src/strings/unicode-inl.h" -#include "src/utils/ostreams.h" #include "src/zone/zone-list-inl.h" #ifdef V8_INTL_SUPPORT @@ -272,13 +271,7 @@ RegExpCompiler::CompilationResult RegExpCompiler::Assemble( Handle<HeapObject> code = macro_assembler_->GetCode(pattern); isolate->IncreaseTotalRegexpCodeGenerated(code->Size()); work_list_ = nullptr; -#ifdef ENABLE_DISASSEMBLER - if (FLAG_print_code && !FLAG_regexp_interpret_all) { - CodeTracer::Scope trace_scope(isolate->GetCodeTracer()); - OFStream os(trace_scope.file()); - Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os); - } -#endif + #ifdef DEBUG if (FLAG_trace_regexp_assembler) { delete macro_assembler_; @@ -422,14 +415,14 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, action = action->next()) { if (action->Mentions(reg)) { switch (action->action_type()) { - case ActionNode::SET_REGISTER: { - Trace::DeferredSetRegister* psr = - static_cast<Trace::DeferredSetRegister*>(action); + case ActionNode::SET_REGISTER_FOR_LOOP: { + Trace::DeferredSetRegisterForLoop* psr = + static_cast<Trace::DeferredSetRegisterForLoop*>(action); if (!absolute) { value += psr->value(); absolute = true; } - // SET_REGISTER is currently only used for newly introduced loop + // SET_REGISTER_FOR_LOOP is only used for newly introduced loop // counters. They can have a significant previous value if they // occur in a loop. TODO(lrn): Propagate this information, so // we can set undo_action to IGNORE if we know there is no value to @@ -634,9 +627,10 @@ void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { guards_->Add(guard, zone); } -ActionNode* ActionNode::SetRegister(int reg, int val, RegExpNode* on_success) { +ActionNode* ActionNode::SetRegisterForLoop(int reg, int val, + RegExpNode* on_success) { ActionNode* result = - new (on_success->zone()) ActionNode(SET_REGISTER, on_success); + new (on_success->zone()) ActionNode(SET_REGISTER_FOR_LOOP, on_success); result->data_.u_store_register.reg = reg; result->data_.u_store_register.value = val; return result; @@ -705,10 +699,6 @@ ActionNode* ActionNode::EmptyMatchCheck(int start_register, FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) #undef DEFINE_ACCEPT -void LoopChoiceNode::Accept(NodeVisitor* visitor) { - visitor->VisitLoopChoice(this); -} - // ------------------------------------------------------------------- // Emit code. @@ -1326,12 +1316,6 @@ bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) { compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion; } -int ActionNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { - if (budget <= 0) return 0; - if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! - return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); -} - void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start) { if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) { @@ -1344,16 +1328,16 @@ void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, SaveBMInfo(bm, not_at_start, offset); } -int AssertionNode::EatsAtLeast(int still_to_find, int budget, - bool not_at_start) { - if (budget <= 0) return 0; - // If we know we are not at the start and we are asked "how many characters - // will you match if you succeed?" then we can answer anything since false - // implies false. So lets just return the max answer (still_to_find) since - // that won't prevent us from preloading a lot of characters for the other - // branches in the node graph. - if (assertion_type() == AT_START && not_at_start) return still_to_find; - return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); +void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details, + RegExpCompiler* compiler, int filled_in, + bool not_at_start) { + if (action_type_ == SET_REGISTER_FOR_LOOP) { + on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler, + filled_in, not_at_start); + } else { + on_success()->GetQuickCheckDetails(details, compiler, filled_in, + not_at_start); + } } void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, @@ -1364,68 +1348,13 @@ void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, SaveBMInfo(bm, not_at_start, offset); } -int BackReferenceNode::EatsAtLeast(int still_to_find, int budget, - bool not_at_start) { - if (read_backward()) return 0; - if (budget <= 0) return 0; - return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); -} - -int TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { - if (read_backward()) return 0; - int answer = Length(); - if (answer >= still_to_find) return answer; - if (budget <= 0) return answer; - // We are not at start after this node so we set the last argument to 'true'. - return answer + - on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true); -} - -int NegativeLookaroundChoiceNode::EatsAtLeast(int still_to_find, int budget, - bool not_at_start) { - if (budget <= 0) return 0; - // Alternative 0 is the negative lookahead, alternative 1 is what comes - // afterwards. - RegExpNode* node = alternatives_->at(1).node(); - return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); -} - void NegativeLookaroundChoiceNode::GetQuickCheckDetails( QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in, bool not_at_start) { - // Alternative 0 is the negative lookahead, alternative 1 is what comes - // afterwards. - RegExpNode* node = alternatives_->at(1).node(); + RegExpNode* node = continue_node(); return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); } -int ChoiceNode::EatsAtLeastHelper(int still_to_find, int budget, - RegExpNode* ignore_this_node, - bool not_at_start) { - if (budget <= 0) return 0; - int min = 100; - int choice_count = alternatives_->length(); - budget = (budget - 1) / choice_count; - for (int i = 0; i < choice_count; i++) { - RegExpNode* node = alternatives_->at(i).node(); - if (node == ignore_this_node) continue; - int node_eats_at_least = - node->EatsAtLeast(still_to_find, budget, not_at_start); - if (node_eats_at_least < min) min = node_eats_at_least; - if (min == 0) return 0; - } - return min; -} - -int LoopChoiceNode::EatsAtLeast(int still_to_find, int budget, - bool not_at_start) { - return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start); -} - -int ChoiceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { - return EatsAtLeastHelper(still_to_find, budget, nullptr, not_at_start); -} - // Takes the left-most 1-bit and smears it out, setting all bits to its right. static inline uint32_t SmearBitsRight(uint32_t v) { v |= v >> 1; @@ -1459,12 +1388,78 @@ bool QuickCheckDetails::Rationalize(bool asc) { return found_useful_op; } +int RegExpNode::EatsAtLeast(bool not_at_start) { + return not_at_start ? eats_at_least_.eats_at_least_from_not_start + : eats_at_least_.eats_at_least_from_possibly_start; +} + +EatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() { + // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it + // implies that the following node must be a LoopChoiceNode. If we need to + // set registers to constant values for other reasons, we could introduce a + // new action type SET_REGISTER that doesn't imply anything about its + // successor. + UNREACHABLE(); +} + +void RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, + RegExpCompiler* compiler, + int characters_filled_in, + bool not_at_start) { + // See comment in RegExpNode::EatsAtLeastFromLoopEntry. + UNREACHABLE(); +} + +EatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() { + DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue. + + if (read_backward()) { + // Can't do anything special for a backward loop, so return the basic values + // that we got during analysis. + return *eats_at_least_info(); + } + + // Figure out how much the loop body itself eats, not including anything in + // the continuation case. In general, the nodes in the loop body should report + // that they eat at least the number eaten by the continuation node, since any + // successful match in the loop body must also include the continuation node. + // However, in some cases involving positive lookaround, the loop body under- + // reports its appetite, so use saturated math here to avoid negative numbers. + uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>( + loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true)); + uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>( + loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true)); + + // Limit the number of loop iterations to avoid overflow in subsequent steps. + int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations()); + + EatsAtLeastInfo result; + result.eats_at_least_from_not_start = + base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start + + continue_node_->EatsAtLeast(true)); + if (loop_iterations > 0 && loop_body_from_possibly_start > 0) { + // First loop iteration eats at least one, so all subsequent iterations + // and the after-loop chunk are guaranteed to not be at the start. + result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>( + loop_body_from_possibly_start + + (loop_iterations - 1) * loop_body_from_not_start + + continue_node_->EatsAtLeast(true)); + } else { + // Loop body might eat nothing, so only continue node contributes. + result.eats_at_least_from_possibly_start = + continue_node_->EatsAtLeast(false); + } + return result; +} + bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace, Trace* trace, bool preload_has_checked_bounds, Label* on_possible_success, QuickCheckDetails* details, - bool fall_through_on_failure) { + bool fall_through_on_failure, + ChoiceNode* predecessor) { + DCHECK_NOT_NULL(predecessor); if (details->characters() == 0) return false; GetQuickCheckDetails(details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); @@ -1479,13 +1474,17 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, if (trace->characters_preloaded() != details->characters()) { DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset()); - // We are attempting to preload the minimum number of characters + // The bounds check is performed using the minimum number of characters // any choice would eat, so if the bounds check fails, then none of the // choices can succeed, so we can just immediately backtrack, rather - // than go to the next choice. + // than go to the next choice. The number of characters preloaded may be + // less than the number used for the bounds check. + int eats_at_least = predecessor->EatsAtLeast( + bounds_check_trace->at_start() == Trace::FALSE_VALUE); + DCHECK_GE(eats_at_least, details->characters()); assembler->LoadCurrentCharacter( trace->cp_offset(), bounds_check_trace->backtrack(), - !preload_has_checked_bounds, details->characters()); + !preload_has_checked_bounds, details->characters(), eats_at_least); } bool need_mask = true; @@ -1579,7 +1578,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, // and the mask-compare will determine definitely whether we have // a match at this character position. pos->mask = char_mask; - pos->value = c; + pos->value = chars[0]; pos->determines_perfectly = true; } else { uint32_t common_bits = char_mask; @@ -1764,6 +1763,37 @@ class VisitMarker { NodeInfo* info_; }; +// Temporarily sets traversed_loop_initialization_node_. +class LoopInitializationMarker { + public: + explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) { + DCHECK(!node_->traversed_loop_initialization_node_); + node_->traversed_loop_initialization_node_ = true; + } + ~LoopInitializationMarker() { + DCHECK(node_->traversed_loop_initialization_node_); + node_->traversed_loop_initialization_node_ = false; + } + + private: + LoopChoiceNode* node_; + DISALLOW_COPY_AND_ASSIGN(LoopInitializationMarker); +}; + +// Temporarily decrements min_loop_iterations_. +class IterationDecrementer { + public: + explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) { + DCHECK_GT(node_->min_loop_iterations_, 0); + --node_->min_loop_iterations_; + } + ~IterationDecrementer() { ++node_->min_loop_iterations_; } + + private: + LoopChoiceNode* node_; + DISALLOW_COPY_AND_ASSIGN(IterationDecrementer); +}; + RegExpNode* SeqRegExpNode::FilterOneByte(int depth) { if (info()->replacement_calculated) return replacement(); if (depth < 0) return this; @@ -1916,17 +1946,17 @@ RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) { VisitMarker marker(info()); // Alternative 0 is the negative lookahead, alternative 1 is what comes // afterwards. - RegExpNode* node = alternatives_->at(1).node(); + RegExpNode* node = continue_node(); RegExpNode* replacement = node->FilterOneByte(depth - 1); if (replacement == nullptr) return set_replacement(nullptr); - alternatives_->at(1).set_node(replacement); + alternatives_->at(kContinueIndex).set_node(replacement); - RegExpNode* neg_node = alternatives_->at(0).node(); + RegExpNode* neg_node = lookaround_node(); RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1); // If the negative lookahead is always going to fail then // we don't need to check it. if (neg_replacement == nullptr) return set_replacement(replacement); - alternatives_->at(0).set_node(neg_replacement); + alternatives_->at(kLookaroundIndex).set_node(neg_replacement); return set_replacement(this); } @@ -1935,9 +1965,48 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, int characters_filled_in, bool not_at_start) { if (body_can_be_zero_length_ || info()->visited) return; - VisitMarker marker(info()); - return ChoiceNode::GetQuickCheckDetails(details, compiler, - characters_filled_in, not_at_start); + not_at_start = not_at_start || this->not_at_start(); + DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue. + if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 && + loop_node_->EatsAtLeast(not_at_start) > + continue_node_->EatsAtLeast(true)) { + // Loop body is guaranteed to execute at least once, and consume characters + // when it does, meaning the only possible quick checks from this point + // begin with the loop body. We may recursively visit this LoopChoiceNode, + // but we temporarily decrease its minimum iteration counter so we know when + // to check the continue case. + IterationDecrementer next_iteration(this); + loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in, + not_at_start); + } else { + // Might not consume anything in the loop body, so treat it like a normal + // ChoiceNode (and don't recursively visit this node again). + VisitMarker marker(info()); + ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in, + not_at_start); + } +} + +void LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry( + QuickCheckDetails* details, RegExpCompiler* compiler, + int characters_filled_in, bool not_at_start) { + if (traversed_loop_initialization_node_) { + // We already entered this loop once, exited via its continuation node, and + // followed an outer loop's back-edge to before the loop entry point. We + // could try to reset the minimum iteration count to its starting value at + // this point, but that seems like more trouble than it's worth. It's safe + // to keep going with the current (possibly reduced) minimum iteration + // count. + GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start); + } else { + // We are entering a loop via its counter initialization action, meaning we + // are guaranteed to run the loop body at least some minimum number of times + // before running the continuation node. Set a flag so that this node knows + // (now and any times we visit it again recursively) that it was entered + // from the top. + LoopInitializationMarker marker(this); + GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start); + } } void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, @@ -2014,12 +2083,7 @@ void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) { if (may_be_at_or_before_subject_string_start) { // The start of input counts as a newline in this context, so skip to ok if // we are at the start. - // TODO(jgruber): It would be less awkward to use CheckAtStart here, but - // that currently does not support a non-zero cp_offset. - Label not_at_start; - assembler->CheckNotAtStart(new_trace.cp_offset(), ¬_at_start); - assembler->GoTo(&ok); - assembler->Bind(¬_at_start); + assembler->CheckAtStart(new_trace.cp_offset(), &ok); } // If we've already checked that we are not at the start of input, it's okay @@ -2049,9 +2113,8 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); BoyerMooreLookahead* lookahead = bm_info(not_at_start); if (lookahead == nullptr) { - int eats_at_least = Min(kMaxLookaheadForBoyerMoore, - EatsAtLeast(kMaxLookaheadForBoyerMoore, - kRecursionBudget, not_at_start)); + int eats_at_least = + Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start)); if (eats_at_least >= 1) { BoyerMooreLookahead* bm = new (zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); @@ -2113,12 +2176,7 @@ void AssertionNode::BacktrackIfPrevious( if (may_be_at_or_before_subject_string_start) { // The start of input counts as a non-word character, so the question is // decided if we are at the start. - // TODO(jgruber): It would be less awkward to use CheckAtStart here, but - // that currently does not support a non-zero cp_offset. - Label not_at_start; - assembler->CheckNotAtStart(new_trace.cp_offset(), ¬_at_start); - assembler->GoTo(non_word); - assembler->Bind(¬_at_start); + assembler->CheckAtStart(new_trace.cp_offset(), non_word); } // If we've already checked that we are not at the start of input, it's okay @@ -2939,8 +2997,7 @@ void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace, if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { // Save some time by looking at most one machine word ahead. state->eats_at_least_ = - EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, - current_trace->at_start() == Trace::FALSE_VALUE); + EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE); } state->preload_characters_ = CalculatePreloadCharacters(compiler, state->eats_at_least_); @@ -3090,9 +3147,7 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, // small alternation. BoyerMooreLookahead* bm = bm_info(false); if (bm == nullptr) { - eats_at_least = - Min(kMaxLookaheadForBoyerMoore, - EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, false)); + eats_at_least = Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false)); if (eats_at_least >= 1) { bm = new (zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); GuardedAlternative alt0 = alternatives_->at(0); @@ -3144,7 +3199,7 @@ void ChoiceNode::EmitChoices(RegExpCompiler* compiler, alternative.node()->EmitQuickCheck( compiler, trace, &new_trace, preload->preload_has_checked_bounds_, &alt_gen->possible_success, &alt_gen->quick_check_details, - fall_through_on_failure)) { + fall_through_on_failure, this)) { // Quick check was generated for this choice. preload->preload_is_current_ = true; preload->preload_has_checked_bounds_ = true; @@ -3253,9 +3308,9 @@ void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { on_success()->Emit(compiler, &new_trace); break; } - case SET_REGISTER: { - Trace::DeferredSetRegister new_set(data_.u_store_register.reg, - data_.u_store_register.value); + case SET_REGISTER_FOR_LOOP: { + Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg, + data_.u_store_register.value); Trace new_trace = *trace; new_trace.add_action(&new_set); on_success()->Emit(compiler, &new_trace); @@ -3377,26 +3432,6 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { on_success()->Emit(compiler, trace); } -// ------------------------------------------------------------------- -// Analysis - -void Analysis::EnsureAnalyzed(RegExpNode* that) { - StackLimitCheck check(isolate()); - if (check.HasOverflowed()) { - fail("Stack overflow"); - return; - } - if (that->info()->been_analyzed || that->info()->being_analyzed) return; - that->info()->being_analyzed = true; - that->Accept(this); - that->info()->being_analyzed = false; - that->info()->been_analyzed = true; -} - -void Analysis::VisitEnd(EndNode* that) { - // nothing to do -} - void TextNode::CalculateOffsets() { int element_count = elements()->length(); // Set up the offsets of the elements relative to the start. This is a fixed @@ -3409,60 +3444,269 @@ void TextNode::CalculateOffsets() { } } -void Analysis::VisitText(TextNode* that) { - that->MakeCaseIndependent(isolate(), is_one_byte_); - EnsureAnalyzed(that->on_success()); - if (!has_failed()) { - that->CalculateOffsets(); - } -} +namespace { -void Analysis::VisitAction(ActionNode* that) { - RegExpNode* target = that->on_success(); - EnsureAnalyzed(target); - if (!has_failed()) { +// Assertion propagation moves information about assertions such as +// \b to the affected nodes. For instance, in /.\b./ information must +// be propagated to the first '.' that whatever follows needs to know +// if it matched a word or a non-word, and to the second '.' that it +// has to check if it succeeds a word or non-word. In this case the +// result will be something like: +// +// +-------+ +------------+ +// | . | | . | +// +-------+ ---> +------------+ +// | word? | | check word | +// +-------+ +------------+ +class AssertionPropagator : public AllStatic { + public: + static void VisitText(TextNode* that) {} + + static void VisitAction(ActionNode* that) { // If the next node is interested in what it follows then this node // has to be interested too so it can pass the information on. - that->info()->AddFromFollowing(target->info()); + that->info()->AddFromFollowing(that->on_success()->info()); } -} -void Analysis::VisitChoice(ChoiceNode* that) { - NodeInfo* info = that->info(); - for (int i = 0; i < that->alternatives()->length(); i++) { - RegExpNode* node = that->alternatives()->at(i).node(); - EnsureAnalyzed(node); - if (has_failed()) return; + static void VisitChoice(ChoiceNode* that, int i) { // Anything the following nodes need to know has to be known by // this node also, so it can pass it on. - info->AddFromFollowing(node->info()); + that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info()); } -} -void Analysis::VisitLoopChoice(LoopChoiceNode* that) { - NodeInfo* info = that->info(); - for (int i = 0; i < that->alternatives()->length(); i++) { - RegExpNode* node = that->alternatives()->at(i).node(); - if (node != that->loop_node()) { - EnsureAnalyzed(node); + static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) { + that->info()->AddFromFollowing(that->continue_node()->info()); + } + + static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) { + that->info()->AddFromFollowing(that->loop_node()->info()); + } + + static void VisitNegativeLookaroundChoiceLookaroundNode( + NegativeLookaroundChoiceNode* that) { + VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex); + } + + static void VisitNegativeLookaroundChoiceContinueNode( + NegativeLookaroundChoiceNode* that) { + VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex); + } + + static void VisitBackReference(BackReferenceNode* that) {} + + static void VisitAssertion(AssertionNode* that) {} +}; + +// Propagates information about the minimum size of successful matches from +// successor nodes to their predecessors. Note that all eats_at_least values +// are initialized to zero before analysis. +class EatsAtLeastPropagator : public AllStatic { + public: + static void VisitText(TextNode* that) { + // The eats_at_least value is not used if reading backward. + if (!that->read_backward()) { + // We are not at the start after this node, and thus we can use the + // successor's eats_at_least_from_not_start value. + uint8_t eats_at_least = base::saturated_cast<uint8_t>( + that->Length() + that->on_success() + ->eats_at_least_info() + ->eats_at_least_from_not_start); + that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least)); + } + } + + static void VisitAction(ActionNode* that) { + // POSITIVE_SUBMATCH_SUCCESS rewinds input, so we must not consider + // successor nodes for eats_at_least. SET_REGISTER_FOR_LOOP indicates a loop + // entry point, which means the loop body will run at least the minimum + // number of times before the continuation case can run. Otherwise the + // current node eats at least as much as its successor. + switch (that->action_type()) { + case ActionNode::POSITIVE_SUBMATCH_SUCCESS: + break; // Was already initialized to zero. + case ActionNode::SET_REGISTER_FOR_LOOP: + that->set_eats_at_least_info( + that->on_success()->EatsAtLeastFromLoopEntry()); + break; + default: + that->set_eats_at_least_info(*that->on_success()->eats_at_least_info()); + break; + } + } + + static void VisitChoice(ChoiceNode* that, int i) { + // The minimum possible match from a choice node is the minimum of its + // successors. + EatsAtLeastInfo eats_at_least = + i == 0 ? EatsAtLeastInfo(UINT8_MAX) : *that->eats_at_least_info(); + eats_at_least.SetMin( + *that->alternatives()->at(i).node()->eats_at_least_info()); + that->set_eats_at_least_info(eats_at_least); + } + + static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) { + that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info()); + } + + static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {} + + static void VisitNegativeLookaroundChoiceLookaroundNode( + NegativeLookaroundChoiceNode* that) {} + + static void VisitNegativeLookaroundChoiceContinueNode( + NegativeLookaroundChoiceNode* that) { + that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info()); + } + + static void VisitBackReference(BackReferenceNode* that) { + if (!that->read_backward()) { + that->set_eats_at_least_info(*that->on_success()->eats_at_least_info()); + } + } + + static void VisitAssertion(AssertionNode* that) { + EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info(); + if (that->assertion_type() == AssertionNode::AT_START) { + // If we know we are not at the start and we are asked "how many + // characters will you match if you succeed?" then we can answer anything + // since false implies false. So let's just set the max answer + // (UINT8_MAX) since that won't prevent us from preloading a lot of + // characters for the other branches in the node graph. + eats_at_least.eats_at_least_from_not_start = UINT8_MAX; + } + that->set_eats_at_least_info(eats_at_least); + } +}; + +} // namespace + +// ------------------------------------------------------------------- +// Analysis + +// Iterates the node graph and provides the opportunity for propagators to set +// values that depend on successor nodes. +template <typename... Propagators> +class Analysis : public NodeVisitor { + public: + Analysis(Isolate* isolate, bool is_one_byte) + : isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {} + + void EnsureAnalyzed(RegExpNode* that) { + StackLimitCheck check(isolate()); + if (check.HasOverflowed()) { + fail("Stack overflow"); + return; + } + if (that->info()->been_analyzed || that->info()->being_analyzed) return; + that->info()->being_analyzed = true; + that->Accept(this); + that->info()->being_analyzed = false; + that->info()->been_analyzed = true; + } + + bool has_failed() { return error_message_ != nullptr; } + const char* error_message() { + DCHECK(error_message_ != nullptr); + return error_message_; + } + void fail(const char* error_message) { error_message_ = error_message; } + + Isolate* isolate() const { return isolate_; } + + void VisitEnd(EndNode* that) override { + // nothing to do + } + +// Used to call the given static function on each propagator / variadic template +// argument. +#define STATIC_FOR_EACH(expr) \ + do { \ + int dummy[] = {((expr), 0)...}; \ + USE(dummy); \ + } while (false) + + void VisitText(TextNode* that) override { + that->MakeCaseIndependent(isolate(), is_one_byte_); + EnsureAnalyzed(that->on_success()); + if (has_failed()) return; + that->CalculateOffsets(); + STATIC_FOR_EACH(Propagators::VisitText(that)); + } + + void VisitAction(ActionNode* that) override { + EnsureAnalyzed(that->on_success()); + if (has_failed()) return; + STATIC_FOR_EACH(Propagators::VisitAction(that)); + } + + void VisitChoice(ChoiceNode* that) override { + for (int i = 0; i < that->alternatives()->length(); i++) { + EnsureAnalyzed(that->alternatives()->at(i).node()); if (has_failed()) return; - info->AddFromFollowing(node->info()); + STATIC_FOR_EACH(Propagators::VisitChoice(that, i)); } } - // Check the loop last since it may need the value of this node - // to get a correct result. - EnsureAnalyzed(that->loop_node()); - if (!has_failed()) { - info->AddFromFollowing(that->loop_node()->info()); + + void VisitLoopChoice(LoopChoiceNode* that) override { + DCHECK_EQ(that->alternatives()->length(), 2); // Just loop and continue. + + // First propagate all information from the continuation node. + EnsureAnalyzed(that->continue_node()); + if (has_failed()) return; + STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that)); + + // Check the loop last since it may need the value of this node + // to get a correct result. + EnsureAnalyzed(that->loop_node()); + if (has_failed()) return; + STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that)); + } + + void VisitNegativeLookaroundChoice( + NegativeLookaroundChoiceNode* that) override { + DCHECK_EQ(that->alternatives()->length(), 2); // Lookaround and continue. + + EnsureAnalyzed(that->lookaround_node()); + if (has_failed()) return; + STATIC_FOR_EACH( + Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that)); + + EnsureAnalyzed(that->continue_node()); + if (has_failed()) return; + STATIC_FOR_EACH( + Propagators::VisitNegativeLookaroundChoiceContinueNode(that)); } -} -void Analysis::VisitBackReference(BackReferenceNode* that) { - EnsureAnalyzed(that->on_success()); -} + void VisitBackReference(BackReferenceNode* that) override { + EnsureAnalyzed(that->on_success()); + if (has_failed()) return; + STATIC_FOR_EACH(Propagators::VisitBackReference(that)); + } + + void VisitAssertion(AssertionNode* that) override { + EnsureAnalyzed(that->on_success()); + if (has_failed()) return; + STATIC_FOR_EACH(Propagators::VisitAssertion(that)); + } + +#undef STATIC_FOR_EACH + + private: + Isolate* isolate_; + bool is_one_byte_; + const char* error_message_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); +}; -void Analysis::VisitAssertion(AssertionNode* that) { - EnsureAnalyzed(that->on_success()); +const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte, + RegExpNode* node) { + Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(isolate, + is_one_byte); + DCHECK_EQ(node->info()->been_analyzed, false); + analysis.EnsureAnalyzed(node); + DCHECK_IMPLIES(analysis.has_failed(), analysis.error_message() != nullptr); + return analysis.has_failed() ? analysis.error_message() : nullptr; } void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, diff --git a/deps/v8/src/regexp/regexp-compiler.h b/deps/v8/src/regexp/regexp-compiler.h index 1b70abfd98..2de221f35d 100644 --- a/deps/v8/src/regexp/regexp-compiler.h +++ b/deps/v8/src/regexp/regexp-compiler.h @@ -285,10 +285,11 @@ class Trace { void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } }; - class DeferredSetRegister : public DeferredAction { + class DeferredSetRegisterForLoop : public DeferredAction { public: - DeferredSetRegister(int reg, int value) - : DeferredAction(ActionNode::SET_REGISTER, reg), value_(value) {} + DeferredSetRegisterForLoop(int reg, int value) + : DeferredAction(ActionNode::SET_REGISTER_FOR_LOOP, reg), + value_(value) {} int value() { return value_; } private: @@ -419,45 +420,13 @@ struct PreloadState { void init() { eats_at_least_ = kEatsAtLeastNotYetInitialized; } }; -// Assertion propagation moves information about assertions such as -// \b to the affected nodes. For instance, in /.\b./ information must -// be propagated to the first '.' that whatever follows needs to know -// if it matched a word or a non-word, and to the second '.' that it -// has to check if it succeeds a word or non-word. In this case the -// result will be something like: +// Analysis performs assertion propagation and computes eats_at_least_ values. +// See the comments on AssertionPropagator and EatsAtLeastPropagator for more +// details. // -// +-------+ +------------+ -// | . | | . | -// +-------+ ---> +------------+ -// | word? | | check word | -// +-------+ +------------+ -class Analysis : public NodeVisitor { - public: - Analysis(Isolate* isolate, bool is_one_byte) - : isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {} - void EnsureAnalyzed(RegExpNode* node); - -#define DECLARE_VISIT(Type) void Visit##Type(Type##Node* that) override; - FOR_EACH_NODE_TYPE(DECLARE_VISIT) -#undef DECLARE_VISIT - void VisitLoopChoice(LoopChoiceNode* that) override; - - bool has_failed() { return error_message_ != nullptr; } - const char* error_message() { - DCHECK(error_message_ != nullptr); - return error_message_; - } - void fail(const char* error_message) { error_message_ = error_message; } - - Isolate* isolate() const { return isolate_; } - - private: - Isolate* isolate_; - bool is_one_byte_; - const char* error_message_; - - DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); -}; +// This method returns nullptr on success or a null-terminated failure message +// on failure. +const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node); class FrequencyCollator { public: diff --git a/deps/v8/src/regexp/regexp-dotprinter.cc b/deps/v8/src/regexp/regexp-dotprinter.cc index a6d72aaf5b..b6640626f2 100644 --- a/deps/v8/src/regexp/regexp-dotprinter.cc +++ b/deps/v8/src/regexp/regexp-dotprinter.cc @@ -114,6 +114,15 @@ void DotPrinterImpl::VisitChoice(ChoiceNode* that) { } } +void DotPrinterImpl::VisitLoopChoice(LoopChoiceNode* that) { + VisitChoice(that); +} + +void DotPrinterImpl::VisitNegativeLookaroundChoice( + NegativeLookaroundChoiceNode* that) { + VisitChoice(that); +} + void DotPrinterImpl::VisitText(TextNode* that) { Zone* zone = that->zone(); os_ << " n" << that << " [label=\""; @@ -191,7 +200,7 @@ void DotPrinterImpl::VisitAssertion(AssertionNode* that) { void DotPrinterImpl::VisitAction(ActionNode* that) { os_ << " n" << that << " ["; switch (that->action_type_) { - case ActionNode::SET_REGISTER: + case ActionNode::SET_REGISTER_FOR_LOOP: os_ << "label=\"$" << that->data_.u_store_register.reg << ":=" << that->data_.u_store_register.value << "\", shape=octagon"; break; diff --git a/deps/v8/src/regexp/regexp-interpreter.cc b/deps/v8/src/regexp/regexp-interpreter.cc index 881758861c..cf2fb55e4a 100644 --- a/deps/v8/src/regexp/regexp-interpreter.cc +++ b/deps/v8/src/regexp/regexp-interpreter.cc @@ -8,6 +8,7 @@ #include "src/ast/ast.h" #include "src/base/small-vector.h" +#include "src/objects/js-regexp-inl.h" #include "src/objects/objects-inl.h" #include "src/regexp/regexp-bytecodes.h" #include "src/regexp/regexp-macro-assembler.h" @@ -19,12 +20,20 @@ #include "unicode/uchar.h" #endif // V8_INTL_SUPPORT +// Use token threaded dispatch iff the compiler supports computed gotos and the +// build argument v8_enable_regexp_interpreter_threaded_dispatch was set. +#if V8_HAS_COMPUTED_GOTO && \ + defined(V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH) +#define V8_USE_COMPUTED_GOTO 1 +#endif // V8_HAS_COMPUTED_GOTO + namespace v8 { namespace internal { -static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, - int len, Vector<const uc16> subject, - bool unicode) { +namespace { + +bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len, + Vector<const uc16> subject, bool unicode) { Address offset_a = reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from))); Address offset_b = @@ -34,9 +43,8 @@ static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, offset_a, offset_b, length, unicode ? nullptr : isolate) == 1; } -static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, - int len, Vector<const uint8_t> subject, - bool unicode) { +bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len, + Vector<const uint8_t> subject, bool unicode) { // For Latin1 characters the unicode flag makes no difference. for (int i = 0; i < len; i++) { unsigned int old_char = subject[from++]; @@ -55,49 +63,48 @@ static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, return true; } +void DisassembleSingleBytecode(const byte* code_base, const byte* pc) { + PrintF("%s", RegExpBytecodeName(*pc)); + + // Args and the bytecode as hex. + for (int i = 0; i < RegExpBytecodeLength(*pc); i++) { + PrintF(", %02x", pc[i]); + } + PrintF(" "); + + // Args as ascii. + for (int i = 1; i < RegExpBytecodeLength(*pc); i++) { + unsigned char b = pc[i]; + PrintF("%c", std::isprint(b) ? b : '.'); + } + PrintF("\n"); +} + #ifdef DEBUG -static void TraceInterpreter(const byte* code_base, const byte* pc, - int stack_depth, int current_position, - uint32_t current_char, int bytecode_length, - const char* bytecode_name) { +void MaybeTraceInterpreter(const byte* code_base, const byte* pc, + int stack_depth, int current_position, + uint32_t current_char, int bytecode_length, + const char* bytecode_name) { if (FLAG_trace_regexp_bytecodes) { - bool printable = (current_char < 127 && current_char >= 32); + const bool printable = std::isprint(current_char); const char* format = printable - ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" - : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s"; + ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = " + : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = "; PrintF(format, pc - code_base, stack_depth, current_position, current_char, - printable ? current_char : '.', bytecode_name); - for (int i = 0; i < bytecode_length; i++) { - printf(", %02x", pc[i]); - } - printf(" "); - for (int i = 1; i < bytecode_length; i++) { - unsigned char b = pc[i]; - if (b < 127 && b >= 32) { - printf("%c", b); - } else { - printf("."); - } - } - printf("\n"); + printable ? current_char : '.'); + + DisassembleSingleBytecode(code_base, pc); } } +#endif // DEBUG -#define BYTECODE(name) \ - case BC_##name: \ - TraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \ - current_char, BC_##name##_LENGTH, #name); -#else -#define BYTECODE(name) case BC_##name: -#endif - -static int32_t Load32Aligned(const byte* pc) { +int32_t Load32Aligned(const byte* pc) { DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3); return *reinterpret_cast<const int32_t*>(pc); } -static int32_t Load16Aligned(const byte* pc) { +int32_t Load16Aligned(const byte* pc) { DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1); return *reinterpret_cast<const uint16_t*>(pc); } @@ -139,9 +146,9 @@ class BacktrackStack { DISALLOW_COPY_AND_ASSIGN(BacktrackStack); }; -namespace { - -IrregexpInterpreter::Result StackOverflow(Isolate* isolate) { +IrregexpInterpreter::Result StackOverflow(Isolate* isolate, + RegExp::CallOrigin call_origin) { + CHECK(call_origin == RegExp::CallOrigin::kFromRuntime); // We abort interpreter execution after the stack overflow is thrown, and thus // allow allocation here despite the outer DisallowHeapAllocationScope. AllowHeapAllocation yes_gc; @@ -149,72 +156,154 @@ IrregexpInterpreter::Result StackOverflow(Isolate* isolate) { return IrregexpInterpreter::EXCEPTION; } -// Runs all pending interrupts. Callers must update unhandlified object -// references after this function completes. -IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate, - Handle<String> subject_string) { +template <typename Char> +void UpdateCodeAndSubjectReferences( + Isolate* isolate, Handle<ByteArray> code_array, + Handle<String> subject_string, ByteArray* code_array_out, + const byte** code_base_out, const byte** pc_out, String* subject_string_out, + Vector<const Char>* subject_string_vector_out) { DisallowHeapAllocation no_gc; - StackLimitCheck check(isolate); - if (check.JsHasOverflowed()) { - return StackOverflow(isolate); // A real stack overflow. + if (*code_base_out != code_array->GetDataStartAddress()) { + *code_array_out = *code_array; + const intptr_t pc_offset = *pc_out - *code_base_out; + DCHECK_GT(pc_offset, 0); + *code_base_out = code_array->GetDataStartAddress(); + *pc_out = *code_base_out + pc_offset; } - // Handle interrupts if any exist. - if (check.InterruptRequested()) { - const bool was_one_byte = - String::IsOneByteRepresentationUnderneath(*subject_string); + DCHECK(subject_string->IsFlat()); + *subject_string_out = *subject_string; + *subject_string_vector_out = subject_string->GetCharVector<Char>(no_gc); +} - Object result; - { - AllowHeapAllocation yes_gc; - result = isolate->stack_guard()->HandleInterrupts(); - } +// Runs all pending interrupts and updates unhandlified object references if +// necessary. +template <typename Char> +IrregexpInterpreter::Result HandleInterrupts( + Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out, + String* subject_string_out, const byte** code_base_out, + Vector<const Char>* subject_string_vector_out, const byte** pc_out) { + DisallowHeapAllocation no_gc; - if (result.IsException(isolate)) { + StackLimitCheck check(isolate); + bool js_has_overflowed = check.JsHasOverflowed(); + + if (call_origin == RegExp::CallOrigin::kFromJs) { + // Direct calls from JavaScript can be interrupted in two ways: + // 1. A real stack overflow, in which case we let the caller throw the + // exception. + // 2. The stack guard was used to interrupt execution for another purpose, + // forcing the call through the runtime system. + if (js_has_overflowed) { return IrregexpInterpreter::EXCEPTION; - } - - // If we changed between a LATIN1 and a UC16 string, we need to restart - // regexp matching with the appropriate template instantiation of RawMatch. - if (String::IsOneByteRepresentationUnderneath(*subject_string) != - was_one_byte) { + } else if (check.InterruptRequested()) { return IrregexpInterpreter::RETRY; } + } else { + DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime); + // Prepare for possible GC. + HandleScope handles(isolate); + Handle<ByteArray> code_handle(*code_array_out, isolate); + Handle<String> subject_handle(*subject_string_out, isolate); + + if (js_has_overflowed) { + return StackOverflow(isolate, call_origin); + } else if (check.InterruptRequested()) { + const bool was_one_byte = + String::IsOneByteRepresentationUnderneath(*subject_string_out); + Object result; + { + AllowHeapAllocation yes_gc; + result = isolate->stack_guard()->HandleInterrupts(); + } + if (result.IsException(isolate)) { + return IrregexpInterpreter::EXCEPTION; + } + + // If we changed between a LATIN1 and a UC16 string, we need to restart + // regexp matching with the appropriate template instantiation of + // RawMatch. + if (String::IsOneByteRepresentationUnderneath(*subject_handle) != + was_one_byte) { + return IrregexpInterpreter::RETRY; + } + + UpdateCodeAndSubjectReferences( + isolate, code_handle, subject_handle, code_array_out, code_base_out, + pc_out, subject_string_out, subject_string_vector_out); + } } return IrregexpInterpreter::SUCCESS; } -template <typename Char> -void UpdateCodeAndSubjectReferences(Isolate* isolate, - Handle<ByteArray> code_array, - Handle<String> subject_string, - const byte** code_base_out, - const byte** pc_out, - Vector<const Char>* subject_string_out) { - DisallowHeapAllocation no_gc; +// If computed gotos are supported by the compiler, we can get addresses to +// labels directly in C/C++. Every bytecode handler has its own label and we +// store the addresses in a dispatch table indexed by bytecode. To execute the +// next handler we simply jump (goto) directly to its address. +#if V8_USE_COMPUTED_GOTO +#define BC_LABEL(name) BC_##name: +#define DECODE() \ + do { \ + next_insn = Load32Aligned(next_pc); \ + next_handler_addr = dispatch_table[next_insn & BYTECODE_MASK]; \ + } while (false) +#define DISPATCH() \ + pc = next_pc; \ + insn = next_insn; \ + goto* next_handler_addr +// Without computed goto support, we fall back to a simple switch-based +// dispatch (A large switch statement inside a loop with a case for every +// bytecode). +#else // V8_USE_COMPUTED_GOTO +#define BC_LABEL(name) case BC_##name: +#define DECODE() next_insn = Load32Aligned(next_pc) +#define DISPATCH() \ + pc = next_pc; \ + insn = next_insn; \ + break +#endif // V8_USE_COMPUTED_GOTO + +// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some +// instructions can be executed between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH. +// We want those two macros as far apart as possible, because the goto in +// DISPATCH is dependent on a memory load in ADVANCE/SET_PC_FROM_OFFSET. If we +// don't hit the cache and have to fetch the next handler address from physical +// memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can +// potentially be executed unconditionally, reducing memory stall. +#define ADVANCE(name) \ + next_pc = pc + RegExpBytecodeLength(BC_##name); \ + DECODE() +#define SET_PC_FROM_OFFSET(offset) \ + next_pc = code_base + offset; \ + DECODE() - if (*code_base_out != code_array->GetDataStartAddress()) { - const intptr_t pc_offset = *pc_out - *code_base_out; - DCHECK_GT(pc_offset, 0); - *code_base_out = code_array->GetDataStartAddress(); - *pc_out = *code_base_out + pc_offset; - } - - DCHECK(subject_string->IsFlat()); - *subject_string_out = subject_string->GetCharVector<Char>(no_gc); -} +#ifdef DEBUG +#define BYTECODE(name) \ + BC_LABEL(name) \ + MaybeTraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \ + current_char, RegExpBytecodeLength(BC_##name), #name); +#else +#define BYTECODE(name) BC_LABEL(name) +#endif // DEBUG template <typename Char> -IrregexpInterpreter::Result RawMatch(Isolate* isolate, - Handle<ByteArray> code_array, - Handle<String> subject_string, +IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array, + String subject_string, Vector<const Char> subject, int* registers, - int current, uint32_t current_char) { + int current, uint32_t current_char, + RegExp::CallOrigin call_origin) { DisallowHeapAllocation no_gc; - const byte* pc = code_array->GetDataStartAddress(); +#if V8_USE_COMPUTED_GOTO +#define DECLARE_DISPATCH_TABLE_ENTRY(name, code, length) &&BC_##name, + static const void* const dispatch_table[] = { + BYTECODE_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)}; +#undef DECLARE_DISPATCH_TABLE_ENTRY +#endif + + const byte* pc = code_array.GetDataStartAddress(); const byte* code_base = pc; BacktrackStack backtrack_stack; @@ -224,457 +313,572 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, PrintF("\n\nStart bytecode interpreter\n\n"); } #endif + while (true) { - const int32_t insn = Load32Aligned(pc); + const byte* next_pc = pc; + int32_t insn; + int32_t next_insn; +#if V8_USE_COMPUTED_GOTO + const void* next_handler_addr; + DECODE(); + DISPATCH(); +#else + insn = Load32Aligned(pc); switch (insn & BYTECODE_MASK) { - BYTECODE(BREAK) { UNREACHABLE(); } - BYTECODE(PUSH_CP) { - backtrack_stack.push(current); - pc += BC_PUSH_CP_LENGTH; - break; - } - BYTECODE(PUSH_BT) { - backtrack_stack.push(Load32Aligned(pc + 4)); - pc += BC_PUSH_BT_LENGTH; - break; - } - BYTECODE(PUSH_REGISTER) { - backtrack_stack.push(registers[insn >> BYTECODE_SHIFT]); - pc += BC_PUSH_REGISTER_LENGTH; - break; - } - BYTECODE(SET_REGISTER) { - registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); - pc += BC_SET_REGISTER_LENGTH; - break; - } - BYTECODE(ADVANCE_REGISTER) { - registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); - pc += BC_ADVANCE_REGISTER_LENGTH; - break; - } - BYTECODE(SET_REGISTER_TO_CP) { - registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); - pc += BC_SET_REGISTER_TO_CP_LENGTH; - break; - } - BYTECODE(SET_CP_TO_REGISTER) { - current = registers[insn >> BYTECODE_SHIFT]; - pc += BC_SET_CP_TO_REGISTER_LENGTH; - break; - } - BYTECODE(SET_REGISTER_TO_SP) { - registers[insn >> BYTECODE_SHIFT] = backtrack_stack.sp(); - pc += BC_SET_REGISTER_TO_SP_LENGTH; - break; - } - BYTECODE(SET_SP_TO_REGISTER) { - backtrack_stack.set_sp(registers[insn >> BYTECODE_SHIFT]); - pc += BC_SET_SP_TO_REGISTER_LENGTH; - break; - } - BYTECODE(POP_CP) { - current = backtrack_stack.pop(); - pc += BC_POP_CP_LENGTH; - break; - } - BYTECODE(POP_BT) { - IrregexpInterpreter::Result return_code = - HandleInterrupts(isolate, subject_string); - if (return_code != IrregexpInterpreter::SUCCESS) return return_code; - - UpdateCodeAndSubjectReferences(isolate, code_array, subject_string, - &code_base, &pc, &subject); - - pc = code_base + backtrack_stack.pop(); - break; - } - BYTECODE(POP_REGISTER) { - registers[insn >> BYTECODE_SHIFT] = backtrack_stack.pop(); - pc += BC_POP_REGISTER_LENGTH; - break; - } - BYTECODE(FAIL) { return IrregexpInterpreter::FAILURE; } - BYTECODE(SUCCEED) { return IrregexpInterpreter::SUCCESS; } - BYTECODE(ADVANCE_CP) { - current += insn >> BYTECODE_SHIFT; - pc += BC_ADVANCE_CP_LENGTH; - break; - } - BYTECODE(GOTO) { - pc = code_base + Load32Aligned(pc + 4); - break; - } - BYTECODE(ADVANCE_CP_AND_GOTO) { - current += insn >> BYTECODE_SHIFT; - pc = code_base + Load32Aligned(pc + 4); - break; - } - BYTECODE(CHECK_GREEDY) { - if (current == backtrack_stack.peek()) { - backtrack_stack.pop(); - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_GREEDY_LENGTH; - } - break; - } - BYTECODE(LOAD_CURRENT_CHAR) { - int pos = current + (insn >> BYTECODE_SHIFT); - if (pos >= subject.length() || pos < 0) { - pc = code_base + Load32Aligned(pc + 4); - } else { - current_char = subject[pos]; - pc += BC_LOAD_CURRENT_CHAR_LENGTH; - } - break; +#endif // V8_USE_COMPUTED_GOTO + BYTECODE(BREAK) { UNREACHABLE(); } + BYTECODE(PUSH_CP) { + ADVANCE(PUSH_CP); + backtrack_stack.push(current); + DISPATCH(); + } + BYTECODE(PUSH_BT) { + ADVANCE(PUSH_BT); + backtrack_stack.push(Load32Aligned(pc + 4)); + DISPATCH(); + } + BYTECODE(PUSH_REGISTER) { + ADVANCE(PUSH_REGISTER); + backtrack_stack.push(registers[insn >> BYTECODE_SHIFT]); + DISPATCH(); + } + BYTECODE(SET_REGISTER) { + ADVANCE(SET_REGISTER); + registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); + DISPATCH(); + } + BYTECODE(ADVANCE_REGISTER) { + ADVANCE(ADVANCE_REGISTER); + registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); + DISPATCH(); + } + BYTECODE(SET_REGISTER_TO_CP) { + ADVANCE(SET_REGISTER_TO_CP); + registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); + DISPATCH(); + } + BYTECODE(SET_CP_TO_REGISTER) { + ADVANCE(SET_CP_TO_REGISTER); + current = registers[insn >> BYTECODE_SHIFT]; + DISPATCH(); + } + BYTECODE(SET_REGISTER_TO_SP) { + ADVANCE(SET_REGISTER_TO_SP); + registers[insn >> BYTECODE_SHIFT] = backtrack_stack.sp(); + DISPATCH(); + } + BYTECODE(SET_SP_TO_REGISTER) { + ADVANCE(SET_SP_TO_REGISTER); + backtrack_stack.set_sp(registers[insn >> BYTECODE_SHIFT]); + DISPATCH(); + } + BYTECODE(POP_CP) { + ADVANCE(POP_CP); + current = backtrack_stack.pop(); + DISPATCH(); + } + BYTECODE(POP_BT) { + IrregexpInterpreter::Result return_code = + HandleInterrupts(isolate, call_origin, &code_array, &subject_string, + &code_base, &subject, &pc); + if (return_code != IrregexpInterpreter::SUCCESS) return return_code; + + SET_PC_FROM_OFFSET(backtrack_stack.pop()); + DISPATCH(); + } + BYTECODE(POP_REGISTER) { + ADVANCE(POP_REGISTER); + registers[insn >> BYTECODE_SHIFT] = backtrack_stack.pop(); + DISPATCH(); + } + BYTECODE(FAIL) { return IrregexpInterpreter::FAILURE; } + BYTECODE(SUCCEED) { return IrregexpInterpreter::SUCCESS; } + BYTECODE(ADVANCE_CP) { + ADVANCE(ADVANCE_CP); + current += insn >> BYTECODE_SHIFT; + DISPATCH(); + } + BYTECODE(GOTO) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + DISPATCH(); + } + BYTECODE(ADVANCE_CP_AND_GOTO) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + current += insn >> BYTECODE_SHIFT; + DISPATCH(); + } + BYTECODE(CHECK_GREEDY) { + if (current == backtrack_stack.peek()) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + backtrack_stack.pop(); + } else { + ADVANCE(CHECK_GREEDY); } - BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { - int pos = current + (insn >> BYTECODE_SHIFT); + DISPATCH(); + } + BYTECODE(LOAD_CURRENT_CHAR) { + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos >= subject.length() || pos < 0) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(LOAD_CURRENT_CHAR); current_char = subject[pos]; - pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH; - break; - } - BYTECODE(LOAD_2_CURRENT_CHARS) { - int pos = current + (insn >> BYTECODE_SHIFT); - if (pos + 2 > subject.length() || pos < 0) { - pc = code_base + Load32Aligned(pc + 4); - } else { - Char next = subject[pos + 1]; - current_char = - (subject[pos] | (next << (kBitsPerByte * sizeof(Char)))); - pc += BC_LOAD_2_CURRENT_CHARS_LENGTH; - } - break; } - BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { - int pos = current + (insn >> BYTECODE_SHIFT); + DISPATCH(); + } + BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { + ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED); + int pos = current + (insn >> BYTECODE_SHIFT); + current_char = subject[pos]; + DISPATCH(); + } + BYTECODE(LOAD_2_CURRENT_CHARS) { + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos + 2 > subject.length() || pos < 0) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(LOAD_2_CURRENT_CHARS); Char next = subject[pos + 1]; current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char)))); - pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH; - break; - } - BYTECODE(LOAD_4_CURRENT_CHARS) { - DCHECK_EQ(1, sizeof(Char)); - int pos = current + (insn >> BYTECODE_SHIFT); - if (pos + 4 > subject.length() || pos < 0) { - pc = code_base + Load32Aligned(pc + 4); - } else { - Char next1 = subject[pos + 1]; - Char next2 = subject[pos + 2]; - Char next3 = subject[pos + 3]; - current_char = - (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24)); - pc += BC_LOAD_4_CURRENT_CHARS_LENGTH; - } - break; } - BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) { - DCHECK_EQ(1, sizeof(Char)); - int pos = current + (insn >> BYTECODE_SHIFT); + DISPATCH(); + } + BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { + ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED); + int pos = current + (insn >> BYTECODE_SHIFT); + Char next = subject[pos + 1]; + current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char)))); + DISPATCH(); + } + BYTECODE(LOAD_4_CURRENT_CHARS) { + DCHECK_EQ(1, sizeof(Char)); + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos + 4 > subject.length() || pos < 0) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(LOAD_4_CURRENT_CHARS); Char next1 = subject[pos + 1]; Char next2 = subject[pos + 2]; Char next3 = subject[pos + 3]; current_char = (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24)); - pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH; - break; - } - BYTECODE(CHECK_4_CHARS) { - uint32_t c = Load32Aligned(pc + 4); - if (c == current_char) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_4_CHARS_LENGTH; - } - break; - } - BYTECODE(CHECK_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - if (c == current_char) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_CHAR_LENGTH; - } - break; - } - BYTECODE(CHECK_NOT_4_CHARS) { - uint32_t c = Load32Aligned(pc + 4); - if (c != current_char) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_NOT_4_CHARS_LENGTH; - } - break; - } - BYTECODE(CHECK_NOT_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - if (c != current_char) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_NOT_CHAR_LENGTH; - } - break; - } - BYTECODE(AND_CHECK_4_CHARS) { - uint32_t c = Load32Aligned(pc + 4); - if (c == (current_char & Load32Aligned(pc + 8))) { - pc = code_base + Load32Aligned(pc + 12); - } else { - pc += BC_AND_CHECK_4_CHARS_LENGTH; - } - break; - } - BYTECODE(AND_CHECK_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - if (c == (current_char & Load32Aligned(pc + 4))) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_AND_CHECK_CHAR_LENGTH; - } - break; - } - BYTECODE(AND_CHECK_NOT_4_CHARS) { - uint32_t c = Load32Aligned(pc + 4); - if (c != (current_char & Load32Aligned(pc + 8))) { - pc = code_base + Load32Aligned(pc + 12); - } else { - pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH; - } - break; - } - BYTECODE(AND_CHECK_NOT_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - if (c != (current_char & Load32Aligned(pc + 4))) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_AND_CHECK_NOT_CHAR_LENGTH; - } - break; - } - BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - uint32_t minus = Load16Aligned(pc + 4); - uint32_t mask = Load16Aligned(pc + 6); - if (c != ((current_char - minus) & mask)) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH; - } - break; - } - BYTECODE(CHECK_CHAR_IN_RANGE) { - uint32_t from = Load16Aligned(pc + 4); - uint32_t to = Load16Aligned(pc + 6); - if (from <= current_char && current_char <= to) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_CHAR_IN_RANGE_LENGTH; - } - break; - } - BYTECODE(CHECK_CHAR_NOT_IN_RANGE) { - uint32_t from = Load16Aligned(pc + 4); - uint32_t to = Load16Aligned(pc + 6); - if (from > current_char || current_char > to) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH; - } - break; - } - BYTECODE(CHECK_BIT_IN_TABLE) { - int mask = RegExpMacroAssembler::kTableMask; - byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)]; - int bit = (current_char & (kBitsPerByte - 1)); - if ((b & (1 << bit)) != 0) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_BIT_IN_TABLE_LENGTH; - } - break; - } - BYTECODE(CHECK_LT) { - uint32_t limit = (insn >> BYTECODE_SHIFT); - if (current_char < limit) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_LT_LENGTH; - } - break; - } - BYTECODE(CHECK_GT) { - uint32_t limit = (insn >> BYTECODE_SHIFT); - if (current_char > limit) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_GT_LENGTH; - } - break; } - BYTECODE(CHECK_REGISTER_LT) { - if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_REGISTER_LT_LENGTH; - } - break; + DISPATCH(); + } + BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) { + ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED); + DCHECK_EQ(1, sizeof(Char)); + int pos = current + (insn >> BYTECODE_SHIFT); + Char next1 = subject[pos + 1]; + Char next2 = subject[pos + 2]; + Char next3 = subject[pos + 3]; + current_char = + (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24)); + DISPATCH(); + } + BYTECODE(CHECK_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c == current_char) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(CHECK_4_CHARS); } - BYTECODE(CHECK_REGISTER_GE) { - if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_REGISTER_GE_LENGTH; - } - break; + DISPATCH(); + } + BYTECODE(CHECK_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c == current_char) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(CHECK_CHAR); } - BYTECODE(CHECK_REGISTER_EQ_POS) { - if (registers[insn >> BYTECODE_SHIFT] == current) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_REGISTER_EQ_POS_LENGTH; - } - break; - } - BYTECODE(CHECK_NOT_REGS_EQUAL) { - if (registers[insn >> BYTECODE_SHIFT] == - registers[Load32Aligned(pc + 4)]) { - pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH; - } else { - pc = code_base + Load32Aligned(pc + 8); - } - break; - } - BYTECODE(CHECK_NOT_BACK_REF) { - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from >= 0 && len > 0) { - if (current + len > subject.length() || - CompareChars(&subject[from], &subject[current], len) != 0) { - pc = code_base + Load32Aligned(pc + 4); - break; - } - current += len; - } - pc += BC_CHECK_NOT_BACK_REF_LENGTH; - break; - } - BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from >= 0 && len > 0) { - if (current - len < 0 || - CompareChars(&subject[from], &subject[current - len], len) != 0) { - pc = code_base + Load32Aligned(pc + 4); - break; - } - current -= len; + DISPATCH(); + } + BYTECODE(CHECK_NOT_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c != current_char) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(CHECK_NOT_4_CHARS); + } + DISPATCH(); + } + BYTECODE(CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c != current_char) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(CHECK_NOT_CHAR); + } + DISPATCH(); + } + BYTECODE(AND_CHECK_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c == (current_char & Load32Aligned(pc + 8))) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 12)); + } else { + ADVANCE(AND_CHECK_4_CHARS); + } + DISPATCH(); + } + BYTECODE(AND_CHECK_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c == (current_char & Load32Aligned(pc + 4))) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(AND_CHECK_CHAR); + } + DISPATCH(); + } + BYTECODE(AND_CHECK_NOT_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c != (current_char & Load32Aligned(pc + 8))) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 12)); + } else { + ADVANCE(AND_CHECK_NOT_4_CHARS); + } + DISPATCH(); + } + BYTECODE(AND_CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c != (current_char & Load32Aligned(pc + 4))) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(AND_CHECK_NOT_CHAR); + } + DISPATCH(); + } + BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + uint32_t minus = Load16Aligned(pc + 4); + uint32_t mask = Load16Aligned(pc + 6); + if (c != ((current_char - minus) & mask)) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(MINUS_AND_CHECK_NOT_CHAR); + } + DISPATCH(); + } + BYTECODE(CHECK_CHAR_IN_RANGE) { + uint32_t from = Load16Aligned(pc + 4); + uint32_t to = Load16Aligned(pc + 6); + if (from <= current_char && current_char <= to) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(CHECK_CHAR_IN_RANGE); + } + DISPATCH(); + } + BYTECODE(CHECK_CHAR_NOT_IN_RANGE) { + uint32_t from = Load16Aligned(pc + 4); + uint32_t to = Load16Aligned(pc + 6); + if (from > current_char || current_char > to) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(CHECK_CHAR_NOT_IN_RANGE); + } + DISPATCH(); + } + BYTECODE(CHECK_BIT_IN_TABLE) { + int mask = RegExpMacroAssembler::kTableMask; + byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)]; + int bit = (current_char & (kBitsPerByte - 1)); + if ((b & (1 << bit)) != 0) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(CHECK_BIT_IN_TABLE); + } + DISPATCH(); + } + BYTECODE(CHECK_LT) { + uint32_t limit = (insn >> BYTECODE_SHIFT); + if (current_char < limit) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(CHECK_LT); + } + DISPATCH(); + } + BYTECODE(CHECK_GT) { + uint32_t limit = (insn >> BYTECODE_SHIFT); + if (current_char > limit) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(CHECK_GT); + } + DISPATCH(); + } + BYTECODE(CHECK_REGISTER_LT) { + if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(CHECK_REGISTER_LT); + } + DISPATCH(); + } + BYTECODE(CHECK_REGISTER_GE) { + if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } else { + ADVANCE(CHECK_REGISTER_GE); + } + DISPATCH(); + } + BYTECODE(CHECK_REGISTER_EQ_POS) { + if (registers[insn >> BYTECODE_SHIFT] == current) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(CHECK_REGISTER_EQ_POS); + } + DISPATCH(); + } + BYTECODE(CHECK_NOT_REGS_EQUAL) { + if (registers[insn >> BYTECODE_SHIFT] == + registers[Load32Aligned(pc + 4)]) { + ADVANCE(CHECK_NOT_REGS_EQUAL); + } else { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 8)); + } + DISPATCH(); + } + BYTECODE(CHECK_NOT_BACK_REF) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from >= 0 && len > 0) { + if (current + len > subject.length() || + CompareChars(&subject[from], &subject[current], len) != 0) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + DISPATCH(); } - pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; - break; - } - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) - V8_FALLTHROUGH; - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { - bool unicode = - (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE; - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from >= 0 && len > 0) { - if (current + len > subject.length() || - !BackRefMatchesNoCase(isolate, from, current, len, subject, - unicode)) { - pc = code_base + Load32Aligned(pc + 4); - break; - } - current += len; + current += len; + } + ADVANCE(CHECK_NOT_BACK_REF); + DISPATCH(); + } + BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from >= 0 && len > 0) { + if (current - len < 0 || + CompareChars(&subject[from], &subject[current - len], len) != 0) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + DISPATCH(); } - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; - break; - } - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) - V8_FALLTHROUGH; - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { - bool unicode = (insn & BYTECODE_MASK) == - BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD; - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from >= 0 && len > 0) { - if (current - len < 0 || - !BackRefMatchesNoCase(isolate, from, current - len, len, subject, - unicode)) { - pc = code_base + Load32Aligned(pc + 4); - break; - } - current -= len; + current -= len; + } + ADVANCE(CHECK_NOT_BACK_REF_BACKWARD); + DISPATCH(); + } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from >= 0 && len > 0) { + if (current + len > subject.length() || + !BackRefMatchesNoCase(isolate, from, current, len, subject, true)) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + DISPATCH(); } - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; - break; - } - BYTECODE(CHECK_AT_START) { - if (current == 0) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_AT_START_LENGTH; + current += len; + } + ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE); + DISPATCH(); + } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from >= 0 && len > 0) { + if (current + len > subject.length() || + !BackRefMatchesNoCase(isolate, from, current, len, subject, + false)) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + DISPATCH(); } - break; + current += len; } - BYTECODE(CHECK_NOT_AT_START) { - if (current + (insn >> BYTECODE_SHIFT) == 0) { - pc += BC_CHECK_NOT_AT_START_LENGTH; - } else { - pc = code_base + Load32Aligned(pc + 4); + ADVANCE(CHECK_NOT_BACK_REF_NO_CASE); + DISPATCH(); + } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from >= 0 && len > 0) { + if (current - len < 0 || + !BackRefMatchesNoCase(isolate, from, current - len, len, subject, + true)) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + DISPATCH(); } - break; + current -= len; } - BYTECODE(SET_CURRENT_POSITION_FROM_END) { - int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT; - if (subject.length() - current > by) { - current = subject.length() - by; - current_char = subject[current - 1]; + ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD); + DISPATCH(); + } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from >= 0 && len > 0) { + if (current - len < 0 || + !BackRefMatchesNoCase(isolate, from, current - len, len, subject, + false)) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + DISPATCH(); } - pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; - break; + current -= len; } + ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD); + DISPATCH(); + } + BYTECODE(CHECK_AT_START) { + if (current + (insn >> BYTECODE_SHIFT) == 0) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(CHECK_AT_START); + } + DISPATCH(); + } + BYTECODE(CHECK_NOT_AT_START) { + if (current + (insn >> BYTECODE_SHIFT) == 0) { + ADVANCE(CHECK_NOT_AT_START); + } else { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } + DISPATCH(); + } + BYTECODE(SET_CURRENT_POSITION_FROM_END) { + ADVANCE(SET_CURRENT_POSITION_FROM_END); + int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT; + if (subject.length() - current > by) { + current = subject.length() - by; + current_char = subject[current - 1]; + } + DISPATCH(); + } + BYTECODE(CHECK_CURRENT_POSITION) { + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos > subject.length() || pos < 0) { + SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); + } else { + ADVANCE(CHECK_CURRENT_POSITION); + } + DISPATCH(); + } +#if V8_USE_COMPUTED_GOTO +// Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef +// V8_USE_COMPUTED_GOTO here. +#else default: UNREACHABLE(); - break; } +#endif // V8_USE_COMPUTED_GOTO } } #undef BYTECODE +#undef DISPATCH +#undef DECODE +#undef SET_PC_FROM_OFFSET +#undef ADVANCE +#undef BC_LABEL +#undef V8_USE_COMPUTED_GOTO } // namespace // static +void IrregexpInterpreter::Disassemble(ByteArray byte_array, + const std::string& pattern) { + DisallowHeapAllocation no_gc; + + PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str()); + + const byte* const code_base = byte_array.GetDataStartAddress(); + const int byte_array_length = byte_array.length(); + ptrdiff_t offset = 0; + + while (offset < byte_array_length) { + const byte* const pc = code_base + offset; + PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset); + DisassembleSingleBytecode(code_base, pc); + offset += RegExpBytecodeLength(*pc); + } +} + +// static IrregexpInterpreter::Result IrregexpInterpreter::Match( - Isolate* isolate, Handle<ByteArray> code_array, - Handle<String> subject_string, int* registers, int start_position) { - DCHECK(subject_string->IsFlat()); + Isolate* isolate, JSRegExp regexp, String subject_string, int* registers, + int registers_length, int start_position, RegExp::CallOrigin call_origin) { + if (FLAG_regexp_tier_up) { + regexp.MarkTierUpForNextExec(); + } + + bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string); + ByteArray code_array = ByteArray::cast(regexp.Bytecode(is_one_byte)); - // Note: Heap allocation *is* allowed in two situations: + return MatchInternal(isolate, code_array, subject_string, registers, + registers_length, start_position, call_origin); +} + +IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal( + Isolate* isolate, ByteArray code_array, String subject_string, + int* registers, int registers_length, int start_position, + RegExp::CallOrigin call_origin) { + DCHECK(subject_string.IsFlat()); + + // Note: Heap allocation *is* allowed in two situations if calling from + // Runtime: // 1. When creating & throwing a stack overflow exception. The interpreter // aborts afterwards, and thus possible-moved objects are never used. // 2. When handling interrupts. We manually relocate unhandlified references // after interrupts have run. DisallowHeapAllocation no_gc; + // Reset registers to -1 (=undefined). + // This is necessary because registers are only written when a + // capture group matched. + // Resetting them ensures that previous matches are cleared. + memset(registers, -1, sizeof(registers[0]) * registers_length); + uc16 previous_char = '\n'; - String::FlatContent subject_content = subject_string->GetFlatContent(no_gc); + String::FlatContent subject_content = subject_string.GetFlatContent(no_gc); if (subject_content.IsOneByte()) { Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); if (start_position != 0) previous_char = subject_vector[start_position - 1]; return RawMatch(isolate, code_array, subject_string, subject_vector, - registers, start_position, previous_char); + registers, start_position, previous_char, call_origin); } else { DCHECK(subject_content.IsTwoByte()); Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); if (start_position != 0) previous_char = subject_vector[start_position - 1]; return RawMatch(isolate, code_array, subject_string, subject_vector, - registers, start_position, previous_char); + registers, start_position, previous_char, call_origin); } } +// This method is called through an external reference from RegExpExecInternal +// builtin. +IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs( + Address subject, int32_t start_position, Address, Address, int* registers, + int32_t registers_length, Address, RegExp::CallOrigin call_origin, + Isolate* isolate, Address regexp) { + DCHECK_NOT_NULL(isolate); + DCHECK_NOT_NULL(registers); + DCHECK(call_origin == RegExp::CallOrigin::kFromJs); + + DisallowHeapAllocation no_gc; + DisallowJavascriptExecution no_js(isolate); + + String subject_string = String::cast(Object(subject)); + JSRegExp regexp_obj = JSRegExp::cast(Object(regexp)); + + return Match(isolate, regexp_obj, subject_string, registers, registers_length, + start_position, call_origin); +} + +IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime( + Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string, + int* registers, int registers_length, int start_position) { + return Match(isolate, *regexp, *subject_string, registers, registers_length, + start_position, RegExp::CallOrigin::kFromRuntime); +} + } // namespace internal } // namespace v8 diff --git a/deps/v8/src/regexp/regexp-interpreter.h b/deps/v8/src/regexp/regexp-interpreter.h index ad27dcd296..fbc5a3b290 100644 --- a/deps/v8/src/regexp/regexp-interpreter.h +++ b/deps/v8/src/regexp/regexp-interpreter.h @@ -12,7 +12,7 @@ namespace v8 { namespace internal { -class V8_EXPORT_PRIVATE IrregexpInterpreter { +class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic { public: enum Result { FAILURE = RegExp::kInternalRegExpFailure, @@ -21,10 +21,37 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter { RETRY = RegExp::kInternalRegExpRetry, }; - // The caller is responsible for initializing registers before each call. - static Result Match(Isolate* isolate, Handle<ByteArray> code_array, - Handle<String> subject_string, int* registers, - int start_position); + // In case a StackOverflow occurs, a StackOverflowException is created and + // EXCEPTION is returned. + static Result MatchForCallFromRuntime(Isolate* isolate, + Handle<JSRegExp> regexp, + Handle<String> subject_string, + int* registers, int registers_length, + int start_position); + + // In case a StackOverflow occurs, EXCEPTION is returned. The caller is + // responsible for creating the exception. + // Arguments input_start, input_end and backtrack_stack are + // unused. They are only passed to match the signature of the native irregex + // code. + static Result MatchForCallFromJs(Address subject, int32_t start_position, + Address input_start, Address input_end, + int* registers, int32_t registers_length, + Address backtrack_stack, + RegExp::CallOrigin call_origin, + Isolate* isolate, Address regexp); + + static Result MatchInternal(Isolate* isolate, ByteArray code_array, + String subject_string, int* registers, + int registers_length, int start_position, + RegExp::CallOrigin call_origin); + + static void Disassemble(ByteArray byte_array, const std::string& pattern); + + private: + static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string, + int* registers, int registers_length, int start_position, + RegExp::CallOrigin call_origin); }; } // namespace internal diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc index db9c5af569..5dca04a18c 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc @@ -162,24 +162,19 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) { assembler_->ReadStackPointerFromRegister(reg); } - -void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { +void RegExpMacroAssemblerTracer::LoadCurrentCharacterImpl( + int cp_offset, Label* on_end_of_input, bool check_bounds, int characters, + int eats_at_least) { const char* check_msg = check_bounds ? "" : " (unchecked)"; - PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]%s (%d chars));\n", - cp_offset, - LabelToInt(on_end_of_input), - check_msg, - characters); - assembler_->LoadCurrentCharacter(cp_offset, - on_end_of_input, - check_bounds, - characters); + PrintF( + " LoadCurrentCharacter(cp_offset=%d, label[%08x]%s (%d chars) (eats at " + "least %d));\n", + cp_offset, LabelToInt(on_end_of_input), check_msg, characters, + eats_at_least); + assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input, check_bounds, + characters, eats_at_least); } - class PrintablePrinter { public: explicit PrintablePrinter(uc16 character) : character_(character) { } @@ -232,13 +227,13 @@ void RegExpMacroAssemblerTracer::CheckCharacter(unsigned c, Label* on_equal) { assembler_->CheckCharacter(c, on_equal); } - -void RegExpMacroAssemblerTracer::CheckAtStart(Label* on_at_start) { - PrintF(" CheckAtStart(label[%08x]);\n", LabelToInt(on_at_start)); - assembler_->CheckAtStart(on_at_start); +void RegExpMacroAssemblerTracer::CheckAtStart(int cp_offset, + Label* on_at_start) { + PrintF(" CheckAtStart(cp_offset=%d, label[%08x]);\n", cp_offset, + LabelToInt(on_at_start)); + assembler_->CheckAtStart(cp_offset, on_at_start); } - void RegExpMacroAssemblerTracer::CheckNotAtStart(int cp_offset, Label* on_not_at_start) { PrintF(" CheckNotAtStart(cp_offset=%d, label[%08x]);\n", cp_offset, diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h index d0b68bd59d..2a44146e73 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h +++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h @@ -22,13 +22,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { void AdvanceRegister(int reg, int by) override; // r[reg] += by. void Backtrack() override; void Bind(Label* label) override; - void CheckAtStart(Label* on_at_start) override; void CheckCharacter(unsigned c, Label* on_equal) override; void CheckCharacterAfterAnd(unsigned c, unsigned and_with, Label* on_equal) override; void CheckCharacterGT(uc16 limit, Label* on_greater) override; void CheckCharacterLT(uc16 limit, Label* on_less) override; void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckAtStart(int cp_offset, Label* on_at_start) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; @@ -53,9 +53,9 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { void IfRegisterLT(int reg, int comparand, Label* if_lt) override; void IfRegisterEqPos(int reg, Label* if_eq) override; IrregexpImplementation Implementation() override; - void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, - bool check_bounds = true, - int characters = 1) override; + void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least) override; void PopCurrentPosition() override; void PopRegister(int register_index) override; void PushBacktrack(Label* label) override; diff --git a/deps/v8/src/regexp/regexp-macro-assembler.cc b/deps/v8/src/regexp/regexp-macro-assembler.cc index 68fa16db61..96fb53d2a0 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler.cc @@ -85,6 +85,20 @@ void RegExpMacroAssembler::CheckPosition(int cp_offset, LoadCurrentCharacter(cp_offset, on_outside_input, true); } +void RegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // By default, eats_at_least = characters. + if (eats_at_least == kUseCharactersValue) { + eats_at_least = characters; + } + + LoadCurrentCharacterImpl(cp_offset, on_end_of_input, check_bounds, characters, + eats_at_least); +} + bool RegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type, Label* on_no_match) { return false; @@ -129,32 +143,46 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition( } } +// This method may only be called after an interrupt. int NativeRegExpMacroAssembler::CheckStackGuardState( - Isolate* isolate, int start_index, bool is_direct_call, + Isolate* isolate, int start_index, RegExp::CallOrigin call_origin, Address* return_address, Code re_code, Address* subject, const byte** input_start, const byte** input_end) { DisallowHeapAllocation no_gc; DCHECK(re_code.raw_instruction_start() <= *return_address); DCHECK(*return_address <= re_code.raw_instruction_end()); - int return_value = 0; - // Prepare for possible GC. - HandleScope handles(isolate); - Handle<Code> code_handle(re_code, isolate); - Handle<String> subject_handle(String::cast(Object(*subject)), isolate); - bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle); - StackLimitCheck check(isolate); bool js_has_overflowed = check.JsHasOverflowed(); - if (is_direct_call) { + if (call_origin == RegExp::CallOrigin::kFromJs) { // Direct calls from JavaScript can be interrupted in two ways: // 1. A real stack overflow, in which case we let the caller throw the // exception. // 2. The stack guard was used to interrupt execution for another purpose, // forcing the call through the runtime system. - return_value = js_has_overflowed ? EXCEPTION : RETRY; - } else if (js_has_overflowed) { + + // Bug(v8:9540) Investigate why this method is called from JS although no + // stackoverflow or interrupt is pending on ARM64. We return 0 in this case + // to continue execution normally. + if (js_has_overflowed) { + return EXCEPTION; + } else if (check.InterruptRequested()) { + return RETRY; + } else { + return 0; + } + } + DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime); + + // Prepare for possible GC. + HandleScope handles(isolate); + Handle<Code> code_handle(re_code, isolate); + Handle<String> subject_handle(String::cast(Object(*subject)), isolate); + bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle); + int return_value = 0; + + if (js_has_overflowed) { AllowHeapAllocation yes_gc; isolate->StackOverflow(); return_value = EXCEPTION; @@ -191,7 +219,7 @@ int NativeRegExpMacroAssembler::CheckStackGuardState( } // Returns a {Result} sentinel, or the number of successful matches. -int NativeRegExpMacroAssembler::Match(Handle<Code> regexp_code, +int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp, Handle<String> subject, int* offsets_vector, int offsets_vector_length, @@ -234,31 +262,36 @@ int NativeRegExpMacroAssembler::Match(Handle<Code> regexp_code, StringCharacterPosition(subject_ptr, start_offset + slice_offset, no_gc); int byte_length = char_length << char_size_shift; const byte* input_end = input_start + byte_length; - return Execute(*regexp_code, *subject, start_offset, input_start, input_end, - offsets_vector, offsets_vector_length, isolate); + return Execute(*subject, start_offset, input_start, input_end, offsets_vector, + offsets_vector_length, isolate, *regexp); } // Returns a {Result} sentinel, or the number of successful matches. +// TODO(pthier): The JSRegExp object is passed to native irregexp code to match +// the signature of the interpreter. We should get rid of JS objects passed to +// internal methods. int NativeRegExpMacroAssembler::Execute( - Code code, String input, // This needs to be the unpacked (sliced, cons) string. int start_offset, const byte* input_start, const byte* input_end, - int* output, int output_size, Isolate* isolate) { + int* output, int output_size, Isolate* isolate, JSRegExp regexp) { // Ensure that the minimum stack has been allocated. RegExpStackScope stack_scope(isolate); Address stack_base = stack_scope.stack()->stack_base(); - int direct_call = 0; + bool is_one_byte = String::IsOneByteRepresentationUnderneath(input); + Code code = Code::cast(regexp.Code(is_one_byte)); + RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime; using RegexpMatcherSig = int( Address input_string, int start_offset, // NOLINT(readability/casting) const byte* input_start, const byte* input_end, int* output, - int output_size, Address stack_base, int direct_call, Isolate* isolate); + int output_size, Address stack_base, int call_origin, Isolate* isolate, + Address regexp); auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code); - int result = - fn.CallIrregexp(input.ptr(), start_offset, input_start, input_end, output, - output_size, stack_base, direct_call, isolate); + int result = fn.CallIrregexp(input.ptr(), start_offset, input_start, + input_end, output, output_size, stack_base, + call_origin, isolate, regexp.ptr()); DCHECK(result >= RETRY); if (result == EXCEPTION && !isolate->has_pending_exception()) { diff --git a/deps/v8/src/regexp/regexp-macro-assembler.h b/deps/v8/src/regexp/regexp-macro-assembler.h index b55ac13590..ccf19d3fb6 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.h +++ b/deps/v8/src/regexp/regexp-macro-assembler.h @@ -36,6 +36,8 @@ class RegExpMacroAssembler { static const int kTableSize = 1 << kTableSizeBits; static const int kTableMask = kTableSize - 1; + static constexpr int kUseCharactersValue = -1; + enum IrregexpImplementation { kIA32Implementation, kARMImplementation, @@ -69,7 +71,6 @@ class RegExpMacroAssembler { // stack by an earlier PushBacktrack(Label*). virtual void Backtrack() = 0; virtual void Bind(Label* label) = 0; - virtual void CheckAtStart(Label* on_at_start) = 0; // Dispatch after looking the current character up in a 2-bits-per-entry // map. The destinations vector has up to 4 labels. virtual void CheckCharacter(unsigned c, Label* on_equal) = 0; @@ -81,6 +82,7 @@ class RegExpMacroAssembler { virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0; virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0; virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0; + virtual void CheckAtStart(int cp_offset, Label* on_at_start) = 0; virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0; virtual void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) = 0; @@ -133,10 +135,12 @@ class RegExpMacroAssembler { // label if it is. virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0; virtual IrregexpImplementation Implementation() = 0; - virtual void LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds = true, - int characters = 1) = 0; + V8_EXPORT_PRIVATE void LoadCurrentCharacter( + int cp_offset, Label* on_end_of_input, bool check_bounds = true, + int characters = 1, int eats_at_least = kUseCharactersValue); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least) = 0; virtual void PopCurrentPosition() = 0; virtual void PopRegister(int register_index) = 0; // Pushes the label on the backtrack stack, so that a following Backtrack @@ -219,7 +223,7 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler { bool CanReadUnaligned() override; // Returns a {Result} sentinel, or the number of successful matches. - static int Match(Handle<Code> regexp, Handle<String> subject, + static int Match(Handle<JSRegExp> regexp, Handle<String> subject, int* offsets_vector, int offsets_vector_length, int previous_index, Isolate* isolate); @@ -235,9 +239,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler { String subject, int start_index, const DisallowHeapAllocation& no_gc); static int CheckStackGuardState(Isolate* isolate, int start_index, - bool is_direct_call, Address* return_address, - Code re_code, Address* subject, - const byte** input_start, + RegExp::CallOrigin call_origin, + Address* return_address, Code re_code, + Address* subject, const byte** input_start, const byte** input_end); // Byte map of one byte characters with a 0xff if the character is a word @@ -250,11 +254,11 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler { } // Returns a {Result} sentinel, or the number of successful matches. - V8_EXPORT_PRIVATE static int Execute(Code code, String input, - int start_offset, + V8_EXPORT_PRIVATE static int Execute(String input, int start_offset, const byte* input_start, const byte* input_end, int* output, - int output_size, Isolate* isolate); + int output_size, Isolate* isolate, + JSRegExp regexp); }; } // namespace internal diff --git a/deps/v8/src/regexp/regexp-nodes.h b/deps/v8/src/regexp/regexp-nodes.h index 4c13b74926..d618c9bb27 100644 --- a/deps/v8/src/regexp/regexp-nodes.h +++ b/deps/v8/src/regexp/regexp-nodes.h @@ -20,11 +20,14 @@ class QuickCheckDetails; class RegExpCompiler; class Trace; struct PreloadState; +class ChoiceNode; #define FOR_EACH_NODE_TYPE(VISIT) \ VISIT(End) \ VISIT(Action) \ VISIT(Choice) \ + VISIT(LoopChoice) \ + VISIT(NegativeLookaroundChoice) \ VISIT(BackReference) \ VISIT(Assertion) \ VISIT(Text) @@ -90,6 +93,34 @@ struct NodeInfo final { bool replacement_calculated : 1; }; +struct EatsAtLeastInfo final { + EatsAtLeastInfo() : EatsAtLeastInfo(0) {} + explicit EatsAtLeastInfo(uint8_t eats) + : eats_at_least_from_possibly_start(eats), + eats_at_least_from_not_start(eats) {} + void SetMin(const EatsAtLeastInfo& other) { + if (other.eats_at_least_from_possibly_start < + eats_at_least_from_possibly_start) { + eats_at_least_from_possibly_start = + other.eats_at_least_from_possibly_start; + } + if (other.eats_at_least_from_not_start < eats_at_least_from_not_start) { + eats_at_least_from_not_start = other.eats_at_least_from_not_start; + } + } + + // Any successful match starting from the current node will consume at least + // this many characters. This does not necessarily mean that there is a + // possible match with exactly this many characters, but we generally try to + // get this number as high as possible to allow for early exit on failure. + uint8_t eats_at_least_from_possibly_start; + + // Like eats_at_least_from_possibly_start, but with the additional assumption + // that start-of-string assertions (^) can't match. This value is greater than + // or equal to eats_at_least_from_possibly_start. + uint8_t eats_at_least_from_not_start; +}; + class RegExpNode : public ZoneObject { public: explicit RegExpNode(Zone* zone) @@ -104,13 +135,20 @@ class RegExpNode : public ZoneObject { // Generates a goto to this node or actually generates the code at this point. virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; // How many characters must this node consume at a minimum in order to - // succeed. If we have found at least 'still_to_find' characters that - // must be consumed there is no need to ask any following nodes whether - // they are sure to eat any more characters. The not_at_start argument is - // used to indicate that we know we are not at the start of the input. In - // this case anchored branches will always fail and can be ignored when - // determining how many characters are consumed on success. - virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start) = 0; + // succeed. The not_at_start argument is used to indicate that we know we are + // not at the start of the input. In this case anchored branches will always + // fail and can be ignored when determining how many characters are consumed + // on success. If this node has not been analyzed yet, EatsAtLeast returns 0. + int EatsAtLeast(bool not_at_start); + // Returns how many characters this node must consume in order to succeed, + // given that this is a LoopChoiceNode whose counter register is in a + // newly-initialized state at the current position in the generated code. For + // example, consider /a{6,8}/. Absent any extra information, the + // LoopChoiceNode for the repetition must report that it consumes at least + // zero characters, because it may have already looped several times. However, + // with a newly-initialized counter, it can report that it consumes at least + // six characters. + virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry(); // Emits some quick code that checks whether the preloaded characters match. // Falls through on certain failure, jumps to the label on possible success. // If the node cannot make a quick check it does nothing and returns false. @@ -118,7 +156,7 @@ class RegExpNode : public ZoneObject { Trace* trace, bool preload_has_checked_bounds, Label* on_possible_success, QuickCheckDetails* details_return, - bool fall_through_on_failure); + bool fall_through_on_failure, ChoiceNode* predecessor); // For a given number of characters this returns a mask and a value. The // next n characters are anded with the mask and compared with the value. // A comparison failure indicates the node cannot match the next n characters. @@ -127,6 +165,17 @@ class RegExpNode : public ZoneObject { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) = 0; + // Fills in quick check details for this node, given that this is a + // LoopChoiceNode whose counter register is in a newly-initialized state at + // the current position in the generated code. For example, consider /a{6,8}/. + // Absent any extra information, the LoopChoiceNode for the repetition cannot + // generate any useful quick check because a match might be the (empty) + // continuation node. However, with a newly-initialized counter, it can + // generate a quick check for several 'a' characters at once. + virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, + RegExpCompiler* compiler, + int characters_filled_in, + bool not_at_start); static const int kNodeIsTooComplexForGreedyLoops = kMinInt; virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } // Only returns the successor for a text node of length 1 that matches any @@ -183,6 +232,10 @@ class RegExpNode : public ZoneObject { void set_on_work_list(bool value) { on_work_list_ = value; } NodeInfo* info() { return &info_; } + const EatsAtLeastInfo* eats_at_least_info() const { return &eats_at_least_; } + void set_eats_at_least_info(const EatsAtLeastInfo& eats_at_least) { + eats_at_least_ = eats_at_least; + } BoyerMooreLookahead* bm_info(bool not_at_start) { return bm_info_[not_at_start ? 1 : 0]; @@ -205,6 +258,11 @@ class RegExpNode : public ZoneObject { Label label_; bool on_work_list_; NodeInfo info_; + + // Saved values for EatsAtLeast results, to avoid recomputation. Filled in + // during analysis (valid if info_.been_analyzed is true). + EatsAtLeastInfo eats_at_least_; + // This variable keeps track of how many times code has been generated for // this node (in different traces). We don't keep track of where the // generated code is located unless the code is generated at the start of @@ -239,7 +297,7 @@ class SeqRegExpNode : public RegExpNode { class ActionNode : public SeqRegExpNode { public: enum ActionType { - SET_REGISTER, + SET_REGISTER_FOR_LOOP, INCREMENT_REGISTER, STORE_POSITION, BEGIN_SUBMATCH, @@ -247,7 +305,8 @@ class ActionNode : public SeqRegExpNode { EMPTY_MATCH_CHECK, CLEAR_CAPTURES }; - static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success); + static ActionNode* SetRegisterForLoop(int reg, int val, + RegExpNode* on_success); static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); static ActionNode* StorePosition(int reg, bool is_capture, RegExpNode* on_success); @@ -265,13 +324,9 @@ class ActionNode : public SeqRegExpNode { RegExpNode* on_success); void Accept(NodeVisitor* visitor) override; void Emit(RegExpCompiler* compiler, Trace* trace) override; - int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override; void GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in, - bool not_at_start) override { - return on_success()->GetQuickCheckDetails(details, compiler, filled_in, - not_at_start); - } + bool not_at_start) override; void FillInBMInfo(Isolate* isolate, int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start) override; ActionType action_type() { return action_type_; } @@ -342,7 +397,6 @@ class TextNode : public SeqRegExpNode { JSRegExp::Flags flags); void Accept(NodeVisitor* visitor) override; void Emit(RegExpCompiler* compiler, Trace* trace) override; - int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override; void GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) override; @@ -356,6 +410,7 @@ class TextNode : public SeqRegExpNode { BoyerMooreLookahead* bm, bool not_at_start) override; void CalculateOffsets(); RegExpNode* FilterOneByte(int depth) override; + int Length(); private: enum TextEmitPassType { @@ -371,7 +426,6 @@ class TextNode : public SeqRegExpNode { void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass, bool preloaded, Trace* trace, bool first_element_checked, int* checked_up_to); - int Length(); ZoneList<TextElement>* elms_; bool read_backward_; }; @@ -402,7 +456,6 @@ class AssertionNode : public SeqRegExpNode { } void Accept(NodeVisitor* visitor) override; void Emit(RegExpCompiler* compiler, Trace* trace) override; - int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override; void GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in, bool not_at_start) override; @@ -434,8 +487,6 @@ class BackReferenceNode : public SeqRegExpNode { int end_register() { return end_reg_; } bool read_backward() { return read_backward_; } void Emit(RegExpCompiler* compiler, Trace* trace) override; - int EatsAtLeast(int still_to_find, int recursion_depth, - bool not_at_start) override; void GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) override { @@ -457,10 +508,6 @@ class EndNode : public RegExpNode { EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {} void Accept(NodeVisitor* visitor) override; void Emit(RegExpCompiler* compiler, Trace* trace) override; - int EatsAtLeast(int still_to_find, int recursion_depth, - bool not_at_start) override { - return 0; - } void GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) override { @@ -540,9 +587,6 @@ class ChoiceNode : public RegExpNode { } ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } void Emit(RegExpCompiler* compiler, Trace* trace) override; - int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override; - int EatsAtLeastHelper(int still_to_find, int budget, - RegExpNode* ignore_this_node, bool not_at_start); void GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) override; @@ -564,6 +608,7 @@ class ChoiceNode : public RegExpNode { ZoneList<GuardedAlternative>* alternatives_; private: + template <typename...> friend class Analysis; void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard, @@ -601,16 +646,23 @@ class NegativeLookaroundChoiceNode : public ChoiceNode { AddAlternative(this_must_fail); AddAlternative(then_do_this); } - int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override; void GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) override; void FillInBMInfo(Isolate* isolate, int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start) override { - alternatives_->at(1).node()->FillInBMInfo(isolate, offset, budget - 1, bm, - not_at_start); + continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm, + not_at_start); if (offset == 0) set_bm_info(not_at_start, bm); } + static constexpr int kLookaroundIndex = 0; + static constexpr int kContinueIndex = 1; + RegExpNode* lookaround_node() { + return alternatives()->at(kLookaroundIndex).node(); + } + RegExpNode* continue_node() { + return alternatives()->at(kContinueIndex).node(); + } // For a negative lookahead we don't emit the quick check for the // alternative that is expected to fail. This is because quick check code // starts by loading enough characters for the alternative that takes fewest @@ -619,29 +671,38 @@ class NegativeLookaroundChoiceNode : public ChoiceNode { bool try_to_emit_quick_check_for_alternative(bool is_first) override { return !is_first; } + void Accept(NodeVisitor* visitor) override; RegExpNode* FilterOneByte(int depth) override; }; class LoopChoiceNode : public ChoiceNode { public: - LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, Zone* zone) + LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, + int min_loop_iterations, Zone* zone) : ChoiceNode(2, zone), loop_node_(nullptr), continue_node_(nullptr), body_can_be_zero_length_(body_can_be_zero_length), - read_backward_(read_backward) {} + read_backward_(read_backward), + traversed_loop_initialization_node_(false), + min_loop_iterations_(min_loop_iterations) {} void AddLoopAlternative(GuardedAlternative alt); void AddContinueAlternative(GuardedAlternative alt); void Emit(RegExpCompiler* compiler, Trace* trace) override; - int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override; void GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) override; + void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, + RegExpCompiler* compiler, + int characters_filled_in, + bool not_at_start) override; void FillInBMInfo(Isolate* isolate, int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start) override; + EatsAtLeastInfo EatsAtLeastFromLoopEntry() override; RegExpNode* loop_node() { return loop_node_; } RegExpNode* continue_node() { return continue_node_; } bool body_can_be_zero_length() { return body_can_be_zero_length_; } + int min_loop_iterations() const { return min_loop_iterations_; } bool read_backward() override { return read_backward_; } void Accept(NodeVisitor* visitor) override; RegExpNode* FilterOneByte(int depth) override; @@ -658,6 +719,22 @@ class LoopChoiceNode : public ChoiceNode { RegExpNode* continue_node_; bool body_can_be_zero_length_; bool read_backward_; + + // Temporary marker set only while generating quick check details. Represents + // whether GetQuickCheckDetails traversed the initialization node for this + // loop's counter. If so, we may be able to generate stricter quick checks + // because we know the loop node must match at least min_loop_iterations_ + // times before the continuation node can match. + bool traversed_loop_initialization_node_; + + // The minimum number of times the loop_node_ must match before the + // continue_node_ might be considered. This value can be temporarily decreased + // while generating quick check details, to represent the remaining iterations + // after the completed portion of the quick check details. + int min_loop_iterations_; + + friend class IterationDecrementer; + friend class LoopInitializationMarker; }; class NodeVisitor { @@ -666,7 +743,6 @@ class NodeVisitor { #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0; FOR_EACH_NODE_TYPE(DECLARE_VISIT) #undef DECLARE_VISIT - virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); } }; } // namespace internal diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc index 3647680969..d6e421cafa 100644 --- a/deps/v8/src/regexp/regexp-parser.cc +++ b/deps/v8/src/regexp/regexp-parser.cc @@ -692,7 +692,7 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis( } } if (subexpr_type == CAPTURE) { - if (captures_started_ >= kMaxCaptures) { + if (captures_started_ >= JSRegExp::kMaxCaptures) { ReportError(CStrVector("Too many captures")); return nullptr; } @@ -800,7 +800,7 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) { uc32 c = current(); if (IsDecimalDigit(c)) { value = 10 * value + (c - '0'); - if (value > kMaxCaptures) { + if (value > JSRegExp::kMaxCaptures) { Reset(start); return false; } diff --git a/deps/v8/src/regexp/regexp-parser.h b/deps/v8/src/regexp/regexp-parser.h index 36cec7e984..cc1948b101 100644 --- a/deps/v8/src/regexp/regexp-parser.h +++ b/deps/v8/src/regexp/regexp-parser.h @@ -221,7 +221,6 @@ class V8_EXPORT_PRIVATE RegExpParser { static bool IsSyntaxCharacterOrSlash(uc32 c); - static const int kMaxCaptures = 1 << 16; static const uc32 kEndMarker = (1 << 21); private: diff --git a/deps/v8/src/regexp/regexp-stack.cc b/deps/v8/src/regexp/regexp-stack.cc index 3885fd8e8d..a6a128841f 100644 --- a/deps/v8/src/regexp/regexp-stack.cc +++ b/deps/v8/src/regexp/regexp-stack.cc @@ -72,12 +72,12 @@ Address RegExpStack::EnsureCapacity(size_t size) { DeleteArray(thread_local_.memory_); } thread_local_.memory_ = new_memory; + thread_local_.memory_top_ = new_memory + size; thread_local_.memory_size_ = size; thread_local_.limit_ = reinterpret_cast<Address>(new_memory) + kStackLimitSlack * kSystemPointerSize; } - return reinterpret_cast<Address>(thread_local_.memory_) + - thread_local_.memory_size_; + return reinterpret_cast<Address>(thread_local_.memory_top_); } diff --git a/deps/v8/src/regexp/regexp-stack.h b/deps/v8/src/regexp/regexp-stack.h index b1d4571760..7ecaa40b81 100644 --- a/deps/v8/src/regexp/regexp-stack.h +++ b/deps/v8/src/regexp/regexp-stack.h @@ -46,8 +46,9 @@ class RegExpStack { // Gives the top of the memory used as stack. Address stack_base() { DCHECK_NE(0, thread_local_.memory_size_); - return reinterpret_cast<Address>(thread_local_.memory_) + - thread_local_.memory_size_; + DCHECK_EQ(thread_local_.memory_top_, + thread_local_.memory_ + thread_local_.memory_size_); + return reinterpret_cast<Address>(thread_local_.memory_top_); } // The total size of the memory allocated for the stack. @@ -58,7 +59,7 @@ class RegExpStack { // There is only a limited number of locations below the stack limit, // so users of the stack should check the stack limit during any // sequence of pushes longer that this. - Address* limit_address() { return &(thread_local_.limit_); } + Address* limit_address_address() { return &(thread_local_.limit_); } // Ensures that there is a memory area with at least the specified size. // If passing zero, the default/minimum size buffer is allocated. @@ -89,12 +90,15 @@ class RegExpStack { // Structure holding the allocated memory, size and limit. struct ThreadLocal { ThreadLocal() { Clear(); } - // If memory_size_ > 0 then memory_ must be non-nullptr. + // If memory_size_ > 0 then memory_ and memory_top_ must be non-nullptr + // and memory_top_ = memory_ + memory_size_ byte* memory_; + byte* memory_top_; size_t memory_size_; Address limit_; void Clear() { memory_ = nullptr; + memory_top_ = nullptr; memory_size_ = 0; limit_ = kMemoryTop; } @@ -102,7 +106,7 @@ class RegExpStack { }; // Address of allocated memory. - Address memory_address() { + Address memory_address_address() { return reinterpret_cast<Address>(&thread_local_.memory_); } @@ -111,6 +115,11 @@ class RegExpStack { return reinterpret_cast<Address>(&thread_local_.memory_size_); } + // Address of top of memory used as stack. + Address memory_top_address_address() { + return reinterpret_cast<Address>(&thread_local_.memory_top_); + } + // Resets the buffer if it has grown beyond the default/minimum size. // After this, the buffer is either the default size, or it is empty, so // you have to call EnsureCapacity before using it again. diff --git a/deps/v8/src/regexp/regexp-utils.cc b/deps/v8/src/regexp/regexp-utils.cc index ad50270fdc..c9194d5170 100644 --- a/deps/v8/src/regexp/regexp-utils.cc +++ b/deps/v8/src/regexp/regexp-utils.cc @@ -5,6 +5,7 @@ #include "src/regexp/regexp-utils.h" #include "src/execution/isolate.h" +#include "src/execution/protectors-inl.h" #include "src/heap/factory.h" #include "src/objects/js-regexp-inl.h" #include "src/objects/objects-inl.h" @@ -179,7 +180,14 @@ bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) { return false; } - if (!isolate->IsRegExpSpeciesLookupChainIntact(isolate->native_context())) { + // Note: Unlike the more involved check in CSA (see BranchIfFastRegExp), this + // does not go on to check the actual value of the exec property. This would + // not be valid since this method is called from places that access the flags + // property. Similar spots in CSA would use BranchIfFastRegExp_Strict in this + // case. + + if (!Protectors::IsRegExpSpeciesLookupChainProtectorIntact( + recv.GetCreationContext())) { return false; } diff --git a/deps/v8/src/regexp/regexp-utils.h b/deps/v8/src/regexp/regexp-utils.h index 4b8714c55f..19f1f24039 100644 --- a/deps/v8/src/regexp/regexp-utils.h +++ b/deps/v8/src/regexp/regexp-utils.h @@ -38,6 +38,9 @@ class RegExpUtils : public AllStatic { // Checks whether the given object is an unmodified JSRegExp instance. // Neither the object's map, nor its prototype's map, nor any relevant // method on the prototype may be modified. + // + // Note: This check is limited may only be used in situations where the only + // relevant property is 'exec'. static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj); // ES#sec-advancestringindex diff --git a/deps/v8/src/regexp/regexp.cc b/deps/v8/src/regexp/regexp.cc index 15b0321c46..e0bc4b8e32 100644 --- a/deps/v8/src/regexp/regexp.cc +++ b/deps/v8/src/regexp/regexp.cc @@ -5,6 +5,7 @@ #include "src/regexp/regexp.h" #include "src/codegen/compilation-cache.h" +#include "src/diagnostics/code-tracer.h" #include "src/heap/heap-inl.h" #include "src/objects/js-regexp-inl.h" #include "src/regexp/regexp-bytecode-generator.h" @@ -14,6 +15,7 @@ #include "src/regexp/regexp-macro-assembler-arch.h" #include "src/regexp/regexp-parser.h" #include "src/strings/string-search.h" +#include "src/utils/ostreams.h" namespace v8 { namespace internal { @@ -298,29 +300,72 @@ Handle<Object> RegExpImpl::AtomExec(Isolate* isolate, Handle<JSRegExp> re, bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re, Handle<String> sample_subject, bool is_one_byte) { - Object compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte)); - if (compiled_code != Smi::FromInt(JSRegExp::kUninitializedValue)) { - DCHECK(FLAG_regexp_interpret_all ? compiled_code.IsByteArray() - : compiled_code.IsCode()); + Object compiled_code = re->Code(is_one_byte); + Object bytecode = re->Bytecode(is_one_byte); + bool needs_initial_compilation = + compiled_code == Smi::FromInt(JSRegExp::kUninitializedValue); + // Recompile is needed when we're dealing with the first execution of the + // regexp after the decision to tier up has been made. If the tiering up + // strategy is not in use, this value is always false. + bool needs_tier_up_compilation = + re->MarkedForTierUp() && bytecode.IsByteArray(); + + if (FLAG_trace_regexp_tier_up && needs_tier_up_compilation) { + PrintF("JSRegExp object %p needs tier-up compilation\n", + reinterpret_cast<void*>(re->ptr())); + } + + if (!needs_initial_compilation && !needs_tier_up_compilation) { + DCHECK(compiled_code.IsCode()); + DCHECK_IMPLIES(FLAG_regexp_interpret_all, bytecode.IsByteArray()); return true; } + + DCHECK_IMPLIES(needs_tier_up_compilation, bytecode.IsByteArray()); + return CompileIrregexp(isolate, re, sample_subject, is_one_byte); } +#ifdef DEBUG +namespace { + +bool RegExpCodeIsValidForPreCompilation(Handle<JSRegExp> re, bool is_one_byte) { + Object entry = re->Code(is_one_byte); + Object bytecode = re->Bytecode(is_one_byte); + // If we're not using the tier-up strategy, entry can only be a smi + // representing an uncompiled regexp here. If we're using the tier-up + // strategy, entry can still be a smi representing an uncompiled regexp, when + // compiling the regexp before the tier-up, or it can contain a trampoline to + // the regexp interpreter, in which case the bytecode field contains compiled + // bytecode, when recompiling the regexp after the tier-up. If the + // tier-up was forced, which happens for global replaces, entry is a smi + // representing an uncompiled regexp, even though we're "recompiling" after + // the tier-up. + if (re->ShouldProduceBytecode()) { + DCHECK(entry.IsSmi()); + DCHECK(bytecode.IsSmi()); + int entry_value = Smi::ToInt(entry); + int bytecode_value = Smi::ToInt(bytecode); + DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value); + DCHECK_EQ(JSRegExp::kUninitializedValue, bytecode_value); + } else { + DCHECK(entry.IsSmi() || (entry.IsCode() && bytecode.IsByteArray())); + } + + return true; +} + +} // namespace +#endif + bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re, Handle<String> sample_subject, bool is_one_byte) { // Compile the RegExp. Zone zone(isolate->allocator(), ZONE_NAME); PostponeInterruptsScope postpone(isolate); -#ifdef DEBUG - Object entry = re->DataAt(JSRegExp::code_index(is_one_byte)); - // When arriving here entry can only be a smi representing an uncompiled - // regexp. - DCHECK(entry.IsSmi()); - int entry_value = Smi::ToInt(entry); - DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value); -#endif + + DCHECK(RegExpCodeIsValidForPreCompilation(re, is_one_byte)); JSRegExp::Flags flags = re->GetFlags(); @@ -335,6 +380,14 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re, USE(ThrowRegExpException(isolate, re, pattern, compile_data.error)); return false; } + // The compilation target is a kBytecode if we're interpreting all regexp + // objects, or if we're using the tier-up strategy but the tier-up hasn't + // happened yet. The compilation target is a kNative if we're using the + // tier-up strategy and we need to recompile to tier-up, or if we're producing + // native code for all regexp objects. + compile_data.compilation_target = re->ShouldProduceBytecode() + ? RegExpCompilationTarget::kBytecode + : RegExpCompilationTarget::kNative; const bool compilation_succeeded = Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject, is_one_byte); @@ -346,13 +399,37 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re, Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()), isolate); - data->set(JSRegExp::code_index(is_one_byte), compile_data.code); + if (compile_data.compilation_target == RegExpCompilationTarget::kNative) { + data->set(JSRegExp::code_index(is_one_byte), compile_data.code); + // Reset bytecode to uninitialized. In case we use tier-up we know that + // tier-up has happened this way. + data->set(JSRegExp::bytecode_index(is_one_byte), + Smi::FromInt(JSRegExp::kUninitializedValue)); + } else { + DCHECK_EQ(compile_data.compilation_target, + RegExpCompilationTarget::kBytecode); + // Store code generated by compiler in bytecode and trampoline to + // interpreter in code. + data->set(JSRegExp::bytecode_index(is_one_byte), compile_data.code); + Handle<Code> trampoline = + BUILTIN_CODE(isolate, RegExpInterpreterTrampoline); + data->set(JSRegExp::code_index(is_one_byte), *trampoline); + } SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map); int register_max = IrregexpMaxRegisterCount(*data); if (compile_data.register_count > register_max) { SetIrregexpMaxRegisterCount(*data, compile_data.register_count); } + if (FLAG_trace_regexp_tier_up) { + PrintF("JSRegExp object %p %s size: %d\n", + reinterpret_cast<void*>(re->ptr()), + re->ShouldProduceBytecode() ? "bytecode" : "native code", + re->ShouldProduceBytecode() + ? IrregexpByteCode(*data, is_one_byte).Size() + : IrregexpNativeCode(*data, is_one_byte).Size()); + } + return true; } @@ -382,7 +459,7 @@ int RegExpImpl::IrregexpNumberOfRegisters(FixedArray re) { } ByteArray RegExpImpl::IrregexpByteCode(FixedArray re, bool is_one_byte) { - return ByteArray::cast(re.get(JSRegExp::code_index(is_one_byte))); + return ByteArray::cast(re.get(JSRegExp::bytecode_index(is_one_byte))); } Code RegExpImpl::IrregexpNativeCode(FixedArray re, bool is_one_byte) { @@ -411,7 +488,7 @@ int RegExp::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp, DisallowHeapAllocation no_gc; FixedArray data = FixedArray::cast(regexp->data()); - if (FLAG_regexp_interpret_all) { + if (regexp->ShouldProduceBytecode()) { // Byte-code regexp needs space allocated for all its registers. // The result captures are copied to the start of the registers array // if the match succeeds. This way those registers are not clobbered @@ -436,16 +513,15 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject); - if (!FLAG_regexp_interpret_all) { + if (!regexp->ShouldProduceBytecode()) { DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); do { EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte); - Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate); // The stack is used to allocate registers for the compiled regexp code. // This means that in case of failure, the output registers array is left // untouched and contains the capture results from the previous successful // match. We can use that to set the last match info lazily. - int res = NativeRegExpMacroAssembler::Match(code, subject, output, + int res = NativeRegExpMacroAssembler::Match(regexp, subject, output, output_size, index, isolate); if (res != NativeRegExpMacroAssembler::RETRY) { DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION || @@ -464,12 +540,11 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, // the, potentially, different subject (the string can switch between // being internal and external, and even between being Latin1 and UC16, // but the characters are always the same). - RegExp::IrregexpPrepare(isolate, regexp, subject); is_one_byte = String::IsOneByteRepresentationUnderneath(*subject); } while (true); UNREACHABLE(); } else { - DCHECK(FLAG_regexp_interpret_all); + DCHECK(regexp->ShouldProduceBytecode()); DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp)); // We must have done EnsureCompiledIrregexp, so we can get the number of // registers. @@ -478,17 +553,10 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, int32_t* raw_output = &output[number_of_capture_registers]; do { - // We do not touch the actual capture result registers until we know there - // has been a match so that we can use those capture results to set the - // last match info. - for (int i = number_of_capture_registers - 1; i >= 0; i--) { - raw_output[i] = -1; - } - Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte), - isolate); - - IrregexpInterpreter::Result result = IrregexpInterpreter::Match( - isolate, byte_codes, subject, raw_output, index); + IrregexpInterpreter::Result result = + IrregexpInterpreter::MatchForCallFromRuntime( + isolate, regexp, subject, raw_output, number_of_capture_registers, + index); DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION, isolate->has_pending_exception()); @@ -504,6 +572,10 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, case IrregexpInterpreter::RETRY: // The string has changed representation, and we must restart the // match. + // We need to reset the tier up to start over with compilation. + if (FLAG_regexp_tier_up) { + regexp->ResetTierUp(); + } is_one_byte = String::IsOneByteRepresentationUnderneath(*subject); EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte); break; @@ -520,14 +592,15 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec( subject = String::Flatten(isolate, subject); - // Prepare space for the return values. #ifdef DEBUG - if (FLAG_regexp_interpret_all && FLAG_trace_regexp_bytecodes) { + if (FLAG_trace_regexp_bytecodes && regexp->ShouldProduceBytecode()) { String pattern = regexp->Pattern(); PrintF("\n\nRegexp match: /%s/\n\n", pattern.ToCString().get()); PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get()); } #endif + + // Prepare space for the return values. int required_registers = RegExp::IrregexpPrepare(isolate, regexp, subject); if (required_registers < 0) { // Compiling failed with an exception. @@ -547,6 +620,7 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec( int res = RegExpImpl::IrregexpExecRaw(isolate, regexp, subject, previous_index, output_registers, required_registers); + if (res == RegExp::RE_SUCCESS) { int capture_count = IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())); @@ -706,17 +780,14 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data, if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone); data->node = node; - Analysis analysis(isolate, is_one_byte); - analysis.EnsureAnalyzed(node); - if (analysis.has_failed()) { - data->error = - isolate->factory()->NewStringFromAsciiChecked(analysis.error_message()); + if (const char* error_message = AnalyzeRegExp(isolate, is_one_byte, node)) { + data->error = isolate->factory()->NewStringFromAsciiChecked(error_message); return false; } // Create the correct assembler for the architecture. std::unique_ptr<RegExpMacroAssembler> macro_assembler; - if (!FLAG_regexp_interpret_all) { + if (data->compilation_target == RegExpCompilationTarget::kNative) { // Native regexp implementation. DCHECK(!FLAG_jitless); @@ -752,8 +823,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data, #error "Unsupported architecture" #endif } else { - DCHECK(FLAG_regexp_interpret_all); - + DCHECK_EQ(data->compilation_target, RegExpCompilationTarget::kBytecode); // Interpreted regexp implementation. macro_assembler.reset(new RegExpBytecodeGenerator(isolate, zone)); } @@ -781,6 +851,26 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data, RegExpCompiler::CompilationResult result = compiler.Assemble( isolate, macro_assembler.get(), node, data->capture_count, pattern); + // Code / bytecode printing. + { +#ifdef ENABLE_DISASSEMBLER + if (FLAG_print_regexp_code && + data->compilation_target == RegExpCompilationTarget::kNative) { + CodeTracer::Scope trace_scope(isolate->GetCodeTracer()); + OFStream os(trace_scope.file()); + Handle<Code> c(Code::cast(result.code), isolate); + auto pattern_cstring = pattern->ToCString(); + c->Disassemble(pattern_cstring.get(), os); + } +#endif + if (FLAG_print_regexp_bytecode && + data->compilation_target == RegExpCompilationTarget::kBytecode) { + Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate); + auto pattern_cstring = pattern->ToCString(); + IrregexpInterpreter::Disassemble(*bytecode, pattern_cstring.get()); + } + } + if (FLAG_correctness_fuzzer_suppressions && strncmp(result.error_message, "Stack overflow", 15) == 0) { FATAL("Aborting on stack overflow"); @@ -790,6 +880,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data, data->error = isolate->factory()->NewStringFromAsciiChecked(result.error_message); } + data->code = result.code; data->register_count = result.num_registers; @@ -803,7 +894,7 @@ RegExpGlobalCache::RegExpGlobalCache(Handle<JSRegExp> regexp, regexp_(regexp), subject_(subject), isolate_(isolate) { - bool interpreted = FLAG_regexp_interpret_all; + bool interpreted = regexp->ShouldProduceBytecode(); if (regexp_->TypeTag() == JSRegExp::ATOM) { static const int kAtomRegistersPerMatch = 2; @@ -868,6 +959,7 @@ int RegExpGlobalCache::AdvanceZeroLength(int last_index) { int32_t* RegExpGlobalCache::FetchNext() { current_match_index_++; + if (current_match_index_ >= num_matches_) { // Current batch of results exhausted. // Fail if last batch was not even fully filled. diff --git a/deps/v8/src/regexp/regexp.h b/deps/v8/src/regexp/regexp.h index 0f3ed463da..8ccc9789a3 100644 --- a/deps/v8/src/regexp/regexp.h +++ b/deps/v8/src/regexp/regexp.h @@ -13,6 +13,8 @@ namespace internal { class RegExpNode; class RegExpTree; +enum class RegExpCompilationTarget : int { kBytecode, kNative }; + // TODO(jgruber): Consider splitting between ParseData and CompileData. struct RegExpCompileData { // The parsed AST as produced by the RegExpParser. @@ -21,8 +23,8 @@ struct RegExpCompileData { // The compiled Node graph as produced by RegExpTree::ToNode methods. RegExpNode* node = nullptr; - // The generated code as produced by the compiler. Either a Code object (for - // irregexp native code) or a ByteArray (for irregexp bytecode). + // Either the generated code as produced by the compiler or a trampoline + // to the interpreter. Object code; // True, iff the pattern is a 'simple' atom with zero captures. In other @@ -46,12 +48,20 @@ struct RegExpCompileData { // The number of registers used by the generated code. int register_count = 0; + + // The compilation target (bytecode or native code). + RegExpCompilationTarget compilation_target; }; class RegExp final : public AllStatic { public: // Whether the irregexp engine generates native code or interpreter bytecode. - static bool GeneratesNativeCode() { return !FLAG_regexp_interpret_all; } + static bool CanGenerateNativeCode() { + return !FLAG_regexp_interpret_all || FLAG_regexp_tier_up; + } + static bool CanGenerateBytecode() { + return FLAG_regexp_interpret_all || FLAG_regexp_tier_up; + } // Parses the RegExp pattern and prepares the JSRegExp object with // generic data and choice of implementation - as well as what @@ -61,6 +71,11 @@ class RegExp final : public AllStatic { Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern, JSRegExp::Flags flags); + enum CallOrigin : int { + kFromRuntime = 0, + kFromJs = 1, + }; + // See ECMA-262 section 15.10.6.2. // This function calls the garbage collector if necessary. V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec( @@ -73,7 +88,7 @@ class RegExp final : public AllStatic { static constexpr int kInternalRegExpException = -1; static constexpr int kInternalRegExpRetry = -2; - enum IrregexpResult { + enum IrregexpResult : int32_t { RE_FAILURE = kInternalRegExpFailure, RE_SUCCESS = kInternalRegExpSuccess, RE_EXCEPTION = kInternalRegExpException, diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc index 5ebdd6ce15..d4144e7e64 100644 --- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc +++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc @@ -178,9 +178,10 @@ void RegExpMacroAssemblerS390::CheckCharacterGT(uc16 limit, Label* on_greater) { BranchOrBacktrack(gt, on_greater); } -void RegExpMacroAssemblerS390::CheckAtStart(Label* on_at_start) { +void RegExpMacroAssemblerS390::CheckAtStart(int cp_offset, Label* on_at_start) { __ LoadP(r3, MemOperand(frame_pointer(), kStringStartMinusOne)); - __ AddP(r2, current_input_offset(), Operand(-char_size())); + __ AddP(r2, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); __ CmpP(r2, r3); BranchOrBacktrack(eq, on_at_start); } @@ -663,7 +664,7 @@ Handle<HeapObject> RegExpMacroAssemblerS390::GetCode(Handle<String> source) { Label stack_ok; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ mov(r2, Operand(stack_limit)); __ LoadP(r2, MemOperand(r2)); __ SubP(r2, sp, r2); @@ -965,14 +966,19 @@ RegExpMacroAssemblerS390::Implementation() { return kS390Implementation; } -void RegExpMacroAssemblerS390::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { +void RegExpMacroAssemblerS390::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + DCHECK_GE(eats_at_least, characters); + DCHECK(cp_offset < (1 << 30)); // Be sane! (And ensure negation works) if (check_bounds) { if (cp_offset >= 0) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); } else { CheckPosition(cp_offset, on_end_of_input); } @@ -1120,8 +1126,10 @@ int RegExpMacroAssemblerS390::CheckStackGuardState(Address* return_address, return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<intptr_t>(re_frame, kStartIndex), - frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address, - re_code, frame_entry_address<Address>(re_frame, kInputString), + static_cast<RegExp::CallOrigin>( + frame_entry<intptr_t>(re_frame, kDirectCall)), + return_address, re_code, + frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputEnd)); } @@ -1206,7 +1214,7 @@ void RegExpMacroAssemblerS390::Pop(Register target) { void RegExpMacroAssemblerS390::CheckPreemption() { // Check for preemption. ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ mov(r2, Operand(stack_limit)); __ CmpLogicalP(sp, MemOperand(r2)); SafeCall(&check_preempt_label_, le); @@ -1214,7 +1222,7 @@ void RegExpMacroAssemblerS390::CheckPreemption() { void RegExpMacroAssemblerS390::CheckStackLimit() { ExternalReference stack_limit = - ExternalReference::address_of_regexp_stack_limit(isolate()); + ExternalReference::address_of_regexp_stack_limit_address(isolate()); __ mov(r2, Operand(stack_limit)); __ CmpLogicalP(backtrack_stackpointer(), MemOperand(r2)); SafeCall(&stack_overflow_label_, le); diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h index 636ba76079..3a6a915263 100644 --- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h +++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h @@ -23,7 +23,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390 virtual void AdvanceRegister(int reg, int by); virtual void Backtrack(); virtual void Bind(Label* label); - virtual void CheckAtStart(Label* on_at_start); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); virtual void CheckCharacter(unsigned c, Label* on_equal); virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask, Label* on_equal); @@ -59,9 +59,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterEqPos(int reg, Label* if_eq); virtual IrregexpImplementation Implementation(); - virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, - bool check_bounds = true, - int characters = 1); + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); virtual void PopCurrentPosition(); virtual void PopRegister(int register_index); virtual void PushBacktrack(Label* label); diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc index 798484d52f..42ba13c4ee 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc @@ -48,6 +48,8 @@ namespace internal { * * The stack will have the following content, in some order, indexable from the * frame pointer (see, e.g., kStackHighEnd): + * - Address regexp (address of the JSRegExp object; unused in native + * code, passed to match signature of interpreter) * - Isolate* isolate (address of the current isolate) * - direct_call (if 1, direct call from JavaScript code, if 0 call * through the runtime system) @@ -75,9 +77,8 @@ namespace internal { * "character -1" in the string (i.e., char_size() bytes before the first * character of the string). The remaining registers starts out uninitialized. * - * The first seven values must be provided by the calling code by - * calling the code's entry address cast to a function pointer with the - * following signature: + * The argument values must be provided by the calling code by calling the + * code's entry address cast to a function pointer with the following signature: * int (*match)(String input_string, * int start_index, * Address start, @@ -86,7 +87,8 @@ namespace internal { * int num_capture_registers, * byte* stack_area_base, * bool direct_call = false, - * Isolate* isolate); + * Isolate* isolate, + * Address regexp); */ #define __ ACCESS_MASM((&masm_)) @@ -172,14 +174,12 @@ void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) { BranchOrBacktrack(greater, on_greater); } - -void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { - __ leaq(rax, Operand(rdi, -char_size())); +void RegExpMacroAssemblerX64::CheckAtStart(int cp_offset, Label* on_at_start) { + __ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size())); __ cmpq(rax, Operand(rbp, kStringStartMinusOne)); BranchOrBacktrack(equal, on_at_start); } - void RegExpMacroAssemblerX64::CheckNotAtStart(int cp_offset, Label* on_not_at_start) { __ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size())); @@ -721,7 +721,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { Label stack_ok; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ movq(rcx, rsp); __ Move(kScratchRegister, stack_limit); __ subq(rcx, Operand(kScratchRegister, 0)); @@ -1035,15 +1035,19 @@ RegExpMacroAssembler::IrregexpImplementation return kX64Implementation; } +void RegExpMacroAssemblerX64::LoadCurrentCharacterImpl(int cp_offset, + Label* on_end_of_input, + bool check_bounds, + int characters, + int eats_at_least) { + // It's possible to preload a small number of characters when each success + // path requires a large number of characters, but not the reverse. + DCHECK_GE(eats_at_least, characters); -void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset, - Label* on_end_of_input, - bool check_bounds, - int characters) { DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) { if (cp_offset >= 0) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); } else { CheckPosition(cp_offset, on_end_of_input); } @@ -1051,7 +1055,6 @@ void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset, LoadCurrentCharacterUnchecked(cp_offset, characters); } - void RegExpMacroAssemblerX64::PopCurrentPosition() { Pop(rdi); } @@ -1198,7 +1201,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address, return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<int>(re_frame, kStartIndex), - frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, + static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)), + return_address, re_code, frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputEnd)); @@ -1318,7 +1322,7 @@ void RegExpMacroAssemblerX64::CheckPreemption() { // Check for preemption. Label no_preempt; ExternalReference stack_limit = - ExternalReference::address_of_stack_limit(isolate()); + ExternalReference::address_of_jslimit(isolate()); __ load_rax(stack_limit); __ cmpq(rsp, rax); __ j(above, &no_preempt); @@ -1332,7 +1336,7 @@ void RegExpMacroAssemblerX64::CheckPreemption() { void RegExpMacroAssemblerX64::CheckStackLimit() { Label no_stack_overflow; ExternalReference stack_limit = - ExternalReference::address_of_regexp_stack_limit(isolate()); + ExternalReference::address_of_regexp_stack_limit_address(isolate()); __ load_rax(stack_limit); __ cmpq(backtrack_stackpointer(), rax); __ j(above, &no_stack_overflow); diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h index 59b80ef802..9d011dcd46 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h @@ -24,7 +24,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64 void AdvanceRegister(int reg, int by) override; void Backtrack() override; void Bind(Label* label) override; - void CheckAtStart(Label* on_at_start) override; + void CheckAtStart(int cp_offset, Label* on_at_start) override; void CheckCharacter(uint32_t c, Label* on_equal) override; void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, Label* on_equal) override; @@ -60,9 +60,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64 void IfRegisterLT(int reg, int comparand, Label* if_lt) override; void IfRegisterEqPos(int reg, Label* if_eq) override; IrregexpImplementation Implementation() override; - void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, - bool check_bounds = true, - int characters = 1) override; + void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least) override; void PopCurrentPosition() override; void PopRegister(int register_index) override; void PushBacktrack(Label* label) override; |