diff options
Diffstat (limited to 'src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc')
-rw-r--r-- | src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc | 201 |
1 files changed, 138 insertions, 63 deletions
diff --git a/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc b/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc index bf232bf..86f7bfe 100644 --- a/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc +++ b/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc @@ -1,4 +1,4 @@ -// Copyright 2011 the V8 project authors. All rights reserved. +// Copyright 2012 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -44,21 +44,23 @@ namespace internal { /* * This assembler uses the following register assignment convention - * - rdx : currently loaded character(s) as ASCII or UC16. Must be loaded using - * LoadCurrentCharacter before using any of the dispatch methods. - * - rdi : current position in input, as negative offset from end of string. + * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded + * using LoadCurrentCharacter before using any of the dispatch methods. + * Temporarily stores the index of capture start after a matching pass + * for a global regexp. + * - rdi : Current position in input, as negative offset from end of string. * Please notice that this is the byte offset, not the character - * offset! Is always a 32-bit signed (negative) offset, but must be + * offset! Is always a 32-bit signed (negative) offset, but must be * maintained sign-extended to 64 bits, since it is used as index. - * - rsi : end of input (points to byte after last character in input), + * - rsi : End of input (points to byte after last character in input), * so that rsi+rdi points to the current character. - * - rbp : frame pointer. Used to access arguments, local variables and + * - rbp : Frame pointer. Used to access arguments, local variables and * RegExp registers. - * - rsp : points to tip of C stack. - * - rcx : points to tip of backtrack stack. The backtrack stack contains - * only 32-bit values. Most are offsets from some base (e.g., character + * - rsp : Points to tip of C stack. + * - rcx : Points to tip of backtrack stack. The backtrack stack contains + * only 32-bit values. Most are offsets from some base (e.g., character * positions from end of string or code location from Code* pointer). - * - r8 : code object pointer. Used to convert between absolute and + * - r8 : Code object pointer. Used to convert between absolute and * code-object-relative addresses. * * The registers rax, rbx, r9 and r11 are free to use for computations. @@ -72,20 +74,22 @@ namespace internal { * * The stack will have the following content, in some order, indexable from the * frame pointer (see, e.g., kStackHighEnd): - * - Isolate* isolate (Address of the current isolate) + * - Isolate* isolate (address of the current isolate) * - direct_call (if 1, direct call from JavaScript code, if 0 call * through the runtime system) - * - stack_area_base (High end of the memory area to use as + * - stack_area_base (high end of the memory area to use as * backtracking stack) + * - capture array size (may fit multiple sets of matches) * - int* capture_array (int[num_saved_registers_], for output). - * - end of input (Address of end of string) - * - start of input (Address of first character in string) + * - end of input (address of end of string) + * - start of input (address of first character in string) * - start index (character index of start) * - String* input_string (input string) * - return address * - backup of callee save registers (rbx, possibly rsi and rdi). + * - success counter (only useful for global regexp to count matches) * - Offset of location before start of input (effectively character - * position -1). Used to initialize capture registers to a non-position. + * position -1). Used to initialize capture registers to a non-position. * - At start of string (if 1, we are starting at the start of the * string, otherwise 0) * - register 0 rbp[-n] (Only positions must be stored in the first @@ -94,7 +98,7 @@ namespace internal { * * The first num_saved_registers_ registers are initialized to point to * "character -1" in the string (i.e., char_size() bytes before the first - * character of the string). The remaining registers starts out uninitialized. + * character of the string). The remaining registers starts out uninitialized. * * The first seven values must be provided by the calling code by * calling the code's entry address cast to a function pointer with the @@ -113,10 +117,12 @@ namespace internal { RegExpMacroAssemblerX64::RegExpMacroAssemblerX64( Mode mode, - int registers_to_save) - : masm_(Isolate::Current(), NULL, kRegExpCodeSize), + int registers_to_save, + Zone* zone) + : NativeRegExpMacroAssembler(zone), + masm_(Isolate::Current(), NULL, kRegExpCodeSize), no_root_array_scope_(&masm_), - code_relative_fixup_positions_(4), + code_relative_fixup_positions_(4, zone), mode_(mode), num_registers_(registers_to_save), num_saved_registers_(registers_to_save), @@ -347,6 +353,14 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( // In either case succeed immediately. __ j(equal, &fallthrough); + // ----------------------- + // rdx - Start of capture + // rbx - length of capture + // Check that there are sufficient characters left in the input. + __ movl(rax, rdi); + __ addl(rax, rbx); + BranchOrBacktrack(greater, on_no_match); + if (mode_ == ASCII) { Label loop_increment; if (on_no_match == NULL) { @@ -523,15 +537,6 @@ void RegExpMacroAssemblerX64::CheckNotBackReference( } -void RegExpMacroAssemblerX64::CheckNotRegistersEqual(int reg1, - int reg2, - Label* on_not_equal) { - __ movq(rax, register_location(reg1)); - __ cmpq(rax, register_location(reg2)); - BranchOrBacktrack(not_equal, on_not_equal); -} - - void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c, Label* on_not_equal) { __ cmpl(current_character(), Immediate(c)); @@ -744,13 +749,16 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, void RegExpMacroAssemblerX64::Fail() { - ASSERT(FAILURE == 0); // Return value for failure is zero. - __ Set(rax, 0); + STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero. + if (!global()) { + __ Set(rax, FAILURE); + } __ jmp(&exit_label_); } Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { + Label return_rax; // Finalize code - write the entry point code now we know how many // registers we need. // Entry code: @@ -784,7 +792,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { ASSERT_EQ(kInputStart, -3 * kPointerSize); ASSERT_EQ(kInputEnd, -4 * kPointerSize); ASSERT_EQ(kRegisterOutput, -5 * kPointerSize); - ASSERT_EQ(kStackHighEnd, -6 * kPointerSize); + ASSERT_EQ(kNumOutputRegisters, -6 * kPointerSize); __ push(rdi); __ push(rsi); __ push(rdx); @@ -795,7 +803,8 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { __ push(rbx); // Callee-save #endif - __ push(Immediate(0)); // Make room for "at start" constant. + __ push(Immediate(0)); // Number of successful matches in a global regexp. + __ push(Immediate(0)); // Make room for "input start - 1" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; @@ -815,14 +824,14 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { // Exit with OutOfMemory exception. There is not enough space on the stack // for our working registers. __ Set(rax, EXCEPTION); - __ jmp(&exit_label_); + __ jmp(&return_rax); __ bind(&stack_limit_hit); __ Move(code_object_pointer(), masm_.CodeObject()); CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp. __ testq(rax, rax); // If returned value is non-zero, we exit with the returned value as result. - __ j(not_zero, &exit_label_); + __ j(not_zero, &return_rax); __ bind(&stack_ok); @@ -847,19 +856,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { // position registers. __ movq(Operand(rbp, kInputStartMinusOne), rax); - if (num_saved_registers_ > 0) { - // Fill saved registers with initial value = start offset - 1 - // Fill in stack push order, to avoid accessing across an unwritten - // page (a problem on Windows). - __ Set(rcx, kRegisterZero); - Label init_loop; - __ bind(&init_loop); - __ movq(Operand(rbp, rcx, times_1, 0), rax); - __ subq(rcx, Immediate(kPointerSize)); - __ cmpq(rcx, - Immediate(kRegisterZero - num_saved_registers_ * kPointerSize)); - __ j(greater, &init_loop); - } +#ifdef WIN32 // Ensure that we have written to each stack page, in order. Skipping a page // on Windows can cause segmentation faults. Assuming page size is 4k. const int kPageSize = 4096; @@ -869,21 +866,49 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { i += kRegistersPerPage) { __ movq(register_location(i), rax); // One write every page. } +#endif // WIN32 - // Initialize backtrack stack pointer. - __ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); // Initialize code object pointer. __ Move(code_object_pointer(), masm_.CodeObject()); - // Load previous char as initial value of current-character. - Label at_start; - __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); - __ j(equal, &at_start); - LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. - __ jmp(&start_label_); - __ bind(&at_start); + + Label load_char_start_regexp, start_regexp; + // Load newline if index is at start, previous character otherwise. + __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); + __ j(not_equal, &load_char_start_regexp, Label::kNear); __ Set(current_character(), '\n'); - __ jmp(&start_label_); + __ jmp(&start_regexp, Label::kNear); + + // Global regexp restarts matching here. + __ bind(&load_char_start_regexp); + // Load previous char as initial value of current character register. + LoadCurrentCharacterUnchecked(-1, 1); + __ bind(&start_regexp); + + // Initialize on-stack registers. + if (num_saved_registers_ > 0) { + // Fill saved registers with initial value = start offset - 1 + // Fill in stack push order, to avoid accessing across an unwritten + // page (a problem on Windows). + if (num_saved_registers_ > 8) { + __ Set(rcx, kRegisterZero); + Label init_loop; + __ bind(&init_loop); + __ movq(Operand(rbp, rcx, times_1, 0), rax); + __ subq(rcx, Immediate(kPointerSize)); + __ cmpq(rcx, + Immediate(kRegisterZero - num_saved_registers_ * kPointerSize)); + __ j(greater, &init_loop); + } else { // Unroll the loop. + for (int i = 0; i < num_saved_registers_; i++) { + __ movq(register_location(i), rax); + } + } + } + // Initialize backtrack stack pointer. + __ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); + + __ jmp(&start_label_); // Exit code: if (success_label_.is_linked()) { @@ -902,6 +927,10 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { } for (int i = 0; i < num_saved_registers_; i++) { __ movq(rax, register_location(i)); + if (i == 0 && global_with_zero_length_check()) { + // Keep capture start in rdx for the zero-length check later. + __ movq(rdx, rax); + } __ addq(rax, rcx); // Convert to index from start, not end. if (mode_ == UC16) { __ sar(rax, Immediate(1)); // Convert byte index to character index. @@ -909,12 +938,57 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { __ movl(Operand(rbx, i * kIntSize), rax); } } - __ Set(rax, SUCCESS); + + if (global()) { + // Restart matching if the regular expression is flagged as global. + // Increment success counter. + __ incq(Operand(rbp, kSuccessfulCaptures)); + // Capture results have been stored, so the number of remaining global + // output registers is reduced by the number of stored captures. + __ movsxlq(rcx, Operand(rbp, kNumOutputRegisters)); + __ subq(rcx, Immediate(num_saved_registers_)); + // Check whether we have enough room for another set of capture results. + __ cmpq(rcx, Immediate(num_saved_registers_)); + __ j(less, &exit_label_); + + __ movq(Operand(rbp, kNumOutputRegisters), rcx); + // Advance the location for output. + __ addq(Operand(rbp, kRegisterOutput), + Immediate(num_saved_registers_ * kIntSize)); + + // Prepare rax to initialize registers with its value in the next run. + __ movq(rax, Operand(rbp, kInputStartMinusOne)); + + if (global_with_zero_length_check()) { + // Special case for zero-length matches. + // rdx: capture start index + __ cmpq(rdi, rdx); + // Not a zero-length match, restart. + __ j(not_equal, &load_char_start_regexp); + // rdi (offset from the end) is zero if we already reached the end. + __ testq(rdi, rdi); + __ j(zero, &exit_label_, Label::kNear); + // Advance current position after a zero-length match. + if (mode_ == UC16) { + __ addq(rdi, Immediate(2)); + } else { + __ incq(rdi); + } + } + + __ jmp(&load_char_start_regexp); + } else { + __ movq(rax, Immediate(SUCCESS)); + } } - // Exit and return rax __ bind(&exit_label_); + if (global()) { + // Return the number of successful captures. + __ movq(rax, Operand(rbp, kSuccessfulCaptures)); + } + __ bind(&return_rax); #ifdef _WIN64 // Restore callee save registers. __ lea(rsp, Operand(rbp, kLastCalleeSaveRegister)); @@ -951,7 +1025,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { __ testq(rax, rax); // If returning non-zero, we should end execution with the given // result as return value. - __ j(not_zero, &exit_label_); + __ j(not_zero, &return_rax); // Restore registers. __ Move(code_object_pointer(), masm_.CodeObject()); @@ -1012,7 +1086,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { __ bind(&exit_with_exception); // Exit with Result EXCEPTION(-1) to signal thrown exception. __ Set(rax, EXCEPTION); - __ jmp(&exit_label_); + __ jmp(&return_rax); } FixupCodeRelativePositions(); @@ -1135,8 +1209,9 @@ void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) { } -void RegExpMacroAssemblerX64::Succeed() { +bool RegExpMacroAssemblerX64::Succeed() { __ jmp(&success_label_); + return global(); } |