summaryrefslogtreecommitdiff
path: root/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc')
-rw-r--r--src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc201
1 files changed, 138 insertions, 63 deletions
diff --git a/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc b/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc
index bf232bf..86f7bfe 100644
--- a/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc
+++ b/src/3rdparty/v8/src/x64/regexp-macro-assembler-x64.cc
@@ -1,4 +1,4 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -44,21 +44,23 @@ namespace internal {
/*
* This assembler uses the following register assignment convention
- * - rdx : currently loaded character(s) as ASCII or UC16. Must be loaded using
- * LoadCurrentCharacter before using any of the dispatch methods.
- * - rdi : current position in input, as negative offset from end of string.
+ * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded
+ * using LoadCurrentCharacter before using any of the dispatch methods.
+ * Temporarily stores the index of capture start after a matching pass
+ * for a global regexp.
+ * - rdi : Current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character
- * offset! Is always a 32-bit signed (negative) offset, but must be
+ * offset! Is always a 32-bit signed (negative) offset, but must be
* maintained sign-extended to 64 bits, since it is used as index.
- * - rsi : end of input (points to byte after last character in input),
+ * - rsi : End of input (points to byte after last character in input),
* so that rsi+rdi points to the current character.
- * - rbp : frame pointer. Used to access arguments, local variables and
+ * - rbp : Frame pointer. Used to access arguments, local variables and
* RegExp registers.
- * - rsp : points to tip of C stack.
- * - rcx : points to tip of backtrack stack. The backtrack stack contains
- * only 32-bit values. Most are offsets from some base (e.g., character
+ * - rsp : Points to tip of C stack.
+ * - rcx : Points to tip of backtrack stack. The backtrack stack contains
+ * only 32-bit values. Most are offsets from some base (e.g., character
* positions from end of string or code location from Code* pointer).
- * - r8 : code object pointer. Used to convert between absolute and
+ * - r8 : Code object pointer. Used to convert between absolute and
* code-object-relative addresses.
*
* The registers rax, rbx, r9 and r11 are free to use for computations.
@@ -72,20 +74,22 @@ namespace internal {
*
* The stack will have the following content, in some order, indexable from the
* frame pointer (see, e.g., kStackHighEnd):
- * - Isolate* isolate (Address of the current isolate)
+ * - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0 call
* through the runtime system)
- * - stack_area_base (High end of the memory area to use as
+ * - stack_area_base (high end of the memory area to use as
* backtracking stack)
+ * - capture array size (may fit multiple sets of matches)
* - int* capture_array (int[num_saved_registers_], for output).
- * - end of input (Address of end of string)
- * - start of input (Address of first character in string)
+ * - end of input (address of end of string)
+ * - start of input (address of first character in string)
* - start index (character index of start)
* - String* input_string (input string)
* - return address
* - backup of callee save registers (rbx, possibly rsi and rdi).
+ * - success counter (only useful for global regexp to count matches)
* - Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a non-position.
+ * position -1). Used to initialize capture registers to a non-position.
* - At start of string (if 1, we are starting at the start of the
* string, otherwise 0)
* - register 0 rbp[-n] (Only positions must be stored in the first
@@ -94,7 +98,7 @@ namespace internal {
*
* The first num_saved_registers_ registers are initialized to point to
* "character -1" in the string (i.e., char_size() bytes before the first
- * character of the string). The remaining registers starts out uninitialized.
+ * character of the string). The remaining registers starts out uninitialized.
*
* The first seven values must be provided by the calling code by
* calling the code's entry address cast to a function pointer with the
@@ -113,10 +117,12 @@ namespace internal {
RegExpMacroAssemblerX64::RegExpMacroAssemblerX64(
Mode mode,
- int registers_to_save)
- : masm_(Isolate::Current(), NULL, kRegExpCodeSize),
+ int registers_to_save,
+ Zone* zone)
+ : NativeRegExpMacroAssembler(zone),
+ masm_(Isolate::Current(), NULL, kRegExpCodeSize),
no_root_array_scope_(&masm_),
- code_relative_fixup_positions_(4),
+ code_relative_fixup_positions_(4, zone),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -347,6 +353,14 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
// In either case succeed immediately.
__ j(equal, &fallthrough);
+ // -----------------------
+ // rdx - Start of capture
+ // rbx - length of capture
+ // Check that there are sufficient characters left in the input.
+ __ movl(rax, rdi);
+ __ addl(rax, rbx);
+ BranchOrBacktrack(greater, on_no_match);
+
if (mode_ == ASCII) {
Label loop_increment;
if (on_no_match == NULL) {
@@ -523,15 +537,6 @@ void RegExpMacroAssemblerX64::CheckNotBackReference(
}
-void RegExpMacroAssemblerX64::CheckNotRegistersEqual(int reg1,
- int reg2,
- Label* on_not_equal) {
- __ movq(rax, register_location(reg1));
- __ cmpq(rax, register_location(reg2));
- BranchOrBacktrack(not_equal, on_not_equal);
-}
-
-
void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c,
Label* on_not_equal) {
__ cmpl(current_character(), Immediate(c));
@@ -744,13 +749,16 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
void RegExpMacroAssemblerX64::Fail() {
- ASSERT(FAILURE == 0); // Return value for failure is zero.
- __ Set(rax, 0);
+ STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
+ if (!global()) {
+ __ Set(rax, FAILURE);
+ }
__ jmp(&exit_label_);
}
Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
+ Label return_rax;
// Finalize code - write the entry point code now we know how many
// registers we need.
// Entry code:
@@ -784,7 +792,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
ASSERT_EQ(kInputStart, -3 * kPointerSize);
ASSERT_EQ(kInputEnd, -4 * kPointerSize);
ASSERT_EQ(kRegisterOutput, -5 * kPointerSize);
- ASSERT_EQ(kStackHighEnd, -6 * kPointerSize);
+ ASSERT_EQ(kNumOutputRegisters, -6 * kPointerSize);
__ push(rdi);
__ push(rsi);
__ push(rdx);
@@ -795,7 +803,8 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ push(rbx); // Callee-save
#endif
- __ push(Immediate(0)); // Make room for "at start" constant.
+ __ push(Immediate(0)); // Number of successful matches in a global regexp.
+ __ push(Immediate(0)); // Make room for "input start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
@@ -815,14 +824,14 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ Set(rax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_rax);
__ bind(&stack_limit_hit);
__ Move(code_object_pointer(), masm_.CodeObject());
CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp.
__ testq(rax, rax);
// If returned value is non-zero, we exit with the returned value as result.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_rax);
__ bind(&stack_ok);
@@ -847,19 +856,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
// position registers.
__ movq(Operand(rbp, kInputStartMinusOne), rax);
- if (num_saved_registers_ > 0) {
- // Fill saved registers with initial value = start offset - 1
- // Fill in stack push order, to avoid accessing across an unwritten
- // page (a problem on Windows).
- __ Set(rcx, kRegisterZero);
- Label init_loop;
- __ bind(&init_loop);
- __ movq(Operand(rbp, rcx, times_1, 0), rax);
- __ subq(rcx, Immediate(kPointerSize));
- __ cmpq(rcx,
- Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
- __ j(greater, &init_loop);
- }
+#ifdef WIN32
// Ensure that we have written to each stack page, in order. Skipping a page
// on Windows can cause segmentation faults. Assuming page size is 4k.
const int kPageSize = 4096;
@@ -869,21 +866,49 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
i += kRegistersPerPage) {
__ movq(register_location(i), rax); // One write every page.
}
+#endif // WIN32
- // Initialize backtrack stack pointer.
- __ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
// Initialize code object pointer.
__ Move(code_object_pointer(), masm_.CodeObject());
- // Load previous char as initial value of current-character.
- Label at_start;
- __ cmpb(Operand(rbp, kStartIndex), Immediate(0));
- __ j(equal, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
+
+ Label load_char_start_regexp, start_regexp;
+ // Load newline if index is at start, previous character otherwise.
+ __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
+ __ j(not_equal, &load_char_start_regexp, Label::kNear);
__ Set(current_character(), '\n');
- __ jmp(&start_label_);
+ __ jmp(&start_regexp, Label::kNear);
+
+ // Global regexp restarts matching here.
+ __ bind(&load_char_start_regexp);
+ // Load previous char as initial value of current character register.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&start_regexp);
+
+ // Initialize on-stack registers.
+ if (num_saved_registers_ > 0) {
+ // Fill saved registers with initial value = start offset - 1
+ // Fill in stack push order, to avoid accessing across an unwritten
+ // page (a problem on Windows).
+ if (num_saved_registers_ > 8) {
+ __ Set(rcx, kRegisterZero);
+ Label init_loop;
+ __ bind(&init_loop);
+ __ movq(Operand(rbp, rcx, times_1, 0), rax);
+ __ subq(rcx, Immediate(kPointerSize));
+ __ cmpq(rcx,
+ Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
+ __ j(greater, &init_loop);
+ } else { // Unroll the loop.
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ movq(register_location(i), rax);
+ }
+ }
+ }
+ // Initialize backtrack stack pointer.
+ __ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
+
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
@@ -902,6 +927,10 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
}
for (int i = 0; i < num_saved_registers_; i++) {
__ movq(rax, register_location(i));
+ if (i == 0 && global_with_zero_length_check()) {
+ // Keep capture start in rdx for the zero-length check later.
+ __ movq(rdx, rax);
+ }
__ addq(rax, rcx); // Convert to index from start, not end.
if (mode_ == UC16) {
__ sar(rax, Immediate(1)); // Convert byte index to character index.
@@ -909,12 +938,57 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ movl(Operand(rbx, i * kIntSize), rax);
}
}
- __ Set(rax, SUCCESS);
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ // Increment success counter.
+ __ incq(Operand(rbp, kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ movsxlq(rcx, Operand(rbp, kNumOutputRegisters));
+ __ subq(rcx, Immediate(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmpq(rcx, Immediate(num_saved_registers_));
+ __ j(less, &exit_label_);
+
+ __ movq(Operand(rbp, kNumOutputRegisters), rcx);
+ // Advance the location for output.
+ __ addq(Operand(rbp, kRegisterOutput),
+ Immediate(num_saved_registers_ * kIntSize));
+
+ // Prepare rax to initialize registers with its value in the next run.
+ __ movq(rax, Operand(rbp, kInputStartMinusOne));
+
+ if (global_with_zero_length_check()) {
+ // Special case for zero-length matches.
+ // rdx: capture start index
+ __ cmpq(rdi, rdx);
+ // Not a zero-length match, restart.
+ __ j(not_equal, &load_char_start_regexp);
+ // rdi (offset from the end) is zero if we already reached the end.
+ __ testq(rdi, rdi);
+ __ j(zero, &exit_label_, Label::kNear);
+ // Advance current position after a zero-length match.
+ if (mode_ == UC16) {
+ __ addq(rdi, Immediate(2));
+ } else {
+ __ incq(rdi);
+ }
+ }
+
+ __ jmp(&load_char_start_regexp);
+ } else {
+ __ movq(rax, Immediate(SUCCESS));
+ }
}
- // Exit and return rax
__ bind(&exit_label_);
+ if (global()) {
+ // Return the number of successful captures.
+ __ movq(rax, Operand(rbp, kSuccessfulCaptures));
+ }
+ __ bind(&return_rax);
#ifdef _WIN64
// Restore callee save registers.
__ lea(rsp, Operand(rbp, kLastCalleeSaveRegister));
@@ -951,7 +1025,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ testq(rax, rax);
// If returning non-zero, we should end execution with the given
// result as return value.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_rax);
// Restore registers.
__ Move(code_object_pointer(), masm_.CodeObject());
@@ -1012,7 +1086,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ Set(rax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_rax);
}
FixupCodeRelativePositions();
@@ -1135,8 +1209,9 @@ void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) {
}
-void RegExpMacroAssemblerX64::Succeed() {
+bool RegExpMacroAssemblerX64::Succeed() {
__ jmp(&success_label_);
+ return global();
}