summaryrefslogtreecommitdiff
path: root/src/3rdparty/v8/src/regexp-macro-assembler.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/v8/src/regexp-macro-assembler.cc')
-rw-r--r--src/3rdparty/v8/src/regexp-macro-assembler.cc266
1 files changed, 266 insertions, 0 deletions
diff --git a/src/3rdparty/v8/src/regexp-macro-assembler.cc b/src/3rdparty/v8/src/regexp-macro-assembler.cc
new file mode 100644
index 0000000..ea41db6
--- /dev/null
+++ b/src/3rdparty/v8/src/regexp-macro-assembler.cc
@@ -0,0 +1,266 @@
+// Copyright 2008 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "v8.h"
+#include "ast.h"
+#include "assembler.h"
+#include "regexp-stack.h"
+#include "regexp-macro-assembler.h"
+#include "simulator.h"
+
+namespace v8 {
+namespace internal {
+
+RegExpMacroAssembler::RegExpMacroAssembler() {
+}
+
+
+RegExpMacroAssembler::~RegExpMacroAssembler() {
+}
+
+
+bool RegExpMacroAssembler::CanReadUnaligned() {
+#ifdef V8_HOST_CAN_READ_UNALIGNED
+ return true;
+#else
+ return false;
+#endif
+}
+
+
+#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
+
+NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
+}
+
+
+NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
+}
+
+
+bool NativeRegExpMacroAssembler::CanReadUnaligned() {
+#ifdef V8_TARGET_CAN_READ_UNALIGNED
+ return true;
+#else
+ return false;
+#endif
+}
+
+const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
+ String* subject,
+ int start_index) {
+ // Not just flat, but ultra flat.
+ ASSERT(subject->IsExternalString() || subject->IsSeqString());
+ ASSERT(start_index >= 0);
+ ASSERT(start_index <= subject->length());
+ if (subject->IsAsciiRepresentation()) {
+ const byte* address;
+ if (StringShape(subject).IsExternal()) {
+ const char* data = ExternalAsciiString::cast(subject)->resource()->data();
+ address = reinterpret_cast<const byte*>(data);
+ } else {
+ ASSERT(subject->IsSeqAsciiString());
+ char* data = SeqAsciiString::cast(subject)->GetChars();
+ address = reinterpret_cast<const byte*>(data);
+ }
+ return address + start_index;
+ }
+ const uc16* data;
+ if (StringShape(subject).IsExternal()) {
+ data = ExternalTwoByteString::cast(subject)->resource()->data();
+ } else {
+ ASSERT(subject->IsSeqTwoByteString());
+ data = SeqTwoByteString::cast(subject)->GetChars();
+ }
+ return reinterpret_cast<const byte*>(data + start_index);
+}
+
+
+NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
+ Handle<Code> regexp_code,
+ Handle<String> subject,
+ int* offsets_vector,
+ int offsets_vector_length,
+ int previous_index,
+ Isolate* isolate) {
+
+ ASSERT(subject->IsFlat());
+ ASSERT(previous_index >= 0);
+ ASSERT(previous_index <= subject->length());
+
+ // No allocations before calling the regexp, but we can't use
+ // AssertNoAllocation, since regexps might be preempted, and another thread
+ // might do allocation anyway.
+
+ String* subject_ptr = *subject;
+ // Character offsets into string.
+ int start_offset = previous_index;
+ int end_offset = subject_ptr->length();
+
+ // The string has been flattened, so it it is a cons string it contains the
+ // full string in the first part.
+ if (StringShape(subject_ptr).IsCons()) {
+ ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
+ subject_ptr = ConsString::cast(subject_ptr)->first();
+ }
+ // Ensure that an underlying string has the same ascii-ness.
+ bool is_ascii = subject_ptr->IsAsciiRepresentation();
+ ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
+ // String is now either Sequential or External
+ int char_size_shift = is_ascii ? 0 : 1;
+ int char_length = end_offset - start_offset;
+
+ const byte* input_start =
+ StringCharacterPosition(subject_ptr, start_offset);
+ int byte_length = char_length << char_size_shift;
+ const byte* input_end = input_start + byte_length;
+ Result res = Execute(*regexp_code,
+ subject_ptr,
+ start_offset,
+ input_start,
+ input_end,
+ offsets_vector,
+ isolate);
+ return res;
+}
+
+
+NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
+ Code* code,
+ String* input,
+ int start_offset,
+ const byte* input_start,
+ const byte* input_end,
+ int* output,
+ Isolate* isolate) {
+ ASSERT(isolate == Isolate::Current());
+ // Ensure that the minimum stack has been allocated.
+ RegExpStackScope stack_scope(isolate);
+ Address stack_base = stack_scope.stack()->stack_base();
+
+ int direct_call = 0;
+ int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
+ input,
+ start_offset,
+ input_start,
+ input_end,
+ output,
+ stack_base,
+ direct_call,
+ isolate);
+ ASSERT(result <= SUCCESS);
+ ASSERT(result >= RETRY);
+
+ if (result == EXCEPTION && !isolate->has_pending_exception()) {
+ // We detected a stack overflow (on the backtrack stack) in RegExp code,
+ // but haven't created the exception yet.
+ isolate->StackOverflow();
+ }
+ return static_cast<Result>(result);
+}
+
+
+const byte NativeRegExpMacroAssembler::word_character_map[] = {
+ 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
+ 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
+ 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
+ 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
+
+ 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
+ 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
+ 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
+ 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
+
+ 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
+ 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
+ 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
+ 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
+
+ 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
+ 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
+ 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
+ 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
+};
+
+
+int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
+ Address byte_offset1,
+ Address byte_offset2,
+ size_t byte_length,
+ Isolate* isolate) {
+ ASSERT(isolate == Isolate::Current());
+ unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
+ isolate->regexp_macro_assembler_canonicalize();
+ // This function is not allowed to cause a garbage collection.
+ // A GC might move the calling generated code and invalidate the
+ // return address on the stack.
+ ASSERT(byte_length % 2 == 0);
+ uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
+ uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
+ size_t length = byte_length >> 1;
+
+ for (size_t i = 0; i < length; i++) {
+ unibrow::uchar c1 = substring1[i];
+ unibrow::uchar c2 = substring2[i];
+ if (c1 != c2) {
+ unibrow::uchar s1[1] = { c1 };
+ canonicalize->get(c1, '\0', s1);
+ if (s1[0] != c2) {
+ unibrow::uchar s2[1] = { c2 };
+ canonicalize->get(c2, '\0', s2);
+ if (s1[0] != s2[0]) {
+ return 0;
+ }
+ }
+ }
+ }
+ return 1;
+}
+
+
+Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
+ Address* stack_base,
+ Isolate* isolate) {
+ ASSERT(isolate == Isolate::Current());
+ RegExpStack* regexp_stack = isolate->regexp_stack();
+ size_t size = regexp_stack->stack_capacity();
+ Address old_stack_base = regexp_stack->stack_base();
+ ASSERT(old_stack_base == *stack_base);
+ ASSERT(stack_pointer <= old_stack_base);
+ ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
+ Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
+ if (new_stack_base == NULL) {
+ return NULL;
+ }
+ *stack_base = new_stack_base;
+ intptr_t stack_content_size = old_stack_base - stack_pointer;
+ return new_stack_base - stack_content_size;
+}
+
+#endif // V8_INTERPRETED_REGEXP
+
+} } // namespace v8::internal