diff options
Diffstat (limited to 'src/3rdparty/v8/src/regexp-macro-assembler.cc')
-rw-r--r-- | src/3rdparty/v8/src/regexp-macro-assembler.cc | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/src/3rdparty/v8/src/regexp-macro-assembler.cc b/src/3rdparty/v8/src/regexp-macro-assembler.cc new file mode 100644 index 0000000..ea41db6 --- /dev/null +++ b/src/3rdparty/v8/src/regexp-macro-assembler.cc @@ -0,0 +1,266 @@ +// Copyright 2008 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "v8.h" +#include "ast.h" +#include "assembler.h" +#include "regexp-stack.h" +#include "regexp-macro-assembler.h" +#include "simulator.h" + +namespace v8 { +namespace internal { + +RegExpMacroAssembler::RegExpMacroAssembler() { +} + + +RegExpMacroAssembler::~RegExpMacroAssembler() { +} + + +bool RegExpMacroAssembler::CanReadUnaligned() { +#ifdef V8_HOST_CAN_READ_UNALIGNED + return true; +#else + return false; +#endif +} + + +#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. + +NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() { +} + + +NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { +} + + +bool NativeRegExpMacroAssembler::CanReadUnaligned() { +#ifdef V8_TARGET_CAN_READ_UNALIGNED + return true; +#else + return false; +#endif +} + +const byte* NativeRegExpMacroAssembler::StringCharacterPosition( + String* subject, + int start_index) { + // Not just flat, but ultra flat. + ASSERT(subject->IsExternalString() || subject->IsSeqString()); + ASSERT(start_index >= 0); + ASSERT(start_index <= subject->length()); + if (subject->IsAsciiRepresentation()) { + const byte* address; + if (StringShape(subject).IsExternal()) { + const char* data = ExternalAsciiString::cast(subject)->resource()->data(); + address = reinterpret_cast<const byte*>(data); + } else { + ASSERT(subject->IsSeqAsciiString()); + char* data = SeqAsciiString::cast(subject)->GetChars(); + address = reinterpret_cast<const byte*>(data); + } + return address + start_index; + } + const uc16* data; + if (StringShape(subject).IsExternal()) { + data = ExternalTwoByteString::cast(subject)->resource()->data(); + } else { + ASSERT(subject->IsSeqTwoByteString()); + data = SeqTwoByteString::cast(subject)->GetChars(); + } + return reinterpret_cast<const byte*>(data + start_index); +} + + +NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( + Handle<Code> regexp_code, + Handle<String> subject, + int* offsets_vector, + int offsets_vector_length, + int previous_index, + Isolate* isolate) { + + ASSERT(subject->IsFlat()); + ASSERT(previous_index >= 0); + ASSERT(previous_index <= subject->length()); + + // No allocations before calling the regexp, but we can't use + // AssertNoAllocation, since regexps might be preempted, and another thread + // might do allocation anyway. + + String* subject_ptr = *subject; + // Character offsets into string. + int start_offset = previous_index; + int end_offset = subject_ptr->length(); + + // The string has been flattened, so it it is a cons string it contains the + // full string in the first part. + if (StringShape(subject_ptr).IsCons()) { + ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length()); + subject_ptr = ConsString::cast(subject_ptr)->first(); + } + // Ensure that an underlying string has the same ascii-ness. + bool is_ascii = subject_ptr->IsAsciiRepresentation(); + ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString()); + // String is now either Sequential or External + int char_size_shift = is_ascii ? 0 : 1; + int char_length = end_offset - start_offset; + + const byte* input_start = + StringCharacterPosition(subject_ptr, start_offset); + int byte_length = char_length << char_size_shift; + const byte* input_end = input_start + byte_length; + Result res = Execute(*regexp_code, + subject_ptr, + start_offset, + input_start, + input_end, + offsets_vector, + isolate); + return res; +} + + +NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute( + Code* code, + String* input, + int start_offset, + const byte* input_start, + const byte* input_end, + int* output, + Isolate* isolate) { + ASSERT(isolate == Isolate::Current()); + // Ensure that the minimum stack has been allocated. + RegExpStackScope stack_scope(isolate); + Address stack_base = stack_scope.stack()->stack_base(); + + int direct_call = 0; + int result = CALL_GENERATED_REGEXP_CODE(code->entry(), + input, + start_offset, + input_start, + input_end, + output, + stack_base, + direct_call, + isolate); + ASSERT(result <= SUCCESS); + ASSERT(result >= RETRY); + + if (result == EXCEPTION && !isolate->has_pending_exception()) { + // We detected a stack overflow (on the backtrack stack) in RegExp code, + // but haven't created the exception yet. + isolate->StackOverflow(); + } + return static_cast<Result>(result); +} + + +const byte NativeRegExpMacroAssembler::word_character_map[] = { + 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, + 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, + 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, + 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, + + 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, + 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, + 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7' + 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9' + + 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G' + 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O' + 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W' + 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_' + + 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g' + 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o' + 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w' + 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z' +}; + + +int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( + Address byte_offset1, + Address byte_offset2, + size_t byte_length, + Isolate* isolate) { + ASSERT(isolate == Isolate::Current()); + unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = + isolate->regexp_macro_assembler_canonicalize(); + // This function is not allowed to cause a garbage collection. + // A GC might move the calling generated code and invalidate the + // return address on the stack. + ASSERT(byte_length % 2 == 0); + uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); + uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); + size_t length = byte_length >> 1; + + for (size_t i = 0; i < length; i++) { + unibrow::uchar c1 = substring1[i]; + unibrow::uchar c2 = substring2[i]; + if (c1 != c2) { + unibrow::uchar s1[1] = { c1 }; + canonicalize->get(c1, '\0', s1); + if (s1[0] != c2) { + unibrow::uchar s2[1] = { c2 }; + canonicalize->get(c2, '\0', s2); + if (s1[0] != s2[0]) { + return 0; + } + } + } + } + return 1; +} + + +Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, + Address* stack_base, + Isolate* isolate) { + ASSERT(isolate == Isolate::Current()); + RegExpStack* regexp_stack = isolate->regexp_stack(); + size_t size = regexp_stack->stack_capacity(); + Address old_stack_base = regexp_stack->stack_base(); + ASSERT(old_stack_base == *stack_base); + ASSERT(stack_pointer <= old_stack_base); + ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size); + Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); + if (new_stack_base == NULL) { + return NULL; + } + *stack_base = new_stack_base; + intptr_t stack_content_size = old_stack_base - stack_pointer; + return new_stack_base - stack_content_size; +} + +#endif // V8_INTERPRETED_REGEXP + +} } // namespace v8::internal |