summaryrefslogtreecommitdiff
path: root/deps/v8/src/regexp/interpreter-irregexp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/regexp/interpreter-irregexp.cc')
-rw-r--r--deps/v8/src/regexp/interpreter-irregexp.cc60
1 files changed, 32 insertions, 28 deletions
diff --git a/deps/v8/src/regexp/interpreter-irregexp.cc b/deps/v8/src/regexp/interpreter-irregexp.cc
index ea748e4e55..14834d512a 100644
--- a/deps/v8/src/regexp/interpreter-irregexp.cc
+++ b/deps/v8/src/regexp/interpreter-irregexp.cc
@@ -4,6 +4,8 @@
// A simple interpreter for the Irregexp byte code.
+#ifdef V8_INTERPRETED_REGEXP
+
#include "src/regexp/interpreter-irregexp.h"
#include "src/ast/ast.h"
@@ -13,38 +15,32 @@
#include "src/unicode.h"
#include "src/utils.h"
+#ifdef V8_I18N_SUPPORT
+#include "unicode/uchar.h"
+#endif // V8_I18N_SUPPORT
+
namespace v8 {
namespace internal {
-
typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
-static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
- int from,
- int current,
- int len,
- Vector<const uc16> subject) {
- for (int i = 0; i < len; i++) {
- unibrow::uchar old_char = subject[from++];
- unibrow::uchar new_char = subject[current++];
- if (old_char == new_char) continue;
- unibrow::uchar old_string[1] = { old_char };
- unibrow::uchar new_string[1] = { new_char };
- interp_canonicalize->get(old_char, '\0', old_string);
- interp_canonicalize->get(new_char, '\0', new_string);
- if (old_string[0] != new_string[0]) {
- return false;
- }
- }
- return true;
+static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
+ int len, Vector<const uc16> subject,
+ bool unicode) {
+ Address offset_a =
+ reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
+ Address offset_b =
+ reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
+ size_t length = len * kUC16Size;
+ return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
+ offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
}
-static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
- int from,
- int current,
- int len,
- Vector<const uint8_t> subject) {
+static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
+ int len, Vector<const uint8_t> subject,
+ bool unicode) {
+ // For Latin1 characters the unicode flag makes no difference.
for (int i = 0; i < len; i++) {
unsigned int old_char = subject[from++];
unsigned int new_char = subject[current++];
@@ -522,13 +518,16 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
+ bool unicode =
+ (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
- !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
- from, current, len, subject)) {
+ !BackRefMatchesNoCase(isolate, from, current, len, subject,
+ unicode)) {
pc = code_base + Load32Aligned(pc + 4);
break;
}
@@ -537,13 +536,16 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
+ bool unicode = (insn & BYTECODE_MASK) ==
+ BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
- !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
- from, current - len, len, subject)) {
+ !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
+ unicode)) {
pc = code_base + Load32Aligned(pc + 4);
break;
}
@@ -619,3 +621,5 @@ RegExpImpl::IrregexpResult IrregexpInterpreter::Match(
} // namespace internal
} // namespace v8
+
+#endif // V8_INTERPRETED_REGEXP