summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2016-02-17 10:06:38 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2016-02-17 10:06:38 +0000
commite0040a667ce28ac0345b449b53edea6613d3fd89 (patch)
tree9e940ab7de75440dad58c9c70b323030dcae683f
parenteda0027b5511441c9e0382365416ba6d779d8b71 (diff)
downloadpcre-e0040a667ce28ac0345b449b53edea6613d3fd89.tar.gz
Migrating fast forward skip optimization form PCRE2-JIT.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1633 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_jit_compile.c530
1 files changed, 307 insertions, 223 deletions
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 0c570f1..ef33276 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -582,11 +582,6 @@ SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
return count;
}
-static int ones_in_half_byte[16] = {
- /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
- /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
-};
-
/* Functions whose might need modification for all new supported opcodes:
next_opcode
check_opcode_types
@@ -986,6 +981,57 @@ switch(*cc)
return FALSE;
}
+static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
+{
+pcre_uchar *cc = common->start;
+pcre_uchar *end;
+
+/* Skip not repeated brackets. */
+while (TRUE)
+ {
+ switch(*cc)
+ {
+ case OP_SOD:
+ case OP_SOM:
+ case OP_SET_SOM:
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ /* Zero width assertions. */
+ cc++;
+ continue;
+ }
+
+ if (*cc != OP_BRA && *cc != OP_CBRA)
+ break;
+
+ end = cc + GET(cc, 1);
+ if (*end != OP_KET || PRIVATE_DATA(end) != 0)
+ return FALSE;
+ if (*cc == OP_CBRA)
+ {
+ if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ return FALSE;
+ cc += IMM2_SIZE;
+ }
+ cc += 1 + LINK_SIZE;
+ }
+
+if (is_accelerated_repeat(cc))
+ {
+ common->fast_forward_bc_ptr = cc;
+ common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
+ *private_data_start += sizeof(sljit_sw);
+ return TRUE;
+ }
+return FALSE;
+}
+
static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_si depth)
{
pcre_uchar *next_alt;
@@ -3258,14 +3304,14 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
#endif
-static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
+static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
{
DEFINE_COMPILER;
struct sljit_label *mainloop;
struct sljit_label *newlinelabel = NULL;
struct sljit_jump *start;
struct sljit_jump *end = NULL;
-struct sljit_jump *nl = NULL;
+struct sljit_jump *end2 = NULL;
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
struct sljit_jump *singlechar;
#endif
@@ -3273,14 +3319,13 @@ jump_list *newline = NULL;
BOOL newlinecheck = FALSE;
BOOL readuchar = FALSE;
-if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
- common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
+if (!(hascrorlf || (common->first_line_end != 0)) &&
+ (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
newlinecheck = TRUE;
-if (firstline)
+if (common->first_line_end != 0)
{
/* Search for the end of the first line. */
- SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
@@ -3326,7 +3371,7 @@ if (newlinecheck)
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- nl = JUMP(SLJIT_JUMP);
+ end2 = JUMP(SLJIT_JUMP);
}
mainloop = LABEL();
@@ -3371,51 +3416,52 @@ JUMPHERE(start);
if (newlinecheck)
{
JUMPHERE(end);
- JUMPHERE(nl);
+ JUMPHERE(end2);
}
return mainloop;
}
#define MAX_N_CHARS 16
-#define MAX_N_BYTES 8
+#define MAX_DIFF_CHARS 6
-static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
+static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
{
-pcre_uint8 len = bytes[0];
-int i;
+pcre_uchar i, len;
+len = chars[0];
if (len == 255)
return;
if (len == 0)
{
- bytes[0] = 1;
- bytes[1] = byte;
+ chars[0] = 1;
+ chars[1] = chr;
return;
}
for (i = len; i > 0; i--)
- if (bytes[i] == byte)
+ if (chars[i] == chr)
return;
-if (len >= MAX_N_BYTES - 1)
+if (len >= MAX_DIFF_CHARS - 1)
{
- bytes[0] = 255;
+ chars[0] = 255;
return;
}
len++;
-bytes[len] = byte;
-bytes[0] = len;
+chars[len] = chr;
+chars[0] = len;
}
-static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
+static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, pcre_uint32 *rec_count)
{
/* Recursive function, which scans prefix literals. */
-BOOL last, any, caseless;
+BOOL last, any, class, caseless;
int len, repeat, len_save, consumed = 0;
-pcre_uint32 chr, mask;
+sljit_ui chr;
+sljit_ub *bytes, *bytes_end, byte;
pcre_uchar *alternative, *cc_save, *oc;
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
pcre_uchar othercase[8];
@@ -3434,6 +3480,7 @@ while (TRUE)
last = TRUE;
any = FALSE;
+ class = FALSE;
caseless = FALSE;
switch (*cc)
@@ -3497,7 +3544,7 @@ while (TRUE)
#ifdef SUPPORT_UTF
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
#endif
- max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
+ max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
if (max_chars == 0)
return consumed;
last = FALSE;
@@ -3520,7 +3567,7 @@ while (TRUE)
alternative = cc + GET(cc, 1);
while (*alternative == OP_ALT)
{
- max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
+ max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
if (max_chars == 0)
return consumed;
alternative += GET(alternative, 1);
@@ -3533,18 +3580,17 @@ while (TRUE)
case OP_CLASS:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
+ if (common->utf && !is_char7_bitset((const sljit_ub *)(cc + 1), FALSE))
+ return consumed;
#endif
- any = TRUE;
- cc += 1 + 32 / sizeof(pcre_uchar);
+ class = TRUE;
break;
case OP_NCLASS:
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf) return consumed;
#endif
- any = TRUE;
- cc += 1 + 32 / sizeof(pcre_uchar);
+ class = TRUE;
break;
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
@@ -3559,7 +3605,7 @@ while (TRUE)
case OP_DIGIT:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
+ if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_digit, FALSE))
return consumed;
#endif
any = TRUE;
@@ -3568,7 +3614,7 @@ while (TRUE)
case OP_WHITESPACE:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
+ if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_space, FALSE))
return consumed;
#endif
any = TRUE;
@@ -3577,7 +3623,7 @@ while (TRUE)
case OP_WORDCHAR:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
+ if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_word, FALSE))
return consumed;
#endif
any = TRUE;
@@ -3600,10 +3646,10 @@ while (TRUE)
cc++;
break;
-#ifdef SUPPORT_UCP
+#ifdef SUPPORT_UTF
case OP_NOTPROP:
case OP_PROP:
-#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+#ifndef COMPILE_PCRE32
if (common->utf) return consumed;
#endif
any = TRUE;
@@ -3632,30 +3678,114 @@ while (TRUE)
if (any)
{
-#if defined COMPILE_PCRE8
- mask = 0xff;
-#elif defined COMPILE_PCRE16
- mask = 0xffff;
-#elif defined COMPILE_PCRE32
- mask = 0xffffffff;
-#else
- SLJIT_ASSERT_STOP();
-#endif
+ do
+ {
+ chars[0] = 255;
+
+ consumed++;
+ if (--max_chars == 0)
+ return consumed;
+ chars += MAX_DIFF_CHARS;
+ }
+ while (--repeat > 0);
+
+ repeat = 1;
+ continue;
+ }
+
+ if (class)
+ {
+ bytes = (sljit_ub*) (cc + 1);
+ cc += 1 + 32 / sizeof(pcre_uchar);
+
+ switch (*cc)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRPOSSTAR:
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ case OP_CRPOSQUERY:
+ max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
+ if (max_chars == 0)
+ return consumed;
+ break;
+
+ default:
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
+ break;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
+ repeat = GET2(cc, 1);
+ if (repeat <= 0)
+ return consumed;
+ break;
+ }
do
{
- chars[0] = mask;
- chars[1] = mask;
- bytes[0] = 255;
+ if (bytes[31] & 0x80)
+ chars[0] = 255;
+ else if (chars[0] != 255)
+ {
+ bytes_end = bytes + 32;
+ chr = 0;
+ do
+ {
+ byte = *bytes++;
+ SLJIT_ASSERT((chr & 0x7) == 0);
+ if (byte == 0)
+ chr += 8;
+ else
+ {
+ do
+ {
+ if ((byte & 0x1) != 0)
+ add_prefix_char(chr, chars);
+ byte >>= 1;
+ chr++;
+ }
+ while (byte != 0);
+ chr = (chr + 7) & ~7;
+ }
+ }
+ while (chars[0] != 255 && bytes < bytes_end);
+ bytes = bytes_end - 32;
+ }
consumed++;
if (--max_chars == 0)
return consumed;
- chars += 2;
- bytes += MAX_N_BYTES;
+ chars += MAX_DIFF_CHARS;
}
while (--repeat > 0);
+ switch (*cc)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRPOSSTAR:
+ return consumed;
+
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ case OP_CRPOSQUERY:
+ cc++;
+ break;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
+ if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
+ return consumed;
+ cc += 1 + 2 * IMM2_SIZE;
+ break;
+ }
+
repeat = 1;
continue;
}
@@ -3682,7 +3812,10 @@ while (TRUE)
}
}
else
+ {
caseless = FALSE;
+ othercase[0] = 0; /* Stops compiler warning - PH */
+ }
len_save = len;
cc_save = cc;
@@ -3692,43 +3825,16 @@ while (TRUE)
do
{
chr = *cc;
-#ifdef COMPILE_PCRE32
- if (SLJIT_UNLIKELY(chr == NOTACHAR))
- return consumed;
-#endif
- add_prefix_byte((pcre_uint8)chr, bytes);
+ add_prefix_char(*cc, chars);
- mask = 0;
if (caseless)
- {
- add_prefix_byte((pcre_uint8)*oc, bytes);
- mask = *cc ^ *oc;
- chr |= mask;
- }
-
-#ifdef COMPILE_PCRE32
- if (chars[0] == NOTACHAR && chars[1] == 0)
-#else
- if (chars[0] == NOTACHAR)
-#endif
- {
- chars[0] = chr;
- chars[1] = mask;
- }
- else
- {
- mask |= chars[0] ^ chr;
- chr |= mask;
- chars[0] = chr;
- chars[1] |= mask;
- }
+ add_prefix_char(*oc, chars);
len--;
consumed++;
if (--max_chars == 0)
return consumed;
- chars += 2;
- bytes += MAX_N_BYTES;
+ chars += MAX_DIFF_CHARS;
cc++;
oc++;
}
@@ -4196,67 +4302,55 @@ if (offset > 0)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
}
-static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
+static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *quit;
-pcre_uint32 chars[MAX_N_CHARS * 2];
-pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
-pcre_uint8 ones[MAX_N_CHARS];
-int offsets[3];
-pcre_uint32 mask;
-pcre_uint8 *byte_set, *byte_set_end;
+struct sljit_jump *match;
+/* bytes[0] represent the number of characters between 0
+and MAX_N_BYTES - 1, 255 represents any character. */
+pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
+sljit_si offset;
+pcre_uchar mask;
+pcre_uchar *char_set, *char_set_end;
int i, max, from;
-int range_right = -1, range_len = 3 - 1;
+int range_right = -1, range_len;
sljit_ub *update_table = NULL;
BOOL in_range;
-pcre_uint32 rec_count;
+uint32_t rec_count;
for (i = 0; i < MAX_N_CHARS; i++)
- {
- chars[i << 1] = NOTACHAR;
- chars[(i << 1) + 1] = 0;
- bytes[i * MAX_N_BYTES] = 0;
- }
+ chars[i * MAX_DIFF_CHARS] = 0;
rec_count = 10000;
-max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
+max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
-if (max <= 1)
+if (max < 1)
return FALSE;
-for (i = 0; i < max; i++)
- {
- mask = chars[(i << 1) + 1];
- ones[i] = ones_in_half_byte[mask & 0xf];
- mask >>= 4;
- while (mask != 0)
- {
- ones[i] += ones_in_half_byte[mask & 0xf];
- mask >>= 4;
- }
- }
-
in_range = FALSE;
-from = 0; /* Prevent compiler "uninitialized" warning */
+/* Prevent compiler "uninitialized" warning */
+from = 0;
+range_len = 4 /* minimum length */ - 1;
for (i = 0; i <= max; i++)
{
- if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
+ if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
{
range_len = i - from;
range_right = i - 1;
}
- if (i < max && bytes[i * MAX_N_BYTES] < 255)
+ if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
{
+ SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
if (!in_range)
{
in_range = TRUE;
from = i;
}
}
- else if (in_range)
+ else
in_range = FALSE;
}
@@ -4269,89 +4363,65 @@ if (range_right >= 0)
for (i = 0; i < range_len; i++)
{
- byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
- SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
- byte_set_end = byte_set + byte_set[0];
- byte_set++;
- while (byte_set <= byte_set_end)
+ char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
+ SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
+ char_set_end = char_set + char_set[0];
+ char_set++;
+ while (char_set <= char_set_end)
{
- if (update_table[*byte_set] > IN_UCHARS(i))
- update_table[*byte_set] = IN_UCHARS(i);
- byte_set++;
+ if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
+ update_table[(*char_set) & 0xff] = IN_UCHARS(i);
+ char_set++;
}
}
}
-offsets[0] = -1;
+offset = -1;
/* Scan forward. */
for (i = 0; i < max; i++)
- if (ones[i] <= 2) {
- offsets[0] = i;
- break;
- }
-
-if (offsets[0] < 0 && range_right < 0)
- return FALSE;
-
-if (offsets[0] >= 0)
{
- /* Scan backward. */
- offsets[1] = -1;
- for (i = max - 1; i > offsets[0]; i--)
- if (ones[i] <= 2 && i != range_right)
- {
- offsets[1] = i;
- break;
- }
-
- /* This case is handled better by fast_forward_first_char. */
- if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
- return FALSE;
-
- offsets[2] = -1;
- /* We only search for a middle character if there is no range check. */
- if (offsets[1] >= 0 && range_right == -1)
+ if (offset == -1)
+ {
+ if (chars[i * MAX_DIFF_CHARS] <= 2)
+ offset = i;
+ }
+ else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
{
- /* Scan from middle. */
- for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
- if (ones[i] <= 2)
+ if (chars[i * MAX_DIFF_CHARS] == 1)
+ offset = i;
+ else
+ {
+ mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
+ if (!is_powerof2(mask))
{
- offsets[2] = i;
- break;
+ mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
+ if (is_powerof2(mask))
+ offset = i;
}
-
- if (offsets[2] == -1)
- {
- for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
- if (ones[i] <= 2)
- {
- offsets[2] = i;
- break;
- }
}
}
+ }
- SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
- SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
-
- chars[0] = chars[offsets[0] << 1];
- chars[1] = chars[(offsets[0] << 1) + 1];
- if (offsets[2] >= 0)
- {
- chars[2] = chars[offsets[2] << 1];
- chars[3] = chars[(offsets[2] << 1) + 1];
- }
- if (offsets[1] >= 0)
- {
- chars[4] = chars[offsets[1] << 1];
- chars[5] = chars[(offsets[1] << 1) + 1];
- }
+if (range_right < 0)
+ {
+ if (offset < 0)
+ return FALSE;
+ SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
+ /* Works regardless the value is 1 or 2. */
+ mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
+ fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
+ return TRUE;
}
+if (range_right == offset)
+ offset = -1;
+
+SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
+
max -= 1;
-if (firstline)
+SLJIT_ASSERT(max > 0);
+if (common->first_line_end != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
@@ -4362,65 +4432,83 @@ if (firstline)
else
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+SLJIT_ASSERT(range_right >= 0);
+
#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-if (range_right >= 0)
- OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
+OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
#endif
start = LABEL();
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
-
-if (range_right >= 0)
- {
#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
#else
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
#endif
#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
#else
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
#endif
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
- }
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
-if (offsets[0] >= 0)
+if (offset >= 0)
{
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
- if (offsets[1] >= 0)
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (chars[1] != 0)
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
- if (offsets[2] >= 0)
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
-
- if (offsets[1] >= 0)
+ if (chars[offset * MAX_DIFF_CHARS] == 1)
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
+ else
{
- if (chars[5] != 0)
- OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
- CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
+ mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
+ if (is_powerof2(mask))
+ {
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
+ }
+ else
+ {
+ match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
+ JUMPHERE(match);
+ }
}
+ }
- if (offsets[2] >= 0)
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+if (common->utf && offset != 0)
+ {
+ if (offset < 0)
{
- if (chars[3] != 0)
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ else
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+#if defined COMPILE_PCRE8
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
+#elif defined COMPILE_PCRE16
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
+#else
+#error "Unknown code width"
+#endif
+ if (offset < 0)
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
+#endif
+
+if (offset >= 0)
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPHERE(quit);
-if (firstline)
+if (common->first_line_end != 0)
{
if (range_right >= 0)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
@@ -4438,7 +4526,7 @@ return TRUE;
}
#undef MAX_N_CHARS
-#undef MAX_N_BYTES
+#undef MAX_DIFF_CHARS
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
{
@@ -10950,12 +11038,12 @@ if (common->control_head_ptr != 0)
/* Main part of the matching */
if ((re->options & PCRE_ANCHORED) == 0)
{
- mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+ mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
continue_match_label = LABEL();
/* Forward search if possible. */
if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
{
- if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
+ if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
;
else if ((re->flags & PCRE_FIRSTSET) != 0)
fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
@@ -11068,7 +11156,8 @@ if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
}
-OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
+ (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
if ((re->options & PCRE_ANCHORED) == 0)
{
@@ -11079,12 +11168,7 @@ if ((re->options & PCRE_ANCHORED) == 0)
/* There cannot be more newlines here. */
}
else
- {
- if ((re->options & PCRE_FIRSTLINE) == 0)
- CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
- else
- CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
- }
+ CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
}
/* No more remaining characters. */