summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2016-02-04 17:13:10 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2016-02-04 17:13:10 +0000
commitaeca4ee3da4ae1629b91651a30e3cb1f1a1fdad1 (patch)
treea9c8dbe5a60ee28e3035d181ccaf5450ed42a734
parent02a2ed8e1a119cbe10d2bf353bc54432020ce58f (diff)
downloadpcre-aeca4ee3da4ae1629b91651a30e3cb1f1a1fdad1.tar.gz
JIT code generator for assertion matching is refactored to a separat function.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1623 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rw-r--r--pcre_jit_compile.c507
2 files changed, 273 insertions, 237 deletions
diff --git a/ChangeLog b/ChangeLog
index 1b997a7..2c6315c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -44,6 +44,9 @@ Version 8.39 xx-xxxxxx-201x
10. pcre_get_substring_list() crashed if the use of \K in a match caused the
start of the match to be earlier than the end.
+11. JIT code generator for assertion matching is refactored to a separate
+ function.
+
Version 8.38 23-November-2015
-----------------------------
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 445de0c..163705d 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -4895,7 +4895,6 @@ while (*cc != XCL_END)
/* We are not necessary in utf mode even in 8 bit mode. */
cc = ccbegin;
-detect_partial_match(common, backtracks);
read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
if ((cc[-1] & XCL_HASPROP) == 0)
@@ -4903,7 +4902,7 @@ if ((cc[-1] & XCL_HASPROP) == 0)
if ((cc[-1] & XCL_MAP) != 0)
{
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
+ if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
{
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
@@ -5282,19 +5281,13 @@ if (found != NULL)
#endif
-static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
+static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
{
DEFINE_COMPILER;
int length;
-unsigned int c, oc, bit;
-compare_context context;
struct sljit_jump *jump[4];
-jump_list *end_list;
#ifdef SUPPORT_UTF
struct sljit_label *label;
-#ifdef SUPPORT_UCP
-pcre_uchar propdata[5];
-#endif
#endif /* SUPPORT_UTF */
switch(type)
@@ -5317,10 +5310,218 @@ switch(type)
add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
return cc;
+ case OP_EODN:
+ /* Requires rather complex checks. */
+ jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+ {
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ if (common->mode == JIT_COMPILE)
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
+ else
+ {
+ jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+ add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
+ check_partial(common, TRUE);
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ JUMPHERE(jump[1]);
+ }
+ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+ }
+ else if (common->nltype == NLTYPE_FIXED)
+ {
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
+ }
+ else
+ {
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+ jump[2] = JUMP(SLJIT_GREATER);
+ add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
+ /* Equal. */
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+
+ JUMPHERE(jump[1]);
+ if (common->nltype == NLTYPE_ANYCRLF)
+ {
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
+ }
+ else
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
+ read_char_range(common, common->nlmin, common->nlmax, TRUE);
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+ add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
+ add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+ }
+ JUMPHERE(jump[2]);
+ JUMPHERE(jump[3]);
+ }
+ JUMPHERE(jump[0]);
+ check_partial(common, FALSE);
+ return cc;
+
+ case OP_EOD:
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ check_partial(common, FALSE);
+ return cc;
+
+ case OP_DOLL:
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+
+ if (!common->endonly)
+ compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
+ else
+ {
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ check_partial(common, FALSE);
+ }
+ return cc;
+
+ case OP_DOLLM:
+ jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+ check_partial(common, FALSE);
+ jump[0] = JUMP(SLJIT_JUMP);
+ JUMPHERE(jump[1]);
+
+ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+ {
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ if (common->mode == JIT_COMPILE)
+ add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
+ else
+ {
+ jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
+ /* STR_PTR = STR_END - IN_UCHARS(1) */
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+ check_partial(common, TRUE);
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ JUMPHERE(jump[1]);
+ }
+
+ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+ }
+ else
+ {
+ peek_char(common, common->nlmax);
+ check_newlinechar(common, common->nltype, backtracks, FALSE);
+ }
+ JUMPHERE(jump[0]);
+ return cc;
+
+ case OP_CIRC:
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+ add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
+ OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+ return cc;
+
+ case OP_CIRCM:
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+ jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
+ OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+ jump[0] = JUMP(SLJIT_JUMP);
+ JUMPHERE(jump[1]);
+
+ add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+ {
+ OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
+ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+ }
+ else
+ {
+ skip_char_back(common);
+ read_char_range(common, common->nlmin, common->nlmax, TRUE);
+ check_newlinechar(common, common->nltype, backtracks, FALSE);
+ }
+ JUMPHERE(jump[0]);
+ return cc;
+
+ case OP_REVERSE:
+ length = GET(cc, 0);
+ if (length == 0)
+ return cc + LINK_SIZE;
+ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+#ifdef SUPPORT_UTF
+ if (common->utf)
+ {
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
+ label = LABEL();
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
+ skip_char_back(common);
+ OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ }
+ else
+#endif
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
+ }
+ check_start_used_ptr(common);
+ return cc + LINK_SIZE;
+ }
+SLJIT_ASSERT_STOP();
+return cc;
+}
+
+static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
+{
+DEFINE_COMPILER;
+int length;
+unsigned int c, oc, bit;
+compare_context context;
+struct sljit_jump *jump[3];
+jump_list *end_list;
+#ifdef SUPPORT_UTF
+struct sljit_label *label;
+#ifdef SUPPORT_UCP
+pcre_uchar propdata[5];
+#endif
+#endif /* SUPPORT_UTF */
+
+switch(type)
+ {
case OP_NOT_DIGIT:
case OP_DIGIT:
/* Digits are usually 0-9, so it is worth to optimize them. */
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
read_char7_type(common, type == OP_NOT_DIGIT);
@@ -5334,7 +5535,8 @@ switch(type)
case OP_NOT_WHITESPACE:
case OP_WHITESPACE:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
read_char7_type(common, type == OP_NOT_WHITESPACE);
@@ -5347,7 +5549,8 @@ switch(type)
case OP_NOT_WORDCHAR:
case OP_WORDCHAR:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
read_char7_type(common, type == OP_NOT_WORDCHAR);
@@ -5359,7 +5562,8 @@ switch(type)
return cc;
case OP_ANY:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char_range(common, common->nlmin, common->nlmax, TRUE);
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
{
@@ -5380,7 +5584,8 @@ switch(type)
return cc;
case OP_ALLANY:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#ifdef SUPPORT_UTF
if (common->utf)
{
@@ -5408,7 +5613,8 @@ switch(type)
return cc;
case OP_ANYBYTE:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
return cc;
@@ -5421,13 +5627,16 @@ switch(type)
propdata[2] = cc[0];
propdata[3] = cc[1];
propdata[4] = XCL_END;
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
compile_xclass_matchingpath(common, propdata, backtracks);
return cc + 2;
#endif
#endif
case OP_ANYNL:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
/* We don't need to handle soft partial matching case. */
@@ -5449,7 +5658,8 @@ switch(type)
case OP_NOT_HSPACE:
case OP_HSPACE:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
@@ -5457,7 +5667,8 @@ switch(type)
case OP_NOT_VSPACE:
case OP_VSPACE:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
@@ -5465,7 +5676,8 @@ switch(type)
#ifdef SUPPORT_UCP
case OP_EXTUNI:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char(common);
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
@@ -5502,166 +5714,6 @@ switch(type)
return cc;
#endif
- case OP_EODN:
- /* Requires rather complex checks. */
- jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (common->mode == JIT_COMPILE)
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
- else
- {
- jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
- check_partial(common, TRUE);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(jump[1]);
- }
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else if (common->nltype == NLTYPE_FIXED)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
- }
- else
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
- jump[2] = JUMP(SLJIT_GREATER);
- add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
- /* Equal. */
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
-
- JUMPHERE(jump[1]);
- if (common->nltype == NLTYPE_ANYCRLF)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
- }
- else
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
- add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
- add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
- }
- JUMPHERE(jump[2]);
- JUMPHERE(jump[3]);
- }
- JUMPHERE(jump[0]);
- check_partial(common, FALSE);
- return cc;
-
- case OP_EOD:
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
- check_partial(common, FALSE);
- return cc;
-
- case OP_CIRC:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- return cc;
-
- case OP_CIRCM:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
- jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- jump[0] = JUMP(SLJIT_JUMP);
- JUMPHERE(jump[1]);
-
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else
- {
- skip_char_back(common);
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- check_newlinechar(common, common->nltype, backtracks, FALSE);
- }
- JUMPHERE(jump[0]);
- return cc;
-
- case OP_DOLL:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
-
- if (!common->endonly)
- compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
- else
- {
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
- check_partial(common, FALSE);
- }
- return cc;
-
- case OP_DOLLM:
- jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- check_partial(common, FALSE);
- jump[0] = JUMP(SLJIT_JUMP);
- JUMPHERE(jump[1]);
-
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (common->mode == JIT_COMPILE)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
- else
- {
- jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
- /* STR_PTR = STR_END - IN_UCHARS(1) */
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- check_partial(common, TRUE);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(jump[1]);
- }
-
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else
- {
- peek_char(common, common->nlmax);
- check_newlinechar(common, common->nltype, backtracks, FALSE);
- }
- JUMPHERE(jump[0]);
- return cc;
-
case OP_CHAR:
case OP_CHARI:
length = 1;
@@ -5681,7 +5733,8 @@ switch(type)
return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
}
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#ifdef SUPPORT_UTF
if (common->utf)
{
@@ -5713,7 +5766,8 @@ switch(type)
case OP_NOT:
case OP_NOTI:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
length = 1;
#ifdef SUPPORT_UTF
if (common->utf)
@@ -5774,16 +5828,17 @@ switch(type)
case OP_CLASS:
case OP_NCLASS:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
+ bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
read_char_range(common, 0, bit, type == OP_NCLASS);
#else
read_char_range(common, 0, 255, type == OP_NCLASS);
#endif
- if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
+ if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
return cc + 32 / sizeof(pcre_uchar);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
@@ -5817,40 +5872,15 @@ switch(type)
if (jump[0] != NULL)
JUMPHERE(jump[0]);
#endif
-
return cc + 32 / sizeof(pcre_uchar);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case OP_XCLASS:
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
return cc + GET(cc, 0) - 1;
#endif
-
- case OP_REVERSE:
- length = GET(cc, 0);
- if (length == 0)
- return cc + LINK_SIZE;
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
-#ifdef SUPPORT_UTF
- if (common->utf)
- {
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
- label = LABEL();
- add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
- skip_char_back(common);
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- else
-#endif
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
- }
- check_start_used_ptr(common);
- return cc + LINK_SIZE;
}
SLJIT_ASSERT_STOP();
return cc;
@@ -5919,7 +5949,7 @@ if (context.length > 0)
}
/* A non-fixed length character will be checked if length == 0. */
-return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
+return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
}
/* Forward definitions. */
@@ -7985,7 +8015,7 @@ switch(opcode)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
if (opcode == OP_UPTO || opcode == OP_CRRANGE)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
@@ -8007,7 +8037,7 @@ switch(opcode)
else
{
if (opcode == OP_PLUS)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
if (private_data_ptr == 0)
allocate_stack(common, 2);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
@@ -8016,7 +8046,7 @@ switch(opcode)
else
OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &nomatch);
+ compile_char1_matchingpath(common, type, cc, &nomatch, TRUE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode <= OP_PLUS)
JUMPTO(SLJIT_JUMP, label);
@@ -8043,7 +8073,7 @@ switch(opcode)
case OP_MINSTAR:
case OP_MINPLUS:
if (opcode == OP_MINPLUS)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
if (private_data_ptr == 0)
allocate_stack(common, 1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
@@ -8067,14 +8097,14 @@ switch(opcode)
allocate_stack(common, 1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode == OP_QUERY)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
break;
case OP_EXACT:
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
break;
@@ -8083,12 +8113,12 @@ switch(opcode)
case OP_POSPLUS:
case OP_POSUPTO:
if (opcode == OP_POSPLUS)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
if (opcode == OP_POSUPTO)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &nomatch);
+ compile_char1_matchingpath(common, type, cc, &nomatch, TRUE);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
if (opcode != OP_POSUPTO)
JUMPTO(SLJIT_JUMP, label);
@@ -8103,7 +8133,7 @@ switch(opcode)
case OP_POSQUERY:
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- compile_char1_matchingpath(common, type, cc, &nomatch);
+ compile_char1_matchingpath(common, type, cc, &nomatch, TRUE);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
set_jumps(nomatch, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
@@ -8113,7 +8143,7 @@ switch(opcode)
/* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
@@ -8124,7 +8154,7 @@ switch(opcode)
}
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &nomatch);
+ compile_char1_matchingpath(common, type, cc, &nomatch, TRUE);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
if (max == 0)
JUMPTO(SLJIT_JUMP, label);
@@ -8298,6 +8328,16 @@ while (cc < ccend)
case OP_SOM:
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_DOLL:
+ case OP_DOLLM:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_REVERSE:
+ cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ break;
+
case OP_NOT_DIGIT:
case OP_DIGIT:
case OP_NOT_WHITESPACE:
@@ -8315,16 +8355,9 @@ while (cc < ccend)
case OP_NOT_VSPACE:
case OP_VSPACE:
case OP_EXTUNI:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
case OP_NOT:
case OP_NOTI:
- case OP_REVERSE:
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
break;
case OP_SET_SOM:
@@ -8341,7 +8374,7 @@ while (cc < ccend)
if (common->mode == JIT_COMPILE)
cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
break;
case OP_STAR:
@@ -8417,7 +8450,7 @@ while (cc < ccend)
if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
break;
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
@@ -8425,7 +8458,7 @@ while (cc < ccend)
if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
break;
#endif
@@ -8658,7 +8691,7 @@ switch(opcode)
case OP_MINSTAR:
case OP_MINPLUS:
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- compile_char1_matchingpath(common, type, cc, &jumplist);
+ compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
@@ -8676,7 +8709,7 @@ switch(opcode)
set_jumps(current->topbacktracks, label);
}
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- compile_char1_matchingpath(common, type, cc, &jumplist);
+ compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
@@ -8714,7 +8747,7 @@ switch(opcode)
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- compile_char1_matchingpath(common, type, cc, &jumplist);
+ compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
JUMPHERE(jump);