summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2018-01-05 09:51:02 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2018-01-05 09:51:02 +0000
commita13a9a71193edc53084f9e0f4a13dd8e1a925f41 (patch)
tree2376bd64c0a51b80466b9d9ce8b72160b39980a9
parent774be03b760cb4b56abf686f6dc56ac4de07fa67 (diff)
downloadpcre-a13a9a71193edc53084f9e0f4a13dd8e1a925f41.tar.gz
JIT compiler update.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1720 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_jit_compile.c299
-rw-r--r--sljit/sljitConfigInternal.h4
-rw-r--r--sljit/sljitLir.c432
-rw-r--r--sljit/sljitLir.h208
-rw-r--r--sljit/sljitNativeARM_32.c250
-rw-r--r--sljit/sljitNativeARM_64.c683
-rw-r--r--sljit/sljitNativeARM_T2_32.c181
-rw-r--r--sljit/sljitNativeMIPS_common.c107
-rw-r--r--sljit/sljitNativePPC_common.c870
-rw-r--r--sljit/sljitNativeSPARC_common.c82
-rw-r--r--sljit/sljitNativeX86_32.c12
-rw-r--r--sljit/sljitNativeX86_64.c35
-rw-r--r--sljit/sljitNativeX86_common.c60
13 files changed, 1472 insertions, 1751 deletions
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 7937be9..50fe50b 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -534,13 +534,10 @@ the start pointers when the end of the capturing group has not yet reached. */
#if defined COMPILE_PCRE8
#define MOV_UCHAR SLJIT_MOV_U8
-#define MOVU_UCHAR SLJIT_MOVU_U8
#elif defined COMPILE_PCRE16
#define MOV_UCHAR SLJIT_MOV_U16
-#define MOVU_UCHAR SLJIT_MOVU_U16
#elif defined COMPILE_PCRE32
#define MOV_UCHAR SLJIT_MOV_U32
-#define MOVU_UCHAR SLJIT_MOVU_U32
#else
#error Unsupported compiling mode
#endif
@@ -2397,12 +2394,25 @@ if (length < 8)
}
else
{
- GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
- OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
- loop = LABEL();
- OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, loop);
+ if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
+ {
+ GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
+ OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
+ loop = LABEL();
+ sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, loop);
+ }
+ else
+ {
+ GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
+ loop = LABEL();
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
+ OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, loop);
+ }
}
}
@@ -2435,12 +2445,25 @@ if (length < 8)
}
else
{
- GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
- loop = LABEL();
- OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, loop);
+ if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
+ {
+ GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
+ loop = LABEL();
+ sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+ OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, loop);
+ }
+ else
+ {
+ GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
+ loop = LABEL();
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
+ OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, loop);
+ }
}
OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
@@ -2482,6 +2505,7 @@ static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
DEFINE_COMPILER;
struct sljit_label *loop;
struct sljit_jump *early_quit;
+BOOL has_pre;
/* At this point we can freely use all registers. */
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
@@ -2495,17 +2519,30 @@ if (common->mark_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
-GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
+
+has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
+GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
+
/* Unlikely, but possible */
early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
loop = LABEL();
-OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
-OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
+
+if (has_pre)
+ sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
+else
+ {
+ OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
+ OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
+ }
+
+OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
+OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
/* Copy the integer value to the output buffer */
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
+
+OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
JUMPHERE(early_quit);
@@ -2513,14 +2550,29 @@ JUMPHERE(early_quit);
/* Calculate the return value, which is the maximum ovector value. */
if (topbracket > 1)
{
- GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
+ if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
+ {
+ GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
- /* OVECTOR(0) is never equal to SLJIT_S2. */
- loop = LABEL();
- OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
- OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
- CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
+ /* OVECTOR(0) is never equal to SLJIT_S2. */
+ loop = LABEL();
+ sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
+ OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
+ CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
+ }
+ else
+ {
+ GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
+
+ /* OVECTOR(0) is never equal to SLJIT_S2. */
+ loop = LABEL();
+ OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
+ OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
+ OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
+ CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
+ }
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
}
else
@@ -5181,84 +5233,183 @@ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
-#define CHAR1 STR_END
-#define CHAR2 STACK_TOP
-
static void do_casefulcmp(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_jump *jump;
struct sljit_label *label;
+int char1_reg;
+int char2_reg;
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+if (sljit_get_register_index(TMP3) < 0)
+ {
+ char1_reg = STR_END;
+ char2_reg = STACK_TOP;
+ }
+else
+ {
+ char1_reg = TMP3;
+ char2_reg = RETURN_ADDR;
+ }
+
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-label = LABEL();
-OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
-OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
-jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
-OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
-JUMPTO(SLJIT_NOT_ZERO, label);
+if (char1_reg == STR_END)
+ {
+ OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
+ OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
+ }
-JUMPHERE(jump);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
-OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
+if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+ {
+ label = LABEL();
+ sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+ sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+ JUMPTO(SLJIT_NOT_ZERO, label);
+
+ JUMPHERE(jump);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+ }
+else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+ {
+ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+ label = LABEL();
+ sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+ sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+ JUMPTO(SLJIT_NOT_ZERO, label);
+
+ JUMPHERE(jump);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ }
+else
+ {
+ label = LABEL();
+ OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
+ OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+ JUMPTO(SLJIT_NOT_ZERO, label);
+
+ JUMPHERE(jump);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+ }
+
+if (char1_reg == STR_END)
+ {
+ OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
+ OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
+ }
-#define LCC_TABLE STACK_LIMIT
+sljit_emit_fast_return(compiler, TMP1, 0);
+}
static void do_caselesscmp(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_jump *jump;
struct sljit_label *label;
+int char1_reg = STR_END;
+int char2_reg;
+int lcc_table;
+int opt_type = 0;
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+if (sljit_get_register_index(TMP3) < 0)
+ {
+ char2_reg = STACK_TOP;
+ lcc_table = STACK_LIMIT;
+ }
+else
+ {
+ char2_reg = RETURN_ADDR;
+ lcc_table = TMP3;
+ }
+
+if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+ opt_type = 1;
+else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+ opt_type = 2;
+
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
-OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
-OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
-OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
-label = LABEL();
-OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
-OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
-#ifndef COMPILE_PCRE8
-jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
+if (char2_reg == STACK_TOP)
+ {
+ OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
+ OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
+ }
+
+OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
+
+if (opt_type == 1)
+ {
+ label = LABEL();
+ sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+ sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ }
+else if (opt_type == 2)
+ {
+ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+ label = LABEL();
+ sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+ sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ }
+else
+ {
+ label = LABEL();
+ OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
+ OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+ }
+
+#if PCRE2_CODE_UNIT_WIDTH != 8
+jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
#endif
-OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
-#ifndef COMPILE_PCRE8
+OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
+#if PCRE2_CODE_UNIT_WIDTH != 8
JUMPHERE(jump);
-jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
+jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
#endif
-OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
-#ifndef COMPILE_PCRE8
+OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
+#if PCRE2_CODE_UNIT_WIDTH != 8
JUMPHERE(jump);
#endif
-jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
+
+if (opt_type == 0)
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);
JUMPHERE(jump);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
-OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
-OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
-}
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+
+if (opt_type == 2)
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#undef LCC_TABLE
-#undef CHAR1
-#undef CHAR2
+if (char2_reg == STACK_TOP)
+ {
+ OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
+ OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
+ }
+
+OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+sljit_emit_fast_return(compiler, TMP1, 0);
+}
#if defined SUPPORT_UTF && defined SUPPORT_UCP
diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h
index 12fe2c2..e13282c 100644
--- a/sljit/sljitConfigInternal.h
+++ b/sljit/sljitConfigInternal.h
@@ -588,13 +588,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
-#define SLJIT_NUMBER_OF_REGISTERS 25
+#define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
-#define SLJIT_NUMBER_OF_REGISTERS 22
+#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw))
diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c
index 11a37a5..5e435f0 100644
--- a/sljit/sljitLir.c
+++ b/sljit/sljitLir.c
@@ -99,10 +99,10 @@
#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
#define TYPE_CAST_NEEDED(op) \
- (((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S32))
+ ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32)
#else
#define TYPE_CAST_NEEDED(op) \
- (((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S16))
+ ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16)
#endif
#define BUF_SIZE 4096
@@ -685,80 +685,106 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
#define FUNCTION_CHECK_IS_REG(r) \
- (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \
- ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))
+ (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \
+ || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))
-#define FUNCTION_CHECK_IS_REG_OR_UNUSED(r) \
- ((r) == SLJIT_UNUSED || \
- ((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \
- ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))
+#define FUNCTION_CHECK_IS_FREG(fr) \
+ (((fr) >= SLJIT_FR0 && (fr) < (SLJIT_FR0 + compiler->fscratches)) \
+ || ((fr) > (SLJIT_FS0 - compiler->fsaveds) && (fr) <= SLJIT_FS0))
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-#define CHECK_NOT_VIRTUAL_REGISTER(p) \
- CHECK_ARGUMENT((p) < SLJIT_R3 || (p) > SLJIT_R6);
+#define CHECK_IF_VIRTUAL_REGISTER(p) ((p) <= SLJIT_S3 && (p) >= SLJIT_S8)
#else
-#define CHECK_NOT_VIRTUAL_REGISTER(p)
+#define CHECK_IF_VIRTUAL_REGISTER(p) 0
#endif
-#define FUNCTION_CHECK_SRC(p, i) \
- CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \
- if (FUNCTION_CHECK_IS_REG(p)) \
- CHECK_ARGUMENT((i) == 0); \
- else if ((p) == SLJIT_IMM) \
- ; \
- else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
- CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \
- else { \
- CHECK_ARGUMENT((p) & SLJIT_MEM); \
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \
- CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \
- if ((p) & OFFS_REG_MASK) { \
- CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
- CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
- CHECK_ARGUMENT(!((i) & ~0x3)); \
- } \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
+static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
+{
+ if (compiler->scratches == -1 || compiler->saveds == -1)
+ return 0;
+
+ if (!(p & SLJIT_MEM))
+ return 0;
+
+ if (!((p & REG_MASK) == SLJIT_UNUSED || FUNCTION_CHECK_IS_REG(p & REG_MASK)))
+ return 0;
+
+ if (CHECK_IF_VIRTUAL_REGISTER(p & REG_MASK))
+ return 0;
+
+ if (p & OFFS_REG_MASK) {
+ if ((p & REG_MASK) == SLJIT_UNUSED)
+ return 0;
+
+ if (!(FUNCTION_CHECK_IS_REG(OFFS_REG(p))))
+ return 0;
+
+ if (CHECK_IF_VIRTUAL_REGISTER(OFFS_REG(p)))
+ return 0;
+
+ if ((i & ~0x3) != 0)
+ return 0;
}
+ return (p & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK)) == 0;
+}
+
+#define FUNCTION_CHECK_SRC_MEM(p, i) \
+ CHECK_ARGUMENT(function_check_src_mem(compiler, p, i));
+
+static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
+{
+ if (compiler->scratches == -1 || compiler->saveds == -1)
+ return 0;
+
+ if (FUNCTION_CHECK_IS_REG(p))
+ return (i == 0);
+
+ if (p == SLJIT_IMM)
+ return 1;
+
+ if (p == SLJIT_MEM1(SLJIT_SP))
+ return (i >= 0 && i < compiler->logical_local_size);
+
+ return function_check_src_mem(compiler, p, i);
+}
+
+#define FUNCTION_CHECK_SRC(p, i) \
+ CHECK_ARGUMENT(function_check_src(compiler, p, i));
+
+static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 unused)
+{
+ if (compiler->scratches == -1 || compiler->saveds == -1)
+ return 0;
+
+ if (FUNCTION_CHECK_IS_REG(p) || ((unused) && (p) == SLJIT_UNUSED))
+ return (i == 0);
+
+ if (p == SLJIT_MEM1(SLJIT_SP))
+ return (i >= 0 && i < compiler->logical_local_size);
+
+ return function_check_src_mem(compiler, p, i);
+}
+
#define FUNCTION_CHECK_DST(p, i, unused) \
- CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \
- if (FUNCTION_CHECK_IS_REG(p) || ((unused) && (p) == SLJIT_UNUSED)) \
- CHECK_ARGUMENT((i) == 0); \
- else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
- CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \
- else { \
- CHECK_ARGUMENT((p) & SLJIT_MEM); \
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \
- CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \
- if ((p) & OFFS_REG_MASK) { \
- CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
- CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
- CHECK_ARGUMENT(!((i) & ~0x3)); \
- } \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
- }
+ CHECK_ARGUMENT(function_check_dst(compiler, p, i, unused));
+
+static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
+{
+ if (compiler->scratches == -1 || compiler->saveds == -1)
+ return 0;
+
+ if (FUNCTION_CHECK_IS_FREG(p))
+ return (i == 0);
+
+ if (p == SLJIT_MEM1(SLJIT_SP))
+ return (i >= 0 && i < compiler->logical_local_size);
+
+ return function_check_src_mem(compiler, p, i);
+}
#define FUNCTION_FCHECK(p, i) \
- CHECK_ARGUMENT(compiler->fscratches != -1 && compiler->fsaveds != -1); \
- if (((p) >= SLJIT_FR0 && (p) < (SLJIT_FR0 + compiler->fscratches)) || \
- ((p) > (SLJIT_FS0 - compiler->fsaveds) && (p) <= SLJIT_FS0)) \
- CHECK_ARGUMENT(i == 0); \
- else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
- CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \
- else { \
- CHECK_ARGUMENT((p) & SLJIT_MEM); \
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \
- CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \
- if ((p) & OFFS_REG_MASK) { \
- CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
- CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
- CHECK_ARGUMENT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \
- } \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
- }
+ CHECK_ARGUMENT(function_fcheck(compiler, p, i));
#endif /* SLJIT_ARGUMENT_CHECKS */
@@ -779,64 +805,72 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp
# define SLJIT_PRINT_D ""
#endif
-#define sljit_verbose_reg(compiler, r) \
- do { \
- if ((r) < (SLJIT_R0 + compiler->scratches)) \
- fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \
- else if ((r) != SLJIT_SP) \
- fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \
- else \
- fprintf(compiler->verbose, "sp"); \
- } while (0)
+static void sljit_verbose_reg(struct sljit_compiler *compiler, sljit_s32 r)
+{
+ if (r < (SLJIT_R0 + compiler->scratches))
+ fprintf(compiler->verbose, "r%d", r - SLJIT_R0);
+ else if (r != SLJIT_SP)
+ fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - r);
+ else
+ fprintf(compiler->verbose, "sp");
+}
-#define sljit_verbose_param(compiler, p, i) \
- if ((p) & SLJIT_IMM) \
- fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \
- else if ((p) & SLJIT_MEM) { \
- if ((p) & REG_MASK) { \
- fputc('[', compiler->verbose); \
- sljit_verbose_reg(compiler, (p) & REG_MASK); \
- if ((p) & OFFS_REG_MASK) { \
- fprintf(compiler->verbose, " + "); \
- sljit_verbose_reg(compiler, OFFS_REG(p)); \
- if (i) \
- fprintf(compiler->verbose, " * %d", 1 << (i)); \
- } \
- else if (i) \
- fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); \
- fputc(']', compiler->verbose); \
- } \
- else \
- fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \
- } else if (p) \
- sljit_verbose_reg(compiler, p); \
- else \
+static void sljit_verbose_freg(struct sljit_compiler *compiler, sljit_s32 r)
+{
+ if (r < (SLJIT_FR0 + compiler->fscratches))
+ fprintf(compiler->verbose, "fr%d", r - SLJIT_FR0);
+ else
+ fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - r);
+}
+
+static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
+{
+ if ((p) & SLJIT_IMM)
+ fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i));
+ else if ((p) & SLJIT_MEM) {
+ if ((p) & REG_MASK) {
+ fputc('[', compiler->verbose);
+ sljit_verbose_reg(compiler, (p) & REG_MASK);
+ if ((p) & OFFS_REG_MASK) {
+ fprintf(compiler->verbose, " + ");
+ sljit_verbose_reg(compiler, OFFS_REG(p));
+ if (i)
+ fprintf(compiler->verbose, " * %d", 1 << (i));
+ }
+ else if (i)
+ fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i));
+ fputc(']', compiler->verbose);
+ }
+ else
+ fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i));
+ } else if (p)
+ sljit_verbose_reg(compiler, p);
+ else
fprintf(compiler->verbose, "unused");
+}
-#define sljit_verbose_fparam(compiler, p, i) \
- if ((p) & SLJIT_MEM) { \
- if ((p) & REG_MASK) { \
- fputc('[', compiler->verbose); \
- sljit_verbose_reg(compiler, (p) & REG_MASK); \
- if ((p) & OFFS_REG_MASK) { \
- fprintf(compiler->verbose, " + "); \
- sljit_verbose_reg(compiler, OFFS_REG(p)); \
- if (i) \
- fprintf(compiler->verbose, "%d", 1 << (i)); \
- } \
- else if (i) \
- fprintf(compiler->verbose, "%" SLJIT_PRINT_D "d", (i)); \
- fputc(']', compiler->verbose); \
- } \
- else \
- fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \
- } \
- else { \
- if ((p) < (SLJIT_FR0 + compiler->fscratches)) \
- fprintf(compiler->verbose, "fr%d", (p) - SLJIT_FR0); \
- else \
- fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - (p)); \
+static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
+{
+ if ((p) & SLJIT_MEM) {
+ if ((p) & REG_MASK) {
+ fputc('[', compiler->verbose);
+ sljit_verbose_reg(compiler, (p) & REG_MASK);
+ if ((p) & OFFS_REG_MASK) {
+ fprintf(compiler->verbose, " + ");
+ sljit_verbose_reg(compiler, OFFS_REG(p));
+ if (i)
+ fprintf(compiler->verbose, "%d", 1 << (i));
+ }
+ else if (i)
+ fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i));
+ fputc(']', compiler->verbose);
+ }
+ else
+ fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i));
}
+ else
+ sljit_verbose_freg(compiler, p);
+}
static const char* op0_names[] = {
(char*)"breakpoint", (char*)"nop", (char*)"lmul.uw", (char*)"lmul.sw",
@@ -1070,6 +1104,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
FUNCTION_CHECK_SRC(src, srcw);
+ CHECK_ARGUMENT(src != SLJIT_IMM);
compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -1128,9 +1163,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
case SLJIT_MOV:
case SLJIT_MOV_U32:
case SLJIT_MOV_P:
- case SLJIT_MOVU:
- case SLJIT_MOVU_U32:
- case SLJIT_MOVU_P:
/* Nothing allowed */
CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
break;
@@ -1143,28 +1175,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
FUNCTION_CHECK_DST(dst, dstw, 1);
FUNCTION_CHECK_SRC(src, srcw);
- if (GET_OPCODE(op) >= SLJIT_NOT)
+ if (GET_OPCODE(op) >= SLJIT_NOT) {
+ CHECK_ARGUMENT(src != SLJIT_IMM);
compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
- else if (GET_OPCODE(op) >= SLJIT_MOVU) {
- CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP);
- CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP);
- if ((src & REG_MASK) != SLJIT_UNUSED) {
- CHECK_ARGUMENT((src & REG_MASK) != (dst & REG_MASK) && (src & REG_MASK) != OFFS_REG(dst));
- CHECK_ARGUMENT((src & OFFS_REG_MASK) == SLJIT_UNUSED || srcw == 0);
- }
- if ((dst & REG_MASK) != SLJIT_UNUSED) {
- CHECK_ARGUMENT((dst & REG_MASK) != OFFS_REG(src));
- CHECK_ARGUMENT((dst & OFFS_REG_MASK) == SLJIT_UNUSED || dstw == 0);
- }
- compiler->last_flags = 0;
}
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- if (GET_OPCODE(op) <= SLJIT_MOVU_P)
+ if (GET_OPCODE(op) <= SLJIT_MOV_P)
{
- fprintf(compiler->verbose, " mov%s%s%s ", (GET_OPCODE(op) >= SLJIT_MOVU) ? "u" : "",
- !(op & SLJIT_I32_OP) ? "" : "32", (op != SLJIT_MOV32 && op != SLJIT_MOVU32) ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : "");
+ fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_I32_OP) ? "" : "32",
+ (op != SLJIT_MOV32) ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : "");
}
else
{
@@ -1746,6 +1767,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
+
+ CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
if (src != SLJIT_IMM) {
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src));
@@ -1762,7 +1785,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " cmov%s %s%s, ",
- !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
+ !(dst_reg & SLJIT_I32_OP) ? "" : "32",
jump_names[type & 0xff], JUMP_POSTFIX(type));
sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
fprintf(compiler->verbose, ", ");
@@ -1773,6 +1796,72 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
CHECK_RETURN_OK;
}
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 reg,
+ sljit_s32 mem, sljit_sw memw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P);
+ CHECK_ARGUMENT(!(type & SLJIT_I32_OP) || ((type & 0xff) != SLJIT_MOV && (type & 0xff) != SLJIT_MOV_U32 && (type & 0xff) != SLJIT_MOV_P));
+ CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST));
+ CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST));
+ CHECK_ARGUMENT((type & ~(0xff | SLJIT_I32_OP | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0);
+
+ FUNCTION_CHECK_SRC_MEM(mem, memw);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg));
+
+ CHECK_ARGUMENT((mem & REG_MASK) != SLJIT_UNUSED && (mem & REG_MASK) != reg);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (!(type & SLJIT_MEM_SUPP) && SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (sljit_emit_mem(compiler, type | SLJIT_MEM_SUPP, reg, mem, memw) == SLJIT_ERR_UNSUPPORTED)
+ fprintf(compiler->verbose, " //");
+
+ fprintf(compiler->verbose, " mem%s.%s%s%s ",
+ !(type & SLJIT_I32_OP) ? "" : "32",
+ (type & SLJIT_MEM_STORE) ? "st" : "ld",
+ op1_names[(type & 0xff) - SLJIT_OP1_BASE],
+ (type & SLJIT_MEM_PRE) ? ".pre" : ".post");
+ sljit_verbose_reg(compiler, reg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_param(compiler, mem, memw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 mem, sljit_sw memw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64);
+ CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST));
+ CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST));
+ CHECK_ARGUMENT((type & ~(0xff | SLJIT_I32_OP | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0);
+
+ FUNCTION_CHECK_SRC_MEM(mem, memw);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (!(type & SLJIT_MEM_SUPP) && SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (sljit_emit_fmem(compiler, type | SLJIT_MEM_SUPP, freg, mem, memw) == SLJIT_ERR_UNSUPPORTED)
+ fprintf(compiler->verbose, " //");
+
+ fprintf(compiler->verbose, " fmem.%s%s%s ",
+ (type & SLJIT_MEM_STORE) ? "st" : "ld",
+ !(type & SLJIT_I32_OP) ? ".f64" : ".f32",
+ (type & SLJIT_MEM_PRE) ? ".pre" : ".post");
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_param(compiler, mem, memw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
/* Any offset is allowed. */
@@ -2046,6 +2135,49 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile
return sljit_emit_jump(compiler, type);
}
+#if !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
+ && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+ && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 reg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(reg);
+ SLJIT_UNUSED_ARG(mem);
+ SLJIT_UNUSED_ARG(memw);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
+
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
+#endif
+
+#if !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+ && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(mem);
+ SLJIT_UNUSED_ARG(memw);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
+
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
+#endif
+
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
@@ -2398,6 +2530,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
return SLJIT_ERR_UNSUPPORTED;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(reg);
+ SLJIT_UNUSED_ARG(mem);
+ SLJIT_UNUSED_ARG(memw);
+ SLJIT_UNREACHABLE();
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(mem);
+ SLJIT_UNUSED_ARG(memw);
+ SLJIT_UNREACHABLE();
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
SLJIT_UNUSED_ARG(compiler);
diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h
index 925178f..874b9ea 100644
--- a/sljit/sljitLir.h
+++ b/sljit/sljitLir.h
@@ -153,8 +153,8 @@ of sljitConfigInternal.h */
is not available at all.
*/
-/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1 and
- and sljit_emit_op2 operations the result is discarded. If no status
+/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1
+ or sljit_emit_op2 operations the result is discarded. If no status
flags are set, no instructions are emitted for these operations. Data
prefetch is a special exception, see SLJIT_MOV operation. Other SLJIT
operations do not support SLJIT_UNUSED as a destination operand. */
@@ -422,15 +422,8 @@ struct sljit_compiler {
sljit_uw shift_imm;
#endif
-#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
- sljit_s32 cache_arg;
- sljit_sw cache_argw;
-#endif
-
#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
sljit_sw imm;
- sljit_s32 cache_arg;
- sljit_sw cache_argw;
#endif
#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
@@ -565,12 +558,10 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
#define SLJIT_HAS_FPU 0
/* [Limitation] Some registers are virtual registers. */
#define SLJIT_HAS_VIRTUAL_REGISTERS 1
-/* [Emulated] Some forms of move with pre update is supported. */
-#define SLJIT_HAS_PRE_UPDATE 2
/* [Emulated] Count leading zero is supported. */
-#define SLJIT_HAS_CLZ 3
+#define SLJIT_HAS_CLZ 2
/* [Emulated] Conditional move is supported. */
-#define SLJIT_HAS_CMOV 4
+#define SLJIT_HAS_CMOV 3
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* [Not emulated] SSE2 support is available on x86. */
@@ -657,26 +648,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw);
-/* Fast calling mechanism for utility functions (see SLJIT_FAST_CALL). All registers and
- even the stack frame is passed to the callee. The return address is preserved in
- dst/dstw by sljit_emit_fast_enter (the type of the value stored by this function
- is sljit_p), and sljit_emit_fast_return can use this as a return value later. */
+/* Generating entry and exit points for fast call functions (see SLJIT_FAST_CALL).
+ Both sljit_emit_fast_enter and sljit_emit_fast_return functions preserve the
+ values of all registers and stack frame. The return address is stored in the
+ dst argument of sljit_emit_fast_enter, and this return address can be passed
+ to sljit_emit_fast_return to continue the execution after the fast call.
-/* Note: only for sljit specific, non ABI compilant calls. Fast, since only a few machine
- instructions are needed. Excellent for small uility functions, where saving registers
- and setting up a new stack frame would cost too much performance. However, it is still
- possible to return to the address of the caller (or anywhere else). */
+ Fast calls are cheap operations (usually only a single call instruction is
+ emitted) but they do not preserve any registers. However the callee function
+ can freely use / update any registers and stack values which can be
+ efficiently exploited by various optimizations. Registers can be saved
+ manually by the callee function if needed.
-/* Note: may destroy flags. */
+ Although returning to different address by sljit_emit_fast_return is possible,
+ this address usually cannot be predicted by the return address predictor of
+ modern CPUs which may reduce performance. Furthermore using sljit_emit_ijump
+ to return is also inefficient since return address prediction is usually
+ triggered by a specific form of ijump.
-/* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested,
- since many architectures do clever branch prediction on call / return instruction pairs. */
+ Flags: - (does not modify flags). */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw);
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw);
/*
- Source and destination values for arithmetical instructions
+ Source and destination operands for arithmetical instructions
imm - a simple immediate value (cannot be used as a destination)
reg - any of the registers (immediate argument must be 0)
[imm] - absolute immediate memory address
@@ -717,6 +713,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
arm-t2: [reg+imm], -255 <= imm <= 4095
[reg+(reg<<imm)] is supported
Write back is supported only for [reg+imm], where -255 <= imm <= 255
+ arm64: [reg+imm], -256 <= imm <= 255, 0 <= aligned imm <= 4095 * alignment
+ [reg+(reg<<imm)] is supported
+ Write back is supported only for [reg+imm], where -256 <= imm <= 255
ppc: [reg+imm], -65536 <= imm <= 65535. 64 bit loads/stores and 32 bit
signed load on 64 bit requires immediates divisible by 4.
[reg+imm] is not supported for signed 8 bit values.
@@ -728,8 +727,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
[reg+reg] is supported
*/
-/* Register output: simply the name of the register.
- For destination, you can use SLJIT_UNUSED as well. */
+/* Macros for specifying operand types. */
#define SLJIT_MEM 0x80
#define SLJIT_MEM0() (SLJIT_MEM)
#define SLJIT_MEM1(r1) (SLJIT_MEM | (r1))
@@ -898,43 +896,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
S32 - signed int (32 bit) data transfer
P - pointer (sljit_p) data transfer
- U = move with update (pre form). If source or destination defined as
- SLJIT_MEM1(r1) or SLJIT_MEM2(r1, r2), r1 is increased by the
- offset part of the address.
-
- Register arguments and base registers can only be used once for move
- with update instructions. The shift value of SLJIT_MEM2 addressing
- mode must also be 0. Reason: SLJIT_MOVU instructions are expected to
- be in high-performance loops where complex instruction emulation
- would be too costly.
-
- Examples for invalid move with update instructions:
-
- sljit_emit_op1(..., SLJIT_MOVU_U8,
- SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), 8);
- sljit_emit_op1(..., SLJIT_MOVU_U8,
- SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_R0, 0);
- sljit_emit_op1(..., SLJIT_MOVU_U8,
- SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 0, SLJIT_MEM1(SLJIT_R0), 8);
- sljit_emit_op1(..., SLJIT_MOVU_U8,
- SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0);
- sljit_emit_op1(..., SLJIT_MOVU_U8,
- SLJIT_R2, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1);
-
- The following example is valid, since only the offset register is
- used multiple times:
-
- sljit_emit_op1(..., SLJIT_MOVU_U8,
- SLJIT_MEM2(SLJIT_R0, SLJIT_R2), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0);
-
- If the destination of a MOV without update instruction is SLJIT_UNUSED
- and the source operand is a memory address the compiler emits a prefetch
- instruction if this instruction is supported by the current CPU.
- Higher data sizes bring the data closer to the core: a MOV with word
- size loads the data into a higher level cache than a byte size. Otherwise
- the type does not affect the prefetch instruction. Furthermore a prefetch
- instruction never fails, so it can be used to prefetch a data from an
- address and check whether that address is NULL afterwards.
+ If the destination of a MOV instruction is SLJIT_UNUSED and the source
+ operand is a memory address the compiler emits a prefetch instruction
+ if this instruction is supported by the current CPU. Higher data sizes
+ bring the data closer to the core: a MOV with word size loads the data
+ into a higher level cache than a byte size. Otherwise the type does not
+ affect the prefetch instruction. Furthermore a prefetch instruction
+ never fails, so it can be used to prefetch a data from an address and
+ check whether that address is NULL afterwards.
*/
/* Flags: - (does not modify flags) */
@@ -959,41 +928,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6)
/* Flags: - (does not modify flags) */
#define SLJIT_MOV32 (SLJIT_MOV_S32 | SLJIT_I32_OP)
-/* Flags: - (does not modify flags) */
+/* Flags: - (does not modify flags)
+ Note: load a pointer sized data, useful on x32 (a 32 bit mode on x86-64
+ where all x64 features are available, e.g. 16 register) or similar
+ compiling modes */
#define SLJIT_MOV_P (SLJIT_OP1_BASE + 7)
-/* Flags: - (may destroy flags) */
-#define SLJIT_MOVU (SLJIT_OP1_BASE + 8)
-/* Flags: - (may destroy flags) */
-#define SLJIT_MOVU_U8 (SLJIT_OP1_BASE + 9)
-#define SLJIT_MOVU32_U8 (SLJIT_MOVU_U8 | SLJIT_I32_OP)
-/* Flags: - (may destroy flags) */
-#define SLJIT_MOVU_S8 (SLJIT_OP1_BASE + 10)
-#define SLJIT_MOVU32_S8 (SLJIT_MOVU_S8 | SLJIT_I32_OP)
-/* Flags: - (may destroy flags) */
-#define SLJIT_MOVU_U16 (SLJIT_OP1_BASE + 11)
-#define SLJIT_MOVU32_U16 (SLJIT_MOVU_U16 | SLJIT_I32_OP)
-/* Flags: - (may destroy flags) */
-#define SLJIT_MOVU_S16 (SLJIT_OP1_BASE + 12)
-#define SLJIT_MOVU32_S16 (SLJIT_MOVU_S16 | SLJIT_I32_OP)
-/* Flags: - (may destroy flags)
- Note: no SLJIT_MOVU32_U32 form, since it is the same as SLJIT_MOVU32 */
-#define SLJIT_MOVU_U32 (SLJIT_OP1_BASE + 13)
-/* Flags: - (may destroy flags)
- Note: no SLJIT_MOVU32_S32 form, since it is the same as SLJIT_MOVU32 */
-#define SLJIT_MOVU_S32 (SLJIT_OP1_BASE + 14)
-/* Flags: - (may destroy flags) */
-#define SLJIT_MOVU32 (SLJIT_MOVU_S32 | SLJIT_I32_OP)
-/* Flags: - (may destroy flags) */
-#define SLJIT_MOVU_P (SLJIT_OP1_BASE + 15)
-/* Flags: Z */
-#define SLJIT_NOT (SLJIT_OP1_BASE + 16)
+/* Flags: Z
+ Note: immediate source argument is not supported */
+#define SLJIT_NOT (SLJIT_OP1_BASE + 8)
#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_I32_OP)
-/* Flags: Z | OVERFLOW */
-#define SLJIT_NEG (SLJIT_OP1_BASE + 17)
+/* Flags: Z | OVERFLOW
+ Note: immediate source argument is not supported */
+#define SLJIT_NEG (SLJIT_OP1_BASE + 9)
#define SLJIT_NEG32 (SLJIT_NEG | SLJIT_I32_OP)
/* Count leading zeroes
- Flags: - (may destroy flags) */
-#define SLJIT_CLZ (SLJIT_OP1_BASE + 18)
+ Flags: - (may destroy flags)
+ Note: immediate source argument is not supported */
+#define SLJIT_CLZ (SLJIT_OP1_BASE + 10)
#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_I32_OP)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1294,7 +1245,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
/* Emit a conditional mov instruction which moves source to destination,
if the condition is satisfied. Unlike other arithmetic operations this
- instruction does not support memory accesses.
+ instruction does not support memory access.
type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64
dst_reg must be a valid register and it can be combined
@@ -1306,6 +1257,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
sljit_s32 dst_reg,
sljit_s32 src, sljit_sw srcw);
+/* The following flags are used by sljit_emit_mem() and sljit_emit_fmem(). */
+
+/* When SLJIT_MEM_SUPP is passed, no instructions are emitted.
+ Instead the function returns with SLJIT_SUCCESS if the instruction
+ form is supported and SLJIT_ERR_UNSUPPORTED otherwise. This flag
+ allows runtime checking of available instruction forms. */
+#define SLJIT_MEM_SUPP 0x0200
+/* Memory load operation. This is the default. */
+#define SLJIT_MEM_LOAD 0x0000
+/* Memory store operation. */
+#define SLJIT_MEM_STORE 0x0400
+/* Base register is updated before the memory access. */
+#define SLJIT_MEM_PRE 0x0800
+/* Base register is updated after the memory access. */
+#define SLJIT_MEM_POST 0x1000
+
+/* Emit a single memory load or store with update instruction. When the
+ requested instruction from is not supported by the CPU, it returns
+ with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This
+ allows specializing tight loops based on the supported instruction
+ forms (see SLJIT_MEM_SUPP flag).
+
+ type must be between SLJIT_MOV and SLJIT_MOV_P and can be
+ combined with SLJIT_MEM_* flags. Either SLJIT_MEM_PRE
+ or SLJIT_MEM_POST must be specified.
+ reg is the source or destination register, and must be
+ different from the base register of the mem operand
+ mem must be a SLJIT_MEM1() or SLJIT_MEM2() operand
+
+ Flags: - (does not modify flags) */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 reg,
+ sljit_s32 mem, sljit_sw memw);
+
+/* Same as sljit_emit_mem except the followings:
+
+ type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be
+ combined with SLJIT_MEM_* flags. Either SLJIT_MEM_PRE
+ or SLJIT_MEM_POST must be specified.
+ freg is the source or destination floating point register */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 mem, sljit_sw memw);
+
/* Copies the base address of SLJIT_SP + offset to dst. The offset can be
anything to negate the effect of relative addressing. For example if an
array of sljit_sw values is stored on the stack from offset 0x40, and R0
@@ -1358,9 +1354,9 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void);
#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
/* The sljit_stack is a utility extension of sljit, which provides
- a top-down stack. The stack starts at base and goes down to
- max_limit, so the memory region for this stack is between
- max_limit (inclusive) and base (exclusive). However the
+ a top-down stack. The stack top is stored in base and the stack
+ goes down to max_limit, so the memory region for this stack is
+ between max_limit (inclusive) and base (exclusive). However the
application can only use the region between limit (inclusive)
and base (exclusive). The sljit_stack_resize can be used to
extend this region up to max_limit.
@@ -1368,8 +1364,8 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void);
This feature uses the "address space reserve" feature of modern
operating systems, so instead of allocating a huge memory block
applications can allocate a small region and extend it later
- without moving the memory area. Hence pointers can be stored
- in this area. */
+ without moving the memory area. Hence the region is never moved
+ so pointers are valid after resize. */
/* Note: base and max_limit fields are aligned to PAGE_SIZE bytes
(usually 4 Kbyte or more).
@@ -1389,9 +1385,13 @@ struct sljit_stack {
};
/* Returns NULL if unsuccessful.
- Note: max_limit contains the maximum stack size in bytes.
- Note: limit contains the starting stack size in bytes.
- Note: the top field is initialized to base.
+
+ Note:
+ max_limit field contains the lower bound adress of the stack.
+ limit field contains the current starting address of the stack.
+ base field contains the end address of the stack.
+ top field is initialized to base.
+
Note: see sljit_create_compiler for the explanation of allocator_data. */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data);
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data);
diff --git a/sljit/sljitNativeARM_32.c b/sljit/sljitNativeARM_32.c
index 441d3fa..6d61eed 100644
--- a/sljit/sljitNativeARM_32.c
+++ b/sljit/sljitNativeARM_32.c
@@ -837,7 +837,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif
- case SLJIT_HAS_PRE_UPDATE:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
return 1;
@@ -852,18 +851,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
/* --------------------------------------------------------------------- */
/* Creates an index in data_transfer_insts array. */
-#define WORD_DATA 0x00
-#define BYTE_DATA 0x01
-#define HALF_DATA 0x02
-#define PRELOAD_DATA 0x03
-#define SIGNED_DATA 0x04
+#define WORD_SIZE 0x00
+#define BYTE_SIZE 0x01
+#define HALF_SIZE 0x02
+#define PRELOAD 0x03
+#define SIGNED 0x04
#define LOAD_DATA 0x08
-/* emit_op inp_flags.
- WRITE_BACK must be the first, since it is a flag. */
-#define WRITE_BACK 0x10
-#define ALLOW_IMM 0x20
-#define ALLOW_INV_IMM 0x40
+/* Flag bits for emit_op. */
+#define ALLOW_IMM 0x10
+#define ALLOW_INV_IMM 0x20
#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
/* s/l - store/load (1 bit)
@@ -884,15 +881,15 @@ static const sljit_uw data_transfer_insts[16] = {
/* l u w */ 0xe5100000 /* ldr */,
/* l u b */ 0xe5500000 /* ldrb */,
/* l u h */ 0xe11000b0 /* ldrh */,
-/* l u p */ 0xf5500000 /* preload data */,
+/* l u p */ 0xf5500000 /* preload */,
/* l s w */ 0xe5100000 /* ldr */,
/* l s b */ 0xe11000d0 /* ldrsb */,
/* l s h */ 0xe11000f0 /* ldrsh */,
/* l s N */ 0x00000000 /* not allowed */,
};
-#define EMIT_DATA_TRANSFER(type, add, wb, target_reg, base_reg, arg) \
- (data_transfer_insts[(type) & 0xf] | ((add) << 23) | ((wb) << (21 - 4)) | RD(target_reg) | RN(base_reg) | (arg))
+#define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
+ (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (arg))
/* Normal ldr/str instruction.
Type2: ldrsb, ldrh, ldrsh */
@@ -1325,7 +1322,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
FAIL_IF(generate_int(compiler, reg, ~imm, 0));
/* Load integer. */
- return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm);
+ return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
#else
FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
if (imm <= 0xffff)
@@ -1337,16 +1334,13 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
{
- sljit_uw offset_reg, imm;
+ sljit_uw imm, offset_reg;
sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags);
SLJIT_ASSERT (arg & SLJIT_MEM);
SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);
- SLJIT_COMPILE_ASSERT(WRITE_BACK == 0x10, optimized_for_emit_data_transfer);
-
if ((arg & REG_MASK) == SLJIT_UNUSED) {
- /* Write back is not used. */
if (is_type1_transfer) {
FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff));
argw &= 0xfff;
@@ -1356,7 +1350,8 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit
argw &= 0xff;
}
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw)));
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg,
+ is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw)));
}
if (arg & OFFS_REG_MASK) {
@@ -1365,14 +1360,12 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit
argw &= 0x3;
if (argw != 0 && !is_type1_transfer) {
- SLJIT_ASSERT(!(flags & WRITE_BACK));
-
FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7)));
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
}
/* Bit 25: RM is offset. */
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg,
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7)));
}
@@ -1382,60 +1375,55 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit
if (argw > 0xfff) {
imm = get_imm(argw & ~0xfff);
if (imm) {
- offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
- FAIL_IF(push_inst(compiler, ADD | RD(offset_reg) | RN(arg) | imm));
+ FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
argw = argw & 0xfff;
- arg = offset_reg;
+ arg = tmp_reg;
}
}
else if (argw < -0xfff) {
imm = get_imm(-argw & ~0xfff);
if (imm) {
- offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
- FAIL_IF(push_inst(compiler, SUB | RD(offset_reg) | RN(arg) | imm));
+ FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
argw = -(-argw & 0xfff);
- arg = offset_reg;
+ arg = tmp_reg;
}
}
- if (argw >= 0 && argw <= 0xfff) {
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg & REG_MASK, argw));
- }
- if (argw < 0 && argw >= -0xfff) {
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg & REG_MASK, -argw));
- }
+ if (argw >= 0 && argw <= 0xfff)
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
+
+ if (argw < 0 && argw >= -0xfff)
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, -argw));
}
else {
if (argw > 0xff) {
imm = get_imm(argw & ~0xff);
if (imm) {
- offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
- FAIL_IF(push_inst(compiler, ADD | RD(offset_reg) | RN(arg) | imm));
+ FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
argw = argw & 0xff;
- arg = offset_reg;
+ arg = tmp_reg;
}
}
else if (argw < -0xff) {
imm = get_imm(-argw & ~0xff);
if (imm) {
- offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
- FAIL_IF(push_inst(compiler, SUB | RD(offset_reg) | RN(arg) | imm));
+ FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
argw = -(-argw & 0xff);
- arg = offset_reg;
+ arg = tmp_reg;
}
}
- if (argw >= 0 && argw <= 0xff) {
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw)));
- }
+ if (argw >= 0 && argw <= 0xff)
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, TYPE2_TRANSFER_IMM(argw)));
+
if (argw < 0 && argw >= -0xff) {
argw = -argw;
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw)));
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, TYPE2_TRANSFER_IMM(argw)));
}
}
FAIL_IF(load_immediate(compiler, tmp_reg, argw));
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg,
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0)));
}
@@ -1538,10 +1526,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
/* Destination. */
dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
- if (op <= SLJIT_MOVU_P) {
+ if (op <= SLJIT_MOV_P) {
if (dst & SLJIT_MEM) {
- if (inp_flags & BYTE_DATA)
- inp_flags &= ~SIGNED_DATA;
+ if (inp_flags & BYTE_SIZE)
+ inp_flags &= ~SIGNED;
if (FAST_IS_REG(src2))
return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
@@ -1553,7 +1541,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
/* Source 2. */
if (src2_reg == 0) {
- src2_reg = (op <= SLJIT_MOVU_P) ? dst_reg : TMP_REG2;
+ src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2;
if (FAST_IS_REG(src2))
src2_reg = src2;
@@ -1674,7 +1662,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
- return emit_op_mem(compiler, PRELOAD_DATA | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
+ return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
#endif
return SLJIT_SUCCESS;
}
@@ -1687,34 +1675,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_MOV_U8:
- return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
case SLJIT_MOV_S8:
- return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
case SLJIT_MOV_U16:
- return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
case SLJIT_MOV_S16:
- return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
-
- case SLJIT_MOVU:
- case SLJIT_MOVU_U32:
- case SLJIT_MOVU_S32:
- case SLJIT_MOVU_P:
- return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
-
- case SLJIT_MOVU_U8:
- return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
-
- case SLJIT_MOVU_S8:
- return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
-
- case SLJIT_MOVU_U16:
- return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
-
- case SLJIT_MOVU_S16:
- return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
case SLJIT_NOT:
return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
@@ -2037,7 +2007,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
/* Memory. */
- return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1);
+ return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
@@ -2050,10 +2020,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
- else if (src & SLJIT_MEM)
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
- else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+ else
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
return push_inst(compiler, BX | RM(TMP_REG2));
}
@@ -2150,7 +2118,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (type >= SLJIT_FAST_CALL)
PTR_FAIL_IF(prepare_blx(compiler));
- PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
+ PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
if (jump->flags & SLJIT_REWRITABLE_JUMP) {
@@ -2276,7 +2244,7 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit
}
FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2)));
} else
- FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_DATA] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16)));
+ FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16)));
}
break;
}
@@ -2427,7 +2395,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
}
SLJIT_ASSERT(src & SLJIT_MEM);
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
}
@@ -2440,7 +2408,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (type >= SLJIT_FAST_CALL)
FAIL_IF(prepare_blx(compiler));
- FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
+ FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
if (type >= SLJIT_FAST_CALL)
FAIL_IF(emit_blx(compiler));
#else
@@ -2460,7 +2428,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
#ifdef __SOFTFP__
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
@@ -2505,14 +2473,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
if (dst & SLJIT_MEM)
- return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
+ return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
return SLJIT_SUCCESS;
}
ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
if (dst & SLJIT_MEM)
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
@@ -2520,7 +2488,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
if (dst & SLJIT_MEM)
- FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
if (flags & SLJIT_SET_Z)
return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
@@ -2564,6 +2532,110 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc);
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 reg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ sljit_s32 flags;
+ sljit_uw is_type1_transfer, inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
+
+ is_type1_transfer = 1;
+
+ switch (type & 0xff) {
+ case SLJIT_MOV:
+ case SLJIT_MOV_U32:
+ case SLJIT_MOV_S32:
+ case SLJIT_MOV_P:
+ flags = WORD_SIZE;
+ break;
+ case SLJIT_MOV_U8:
+ flags = BYTE_SIZE;
+ break;
+ case SLJIT_MOV_S8:
+ if (!(type & SLJIT_MEM_STORE))
+ is_type1_transfer = 0;
+ flags = BYTE_SIZE | SIGNED;
+ break;
+ case SLJIT_MOV_U16:
+ is_type1_transfer = 0;
+ flags = HALF_SIZE;
+ break;
+ case SLJIT_MOV_S16:
+ is_type1_transfer = 0;
+ flags = HALF_SIZE | SIGNED;
+ break;
+ default:
+ SLJIT_UNREACHABLE();
+ flags = WORD_SIZE;
+ break;
+ }
+
+ if (!(type & SLJIT_MEM_STORE))
+ flags |= LOAD_DATA;
+
+ SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ if (!is_type1_transfer && memw != 0)
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+ else {
+ if (is_type1_transfer) {
+ if (memw > 4095 && memw < -4095)
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+ else {
+ if (memw > 255 && memw < -255)
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+ }
+
+ if (type & SLJIT_MEM_SUPP)
+ return SLJIT_SUCCESS;
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ memw &= 0x3;
+
+ inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | (memw << 7));
+
+ if (is_type1_transfer)
+ inst |= (1 << 25);
+
+ if (type & SLJIT_MEM_PRE)
+ inst |= (1 << 21);
+ else
+ inst ^= (1 << 24);
+
+ return push_inst(compiler, inst);
+ }
+
+ inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
+
+ if (type & SLJIT_MEM_PRE)
+ inst |= (1 << 21);
+ else
+ inst ^= (1 << 24);
+
+ if (is_type1_transfer) {
+ if (memw >= 0)
+ inst |= (1 << 23);
+ else
+ memw = -memw;
+
+ return push_inst(compiler, inst | memw);
+ }
+
+ if (memw >= 0)
+ inst |= (1 << 23);
+ else
+ memw = -memw;
+
+ return push_inst(compiler, inst | TYPE2_TRANSFER_IMM(memw));
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
@@ -2579,7 +2651,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value));
+ PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), init_value));
compiler->patches++;
#else
PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
@@ -2587,7 +2659,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
set_const(const_, compiler);
if (dst & SLJIT_MEM)
- PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1));
+ PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
return const_;
}
diff --git a/sljit/sljitNativeARM_64.c b/sljit/sljitNativeARM_64.c
index 8ceac4d..8a437bd 100644
--- a/sljit/sljitNativeARM_64.c
+++ b/sljit/sljitNativeARM_64.c
@@ -36,15 +36,15 @@ typedef sljit_u32 sljit_ins;
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 5)
-#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 6)
+#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5)
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
+/* r18 - platform register, currently not used */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
- 31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31
+ 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31
};
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
@@ -116,10 +116,13 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SMULH 0x9b403c00
#define STP 0xa9000000
#define STP_PRE 0xa9800000
+#define STRB 0x38206800
+#define STRBI 0x39000000
#define STRI 0xf9000000
#define STR_FI 0x3d000000
#define STR_FR 0x3c206800
#define STUR_FI 0x3c000000
+#define STURBI 0x38000000
#define SUB 0xcb000000
#define SUBI 0xd1000000
#define SUBS 0xeb000000
@@ -197,6 +200,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
code_ptr[-2] = code_ptr[0];
return 2;
}
+
if (target_addr <= 0xffffffffffffl) {
if (jump->flags & IS_COND)
code_ptr[-5] -= (1 << 5);
@@ -339,7 +343,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif
- case SLJIT_HAS_PRE_UPDATE:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
return 1;
@@ -398,12 +401,14 @@ static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len)
SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
|| (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1));
+
uimm = (sljit_uw)imm;
while (1) {
if (len <= 0) {
SLJIT_UNREACHABLE();
return 0;
}
+
mask = ((sljit_uw)1 << len) - 1;
if ((uimm & mask) != ((uimm >> len) & mask))
break;
@@ -452,39 +457,42 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
sljit_s32 i, zeros, ones, first;
sljit_ins bitmask;
+ /* Handling simple immediates first. */
if (imm <= 0xffff)
return push_inst(compiler, MOVZ | RD(dst) | (imm << 5));
- if (simm >= -0x10000 && simm < 0)
+ if (simm < 0 && simm >= -0x10000)
return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5));
if (imm <= 0xffffffffl) {
+ if ((imm & 0xffff) == 0)
+ return push_inst(compiler, MOVZ | RD(dst) | ((imm >> 16) << 5) | (1 << 21));
if ((imm & 0xffff0000l) == 0xffff0000)
return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5));
if ((imm & 0xffff) == 0xffff)
return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
+
bitmask = logical_imm(simm, 16);
if (bitmask != 0)
return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
- }
- else {
- bitmask = logical_imm(simm, 32);
- if (bitmask != 0)
- return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
- }
- if (imm <= 0xffffffffl) {
FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
}
- if (simm >= -0x100000000l && simm < 0) {
+ bitmask = logical_imm(simm, 32);
+ if (bitmask != 0)
+ return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
+
+ if (simm < 0 && simm >= -0x100000000l) {
+ if ((imm & 0xffff) == 0xffff)
+ return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
+
FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)));
return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
}
- /* A large amount of number can be constructed from ORR and MOVx,
- but computing them is costly. We don't */
+ /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */
zeros = 0;
ones = 0;
@@ -537,9 +545,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
#define INT_OP 0x0040000
#define SET_FLAGS 0x0080000
#define UNUSED_RETURN 0x0100000
-#define SLOW_DEST 0x0200000
-#define SLOW_SRC1 0x0400000
-#define SLOW_SRC2 0x0800000
#define CHECK_FLAGS(flag_bits) \
if (flags & SET_FLAGS) { \
@@ -697,40 +702,32 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
switch (op) {
case SLJIT_MOV:
case SLJIT_MOV_P:
- case SLJIT_MOVU:
- case SLJIT_MOVU_P:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if (dst == arg2)
return SLJIT_SUCCESS;
return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
case SLJIT_MOV_U8:
- case SLJIT_MOVU_U8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
case SLJIT_MOV_S8:
- case SLJIT_MOVU_S8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if (!(flags & INT_OP))
inv_bits |= 1 << 22;
return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
case SLJIT_MOV_U16:
- case SLJIT_MOVU_U16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
case SLJIT_MOV_S16:
- case SLJIT_MOVU_S16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if (!(flags & INT_OP))
inv_bits |= 1 << 22;
return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
case SLJIT_MOV_U32:
- case SLJIT_MOVU_U32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if ((flags & INT_OP) && dst == arg2)
return SLJIT_SUCCESS;
return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
case SLJIT_MOV_S32:
- case SLJIT_MOVU_S32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if ((flags & INT_OP) && dst == arg2)
return SLJIT_SUCCESS;
@@ -799,292 +796,67 @@ set_flags:
return SLJIT_SUCCESS;
}
-#define STORE 0x01
-#define SIGNED 0x02
-
-#define UPDATE 0x04
-#define ARG_TEST 0x08
+#define STORE 0x10
+#define SIGNED 0x20
-#define BYTE_SIZE 0x000
-#define HALF_SIZE 0x100
-#define INT_SIZE 0x200
-#define WORD_SIZE 0x300
+#define BYTE_SIZE 0x0
+#define HALF_SIZE 0x1
+#define INT_SIZE 0x2
+#define WORD_SIZE 0x3
-#define MEM_SIZE_SHIFT(flags) ((flags) >> 8)
-
-static const sljit_ins sljit_mem_imm[4] = {
-/* u l */ 0x39400000 /* ldrb [reg,imm] */,
-/* u s */ 0x39000000 /* strb [reg,imm] */,
-/* s l */ 0x39800000 /* ldrsb [reg,imm] */,
-/* s s */ 0x39000000 /* strb [reg,imm] */,
-};
-
-static const sljit_ins sljit_mem_simm[4] = {
-/* u l */ 0x38400000 /* ldurb [reg,imm] */,
-/* u s */ 0x38000000 /* sturb [reg,imm] */,
-/* s l */ 0x38800000 /* ldursb [reg,imm] */,
-/* s s */ 0x38000000 /* sturb [reg,imm] */,
-};
-
-static const sljit_ins sljit_mem_pre_simm[4] = {
-/* u l */ 0x38400c00 /* ldrb [reg,imm]! */,
-/* u s */ 0x38000c00 /* strb [reg,imm]! */,
-/* s l */ 0x38800c00 /* ldrsb [reg,imm]! */,
-/* s s */ 0x38000c00 /* strb [reg,imm]! */,
-};
-
-static const sljit_ins sljit_mem_reg[4] = {
-/* u l */ 0x38606800 /* ldrb [reg,reg] */,
-/* u s */ 0x38206800 /* strb [reg,reg] */,
-/* s l */ 0x38a06800 /* ldrsb [reg,reg] */,
-/* s s */ 0x38206800 /* strb [reg,reg] */,
-};
-
-/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
-static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
-{
- if (value >= 0) {
- if (value <= 0xfff)
- return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10));
- if (value <= 0xffffff && !(value & 0xfff))
- return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
- }
- else {
- value = -value;
- if (value <= 0xfff)
- return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10));
- if (value <= 0xffffff && !(value & 0xfff))
- return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
- }
- return SLJIT_ERR_UNSUPPORTED;
-}
+#define MEM_SIZE_SHIFT(flags) ((flags) & 0x3)
-/* Can perform an operation using at most 1 instruction. */
-static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
+static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
+ sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
{
sljit_u32 shift = MEM_SIZE_SHIFT(flags);
+ sljit_u32 type = (shift << 30);
- SLJIT_ASSERT(arg & SLJIT_MEM);
-
- if (SLJIT_UNLIKELY(flags & UPDATE)) {
- if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) {
- if (SLJIT_UNLIKELY(flags & ARG_TEST))
- return 1;
+ if (!(flags & STORE))
+ type |= (flags & SIGNED) ? 0x00800000 : 0x00400000;
- arg &= REG_MASK;
- argw &= 0x1ff;
- FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3]
- | (shift << 30) | RT(reg) | RN(arg) | (argw << 12)));
- return -1;
- }
- return 0;
- }
+ SLJIT_ASSERT(arg & SLJIT_MEM);
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
argw &= 0x3;
- if (argw && argw != shift)
- return 0;
- if (SLJIT_UNLIKELY(flags & ARG_TEST))
- return 1;
+ if (argw == 0 || argw == shift)
+ return push_inst(compiler, STRB | type | RT(reg)
+ | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
- FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg)
- | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)));
- return -1;
+ FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10)));
+ return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg));
}
arg &= REG_MASK;
- if (arg == SLJIT_UNUSED)
- return 0;
-
- if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) {
- if (SLJIT_UNLIKELY(flags & ARG_TEST))
- return 1;
-
- FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
- | RT(reg) | RN(arg) | (argw << (10 - shift))));
- return -1;
- }
-
- if (argw > 255 || argw < -256)
- return 0;
-
- if (SLJIT_UNLIKELY(flags & ARG_TEST))
- return 1;
-
- FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
- | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)));
- return -1;
-}
-
-/* see getput_arg below.
- Note: can_cache is called only for binary operators. Those
- operators always uses word arguments without write back. */
-static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
- sljit_sw diff;
- if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
- return 0;
-
- if (!(arg & REG_MASK)) {
- diff = argw - next_argw;
- if (diff <= 0xfff && diff >= -0xfff)
- return 1;
- return 0;
- }
-
- if (argw == next_argw)
- return 1;
-
- diff = argw - next_argw;
- if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
- return 1;
-
- return 0;
-}
-
-/* Emit the necessary instructions. See can_cache above. */
-static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
- sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
- sljit_u32 shift = MEM_SIZE_SHIFT(flags);
- sljit_s32 tmp_r, other_r;
- sljit_sw diff;
-
- SLJIT_ASSERT(arg & SLJIT_MEM);
- if (!(next_arg & SLJIT_MEM)) {
- next_arg = 0;
- next_argw = 0;
- }
-
- tmp_r = ((flags & STORE) || (flags == (WORD_SIZE | SIGNED))) ? TMP_REG3 : reg;
-
- if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
- /* Update only applies if a base register exists. */
- other_r = OFFS_REG(arg);
- if (!other_r) {
- other_r = arg & REG_MASK;
- SLJIT_ASSERT(other_r != reg);
-
- if (argw >= 0 && argw <= 0xffffff) {
- if ((argw & 0xfff) != 0)
- FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
- if (argw >> 12)
- FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
- }
- else if (argw < 0 && argw >= -0xffffff) {
- argw = -argw;
- if ((argw & 0xfff) != 0)
- FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
- if (argw >> 12)
- FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
- }
-
- if (compiler->cache_arg == SLJIT_MEM) {
- if (argw == compiler->cache_argw) {
- other_r = TMP_REG3;
- argw = 0;
- }
- else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_argw = argw;
- other_r = TMP_REG3;
- argw = 0;
- }
- }
-
- if (argw) {
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
- compiler->cache_arg = SLJIT_MEM;
- compiler->cache_argw = argw;
- other_r = TMP_REG3;
- argw = 0;
- }
- }
-
- /* No caching here. */
- arg &= REG_MASK;
- FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r)));
- return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r));
- }
-
- if (arg & OFFS_REG_MASK) {
- other_r = OFFS_REG(arg);
- arg &= REG_MASK;
- FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10)));
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r));
- }
+ if (arg == SLJIT_UNUSED) {
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift)));
- if (compiler->cache_arg == arg) {
- diff = argw - compiler->cache_argw;
- if (diff <= 255 && diff >= -256)
- return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
- | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
- if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
- }
- }
+ argw = (argw >> shift) & 0xfff;
- diff = argw - next_argw;
- next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0;
- arg &= REG_MASK;
-
- if (arg != SLJIT_UNUSED && argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
- FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg) | ((argw >> 12) << 10)));
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
- | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
+ return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10));
}
- if (arg && compiler->cache_arg == SLJIT_MEM) {
- if (compiler->cache_argw == argw)
- return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
- if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_argw = argw;
- return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
+ if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
+ if ((argw >> shift) <= 0xfff) {
+ return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | (argw << (10 - shift)));
}
- }
- compiler->cache_argw = argw;
- if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_arg = SLJIT_MEM | arg;
- arg = 0;
- }
- else {
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
- compiler->cache_arg = SLJIT_MEM;
+ if (argw <= 0xffffff) {
+ FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | ((argw >> 12) << 10)));
- if (next_arg) {
- FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg)));
- compiler->cache_arg = SLJIT_MEM | arg;
- arg = 0;
+ argw = ((argw & 0xfff) >> shift);
+ return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10));
}
}
- if (arg)
- return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3));
-}
+ if (argw <= 255 && argw >= -256)
+ return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
-static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
- if (getput_arg_fast(compiler, flags, reg, arg, argw))
- return compiler->error;
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
-}
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw));
-static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
-{
- if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
- return compiler->error;
- return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+ return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg));
}
/* --------------------------------------------------------------------- */
@@ -1320,9 +1092,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
-
if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) {
SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4);
@@ -1335,7 +1104,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
dst = 1;
/* Signed word sized load is the prefetch instruction. */
- return emit_op_mem(compiler, WORD_SIZE | SIGNED, dst, src, srcw);
+ return emit_op_mem(compiler, WORD_SIZE | SIGNED, dst, src, srcw, TMP_REG1);
}
return SLJIT_SUCCESS;
}
@@ -1343,106 +1112,67 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
op = GET_OPCODE(op);
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+ if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
+ /* Both operands are registers. */
+ if (dst_r != TMP_REG1 && FAST_IS_REG(src))
+ return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src);
+
switch (op) {
case SLJIT_MOV:
case SLJIT_MOV_P:
- flags = WORD_SIZE;
+ mem_flags = WORD_SIZE;
break;
case SLJIT_MOV_U8:
- flags = BYTE_SIZE;
+ mem_flags = BYTE_SIZE;
if (src & SLJIT_IMM)
srcw = (sljit_u8)srcw;
break;
case SLJIT_MOV_S8:
- flags = BYTE_SIZE | SIGNED;
+ mem_flags = BYTE_SIZE | SIGNED;
if (src & SLJIT_IMM)
srcw = (sljit_s8)srcw;
break;
case SLJIT_MOV_U16:
- flags = HALF_SIZE;
+ mem_flags = HALF_SIZE;
if (src & SLJIT_IMM)
srcw = (sljit_u16)srcw;
break;
case SLJIT_MOV_S16:
- flags = HALF_SIZE | SIGNED;
+ mem_flags = HALF_SIZE | SIGNED;
if (src & SLJIT_IMM)
srcw = (sljit_s16)srcw;
break;
case SLJIT_MOV_U32:
- flags = INT_SIZE;
+ mem_flags = INT_SIZE;
if (src & SLJIT_IMM)
srcw = (sljit_u32)srcw;
break;
case SLJIT_MOV_S32:
- flags = INT_SIZE | SIGNED;
- if (src & SLJIT_IMM)
- srcw = (sljit_s32)srcw;
- break;
- case SLJIT_MOVU:
- case SLJIT_MOVU_P:
- flags = WORD_SIZE | UPDATE;
- break;
- case SLJIT_MOVU_U8:
- flags = BYTE_SIZE | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_u8)srcw;
- break;
- case SLJIT_MOVU_S8:
- flags = BYTE_SIZE | SIGNED | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_s8)srcw;
- break;
- case SLJIT_MOVU_U16:
- flags = HALF_SIZE | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_u16)srcw;
- break;
- case SLJIT_MOVU_S16:
- flags = HALF_SIZE | SIGNED | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_s16)srcw;
- break;
- case SLJIT_MOVU_U32:
- flags = INT_SIZE | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_u32)srcw;
- break;
- case SLJIT_MOVU_S32:
- flags = INT_SIZE | SIGNED | UPDATE;
+ mem_flags = INT_SIZE | SIGNED;
if (src & SLJIT_IMM)
srcw = (sljit_s32)srcw;
break;
default:
SLJIT_UNREACHABLE();
- flags = 0;
+ mem_flags = 0;
break;
}
if (src & SLJIT_IMM)
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
- else if (src & SLJIT_MEM) {
- if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
- FAIL_IF(compiler->error);
- else
- FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
- } else {
- if (dst_r != TMP_REG1)
- return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src);
+ else if (!(src & SLJIT_MEM))
dst_r = src;
- }
+ else
+ FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG1));
- if (dst & SLJIT_MEM) {
- if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
- return compiler->error;
- else
- return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
- }
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
return SLJIT_SUCCESS;
}
flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
mem_flags = WORD_SIZE;
+
if (op_flags & SLJIT_I32_OP) {
flags |= INT_OP;
mem_flags = INT_SIZE;
@@ -1452,28 +1182,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
flags |= UNUSED_RETURN;
if (src & SLJIT_MEM) {
- if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw))
- FAIL_IF(compiler->error);
- else
- FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw));
+ FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2));
src = TMP_REG2;
}
- if (src & SLJIT_IMM) {
- flags |= ARG2_IMM;
- if (op_flags & SLJIT_I32_OP)
- srcw = (sljit_s32)srcw;
- } else
- srcw = src;
+ emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, src);
- emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
-
- if (dst & SLJIT_MEM) {
- if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw))
- return compiler->error;
- else
- return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0);
- }
+ if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
+ return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
return SLJIT_SUCCESS;
}
@@ -1490,15 +1206,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
-
if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
return SLJIT_SUCCESS;
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
mem_flags = WORD_SIZE;
+
if (op & SLJIT_I32_OP) {
flags |= INT_OP;
mem_flags = INT_SIZE;
@@ -1507,46 +1221,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
if (dst == SLJIT_UNUSED)
flags |= UNUSED_RETURN;
- if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw))
- flags |= SLOW_DEST;
-
if (src1 & SLJIT_MEM) {
- if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w))
- FAIL_IF(compiler->error);
- else
- flags |= SLOW_SRC1;
- }
- if (src2 & SLJIT_MEM) {
- if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w))
- FAIL_IF(compiler->error);
- else
- flags |= SLOW_SRC2;
- }
-
- if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
- if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
- FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w));
- FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
- }
- else {
- FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w));
- FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
- }
+ FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, src1, src1w, TMP_REG1));
+ src1 = TMP_REG1;
}
- else if (flags & SLOW_SRC1)
- FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
- else if (flags & SLOW_SRC2)
- FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
- if (src1 & SLJIT_MEM)
- src1 = TMP_REG1;
- if (src2 & SLJIT_MEM)
+ if (src2 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src2, src2w, TMP_REG2));
src2 = TMP_REG2;
+ }
if (src1 & SLJIT_IMM)
flags |= ARG1_IMM;
else
src1w = src1;
+
if (src2 & SLJIT_IMM)
flags |= ARG2_IMM;
else
@@ -1554,14 +1243,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
- if (dst & SLJIT_MEM) {
- if (!(flags & SLOW_DEST)) {
- getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw);
- return compiler->error;
- }
- return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
- }
-
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
return SLJIT_SUCCESS;
}
@@ -1593,54 +1276,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
{
sljit_u32 shift = MEM_SIZE_SHIFT(flags);
- sljit_ins ins_bits = (shift << 30);
- sljit_s32 other_r;
- sljit_sw diff;
+ sljit_ins type = (shift << 30);
SLJIT_ASSERT(arg & SLJIT_MEM);
if (!(flags & STORE))
- ins_bits |= 1 << 22;
+ type |= 0x00400000;
if (arg & OFFS_REG_MASK) {
argw &= 3;
- if (!argw || argw == shift)
- return push_inst(compiler, STR_FR | ins_bits | VT(reg)
+ if (argw == 0 || argw == shift)
+ return push_inst(compiler, STR_FR | type | VT(reg)
| RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
- other_r = OFFS_REG(arg);
- arg &= REG_MASK;
- FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10)));
- arg = TMP_REG1;
- argw = 0;
+
+ FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10)));
+ return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1));
}
arg &= REG_MASK;
- if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0)
- return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift)));
-
- if (arg && argw <= 255 && argw >= -256)
- return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
-
- /* Slow cases */
- if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) {
- diff = argw - compiler->cache_argw;
- if (!arg && diff <= 255 && diff >= -256)
- return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
- if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_argw = argw;
- }
+
+ if (arg == SLJIT_UNUSED) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift)));
+
+ argw = (argw >> shift) & 0xfff;
+
+ return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10));
}
- if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) {
- compiler->cache_arg = SLJIT_MEM;
- compiler->cache_argw = argw;
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+ if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
+ if ((argw >> shift) <= 0xfff)
+ return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | (argw << (10 - shift)));
+
+ if (argw <= 0xffffff) {
+ FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | ((argw >> 12) << 10)));
+
+ argw = ((argw & 0xfff) >> shift);
+ return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10));
+ }
}
- if (arg & REG_MASK)
- return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3));
- return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3));
+ if (argw <= 255 && argw >= -256)
+ return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
+ return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1));
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1661,7 +1340,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
if (dst & SLJIT_MEM)
- return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
+ return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw, TMP_REG2);
return SLJIT_SUCCESS;
}
@@ -1676,7 +1355,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
inv_bits |= (1 << 31);
if (src & SLJIT_MEM) {
- emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw);
+ emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
src = TMP_REG1;
} else if (src & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -1722,17 +1401,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
sljit_ins inv_bits;
CHECK_ERROR();
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference);
+ SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference);
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw);
+ emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw);
src = dst_r;
}
@@ -1775,9 +1452,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
-
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src1 & SLJIT_MEM) {
emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
@@ -1822,7 +1496,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
/* Memory. */
- return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw);
+ return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw, TMP_REG1);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
@@ -1833,10 +1507,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
- else if (src & SLJIT_MEM)
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw));
- else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, TMP_LR, srcw));
+ else
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1));
return push_inst(compiler, RET | RN(TMP_LR));
}
@@ -1971,13 +1643,14 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi
jump->flags |= IS_CBZ | IS_COND;
if (src & SLJIT_MEM) {
- PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw));
+ PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
else if (src & SLJIT_IMM) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
+
SLJIT_ASSERT(FAST_IS_REG(src));
if ((type & 0xff) == SLJIT_EQUAL)
@@ -2000,7 +1673,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
if (!(src & SLJIT_IMM)) {
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw));
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
@@ -2048,15 +1721,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
if (GET_OPCODE(op) < SLJIT_ADD) {
FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
- if (dst_r != TMP_REG1)
- return SLJIT_SUCCESS;
- return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw);
+
+ if (dst_r == TMP_REG1) {
+ mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE;
+ return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2);
+ }
+
+ return SLJIT_SUCCESS;
}
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
mem_flags = WORD_SIZE;
+
if (op & SLJIT_I32_OP) {
flags |= INT_OP;
mem_flags = INT_SIZE;
@@ -2065,7 +1741,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
src_r = dst;
if (dst & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, dst, dstw, dst, dstw));
+ FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG1));
src_r = TMP_REG1;
}
@@ -2073,7 +1749,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2);
if (dst & SLJIT_MEM)
- return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+ return emit_op_mem(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, TMP_REG2);
return SLJIT_SUCCESS;
}
@@ -2101,6 +1777,85 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 reg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ sljit_u32 sign = 0, inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
+
+ if ((mem & OFFS_REG_MASK) || (memw > 255 && memw < -256))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_MEM_SUPP)
+ return SLJIT_SUCCESS;
+
+ switch (type & 0xff) {
+ case SLJIT_MOV:
+ case SLJIT_MOV_P:
+ inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400;
+ break;
+ case SLJIT_MOV_S8:
+ sign = 1;
+ case SLJIT_MOV_U8:
+ inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400;
+ break;
+ case SLJIT_MOV_S16:
+ sign = 1;
+ case SLJIT_MOV_U16:
+ inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400;
+ break;
+ case SLJIT_MOV_S32:
+ sign = 1;
+ case SLJIT_MOV_U32:
+ inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400;
+ break;
+ default:
+ SLJIT_UNREACHABLE();
+ inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400;
+ break;
+ }
+
+ if (!(type & SLJIT_MEM_STORE))
+ inst |= sign ? 0x00800000 : 0x00400000;
+
+ if (type & SLJIT_MEM_PRE)
+ inst |= 0x800;
+
+ return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ sljit_u32 inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
+
+ if ((mem & OFFS_REG_MASK) || (memw > 255 && memw < -256))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_MEM_SUPP)
+ return SLJIT_SUCCESS;
+
+ inst = STUR_FI | 0x80000400;
+
+ if (!(type & SLJIT_F32_OP))
+ inst |= 0x40000000;
+
+ if (!(type & SLJIT_MEM_STORE))
+ inst |= 0x00400000;
+
+ if (type & SLJIT_MEM_PRE)
+ inst |= 0x800;
+
+ return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
@@ -2118,7 +1873,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
- PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
+ PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
return const_;
}
diff --git a/sljit/sljitNativeARM_T2_32.c b/sljit/sljitNativeARM_T2_32.c
index db0d8a1..75e7a38 100644
--- a/sljit/sljitNativeARM_T2_32.c
+++ b/sljit/sljitNativeARM_T2_32.c
@@ -453,7 +453,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif
- case SLJIT_HAS_PRE_UPDATE:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
return 1;
@@ -738,34 +737,26 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
case SLJIT_MOV_P:
- case SLJIT_MOVU:
- case SLJIT_MOVU_U32:
- case SLJIT_MOVU_S32:
- case SLJIT_MOVU_P:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (dst == arg2)
return SLJIT_SUCCESS;
return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
case SLJIT_MOV_U8:
- case SLJIT_MOVU_U8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
case SLJIT_MOV_S8:
- case SLJIT_MOVU_S8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
case SLJIT_MOV_U16:
- case SLJIT_MOVU_U16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
case SLJIT_MOV_S16:
- case SLJIT_MOVU_S16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
@@ -849,8 +840,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
#define HALF_SIZE 0x08
#define PRELOAD 0x0c
-#define UPDATE 0x10
-
#define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE)))
#define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift)))
@@ -949,12 +938,10 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit
sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
{
sljit_s32 other_r;
- sljit_s32 update = flags & UPDATE;
sljit_uw tmp;
SLJIT_ASSERT(arg & SLJIT_MEM);
SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);
- flags &= ~UPDATE;
arg &= ~SLJIT_MEM;
if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
@@ -970,63 +957,6 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
}
- if (SLJIT_UNLIKELY(update)) {
- SLJIT_ASSERT(reg != arg);
-
- if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- other_r = OFFS_REG(arg);
- arg &= 0xf;
-
- if (IS_3_LO_REGS(reg, arg, other_r))
- FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
- else
- FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r)));
- return push_inst16(compiler, ADD | SET_REGS44(arg, other_r));
- }
-
- if (argw > 0xff) {
- tmp = get_imm(argw & ~0xff);
- if (tmp != INVALID_IMM) {
- push_inst32(compiler, ADD_WI | RD4(arg) | RN4(arg) | tmp);
- argw = argw & 0xff;
- }
- }
- else if (argw < -0xff) {
- tmp = get_imm(-argw & ~0xff);
- if (tmp != INVALID_IMM) {
- push_inst32(compiler, SUB_WI | RD4(arg) | RN4(arg) | tmp);
- argw = -(-argw & 0xff);
- }
- }
-
- if (argw == 0) {
- if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags])
- return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg));
- return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg));
- }
-
- if (argw <= 0xff && argw >= -0xff) {
- if (argw >= 0)
- argw |= 0x200;
- else {
- argw = -argw;
- }
-
- SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff);
- return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw);
- }
-
- FAIL_IF(load_immediate(compiler, tmp_reg, argw));
-
- SLJIT_ASSERT(reg != tmp_reg);
-
- if (IS_3_LO_REGS(reg, arg, tmp_reg))
- FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)));
- else
- FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)));
- return push_inst16(compiler, ADD | SET_REGS44(arg, tmp_reg));
- }
-
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
argw &= 0x3;
other_r = OFFS_REG(arg);
@@ -1300,7 +1230,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
op = GET_OPCODE(op);
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+ if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
switch (op) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
@@ -1328,32 +1258,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if (src & SLJIT_IMM)
srcw = (sljit_s16)srcw;
break;
- case SLJIT_MOVU:
- case SLJIT_MOVU_U32:
- case SLJIT_MOVU_S32:
- case SLJIT_MOVU_P:
- flags = WORD_SIZE | UPDATE;
- break;
- case SLJIT_MOVU_U8:
- flags = BYTE_SIZE | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_u8)srcw;
- break;
- case SLJIT_MOVU_S8:
- flags = BYTE_SIZE | SIGNED | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_s8)srcw;
- break;
- case SLJIT_MOVU_U16:
- flags = HALF_SIZE | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_u16)srcw;
- break;
- case SLJIT_MOVU_S16:
- flags = HALF_SIZE | SIGNED | UPDATE;
- if (src & SLJIT_IMM)
- srcw = (sljit_s16)srcw;
- break;
default:
SLJIT_UNREACHABLE();
flags = 0;
@@ -1363,7 +1267,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if (src & SLJIT_IMM)
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, srcw));
else if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, ((flags & UPDATE) && dst_r == TMP_REG1) ? TMP_REG2 : TMP_REG1));
+ FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
} else {
if (dst_r != TMP_REG1)
return emit_op_imm(compiler, op, dst_r, TMP_REG2, src);
@@ -1373,7 +1277,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if (!(dst & SLJIT_MEM))
return SLJIT_SUCCESS;
- return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, (dst_r == TMP_REG1) ? TMP_REG2 : TMP_REG1);
+ return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
}
if (op == SLJIT_NEG) {
@@ -1386,20 +1290,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
- if (src & SLJIT_IMM)
- flags |= ARG2_IMM;
- else if (src & SLJIT_MEM) {
+ if (src & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
- srcw = TMP_REG1;
+ src = TMP_REG1;
}
- else
- srcw = src;
- emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, srcw);
+ emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, src);
- if (!(dst & SLJIT_MEM))
- return SLJIT_SUCCESS;
- return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
+ if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
+ return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1713,11 +1613,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
if (FAST_IS_REG(src))
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
- else if (src & SLJIT_MEM) {
+ else
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
- }
- else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+
return push_inst16(compiler, BX | RN3(TMP_REG2));
}
@@ -2231,6 +2129,63 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
| COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 reg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ sljit_s32 flags;
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
+
+ if ((mem & OFFS_REG_MASK) || (memw > 255 && memw < -255))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_MEM_SUPP)
+ return SLJIT_SUCCESS;
+
+ switch (type & 0xff) {
+ case SLJIT_MOV:
+ case SLJIT_MOV_U32:
+ case SLJIT_MOV_S32:
+ case SLJIT_MOV_P:
+ flags = WORD_SIZE;
+ break;
+ case SLJIT_MOV_U8:
+ flags = BYTE_SIZE;
+ break;
+ case SLJIT_MOV_S8:
+ flags = BYTE_SIZE | SIGNED;
+ break;
+ case SLJIT_MOV_U16:
+ flags = HALF_SIZE;
+ break;
+ case SLJIT_MOV_S16:
+ flags = HALF_SIZE | SIGNED;
+ break;
+ default:
+ SLJIT_UNREACHABLE();
+ flags = WORD_SIZE;
+ break;
+ }
+
+ if (type & SLJIT_MEM_STORE)
+ flags |= STORE;
+
+ inst = sljit_mem32[flags] | 0x900;
+
+ if (type & SLJIT_MEM_PRE)
+ inst |= 0x400;
+
+ if (memw >= 0)
+ inst |= 0x200;
+ else
+ memw = -memw;
+
+ return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | memw);
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c
index 588ca10..e108433 100644
--- a/sljit/sljitNativeMIPS_common.c
+++ b/sljit/sljitNativeMIPS_common.c
@@ -558,21 +558,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#define MEM_MASK 0x1f
-#define WRITE_BACK 0x00020
-#define ARG_TEST 0x00040
-#define ALT_KEEP_CACHE 0x00080
-#define CUMULATIVE_OP 0x00100
-#define LOGICAL_OP 0x00200
-#define IMM_OP 0x00400
-#define SRC2_IMM 0x00800
-
-#define UNUSED_DEST 0x01000
-#define REG_DEST 0x02000
-#define REG1_SOURCE 0x04000
-#define REG2_SOURCE 0x08000
-#define SLOW_SRC1 0x10000
-#define SLOW_SRC2 0x20000
-#define SLOW_DEST 0x40000
+#define ARG_TEST 0x00020
+#define ALT_KEEP_CACHE 0x00040
+#define CUMULATIVE_OP 0x00080
+#define LOGICAL_OP 0x00100
+#define IMM_OP 0x00200
+#define SRC2_IMM 0x00400
+
+#define UNUSED_DEST 0x00800
+#define REG_DEST 0x01000
+#define REG1_SOURCE 0x02000
+#define REG2_SOURCE 0x04000
+#define SLOW_SRC1 0x08000
+#define SLOW_SRC2 0x10000
+#define SLOW_DEST 0x20000
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#define STACK_STORE SW
@@ -756,7 +755,7 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag
{
SLJIT_ASSERT(arg & SLJIT_MEM);
- if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) && !(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) {
+ if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) {
/* Works for both absoulte and relative addresses. */
if (SLJIT_UNLIKELY(flags & ARG_TEST))
return 1;
@@ -813,12 +812,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
base = arg & REG_MASK;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- if (SLJIT_UNLIKELY(flags & WRITE_BACK)) {
- SLJIT_ASSERT(argw == 0);
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | D(base), DR(base)));
- return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
- }
-
argw &= 0x3;
/* Using the cache. */
@@ -855,29 +848,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
}
- if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
- if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
- if (argw)
- FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)));
- }
- else {
- if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
- if (argw != compiler->cache_argw) {
- FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
- compiler->cache_argw = argw;
- }
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
- }
- else {
- compiler->cache_arg = SLJIT_MEM;
- compiler->cache_argw = argw;
- FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
- }
- }
- return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
- }
-
if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
if (argw != compiler->cache_argw) {
FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
@@ -951,7 +921,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
else if (FAST_IS_REG(dst)) {
dst_r = dst;
flags |= REG_DEST;
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
+ if (op >= SLJIT_MOV && op <= SLJIT_MOV_P)
sugg_src2_r = dst_r;
}
else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw))
@@ -1005,7 +975,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
if (FAST_IS_REG(src2)) {
src2_r = src2;
flags |= REG2_SOURCE;
- if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
+ if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P)
dst_r = src2_r;
}
else if (src2 & SLJIT_IMM) {
@@ -1016,7 +986,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
}
else {
src2_r = 0;
- if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM))
+ if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM))
dst_r = 0;
}
}
@@ -1155,11 +1125,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
}
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT) {
+ if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT)
flags |= INT_DATA | SIGNED_DATA;
- if (src & SLJIT_IMM)
- srcw = (sljit_s32)srcw;
- }
#endif
switch (GET_OPCODE(op)) {
@@ -1193,36 +1160,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_MOV_S16:
return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
- case SLJIT_MOVU:
- case SLJIT_MOVU_P:
- return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
-
- case SLJIT_MOVU_U32:
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
-#else
- return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw);
-#endif
-
- case SLJIT_MOVU_S32:
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
-#else
- return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw);
-#endif
-
- case SLJIT_MOVU_U8:
- return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
-
- case SLJIT_MOVU_S8:
- return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
-
- case SLJIT_MOVU_U16:
- return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
-
- case SLJIT_MOVU_S16:
- return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
-
case SLJIT_NOT:
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
@@ -1233,6 +1170,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
}
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -1304,6 +1242,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
}
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -1595,10 +1534,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG));
- else if (src & SLJIT_MEM)
+ else
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
- else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, RETURN_ADDR_REG, srcw));
FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
return push_inst(compiler, NOP, UNMOVABLE_INS);
diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c
index e5fd42e..5ef4ac9 100644
--- a/sljit/sljitNativePPC_common.c
+++ b/sljit/sljitNativePPC_common.c
@@ -93,11 +93,10 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 4)
#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
-#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 6)
+#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 5)
#else
#define TMP_CALL_REG TMP_REG2
#endif
@@ -106,7 +105,7 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
- 0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
+ 0, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 9, 10, 31, 12
};
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
@@ -540,7 +539,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif
- case SLJIT_HAS_PRE_UPDATE:
case SLJIT_HAS_CLZ:
return 1;
@@ -558,46 +556,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
/* Creates an index in data_transfer_insts array. */
#define LOAD_DATA 0x01
#define INDEXED 0x02
-#define WRITE_BACK 0x04
+#define SIGNED_DATA 0x04
+
#define WORD_DATA 0x00
#define BYTE_DATA 0x08
#define HALF_DATA 0x10
#define INT_DATA 0x18
-#define SIGNED_DATA 0x20
/* Separates integer and floating point registers */
-#define GPR_REG 0x3f
-#define DOUBLE_DATA 0x40
+#define GPR_REG 0x1f
+#define DOUBLE_DATA 0x20
#define MEM_MASK 0x7f
/* Other inp_flags. */
-#define ARG_TEST 0x000100
/* Integer opertion and set flags -> requires exts on 64 bit systems. */
-#define ALT_SIGN_EXT 0x000200
+#define ALT_SIGN_EXT 0x000100
/* This flag affects the RC() and OERC() macros. */
#define ALT_SET_FLAGS 0x000400
-#define ALT_KEEP_CACHE 0x000800
-#define ALT_FORM1 0x010000
-#define ALT_FORM2 0x020000
-#define ALT_FORM3 0x040000
-#define ALT_FORM4 0x080000
-#define ALT_FORM5 0x100000
+#define ALT_FORM1 0x001000
+#define ALT_FORM2 0x002000
+#define ALT_FORM3 0x004000
+#define ALT_FORM4 0x008000
+#define ALT_FORM5 0x010000
/* Source and destination is register. */
#define REG_DEST 0x000001
#define REG1_SOURCE 0x000002
#define REG2_SOURCE 0x000004
-/* getput_arg_fast returned true. */
-#define FAST_DEST 0x000008
-/* Multiple instructions are required. */
-#define SLOW_DEST 0x000010
/*
-ALT_SIGN_EXT 0x000200
-ALT_SET_FLAGS 0x000400
-ALT_FORM1 0x010000
+ALT_SIGN_EXT 0x000100
+ALT_SET_FLAGS 0x000200
+ALT_FORM1 0x001000
...
-ALT_FORM5 0x100000 */
+ALT_FORM5 0x010000 */
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
#include "sljitNativePPC_32.c"
@@ -745,17 +737,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
/* Operators */
/* --------------------------------------------------------------------- */
-/* i/x - immediate/indexed form
- n/w - no write-back / write-back (1 bit)
- s/l - store/load (1 bit)
+/* s/l - store/load (1 bit)
+ i/x - immediate/indexed form
u/s - signed/unsigned (1 bit)
w/b/h/i - word/byte/half/int allowed (2 bit)
- It contans 32 items, but not all are different. */
+
+ Some opcodes are repeated (e.g. store signed / unsigned byte is the same instruction). */
/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#define INT_ALIGNED 0x10000
-/* 64-bit only: there is no lwau instruction. */
-#define UPDATE_REQ 0x20000
+#endif
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
#define ARCH_32_64(a, b) a
@@ -764,406 +756,217 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
#else
#define ARCH_32_64(a, b) b
#define INST_CODE_AND_DST(inst, flags, reg) \
- (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
+ (((inst) & ~INT_ALIGNED) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
#endif
static const sljit_ins data_transfer_insts[64 + 16] = {
-/* -------- Unsigned -------- */
+/* -------- Integer -------- */
/* Word. */
-/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
-/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
-/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
-/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
+/* w u i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
+/* w u i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
+/* w u x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
+/* w u x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
-/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
-/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
-/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
-/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
+/* w s i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
+/* w s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
+/* w s x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
+/* w s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
/* Byte. */
-/* u b n i s */ HI(38) /* stb */,
-/* u b n i l */ HI(34) /* lbz */,
-/* u b n x s */ HI(31) | LO(215) /* stbx */,
-/* u b n x l */ HI(31) | LO(87) /* lbzx */,
+/* b u i s */ HI(38) /* stb */,
+/* b u i l */ HI(34) /* lbz */,
+/* b u x s */ HI(31) | LO(215) /* stbx */,
+/* b u x l */ HI(31) | LO(87) /* lbzx */,
-/* u b w i s */ HI(39) /* stbu */,
-/* u b w i l */ HI(35) /* lbzu */,
-/* u b w x s */ HI(31) | LO(247) /* stbux */,
-/* u b w x l */ HI(31) | LO(119) /* lbzux */,
+/* b s i s */ HI(38) /* stb */,
+/* b s i l */ HI(34) /* lbz */ /* EXTS_REQ */,
+/* b s x s */ HI(31) | LO(215) /* stbx */,
+/* b s x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
/* Half. */
-/* u h n i s */ HI(44) /* sth */,
-/* u h n i l */ HI(40) /* lhz */,
-/* u h n x s */ HI(31) | LO(407) /* sthx */,
-/* u h n x l */ HI(31) | LO(279) /* lhzx */,
+/* h u i s */ HI(44) /* sth */,
+/* h u i l */ HI(40) /* lhz */,
+/* h u x s */ HI(31) | LO(407) /* sthx */,
+/* h u x l */ HI(31) | LO(279) /* lhzx */,
-/* u h w i s */ HI(45) /* sthu */,
-/* u h w i l */ HI(41) /* lhzu */,
-/* u h w x s */ HI(31) | LO(439) /* sthux */,
-/* u h w x l */ HI(31) | LO(311) /* lhzux */,
+/* h s i s */ HI(44) /* sth */,
+/* h s i l */ HI(42) /* lha */,
+/* h s x s */ HI(31) | LO(407) /* sthx */,
+/* h s x l */ HI(31) | LO(343) /* lhax */,
/* Int. */
-/* u i n i s */ HI(36) /* stw */,
-/* u i n i l */ HI(32) /* lwz */,
-/* u i n x s */ HI(31) | LO(151) /* stwx */,
-/* u i n x l */ HI(31) | LO(23) /* lwzx */,
+/* i u i s */ HI(36) /* stw */,
+/* i u i l */ HI(32) /* lwz */,
+/* i u x s */ HI(31) | LO(151) /* stwx */,
+/* i u x l */ HI(31) | LO(23) /* lwzx */,
+
+/* i s i s */ HI(36) /* stw */,
+/* i s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
+/* i s x s */ HI(31) | LO(151) /* stwx */,
+/* i s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
+
+/* -------- Floating point -------- */
+
+/* d i s */ HI(54) /* stfd */,
+/* d i l */ HI(50) /* lfd */,
+/* d x s */ HI(31) | LO(727) /* stfdx */,
+/* d x l */ HI(31) | LO(599) /* lfdx */,
+
+/* s i s */ HI(52) /* stfs */,
+/* s i l */ HI(48) /* lfs */,
+/* s x s */ HI(31) | LO(663) /* stfsx */,
+/* s x l */ HI(31) | LO(535) /* lfsx */,
+};
-/* u i w i s */ HI(37) /* stwu */,
-/* u i w i l */ HI(33) /* lwzu */,
-/* u i w x s */ HI(31) | LO(183) /* stwux */,
-/* u i w x l */ HI(31) | LO(55) /* lwzux */,
+static const sljit_ins updated_data_transfer_insts[64] = {
-/* -------- Signed -------- */
+/* -------- Integer -------- */
/* Word. */
-/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
-/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
-/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
-/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
+/* w u i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
+/* w u i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
+/* w u x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
+/* w u x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
-/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
-/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
-/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
-/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
+/* w s i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
+/* w s i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
+/* w s x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
+/* w s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
/* Byte. */
-/* s b n i s */ HI(38) /* stb */,
-/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
-/* s b n x s */ HI(31) | LO(215) /* stbx */,
-/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
+/* b u i s */ HI(39) /* stbu */,
+/* b u i l */ HI(35) /* lbzu */,
+/* b u x s */ HI(31) | LO(247) /* stbux */,
+/* b u x l */ HI(31) | LO(119) /* lbzux */,
-/* s b w i s */ HI(39) /* stbu */,
-/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
-/* s b w x s */ HI(31) | LO(247) /* stbux */,
-/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
+/* b s i s */ HI(39) /* stbu */,
+/* b s i l */ 0 /* no such instruction */,
+/* b s x s */ HI(31) | LO(247) /* stbux */,
+/* b s x l */ 0 /* no such instruction */,
/* Half. */
-/* s h n i s */ HI(44) /* sth */,
-/* s h n i l */ HI(42) /* lha */,
-/* s h n x s */ HI(31) | LO(407) /* sthx */,
-/* s h n x l */ HI(31) | LO(343) /* lhax */,
+/* h u i s */ HI(45) /* sthu */,
+/* h u i l */ HI(41) /* lhzu */,
+/* h u x s */ HI(31) | LO(439) /* sthux */,
+/* h u x l */ HI(31) | LO(311) /* lhzux */,
-/* s h w i s */ HI(45) /* sthu */,
-/* s h w i l */ HI(43) /* lhau */,
-/* s h w x s */ HI(31) | LO(439) /* sthux */,
-/* s h w x l */ HI(31) | LO(375) /* lhaux */,
+/* h s i s */ HI(45) /* sthu */,
+/* h s i l */ HI(43) /* lhau */,
+/* h s x s */ HI(31) | LO(439) /* sthux */,
+/* h s x l */ HI(31) | LO(375) /* lhaux */,
/* Int. */
-/* s i n i s */ HI(36) /* stw */,
-/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
-/* s i n x s */ HI(31) | LO(151) /* stwx */,
-/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
-
-/* s i w i s */ HI(37) /* stwu */,
-/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
-/* s i w x s */ HI(31) | LO(183) /* stwux */,
-/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
-
-/* -------- Double -------- */
-
-/* d n i s */ HI(54) /* stfd */,
-/* d n i l */ HI(50) /* lfd */,
-/* d n x s */ HI(31) | LO(727) /* stfdx */,
-/* d n x l */ HI(31) | LO(599) /* lfdx */,
-
-/* d w i s */ HI(55) /* stfdu */,
-/* d w i l */ HI(51) /* lfdu */,
-/* d w x s */ HI(31) | LO(759) /* stfdux */,
-/* d w x l */ HI(31) | LO(631) /* lfdux */,
-
-/* s n i s */ HI(52) /* stfs */,
-/* s n i l */ HI(48) /* lfs */,
-/* s n x s */ HI(31) | LO(663) /* stfsx */,
-/* s n x l */ HI(31) | LO(535) /* lfsx */,
-
-/* s w i s */ HI(53) /* stfsu */,
-/* s w i l */ HI(49) /* lfsu */,
-/* s w x s */ HI(31) | LO(695) /* stfsux */,
-/* s w x l */ HI(31) | LO(567) /* lfsux */,
+/* i u i s */ HI(37) /* stwu */,
+/* i u i l */ HI(33) /* lwzu */,
+/* i u x s */ HI(31) | LO(183) /* stwux */,
+/* i u x l */ HI(31) | LO(55) /* lwzux */,
+
+/* i s i s */ HI(37) /* stwu */,
+/* i s i l */ ARCH_32_64(HI(33) /* lwzu */, 0 /* no such instruction */),
+/* i s x s */ HI(31) | LO(183) /* stwux */,
+/* i s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
+
+/* -------- Floating point -------- */
+
+/* d i s */ HI(55) /* stfdu */,
+/* d i l */ HI(51) /* lfdu */,
+/* d x s */ HI(31) | LO(759) /* stfdux */,
+/* d x l */ HI(31) | LO(631) /* lfdux */,
+
+/* s i s */ HI(53) /* stfsu */,
+/* s i l */ HI(49) /* lfsu */,
+/* s x s */ HI(31) | LO(695) /* stfsux */,
+/* s x l */ HI(31) | LO(567) /* lfsux */,
};
#undef ARCH_32_64
/* Simple cases, (no caching is required). */
-static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
+static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg,
+ sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
{
sljit_ins inst;
+ sljit_s32 offs_reg;
+ sljit_sw high_short;
/* Should work when (arg & REG_MASK) == 0. */
SLJIT_ASSERT(A(0) == 0);
SLJIT_ASSERT(arg & SLJIT_MEM);
- if (arg & OFFS_REG_MASK) {
- if (argw & 0x3)
- return 0;
- if (inp_flags & ARG_TEST)
- return 1;
-
- inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
- SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
- FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
- return -1;
- }
-
- if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
- inp_flags &= ~WRITE_BACK;
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- inst = data_transfer_insts[inp_flags & MEM_MASK];
- SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
-
- if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
- return 0;
- if (inp_flags & ARG_TEST)
- return 1;
-#endif
+ if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+ argw &= 0x3;
+ offs_reg = OFFS_REG(arg);
+ if (argw != 0) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
- if (argw > SIMM_MAX || argw < SIMM_MIN)
- return 0;
- if (inp_flags & ARG_TEST)
- return 1;
-
- inst = data_transfer_insts[inp_flags & MEM_MASK];
- SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+ FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | (argw << 11) | ((31 - argw) << 1)));
+#else
+ FAIL_IF(push_inst(compiler, RLDI(tmp_reg, OFFS_REG(arg), argw, 63 - argw, 1)));
#endif
+ offs_reg = tmp_reg;
+ }
- FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
- return -1;
-}
+ inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
-/* See getput_arg below.
- Note: can_cache is called only for binary operators. Those operator always
- uses word arguments without write back. */
-static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
- sljit_sw high_short, next_high_short;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- sljit_sw diff;
+ SLJIT_ASSERT(!(inst & INT_ALIGNED));
#endif
- SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
-
- if (arg & OFFS_REG_MASK)
- return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
-
- if (next_arg & OFFS_REG_MASK)
- return 0;
-
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
- high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
- next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
- return high_short == next_high_short;
-#else
- if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
- high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
- next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
- if (high_short == next_high_short)
- return 1;
+ return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg));
}
- diff = argw - next_argw;
- if (!(arg & REG_MASK))
- return diff <= SIMM_MAX && diff >= SIMM_MIN;
-
- if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
- return 1;
-
- return 0;
-#endif
-}
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-#define ADJUST_CACHED_IMM(imm) \
- if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
- /* Adjust cached value. Fortunately this is really a rare case */ \
- compiler->cache_argw += imm & 0x3; \
- FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
- imm &= ~0x3; \
- }
-#endif
+ inst = data_transfer_insts[inp_flags & MEM_MASK];
+ arg &= REG_MASK;
-/* Emit the necessary instructions. See can_cache above. */
-static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
- sljit_s32 tmp_r;
- sljit_ins inst;
- sljit_sw high_short, next_high_short;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- sljit_sw diff;
-#endif
-
- SLJIT_ASSERT(arg & SLJIT_MEM);
-
- tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
- /* Special case for "mov reg, [reg, ... ]". */
- if ((arg & REG_MASK) == tmp_r)
- tmp_r = TMP_REG1;
-
- if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- argw &= 0x3;
- /* Otherwise getput_arg_fast would capture it. */
- SLJIT_ASSERT(argw);
+ if ((inst & INT_ALIGNED) && (argw & 0x3) != 0) {
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw));
- if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
- tmp_r = TMP_REG3;
- else {
- if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
- compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
- compiler->cache_argw = argw;
- tmp_r = TMP_REG3;
- }
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
- FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
-#else
- FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
-#endif
- }
inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
- SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
+ return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
}
+#endif
- if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
- inp_flags &= ~WRITE_BACK;
-
- inst = data_transfer_insts[inp_flags & MEM_MASK];
- SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
+ if (argw <= SIMM_MAX && argw >= SIMM_MIN)
+ return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw));
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- if (argw <= 0x7fff7fffl && argw >= -0x80000000l
- && (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
+ if (argw <= 0x7fff7fffl && argw >= -0x80000000l) {
#endif
- arg &= REG_MASK;
high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
- /* The getput_arg_fast should handle this otherwise. */
+
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
#else
- SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
+ SLJIT_ASSERT(high_short);
#endif
- if (inp_flags & WRITE_BACK) {
- tmp_r = arg;
- FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
- }
- else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
- if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
- next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
- if (high_short == next_high_short) {
- compiler->cache_arg = SLJIT_MEM | arg;
- compiler->cache_argw = high_short;
- tmp_r = TMP_REG3;
- }
- }
- FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
- }
- else
- tmp_r = TMP_REG3;
-
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
+ FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM(high_short >> 16)));
+ return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw));
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
}
- /* Everything else is PPC-64 only. */
- if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
- diff = argw - compiler->cache_argw;
- if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
- ADJUST_CACHED_IMM(diff);
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
- }
-
- diff = argw - next_argw;
- if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
- SLJIT_ASSERT(inp_flags & LOAD_DATA);
-
- compiler->cache_arg = SLJIT_IMM;
- compiler->cache_argw = argw;
- tmp_r = TMP_REG3;
- }
-
- FAIL_IF(load_immediate(compiler, tmp_r, argw));
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
- }
-
- diff = argw - compiler->cache_argw;
- if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
- SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
- ADJUST_CACHED_IMM(diff);
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
- }
-
- if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
- inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
- SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
- if (compiler->cache_argw != argw) {
- FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
- compiler->cache_argw = argw;
- }
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
- }
-
- if (argw == next_argw && (next_arg & SLJIT_MEM)) {
- SLJIT_ASSERT(inp_flags & LOAD_DATA);
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-
- compiler->cache_arg = SLJIT_IMM;
- compiler->cache_argw = argw;
-
- inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
- SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
- }
-
- diff = argw - next_argw;
- if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
- SLJIT_ASSERT(inp_flags & LOAD_DATA);
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
- FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
-
- compiler->cache_arg = arg;
- compiler->cache_argw = argw;
-
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
- }
-
- if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
- SLJIT_ASSERT(inp_flags & LOAD_DATA);
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+ /* The rest is PPC-64 only. */
- compiler->cache_arg = SLJIT_IMM;
- compiler->cache_argw = argw;
- tmp_r = TMP_REG3;
- }
- else
- FAIL_IF(load_immediate(compiler, tmp_r, argw));
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw));
- /* Get the indexed version instead of the normal one. */
inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
- SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
- return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
+ return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
#endif
}
-static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
-{
- if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
- return compiler->error;
- return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
-}
-
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
@@ -1171,40 +974,21 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
{
/* arg1 goes to TMP_REG1 or src reg
arg2 goes to TMP_REG2, imm or src reg
- TMP_REG3 can be used for caching
- result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
- sljit_s32 dst_r;
+ result goes to TMP_REG2, so put result can use TMP_REG1. */
+ sljit_s32 dst_r = TMP_REG2;
sljit_s32 src1_r;
sljit_s32 src2_r;
sljit_s32 sugg_src2_r = TMP_REG2;
sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS);
- if (!(input_flags & ALT_KEEP_CACHE)) {
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- }
-
/* Destination check. */
- if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- dst_r = TMP_REG2;
- }
- else if (FAST_IS_REG(dst)) {
+ if (SLOW_IS_REG(dst)) {
dst_r = dst;
flags |= REG_DEST;
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
+
+ if (op >= SLJIT_MOV && op <= SLJIT_MOV_P)
sugg_src2_r = dst_r;
}
- else {
- SLJIT_ASSERT(dst & SLJIT_MEM);
- if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
- flags |= FAST_DEST;
- dst_r = TMP_REG2;
- }
- else {
- flags |= SLOW_DEST;
- dst_r = 0;
- }
- }
/* Source 1. */
if (FAST_IS_REG(src1)) {
@@ -1215,80 +999,34 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
src1_r = TMP_REG1;
}
- else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
- FAIL_IF(compiler->error);
+ else {
+ FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
src1_r = TMP_REG1;
}
- else
- src1_r = 0;
/* Source 2. */
if (FAST_IS_REG(src2)) {
src2_r = src2;
flags |= REG2_SOURCE;
- if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
+
+ if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P)
dst_r = src2_r;
}
else if (src2 & SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
src2_r = sugg_src2_r;
}
- else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
- FAIL_IF(compiler->error);
- src2_r = sugg_src2_r;
- }
- else
- src2_r = 0;
-
- /* src1_r, src2_r and dst_r can be zero (=unprocessed).
- All arguments are complex addressing modes, and it is a binary operator. */
- if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
- if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
- }
- else {
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
- }
- src1_r = TMP_REG1;
- src2_r = TMP_REG2;
- }
- else if (src1_r == 0 && src2_r == 0) {
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
- src1_r = TMP_REG1;
- }
- else if (src1_r == 0 && dst_r == 0) {
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
- src1_r = TMP_REG1;
- }
- else if (src2_r == 0 && dst_r == 0) {
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
- src2_r = sugg_src2_r;
- }
-
- if (dst_r == 0)
- dst_r = TMP_REG2;
-
- if (src1_r == 0) {
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
- src1_r = TMP_REG1;
- }
-
- if (src2_r == 0) {
- FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
+ else {
+ FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2));
src2_r = sugg_src2_r;
}
FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
- if (flags & (FAST_DEST | SLOW_DEST)) {
- if (flags & FAST_DEST)
- FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
- else
- FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
- }
- return SLJIT_SUCCESS;
+ if (!(dst & SLJIT_MEM))
+ return SLJIT_SUCCESS;
+
+ return emit_op_mem(compiler, input_flags, dst_r, dst, dstw, TMP_REG1);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
@@ -1397,34 +1135,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (op_flags & SLJIT_I32_OP) {
if (op < SLJIT_NOT) {
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (src & SLJIT_MEM) {
if (op == SLJIT_MOV_S32)
op = SLJIT_MOV_U32;
- if (op == SLJIT_MOVU_S32)
- op = SLJIT_MOVU_U32;
}
else if (src & SLJIT_IMM) {
if (op == SLJIT_MOV_U32)
op = SLJIT_MOV_S32;
- if (op == SLJIT_MOVU_U32)
- op = SLJIT_MOVU_S32;
}
-#endif
}
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
else {
/* Most operations expect sign extended arguments. */
flags |= INT_DATA | SIGNED_DATA;
- if (src & SLJIT_IMM)
- srcw = (sljit_s32)srcw;
if (HAS_FLAGS(op_flags))
flags |= ALT_SIGN_EXT;
}
-#endif
}
+#endif
switch (op) {
case SLJIT_MOV:
@@ -1455,34 +1185,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_MOV_S16:
return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
- case SLJIT_MOVU:
- case SLJIT_MOVU_P:
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
- case SLJIT_MOVU_U32:
- case SLJIT_MOVU_S32:
-#endif
- return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- case SLJIT_MOVU_U32:
- return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32));
-
- case SLJIT_MOVU_S32:
- return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32));
-#endif
-
- case SLJIT_MOVU_U8:
- return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8));
-
- case SLJIT_MOVU_S8:
- return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8));
-
- case SLJIT_MOVU_U16:
- return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16));
-
- case SLJIT_MOVU_S16:
- return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16));
-
case SLJIT_NOT:
return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
@@ -1570,8 +1272,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
#endif
if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
- if (src2 == TMP_REG2)
- flags |= ALT_KEEP_CACHE;
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
@@ -1774,7 +1474,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 5))
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -1798,7 +1498,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
{
if (src & SLJIT_MEM) {
/* We can ignore the temporary data store on the stack from caching point of view. */
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1));
src = TMP_FREG1;
}
@@ -1808,10 +1508,10 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
if (op == SLJIT_CONV_SW_FROM_F64) {
if (FAST_IS_REG(dst)) {
- FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
- return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
+ FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
+ return emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1);
}
- return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+ return emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, TMP_REG1);
}
#else
FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
@@ -1820,7 +1520,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
if (FAST_IS_REG(dst)) {
FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
- return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
+ return emit_op_mem(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1);
}
SLJIT_ASSERT(dst & SLJIT_MEM);
@@ -1871,21 +1571,21 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
else
- FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+ FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
if (FAST_IS_REG(src)) {
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
- FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
+ FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
}
else
- FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1));
FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
if (dst & SLJIT_MEM)
- return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+ return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
if (op & SLJIT_F32_OP)
return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
return SLJIT_SUCCESS;
@@ -1901,7 +1601,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
invert_sign = 0;
}
else if (!FAST_IS_REG(src)) {
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
@@ -1913,17 +1613,17 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
if (invert_sign)
FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, TMP_REG1));
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2));
FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
- FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
- FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
+ FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2));
+ FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
if (dst & SLJIT_MEM)
- return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+ return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
if (op & SLJIT_F32_OP)
return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
return SLJIT_SUCCESS;
@@ -1936,12 +1636,12 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_s32 src2, sljit_sw src2w)
{
if (src1 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1));
src1 = TMP_FREG1;
}
if (src2 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2));
src2 = TMP_FREG2;
}
@@ -1955,8 +1655,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
sljit_s32 dst_r;
CHECK_ERROR();
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
@@ -1967,7 +1665,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, TMP_REG1));
src = dst_r;
}
@@ -1996,7 +1694,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
}
if (dst & SLJIT_MEM)
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), dst_r, dst, dstw, TMP_REG1));
return SLJIT_SUCCESS;
}
@@ -2005,7 +1703,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
- sljit_s32 dst_r, flags = 0;
+ sljit_s32 dst_r;
CHECK_ERROR();
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
@@ -2013,46 +1711,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
-
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
if (src1 & SLJIT_MEM) {
- if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
- FAIL_IF(compiler->error);
- src1 = TMP_FREG1;
- } else
- flags |= ALT_FORM1;
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1));
+ src1 = TMP_FREG1;
}
if (src2 & SLJIT_MEM) {
- if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
- FAIL_IF(compiler->error);
- src2 = TMP_FREG2;
- } else
- flags |= ALT_FORM2;
- }
-
- if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
- if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
- }
- else {
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
- }
- }
- else if (flags & ALT_FORM1)
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
- else if (flags & ALT_FORM2)
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
-
- if (flags & ALT_FORM1)
- src1 = TMP_FREG1;
- if (flags & ALT_FORM2)
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2));
src2 = TMP_FREG2;
+ }
switch (GET_OPCODE(op)) {
case SLJIT_ADD_F64:
@@ -2072,13 +1741,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
break;
}
- if (dst_r == TMP_FREG2)
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, TMP_REG1));
return SLJIT_SUCCESS;
}
-#undef FLOAT_DATA
#undef SELECT_FOP
/* --------------------------------------------------------------------- */
@@ -2108,12 +1776,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, MTLR | S(src)));
else {
- if (src & SLJIT_MEM)
- FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
- else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+ FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
}
+
return push_inst(compiler, BLR);
}
@@ -2340,11 +2006,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
op = GET_OPCODE(op);
reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
-
if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
- FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
+ FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1));
invert = 0;
cr_bit = 0;
@@ -2440,7 +2103,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
if (op < SLJIT_ADD) {
if (!(dst & SLJIT_MEM))
return SLJIT_SUCCESS;
- return emit_op_mem2(compiler, input_flags, reg, dst, dstw, reg, 0);
+ return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
@@ -2462,6 +2125,139 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 reg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ sljit_s32 mem_flags;
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
+
+ if (type & SLJIT_MEM_POST)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ switch (type & 0xff) {
+ case SLJIT_MOV:
+ case SLJIT_MOV_P:
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+ case SLJIT_MOV_U32:
+ case SLJIT_MOV_S32:
+#endif
+ mem_flags = WORD_DATA;
+ break;
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ case SLJIT_MOV_U32:
+ mem_flags = INT_DATA;
+ break;
+
+ case SLJIT_MOV_S32:
+ mem_flags = INT_DATA;
+
+ if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_I32_OP)) {
+ if (mem & OFFS_REG_MASK)
+ mem_flags |= SIGNED_DATA;
+ else
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+ break;
+#endif
+
+ case SLJIT_MOV_U8:
+ case SLJIT_MOV_S8:
+ mem_flags = BYTE_DATA;
+ break;
+
+ case SLJIT_MOV_U16:
+ mem_flags = HALF_DATA;
+ break;
+
+ case SLJIT_MOV_S16:
+ mem_flags = HALF_DATA | SIGNED_DATA;
+ break;
+
+ default:
+ SLJIT_UNREACHABLE();
+ mem_flags = WORD_DATA;
+ break;
+ }
+
+ if (!(type & SLJIT_MEM_STORE))
+ mem_flags |= LOAD_DATA;
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ if (memw != 0)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_MEM_SUPP)
+ return SLJIT_SUCCESS;
+
+ inst = updated_data_transfer_insts[mem_flags | INDEXED];
+ FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | B(OFFS_REG(mem))));
+ }
+ else {
+ if (memw > SIMM_MAX || memw < SIMM_MIN)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ inst = updated_data_transfer_insts[mem_flags];
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ if ((inst & INT_ALIGNED) && (memw & 0x3) != 0)
+ return SLJIT_ERR_UNSUPPORTED;
+#endif
+
+ if (type & SLJIT_MEM_SUPP)
+ return SLJIT_SUCCESS;
+
+ FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | IMM(memw)));
+ }
+
+ if ((mem_flags & LOAD_DATA) && (type & 0xff) == SLJIT_MOV_S8)
+ return push_inst(compiler, EXTSB | S(reg) | A(reg));
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ sljit_s32 mem_flags;
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
+
+ if (type & SLJIT_MEM_POST)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ if (memw != 0)
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+ else {
+ if (memw > SIMM_MAX || memw < SIMM_MIN)
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+
+ if (type & SLJIT_MEM_SUPP)
+ return SLJIT_SUCCESS;
+
+ mem_flags = FLOAT_DATA(type);
+
+ if (!(type & SLJIT_MEM_STORE))
+ mem_flags |= LOAD_DATA;
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ inst = updated_data_transfer_insts[mem_flags | INDEXED];
+ return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | B(OFFS_REG(mem)));
+ }
+
+ inst = updated_data_transfer_insts[mem_flags];
+ return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | IMM(memw));
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
diff --git a/sljit/sljitNativeSPARC_common.c b/sljit/sljitNativeSPARC_common.c
index 66e6c47..669ecd8 100644
--- a/sljit/sljitNativeSPARC_common.c
+++ b/sljit/sljitNativeSPARC_common.c
@@ -449,18 +449,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#define MEM_MASK 0x1f
-#define WRITE_BACK 0x00020
-#define ARG_TEST 0x00040
-#define ALT_KEEP_CACHE 0x00080
-#define CUMULATIVE_OP 0x00100
-#define IMM_OP 0x00200
-#define SRC2_IMM 0x00400
-
-#define REG_DEST 0x00800
-#define REG2_SOURCE 0x01000
-#define SLOW_SRC1 0x02000
-#define SLOW_SRC2 0x04000
-#define SLOW_DEST 0x08000
+#define ARG_TEST 0x00020
+#define ALT_KEEP_CACHE 0x00040
+#define CUMULATIVE_OP 0x00080
+#define IMM_OP 0x00100
+#define SRC2_IMM 0x00200
+
+#define REG_DEST 0x00400
+#define REG2_SOURCE 0x00800
+#define SLOW_SRC1 0x01000
+#define SLOW_SRC2 0x02000
+#define SLOW_DEST 0x04000
/* SET_FLAGS (0x10 << 19) also belong here! */
@@ -562,18 +561,16 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag
{
SLJIT_ASSERT(arg & SLJIT_MEM);
- if (!(flags & WRITE_BACK) || !(arg & REG_MASK)) {
- if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN)
- || ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) {
- /* Works for both absoulte and relative addresses (immediate case). */
- if (SLJIT_UNLIKELY(flags & ARG_TEST))
- return 1;
- FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
- | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))
- | S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)),
- ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
- return -1;
- }
+ if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN)
+ || ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) {
+ /* Works for both absoulte and relative addresses (immediate case). */
+ if (SLJIT_UNLIKELY(flags & ARG_TEST))
+ return 1;
+ FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
+ | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))
+ | S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)),
+ ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
+ return -1;
}
return 0;
}
@@ -658,10 +655,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS;
if (!base)
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot);
- if (!(flags & WRITE_BACK))
- return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
- FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot));
- return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base));
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
}
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
@@ -703,7 +697,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
if (FAST_IS_REG(dst)) {
dst_r = dst;
flags |= REG_DEST;
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
+ if (op >= SLJIT_MOV && op <= SLJIT_MOV_P)
sugg_src2_r = dst_r;
}
else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
@@ -754,7 +748,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
if (FAST_IS_REG(src2)) {
src2_r = src2;
flags |= REG2_SOURCE;
- if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
+ if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P)
dst_r = src2_r;
}
else if (src2 & SLJIT_IMM) {
@@ -765,7 +759,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
}
else {
src2_r = 0;
- if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM))
+ if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM))
dst_r = 0;
}
}
@@ -891,28 +885,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_MOV_S16:
return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
- case SLJIT_MOVU:
- case SLJIT_MOVU_P:
- return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
-
- case SLJIT_MOVU_U32:
- return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
-
- case SLJIT_MOVU_S32:
- return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
-
- case SLJIT_MOVU_U8:
- return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
-
- case SLJIT_MOVU_S8:
- return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
-
- case SLJIT_MOVU_U16:
- return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
-
- case SLJIT_MOVU_S16:
- return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
-
case SLJIT_NOT:
case SLJIT_CLZ:
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
@@ -1227,10 +1199,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK)));
- else if (src & SLJIT_MEM)
+ else
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw));
- else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, TMP_LINK, srcw));
FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS));
return push_inst(compiler, NOP, UNMOVABLE_INS);
diff --git a/sljit/sljitNativeX86_32.c b/sljit/sljitNativeX86_32.c
index a6aa5c6..8a83e27 100644
--- a/sljit/sljitNativeX86_32.c
+++ b/sljit/sljitNativeX86_32.c
@@ -855,7 +855,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
INC_SIZE(1 + 1);
PUSH_REG(reg_map[src]);
}
- else if (src & SLJIT_MEM) {
+ else {
inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_FF;
@@ -865,16 +865,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
FAIL_IF(!inst);
INC_SIZE(1);
}
- else {
- /* SLJIT_IMM. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
- FAIL_IF(!inst);
-
- INC_SIZE(5 + 1);
- *inst++ = PUSH_i32;
- sljit_unaligned_store_sw(inst, srcw);
- inst += sizeof(sljit_sw);
- }
RET();
return SLJIT_SUCCESS;
diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c
index 866beb2..5194428 100644
--- a/sljit/sljitNativeX86_64.c
+++ b/sljit/sljitNativeX86_64.c
@@ -41,24 +41,31 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg,
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
{
+ /* The relative jump below specialized for this case. */
+ SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
+
+ int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
+
if (type < SLJIT_JUMP) {
/* Invert type. */
*code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
- *code_ptr++ = 10 + 3;
+ *code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
}
- *code_ptr++ = REX_W | ((reg_map[TMP_REG2] <= 7) ? 0 : REX_B);
+ *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
*code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
jump->addr = (sljit_uw)code_ptr;
if (jump->flags & JUMP_LABEL)
jump->flags |= PATCH_MD;
+ else if (short_addr)
+ sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
else
sljit_unaligned_store_sw(code_ptr, jump->u.target);
- code_ptr += sizeof(sljit_sw);
- if (reg_map[TMP_REG2] >= 8)
- *code_ptr++ = REX_B;
+ code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
+
+ *code_ptr++ = REX_B;
*code_ptr++ = GROUP_FF;
*code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2];
@@ -755,11 +762,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
- if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
- FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
- src = TMP_REG1;
- }
-
if (FAST_IS_REG(src)) {
if (reg_map[src] < 8) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
@@ -777,7 +779,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
PUSH_REG(reg_lmap[src]);
}
}
- else if (src & SLJIT_MEM) {
+ else {
/* REX_W is not necessary (src is not immediate). */
compiler->mode32 = 1;
inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
@@ -789,17 +791,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
FAIL_IF(!inst);
INC_SIZE(1);
}
- else {
- SLJIT_ASSERT(IS_HALFWORD(srcw));
- /* SLJIT_IMM. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
- FAIL_IF(!inst);
-
- INC_SIZE(5 + 1);
- *inst++ = PUSH_i32;
- sljit_unaligned_store_s32(inst, srcw);
- inst += sizeof(sljit_s32);
- }
RET();
return SLJIT_SUCCESS;
diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c
index 1ca8c43..ab7b36a 100644
--- a/sljit/sljitNativeX86_common.c
+++ b/sljit/sljitNativeX86_common.c
@@ -39,7 +39,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
1 - ECX
2 - EDX
3 - EBX
- 4 - none
+ 4 - ESP
5 - EBP
6 - ESI
7 - EDI
@@ -51,7 +51,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
1 - RCX
2 - RDX
3 - RBX
- 4 - none
+ 4 - RSP
5 - RBP
6 - RSI
7 - RDI
@@ -477,11 +477,7 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code
code_ptr += sizeof(sljit_s8);
} else {
jump->flags |= PATCH_MW;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- code_ptr += sizeof(sljit_sw);
-#else
code_ptr += sizeof(sljit_s32);
-#endif
}
return code_ptr;
@@ -1135,7 +1131,7 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
return SLJIT_SUCCESS;
}
- if (dst == SLJIT_UNUSED)
+ if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
dst = TMP_REG1;
if (FAST_IS_REG(dst)) {
@@ -1202,12 +1198,6 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
SLJIT_UNUSED_ARG(op_flags);
- if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
- src = TMP_REG1;
- srcw = 0;
- }
-
if (cpu_has_cmov == -1)
get_cpu_features();
@@ -1262,13 +1252,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 update = 0;
sljit_s32 op_flags = GET_ALL_FLAGS(op);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_s32 dst_is_ereg = 0;
- sljit_s32 src_is_ereg = 0;
-#else
-# define src_is_ereg 0
#endif
CHECK_ERROR();
@@ -1277,7 +1263,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src, srcw);
CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
- CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
+ CHECK_EXTRA_REGS(src, srcw, (void)0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = op_flags & SLJIT_I32_OP;
#endif
@@ -1290,7 +1276,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
op = GET_OPCODE(op);
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+ if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif
@@ -1305,24 +1291,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if (src & SLJIT_MEM) {
if (op == SLJIT_MOV_S32)
op = SLJIT_MOV_U32;
- if (op == SLJIT_MOVU_S32)
- op = SLJIT_MOVU_U32;
}
else if (src & SLJIT_IMM) {
if (op == SLJIT_MOV_U32)
op = SLJIT_MOV_S32;
- if (op == SLJIT_MOVU_U32)
- op = SLJIT_MOVU_S32;
}
#endif
}
- SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
- if (op >= SLJIT_MOVU) {
- update = 1;
- op -= 8;
- }
-
if (src & SLJIT_IMM) {
switch (op) {
case SLJIT_MOV_U8:
@@ -1394,28 +1370,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
#endif
-
- if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
- if ((src & OFFS_REG_MASK) != 0) {
- FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
- (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
- }
- else if (srcw != 0) {
- FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
- (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
- }
- }
-
- if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
- if ((dst & OFFS_REG_MASK) != 0) {
- FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
- (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
- }
- else if (dstw != 0) {
- FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
- (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
- }
- }
return SLJIT_SUCCESS;
}
@@ -1433,10 +1387,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
}
return SLJIT_SUCCESS;
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-# undef src_is_ereg
-#endif
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)