summaryrefslogtreecommitdiff
path: root/pcre/sljit/sljitNativeX86_common.c
diff options
context:
space:
mode:
Diffstat (limited to 'pcre/sljit/sljitNativeX86_common.c')
-rw-r--r--pcre/sljit/sljitNativeX86_common.c316
1 files changed, 177 insertions, 139 deletions
diff --git a/pcre/sljit/sljitNativeX86_common.c b/pcre/sljit/sljitNativeX86_common.c
index 653705f6ca6..71442fef254 100644
--- a/pcre/sljit/sljitNativeX86_common.c
+++ b/pcre/sljit/sljitNativeX86_common.c
@@ -64,51 +64,46 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
/* Last register + 1. */
-#define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
- 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
+ 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
};
#define CHECK_EXTRA_REGS(p, w, do) \
- if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
- w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
- p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
- do; \
- } \
- else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
- w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
- p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
+ if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
+ w = FIXED_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
+ p = SLJIT_MEM1(SLJIT_SP); \
do; \
}
#else /* SLJIT_CONFIG_X86_32 */
/* Last register + 1. */
-#define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
+#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
Note: avoid to use r12 and r13 for memory addessing
therefore r12 is better for SAVED_EREG than SAVED_REG. */
#ifndef _WIN64
/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
- 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+ 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
};
/* low-map. reg_map & 0x7. */
-static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
- 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
+static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+ 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
};
#else
/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
- 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+ 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
};
/* low-map. reg_map & 0x7. */
-static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
- 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1
+static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+ 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
};
#endif
@@ -133,9 +128,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#endif /* SLJIT_CONFIG_X86_32 */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
#define TMP_FREG (0)
-#endif
/* Size flags for emit_x86_instruction: */
#define EX86_BIN_INS 0x0010
@@ -145,12 +138,11 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#define EX86_BYTE_ARG 0x0100
#define EX86_HALF_ARG 0x0200
#define EX86_PREF_66 0x0400
-
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-#define EX86_SSE2 0x0800
-#define EX86_PREF_F2 0x1000
-#define EX86_PREF_F3 0x2000
-#endif
+#define EX86_PREF_F2 0x0800
+#define EX86_PREF_F3 0x1000
+#define EX86_SSE2_OP1 0x2000
+#define EX86_SSE2_OP2 0x4000
+#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
/* --------------------------------------------------------------------- */
/* Instrucion forms */
@@ -179,6 +171,9 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#define CMP_EAX_i32 0x3d
#define CMP_r_rm 0x3b
#define CMP_rm_r 0x39
+#define CVTPD2PS_x_xm 0x5a
+#define CVTSI2SD_x_rm 0x2a
+#define CVTTSD2SI_r_xm 0x2c
#define DIV (/* GROUP_F7 */ 6 << 3)
#define DIVSD_x_xm 0x5e
#define INT3 0xcc
@@ -239,6 +234,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#define TEST_EAX_i32 0xa9
#define TEST_rm_r 0x85
#define UCOMISD_x_xm 0x2e
+#define UNPCKLPD_x_xm 0x14
#define XCHG_EAX_r 0x90
#define XCHG_r_rm 0x87
#define XOR (/* BINARY */ 6 << 3)
@@ -271,7 +267,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
/* Multithreading does not affect these static variables, since they store
built-in CPU features. Therefore they can be overwritten by different threads
if they detect the CPU features in the same time. */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
static sljit_si cpu_has_sse2 = -1;
#endif
static sljit_si cpu_has_cmov = -1;
@@ -325,7 +321,7 @@ static void get_cpu_features(void)
#endif /* _MSC_VER && _MSC_VER >= 1400 */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
cpu_has_sse2 = (features >> 26) & 0x1;
#endif
cpu_has_cmov = (features >> 15) & 0x1;
@@ -751,14 +747,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#ifdef _WIN64
SLJIT_COMPILE_ASSERT(
- reg_map[SLJIT_SCRATCH_REG1] == 0
- && reg_map[SLJIT_SCRATCH_REG2] == 2
+ reg_map[SLJIT_R0] == 0
+ && reg_map[SLJIT_R1] == 2
&& reg_map[TMP_REG1] > 7,
invalid_register_assignment_for_div_mul);
#else
SLJIT_COMPILE_ASSERT(
- reg_map[SLJIT_SCRATCH_REG1] == 0
- && reg_map[SLJIT_SCRATCH_REG2] < 7
+ reg_map[SLJIT_R0] == 0
+ && reg_map[SLJIT_R1] < 7
&& reg_map[TMP_REG1] == 2,
invalid_register_assignment_for_div_mul);
#endif
@@ -768,8 +764,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
op = GET_OPCODE(op);
if (op == SLJIT_UDIV) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
- inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0);
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
+ inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
#else
inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
#endif
@@ -779,7 +775,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
if (op == SLJIT_SDIV) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
#endif
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -808,7 +804,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
FAIL_IF(!inst);
INC_SIZE(2);
*inst++ = GROUP_F7;
- *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]);
+ *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
#else
#ifdef _WIN64
size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
@@ -824,12 +820,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
else if (op >= SLJIT_UDIV)
*inst++ = REX_B;
*inst++ = GROUP_F7;
- *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]);
+ *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
#else
if (!compiler->mode32)
*inst++ = REX_W;
*inst++ = GROUP_F7;
- *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2];
+ *inst = MOD_REG | reg_map[SLJIT_R1];
#endif
#endif
switch (op) {
@@ -847,7 +843,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
break;
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
- EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0);
+ EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
#endif
break;
}
@@ -956,22 +952,22 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (dst_r == TMP_REG1) {
/* Find a non-used register, whose reg_map[src] < 4. */
- if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) {
- if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2))
- work_r = SLJIT_SCRATCH_REG3;
+ if ((dst & REG_MASK) == SLJIT_R0) {
+ if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
+ work_r = SLJIT_R2;
else
- work_r = SLJIT_SCRATCH_REG2;
+ work_r = SLJIT_R1;
}
else {
- if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
- work_r = SLJIT_SCRATCH_REG1;
- else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2)
- work_r = SLJIT_SCRATCH_REG3;
+ if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
+ work_r = SLJIT_R0;
+ else if ((dst & REG_MASK) == SLJIT_R1)
+ work_r = SLJIT_R2;
else
- work_r = SLJIT_SCRATCH_REG2;
+ work_r = SLJIT_R1;
}
- if (work_r == SLJIT_SCRATCH_REG1) {
+ if (work_r == SLJIT_R0) {
ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
}
else {
@@ -984,7 +980,7 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
FAIL_IF(!inst);
*inst = MOV_rm8_r8;
- if (work_r == SLJIT_SCRATCH_REG1) {
+ if (work_r == SLJIT_R0) {
ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
}
else {
@@ -1179,12 +1175,12 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
dst_r = dst;
else {
/* Find an unused temporary register. */
- if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
- dst_r = SLJIT_SCRATCH_REG1;
- else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2))
- dst_r = SLJIT_SCRATCH_REG2;
+ if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
+ dst_r = SLJIT_R0;
+ else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
+ dst_r = SLJIT_R1;
else
- dst_r = SLJIT_SCRATCH_REG3;
+ dst_r = SLJIT_R2;
EMIT_MOV(compiler, dst, dstw, dst_r, 0);
}
EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
@@ -1340,7 +1336,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
- SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
+ SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
dst = TMP_REG1;
}
#endif
@@ -1378,7 +1374,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
- return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0);
+ return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
#endif
if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
@@ -1470,9 +1466,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
if (dst == src1 && dstw == src1w) {
if (src2 & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
- if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
+ if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
#endif
BINARY_EAX_IMM(op_eax_imm, src2w);
}
@@ -1504,9 +1500,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
if (dst == src2 && dstw == src2w) {
if (src1 & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+ if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
#else
- if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) {
+ if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
#endif
BINARY_EAX_IMM(op_eax_imm, src1w);
}
@@ -1586,9 +1582,9 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
if (dst == src1 && dstw == src1w) {
if (src2 & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
- if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
+ if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
#endif
BINARY_EAX_IMM(op_eax_imm, src2w);
}
@@ -1840,9 +1836,9 @@ static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
sljit_ub* inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
- if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+ if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
#endif
BINARY_EAX_IMM(CMP_EAX_i32, src2w);
return SLJIT_SUCCESS;
@@ -1891,18 +1887,18 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler,
sljit_ub* inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
- if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+ if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
#endif
BINARY_EAX_IMM(TEST_EAX_i32, src2w);
return SLJIT_SUCCESS;
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+ if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
#else
- if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
+ if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
#endif
BINARY_EAX_IMM(TEST_EAX_i32, src1w);
return SLJIT_SUCCESS;
@@ -2064,7 +2060,7 @@ static sljit_si emit_shift(struct sljit_compiler *compiler,
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
#else
/* [esp+0] contains the flags. */
- EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
#endif
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
@@ -2073,7 +2069,7 @@ static sljit_si emit_shift(struct sljit_compiler *compiler,
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
#else
- EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw));
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
#endif
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
}
@@ -2215,8 +2211,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
{
check_sljit_get_register_index(reg);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
- || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
+ if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
return -1;
#endif
return reg_map[reg];
@@ -2248,8 +2243,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co
/* Floating point operators */
/* --------------------------------------------------------------------- */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-
/* Alignment + 2 * 16 bytes. */
static sljit_si sse2_data[3 + (4 + 4) * 2];
static sljit_si *sse2_buffer;
@@ -2267,27 +2260,19 @@ static void init_compiler(void)
sse2_buffer[13] = 0x7fffffff;
}
-#endif
-
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
{
#ifdef SLJIT_IS_FPU_AVAILABLE
return SLJIT_IS_FPU_AVAILABLE;
-#elif (defined SLJIT_SSE2 && SLJIT_SSE2)
-#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_has_sse2 == -1)
get_cpu_features();
return cpu_has_sse2;
#else /* SLJIT_DETECT_SSE2 */
return 1;
#endif /* SLJIT_DETECT_SSE2 */
-#else /* SLJIT_SSE2 */
- return 0;
-#endif
}
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-
static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
{
@@ -2324,31 +2309,89 @@ static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
{
- sljit_si dst_r;
+ sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ sljit_ub *inst;
- CHECK_ERROR();
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
+ compiler->mode32 = 0;
+#endif
+
+ inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst = CVTTSD2SI_r_xm;
+
+ if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+ sljit_ub *inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
+ compiler->mode32 = 0;
+#endif
+
+ if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+ srcw = (sljit_si)srcw;
+#endif
+ EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst = CVTSI2SD_x_rm;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 1;
#endif
+ if (dst_r == TMP_FREG)
+ return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+ return SLJIT_SUCCESS;
+}
- if (GET_OPCODE(op) == SLJIT_CMPD) {
- compiler->flags_saved = 0;
- if (FAST_IS_REG(dst))
- dst_r = dst;
- else {
- dst_r = TMP_FREG;
- FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
- }
- return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ compiler->flags_saved = 0;
+ if (!FAST_IS_REG(src1)) {
+ FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
+ src1 = TMP_FREG;
}
+ return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
+}
- if (op == SLJIT_MOVD) {
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif
+
+ CHECK_ERROR();
+ SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+ if (GET_OPCODE(op) == SLJIT_MOVD) {
if (FAST_IS_REG(dst))
return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
if (FAST_IS_REG(src))
@@ -2357,6 +2400,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile
return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
}
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+ if (FAST_IS_REG(src)) {
+ /* We overwrite the high bits of source. From SLJIT point of view,
+ this is not an issue.
+ Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
+ FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
+ }
+ else {
+ FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
+ src = TMP_FREG;
+ }
+
+ FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
+ if (dst_r == TMP_FREG)
+ return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+ return SLJIT_SUCCESS;
+ }
+
if (SLOW_IS_REG(dst)) {
dst_r = dst;
if (dst != src)
@@ -2391,6 +2453,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
CHECK_ERROR();
check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 1;
@@ -2440,33 +2505,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
-#else
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
- sljit_si dst, sljit_sw dstw,
- sljit_si src, sljit_sw srcw)
-{
- CHECK_ERROR();
- /* Should cause an assertion fail. */
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
- compiler->error = SLJIT_ERR_UNSUPPORTED;
- return SLJIT_ERR_UNSUPPORTED;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
- sljit_si dst, sljit_sw dstw,
- sljit_si src1, sljit_sw src1w,
- sljit_si src2, sljit_sw src2w)
-{
- CHECK_ERROR();
- /* Should cause an assertion fail. */
- check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
- compiler->error = SLJIT_ERR_UNSUPPORTED;
- return SLJIT_ERR_UNSUPPORTED;
-}
-
-#endif
-
/* --------------------------------------------------------------------- */
/* Conditional instructions */
/* --------------------------------------------------------------------- */
@@ -2557,16 +2595,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil
if (type >= SLJIT_CALL1) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
- if (src == SLJIT_SCRATCH_REG3) {
+ if (src == SLJIT_R2) {
EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
src = TMP_REG1;
}
- if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
+ if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
srcw += sizeof(sljit_sw);
#endif
#endif
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
- if (src == SLJIT_SCRATCH_REG3) {
+ if (src == SLJIT_R2) {
EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
src = TMP_REG1;
}
@@ -2732,8 +2770,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
}
if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
- SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
- if (dst != SLJIT_SCRATCH_REG1) {
+ SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
+ if (dst != SLJIT_R0) {
inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
FAIL_IF(!inst);
INC_SIZE(1 + 3 + 2 + 1);
@@ -2802,23 +2840,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co
compiler->mode32 = 0;
#endif
- ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
+ ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (NOT_HALFWORD(offset)) {
FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
- SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
+ SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
return compiler->error;
#else
- return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0);
+ return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
#endif
}
#endif
if (offset != 0)
- return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
- return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
+ return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+ return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)