Improve process_ea and introduce -OL

Two fixes: 1. Optimization of [bx+0xFFFF] etc 0xFFFF is an sbyte under 16-bit semantics, so make sure to check it right. 2. Don't optimize displacements in -O0 Displacements that fit into an sbyte or can be removed should *not* be optimized in -O0. Implicit zero displacements are still optimized, e.g.: [eax] -> 0 bit displacement, [ebp] -> 8 bit displacement. However explicit displacements are not optimized: [eax+0] -> 32 bit displacement, [ebp+0] -> 32 bit displacement. Because #2 breaks compatibility with 0.98, I introduced a new optimization level: -OL, legacy.
author: Victor van den Elzen <victor.vde@gmail.com> 2009-03-31 04:59:44 +0200
committer: Victor van den Elzen <victor.vde@gmail.com> 2010-07-24 22:00:12 +0200
commit: ac732cb6a599836bf4c988e59ac6de4498758c72 (patch)
tree: 9983e099a978cd40786f9529eb687a6d5a14d265 /assemble.c
parent: 088d151130b427367766057feadd8351e03ee19b (diff)
download: nasm-ac732cb6a599836bf4c988e59ac6de4498758c72.tar.gz
1 files changed, 59 insertions, 40 deletions
diff --git a/assemble.c b/assemble.c
index 3932ab02..dbda08da 100644
--- a/assemble.c
+++ b/assemble.c
@@ -751,36 +751,59 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
     }
 }
 
-static bool possible_sbyte(operand *o)
+static bool possible_sbyte(operand *o, int min_optimizing)
 {
     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 	!(o->opflags & OPFLAG_UNKNOWN) &&
-	optimizing >= 0 && !(o->type & STRICT);
+	optimizing >= min_optimizing && !(o->type & STRICT);
 }
 
 /* check that opn[op]  is a signed byte of size 16 or 32 */
-static bool is_sbyte16(operand *o)
+static bool is_sbyte16(operand *o, int min_optimizing)
 {
     int16_t v;
 
-    if (!possible_sbyte(o))
+    if (!possible_sbyte(o, min_optimizing))
 	return false;
 
     v = o->offset;
     return v >= -128 && v <= 127;
 }
 
-static bool is_sbyte32(operand *o)
+static bool is_sbyte32(operand *o, int min_optimizing)
 {
     int32_t v;
 
-    if (!possible_sbyte(o))
+    if (!possible_sbyte(o, min_optimizing))
 	return false;
 
     v = o->offset;
     return v >= -128 && v <= 127;
 }
 
+/* Check if o is zero of size 16 or 32 */
+static bool is_zero16(operand *o, int min_optimizing)
+{
+    int16_t v;
+
+    if (!possible_sbyte(o, min_optimizing))
+	return false;
+
+    v = o->offset;
+    return v == 0;
+}
+
+static bool is_zero32(operand *o, int min_optimizing)
+{
+    int32_t v;
+
+    if (!possible_sbyte(o, min_optimizing))
+	return false;
+
+    v = o->offset;
+    return v == 0;
+}
+
 /* Common construct */
 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 
@@ -882,7 +905,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             break;
 
 	case4(0140):
-            length += is_sbyte16(opx) ? 1 : 2;
+            length += is_sbyte16(opx, 0) ? 1 : 2;
             break;
 
 	case4(0144):
@@ -891,7 +914,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             break;
 
 	case4(0150):
-            length += is_sbyte32(opx) ? 1 : 4;
+            length += is_sbyte32(opx, 0) ? 1 : 4;
             break;
 
 	case4(0154):
@@ -922,7 +945,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 	    break;
 
 	case4(0250):
-            length += is_sbyte32(opx) ? 1 : 4;
+            length += is_sbyte32(opx, 0) ? 1 : 4;
             break;
 
 	case4(0254):
@@ -1418,7 +1441,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
 	case4(0140):
             data = opx->offset;
             warn_overflow_opd(opx, 2);
-            if (is_sbyte16(opx)) {
+            if (is_sbyte16(opx, 0)) {
                 bytes[0] = data;
                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
                     NO_SEG);
@@ -1433,7 +1456,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
 	case4(0144):
 	    EMIT_REX();
             bytes[0] = *codes++;
-            if (is_sbyte16(opx))
+            if (is_sbyte16(opx, 0))
                 bytes[0] |= 2;  /* s-bit */
             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
             offset++;
@@ -1442,7 +1465,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
 	case4(0150):
             data = opx->offset;
             warn_overflow_opd(opx, 4);
-            if (is_sbyte32(opx)) {
+            if (is_sbyte32(opx, 0)) {
                 bytes[0] = data;
                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
                     NO_SEG);
@@ -1457,7 +1480,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
 	case4(0154):
 	    EMIT_REX();
             bytes[0] = *codes++;
-            if (is_sbyte32(opx))
+            if (is_sbyte32(opx, 0))
                 bytes[0] |= 2;  /* s-bit */
             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
             offset++;
@@ -1521,7 +1544,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
 		errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 			"signed dword immediate exceeds bounds");
 	    }
-            if (is_sbyte32(opx)) {
+            if (is_sbyte32(opx, 0)) {
                 bytes[0] = data;
                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
                     NO_SEG);
@@ -2206,7 +2229,9 @@ static enum match_result matches(const struct itemplate *itemp,
 static ea *process_ea(operand * input, ea * output, int bits,
 		      int addrbits, int rfield, opflags_t rflags)
 {
-    bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
+    bool byte_offs = !!(input->eaflags & EAF_BYTEOFFS);
+    bool word_offs = !!(input->eaflags & EAF_WORDOFFS);
+    bool no_offs   = !!(input->eaflags & EAF_NO_OFFS);
 
     output->rip = false;
 
@@ -2267,7 +2292,6 @@ static ea *process_ea(operand * input, ea * output, int bits,
             }
         } else {                /* it's an indirection */
             int i = input->indexreg, b = input->basereg, s = input->scale;
-            int32_t seg = input->segment;
             int hb = input->hintbase, ht = input->hinttype;
             int t, it, bt;	 	/* register numbers */
 	    opflags_t x, ix, bx;	/* register flags */
@@ -2295,7 +2319,7 @@ static ea *process_ea(operand * input, ea * output, int bits,
 	    if ((ix|bx) & (BITS32|BITS64)) {
                 /* it must be a 32/64-bit memory reference. Firstly we have
                  * to check that all registers involved are type E/Rxx. */
-		int32_t sok = BITS32|BITS64, o = input->offset;
+		int32_t sok = BITS32|BITS64;
 
                 if (it != -1) {
 		    if (!(REG64 & ~ix) || !(REG32 & ~ix))
@@ -2365,15 +2389,13 @@ static ea *process_ea(operand * input, ea * output, int bits,
                         mod = 0;
                     } else {
                         rm = (bt & 7);
-                        if (rm != REG_NUM_EBP && o == 0 &&
-                                seg == NO_SEG && !forw_ref &&
-                                !(input->eaflags &
-                                  (EAF_BYTEOFFS | EAF_WORDOFFS)))
+                        if (rm != REG_NUM_EBP &&
+                            (no_offs || is_zero32(input, -1)) &&
+                            !(byte_offs || word_offs))
                             mod = 0;
-                        else if (input->eaflags & EAF_BYTEOFFS ||
-                                 (o >= -128 && o <= 127 && seg == NO_SEG
-                                  && !forw_ref
-                                  && !(input->eaflags & EAF_WORDOFFS)))
+                        else if (byte_offs ||
+                                 (! word_offs && is_sbyte32(input, -1)) ||
+                                 (rm == REG_NUM_EBP && no_offs))
                             mod = 1;
                         else
                             mod = 2;
@@ -2413,15 +2435,13 @@ static ea *process_ea(operand * input, ea * output, int bits,
                         mod = 0;
                     } else {
                         base = (bt & 7);
-                        if (base != REG_NUM_EBP && o == 0 &&
-                                    seg == NO_SEG && !forw_ref &&
-                                    !(input->eaflags &
-                                      (EAF_BYTEOFFS | EAF_WORDOFFS)))
+                        if (base != REG_NUM_EBP &&
+                            (no_offs || is_zero32(input, -1)) &&
+                            !(byte_offs || word_offs))
                             mod = 0;
-                        else if (input->eaflags & EAF_BYTEOFFS ||
-                                 (o >= -128 && o <= 127 && seg == NO_SEG
-                                  && !forw_ref
-                                  && !(input->eaflags & EAF_WORDOFFS)))
+                        else if (byte_offs ||
+                                 (! word_offs && is_sbyte32(input, -1)) ||
+                                 (base == REG_NUM_EBP && no_offs))
                             mod = 1;
                         else
                             mod = 2;
@@ -2434,7 +2454,6 @@ static ea *process_ea(operand * input, ea * output, int bits,
                 }
             } else {            /* it's 16-bit */
                 int mod, rm;
-                int16_t o = input->offset;
 
                 /* check for 64-bit long mode */
                 if (addrbits == 64)
@@ -2504,13 +2523,13 @@ static ea *process_ea(operand * input, ea * output, int bits,
                 if (rm == -1)   /* can't happen, in theory */
                     return NULL;        /* so panic if it does */
 
-                if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
-                    !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+                if (rm != 6 &&
+                    (no_offs || is_zero16(input, -1)) &&
+                    !(byte_offs || word_offs))
                     mod = 0;
-                else if (input->eaflags & EAF_BYTEOFFS ||
-                         (o >= -128 && o <= 127 && seg == NO_SEG
-                          && !forw_ref
-                          && !(input->eaflags & EAF_WORDOFFS)))
+                else if (byte_offs ||
+                         (! word_offs && is_sbyte16(input, -1)) ||
+                         (rm == 6 && no_offs))
                     mod = 1;
                 else
                     mod = 2;
author	Victor van den Elzen <victor.vde@gmail.com>	2009-03-31 04:59:44 +0200
committer	Victor van den Elzen <victor.vde@gmail.com>	2010-07-24 22:00:12 +0200
commit	ac732cb6a599836bf4c988e59ac6de4498758c72 (patch)
tree	9983e099a978cd40786f9529eb687a6d5a14d265 /assemble.c
parent	088d151130b427367766057feadd8351e03ee19b (diff)
download	nasm-ac732cb6a599836bf4c988e59ac6de4498758c72.tar.gz