Allow underscores in numbers; better detection of FP

- Allow underscores as group separators in numbers, for example: 0x1234_5678 is now a legal number. The underscore is just ignored, it adds no meaning. - Recognize dotless floating-point numbers, such as "1e30". This entails distinguishing hexadecimal numbers in the scanner, since e.g. 0x1e30 is a perfectly legitimate hex constant.
author: H. Peter Anvin <hpa@zytor.com> 2007-10-19 13:10:46 -0700
committer: H. Peter Anvin <hpa@zytor.com> 2007-10-19 13:10:46 -0700
commit: 2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5 (patch)
tree: 1fb0b1e1b0116950ba3836ea9a4fda9edba1f939
parent: a8eace2b79b1068e54c4af93c41b6e58ba879b83 (diff)
download: nasm-2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5.tar.gz
3 files changed, 110 insertions, 78 deletions
diff --git a/float.c b/float.c
index ec37775e..3aa6e5f5 100644
--- a/float.c
+++ b/float.c
@@ -113,6 +113,53 @@ static int float_multiply(uint16_t * to, uint16_t * from)
 
 /*
  * ---------------------------------------------------------------------------
+ *  read an exponent; returns INT32_MAX on error
+ * ---------------------------------------------------------------------------
+ */
+int32_t read_exponent(const char *string, int32_t max)
+{
+    int32_t i = 0;
+    bool neg = false;
+    
+    if (*string == '+') {
+	string++;
+    } else if (*string == '-') {
+	neg = true;
+	string++;
+    }
+    while (*string) {
+	if (*string >= '0' && *string <= '9') {
+	    i = (i * 10) + (*string - '0');
+	    
+	    /*
+	     * To ensure that underflows and overflows are
+	     * handled properly we must avoid wraparounds of
+	     * the signed integer value that is used to hold
+	     * the exponent. Therefore we cap the exponent at
+	     * +/-5000, which is slightly more/less than
+	     * what's required for normal and denormal numbers
+	     * in single, double, and extended precision, but
+	     * sufficient to avoid signed integer wraparound.
+	     */
+	    if (i > max) {
+		break;
+	    }
+	} else if (*string == '_') {
+	    /* do nothing */
+	} else {
+	    error(ERR_NONFATAL,
+		  "invalid character in floating-point constant %s: '%c'",
+		  "exponent", *string);
+	    return INT32_MAX;
+	}
+	string++;
+    }
+
+    return neg ? -i : i;
+}
+
+/*
+ * ---------------------------------------------------------------------------
  *  convert
  * ---------------------------------------------------------------------------
  */
@@ -161,7 +208,6 @@ static bool ieee_flconvert(const char *string, uint16_t * mant,
                 }
             }
         } else if (*string == '_') {
-
             /* do nothing */
         } else {
             error(ERR_NONFATAL,
@@ -171,48 +217,15 @@ static bool ieee_flconvert(const char *string, uint16_t * mant,
         }
         string++;
     }
+    
     if (*string) {
-        int32_t i = 0;
-        bool neg = false;
-        string++;               /* eat the E */
-        if (*string == '+') {
-            string++;
-        } else if (*string == '-') {
-            neg = true;
-            string++;
-        }
-        while (*string) {
-            if (*string >= '0' && *string <= '9') {
-                i = (i * 10) + (*string - '0');
-
-                /*
-                 * To ensure that underflows and overflows are
-                 * handled properly we must avoid wraparounds of
-                 * the signed integer value that is used to hold
-                 * the exponent. Therefore we cap the exponent at
-                 * +/-5000, which is slightly more/less than
-                 * what's required for normal and denormal numbers
-                 * in single, double, and extended precision, but
-                 * sufficient to avoid signed integer wraparound.
-                 */
-                if (i > 5000) {
-                    break;
-                }
-            } else if (*string == '_') {
+	int32_t e;
 
-                /* do nothing */
-            } else {
-                error(ERR_NONFATAL,
-                      "invalid character in floating-point constant %s: '%c'",
-                      "exponent", *string);
-                return false;
-            }
-            string++;
-        }
-        if (neg) {
-            i = 0 - i;
-        }
-        tenpwr += i;
+        string++;               /* eat the E */
+	e = read_exponent(string, 5000);
+	if (e == INT32_MAX)
+	    return false;
+	tenpwr += e;
     }
 
     /*
@@ -480,8 +493,14 @@ static bool ieee_flconvert_hex(const char *string, uint16_t * mant,
                     twopwr -= 4;
             }
         } else if (c == 'p' || c == 'P') {
-            twopwr += atoi(string);
+	    int32_t e;
+	    e = read_exponent(string, 16384);
+	    if (e == INT32_MAX)
+		return false;
+	    twopwr += e;
             break;
+	} else if (c == '_') {
+	    /* ignore */
         } else {
             error(ERR_NONFATAL,
                   "floating-point constant: `%c' is invalid character", c);
diff --git a/nasmlib.c b/nasmlib.c
index 1951cffe..7f7fdef7 100644
--- a/nasmlib.c
+++ b/nasmlib.c
@@ -193,7 +193,7 @@ char *nasm_strsep(char **stringp, const char *delim)
 #endif
 
 
-#define lib_isnumchar(c)   ( isalnum(c) || (c) == '$')
+#define lib_isnumchar(c)   (isalnum(c) || (c) == '$' || (c) == '_')
 #define numvalue(c)  ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
 
 int64_t readnum(char *str, bool *error)
@@ -274,16 +274,19 @@ int64_t readnum(char *str, bool *error)
 
     result = 0;
     while (*r && r < q) {
-        if (*r < '0' || (*r > '9' && *r < 'A')
-            || (digit = numvalue(*r)) >= radix) {
-            *error = true;
-            return 0;
-        }
-        if (result > checklimit || (result == checklimit && digit >= last)) {
-            warn = true;
-        }
-
-        result = radix * result + digit;
+	if (*r != '_') {
+	    if (*r < '0' || (*r > '9' && *r < 'A')
+		|| (digit = numvalue(*r)) >= radix) {
+		*error = true;
+		return 0;
+	    }
+	    if (result > checklimit ||
+		(result == checklimit && digit >= last)) {
+		warn = true;
+	    }
+	    
+	    result = radix * result + digit;
+	}
         r++;
     }
 
diff --git a/stdscan.c b/stdscan.c
index e06fb843..3979144e 100644
--- a/stdscan.c
+++ b/stdscan.c
@@ -116,34 +116,44 @@ int stdscan(void *private_data, struct tokenval *tv)
         return tv->t_type = TOKEN_HERE;
     } else if (isnumstart(*stdscan_bufptr)) {   /* now we've got a number */
         bool rn_error;
+	bool is_hex = false;
+	bool is_float = false;
+	char c;
 
         r = stdscan_bufptr++;
-        while (isnumchar(*stdscan_bufptr))
-            stdscan_bufptr++;
 
-        if (*stdscan_bufptr == '.') {
-            /*
-             * a floating point constant
-             */
-            stdscan_bufptr++;
-            while (isnumchar(*stdscan_bufptr) ||
-                   ((stdscan_bufptr[-1] == 'e'
-                     || stdscan_bufptr[-1] == 'E'
-		     || stdscan_bufptr[-1] == 'p'
-		     || stdscan_bufptr[-1] == 'P')
-                    && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+'))) {
-                stdscan_bufptr++;
-            }
-            tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
-            return tv->t_type = TOKEN_FLOAT;
-        }
-        r = stdscan_copy(r, stdscan_bufptr - r);
-        tv->t_integer = readnum(r, &rn_error);
-        stdscan_pop();
-        if (rn_error)
-            return tv->t_type = TOKEN_ERRNUM;   /* some malformation occurred */
-        tv->t_charptr = NULL;
-        return tv->t_type = TOKEN_NUM;
+	if (r[0] == '$' || (r[0] == '0' || (r[1] == 'x' || r[1] == 'X')))
+	    is_hex = true;
+
+	for (;;) {
+	    c = *stdscan_bufptr++;
+
+	    if ((!is_hex && (c == 'e' || c == 'E')) ||
+		(c == 'P' || c == 'p')) {
+		is_float = true;
+		if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-')
+		    stdscan_bufptr++;
+	    } else if (isnumchar(c) || c == '_')
+		; /* just advance */
+	    else if (c == '.')
+		is_float = true;
+	    else
+		break;
+	}
+	stdscan_bufptr--;	/* Point to first character beyond number */
+
+	if (is_float) {
+	    tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
+	    return tv->t_type = TOKEN_FLOAT;
+	} else {
+	    r = stdscan_copy(r, stdscan_bufptr - r);
+	    tv->t_integer = readnum(r, &rn_error);
+	    stdscan_pop();
+	    if (rn_error)
+		return tv->t_type = TOKEN_ERRNUM;   /* some malformation occurred */
+	    tv->t_charptr = NULL;
+	    return tv->t_type = TOKEN_NUM;
+	}
     } else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"') {     /* a char constant */
         char quote = *stdscan_bufptr++, *r;
         bool rn_warn;
author	H. Peter Anvin <hpa@zytor.com>	2007-10-19 13:10:46 -0700
committer	H. Peter Anvin <hpa@zytor.com>	2007-10-19 13:10:46 -0700
commit	2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5 (patch)
tree	1fb0b1e1b0116950ba3836ea9a4fda9edba1f939
parent	a8eace2b79b1068e54c4af93c41b6e58ba879b83 (diff)
download	nasm-2ef4aac272ff0d2ac0bf630f11e6d8d3e19b27d5.tar.gz