qstring: first cut at full quoted string support in the preprocessor

First attempt at properly handle quoted strings in the preprocessor. This also adds range support in %substr. No support in the assembler yet.
author: H. Peter Anvin <hpa@zytor.com> 2008-06-01 17:23:51 -0700
committer: H. Peter Anvin <hpa@zytor.com> 2008-06-01 17:23:51 -0700
commit: 8cad14bbcf0b8c056e6f81dccf4af38537e0bac6 (patch)
tree: 454627762c5d12027c8877c7cdde360673cca0d0
parent: 7f2f8b35e6b8ea67fe620f3363b5eaa06f55a222 (diff)
download: nasm-8cad14bbcf0b8c056e6f81dccf4af38537e0bac6.tar.gz
6 files changed, 545 insertions, 267 deletions
diff --git a/Makefile.in b/Makefile.in
index bd037841..230bd069 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -65,7 +65,7 @@ NASM =	nasm.$(O) nasmlib.$(O) float.$(O) insnsa.$(O) insnsb.$(O) \
 	output/outelf32.$(O) output/outelf64.$(O) \
 	output/outobj.$(O) output/outas86.$(O) output/outrdf2.$(O) \
 	output/outdbg.$(O) output/outieee.$(O) output/outmacho.$(O) \
-	preproc.$(O) pptok.$(O) macros.$(O) \
+	preproc.$(O) quote.$(O) pptok.$(O) macros.$(O) \
 	listing.$(O) eval.$(O) exprlib.$(O) stdscan.$(O) tokhash.$(O) \
 	regvals.$(O) regflags.$(O)
 
diff --git a/nasmlib.c b/nasmlib.c
index 8cd41cf8..d74b8acf 100644
--- a/nasmlib.c
+++ b/nasmlib.c
@@ -930,231 +930,6 @@ int src_get(int32_t *xline, char **xname)
     return 0;
 }
 
-/* XXX: This is broken for strings which contain multiple quotes...
-   NASM doesn't have a sane syntax for dealing with those currently. */
-void nasm_quote(char **str)
-{
-    int ln = strlen(*str);
-    char q = (*str)[0];
-    char *p;
-    if (ln > 1 && (*str)[ln - 1] == q && (q == '"' || q == '\''))
-        return;
-    q = '"';
-    if (strchr(*str, q))
-        q = '\'';
-    p = nasm_malloc(ln + 3);
-    strcpy(p + 1, *str);
-    nasm_free(*str);
-    p[ln + 1] = p[0] = q;
-    p[ln + 2] = 0;
-    *str = p;
-}
-
-static char *emit_utf8(char *q, int32_t v)
-{
-    if (v < 0) {
-	/* Impossible - do nothing */
-    } else if (v <= 0x7f) {
-	*q++ = v;
-    } else if (v <= 0x000007ff) {
-	*q++ = 0xc0 | (v >> 6);
-	*q++ = 0x80 | (v & 63);
-    } else if (v <= 0x0000ffff) {
-	*q++ = 0xe0 | (v >> 12);
-	*q++ = 0x80 | ((v >> 6) & 63);
-	*q++ = 0x80 | (v & 63);
-    } else if (v <= 0x001fffff) {
-	*q++ = 0xf0 | (v >> 18);
-	*q++ = 0x80 | ((v >> 12) & 63);
-	*q++ = 0x80 | ((v >> 6) & 63);
-	*q++ = 0x80 | (v & 63);
-    } else if (v <= 0x03ffffff) {
-	*q++ = 0xf8 | (v >> 24);
-	*q++ = 0x80 | ((v >> 18) & 63);
-	*q++ = 0x80 | ((v >> 12) & 63);
-	*q++ = 0x80 | ((v >> 6) & 63);
-	*q++ = 0x80 | (v & 63);
-    } else {
-	*q++ = 0xfc | (v >> 30);
-	*q++ = 0x80 | ((v >> 24) & 63);
-	*q++ = 0x80 | ((v >> 18) & 63);
-	*q++ = 0x80 | ((v >> 12) & 63);
-	*q++ = 0x80 | ((v >> 6) & 63);
-	*q++ = 0x80 | (v & 63);
-    }
-    return q;
-}
-
-/*
- * Do an *in-place* dequoting of the specified string, returning the
- * resulting length (which may be containing embedded nulls.)
- *
- * In-place replacement is possible since the unquoted length is always
- * shorter than or equal to the quoted length.
- */
-size_t nasm_unquote(char *str)
-{
-    size_t ln;
-    char bq, eq;
-    char *p, *q, *ep, *escp;
-    char c;
-    enum unq_state {
-	st_start,
-	st_backslash,
-	st_hex,
-	st_oct,
-	st_ucs,
-    } state;
-    int ndig = 0;
-    int32_t nval = 0;
-
-    bq = str[0];
-    if (!bq)
-	return 0;
-    ln = strlen(str);
-    eq = str[ln-1];
-
-    if ((bq == '\'' || bq == '\"') && bq == eq) {
-	/* '...' or "..." string */
-	memmove(str, str+1, ln-2);
-	str[ln-2] = '\0';
-	return ln-2;
-    }
-    if (bq == '`' || eq == '`') {
-	/* `...` string */
-	q = str;
-	p = str+1;
-	ep = str+ln-1;
-	state = st_start;
-
-	while (p < ep) {
-	    c = *p++;
-	    switch (state) {
-	    case st_start:
-		if (c == '\\')
-		    state = st_backslash;
-		else
-		    *q++ = c;
-		break;
-
-	    case st_backslash:
-		state = st_start;
-		escp = p-1;
-		switch (c) {
-		case 'a':
-		    *q++ = 7;
-		    break;
-		case 'b':
-		    *q++ = 8;
-		    break;
-		case 'e':
-		    *q++ = 27;
-		    break;
-		case 'f':
-		    *q++ = 12;
-		    break;
-		case 'n':
-		    *q++ = 10;
-		    break;
-		case 'r':
-		    *q++ = 13;
-		    break;
-		case 't':
-		    *q++ = 9;
-		    break;
-		case 'u':
-		    state = st_ucs;
-		    ndig = 4;
-		    nval = 0;
-		    break;
-		case 'U':
-		    state = st_ucs;
-		    ndig = 8;
-		    nval = 0;
-		    break;
-		case 'v':
-		    *q++ = 11;
-		case 'x':
-		case 'X':
-		    state = st_hex;
-		    ndig = nval = 0;
-		    break;
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		case '4':
-		case '5':
-		case '6':
-		case '7':
-		    state = st_oct;
-		    ndig = 1;
-		    nval = c - '0';
-		    break;
-		default:
-		    *q++ = c;
-		    break;
-		}
-		break;
-
-	    case st_oct:
-		if (c >= '0' && c <= '7') {
-		    nval = (nval << 3) + (c - '0');
-		    if (++ndig >= 3) {
-			*q++ = nval;
-			state = st_start;
-		    }
-		} else {
-		    p--;	/* Process this character again */
-		    *q++ = nval;
-		    state = st_start;
-		}
-		break;
-
-	    case st_hex:
-		if ((c >= '0' && c <= '9') ||
-		    (c >= 'A' && c <= 'F') ||
-		    (c >= 'a' && c <= 'f')) {
-		    nval = (nval << 4) + numvalue(c);
-		    if (++ndig >= 2) {
-			*q++ = nval;
-			state = st_start;
-		    }
-		} else {
-		    p--;	/* Process this character again */
-		    *q++ = ndig ? nval : *escp;
-		    state = st_start;
-		}
-		break;
-
-	    case st_ucs:
-		if ((c >= '0' && c <= '9') ||
-		    (c >= 'A' && c <= 'F') ||
-		    (c >= 'a' && c <= 'f')) {
-		    nval = (nval << 4) + numvalue(c);
-		    if (!--ndig) {
-			q = emit_utf8(q, nval);
-			state = st_start;
-		    }
-		} else {
-		    p--;	/* Process this character again */
-		    if (p > escp+1)
-			q = emit_utf8(q, nval);
-		    else
-			*q++ = *escp;
-		    state = st_start;
-		}
-		break;
-	    }
-	}
-	*q = '\0';
-	return q-str;
-    }
-
-    /* Otherwise, just return the input... */
-    return ln;
-}
-
 char *nasm_strcat(char *one, char *two)
 {
     char *rslt;
diff --git a/nasmlib.h b/nasmlib.h
index 2f9f87dd..39137af9 100644
--- a/nasmlib.h
+++ b/nasmlib.h
@@ -395,7 +395,6 @@ int32_t src_get_linnum(void);
  */
 int src_get(int32_t *xline, char **xname);
 
-void nasm_quote(char **str);
 char *nasm_strcat(char *one, char *two);
 
 void null_debug_routine(const char *directive, const char *params);
diff --git a/preproc.c b/preproc.c
index 6f8eb0b5..46f8c6b6 100644
--- a/preproc.c
+++ b/preproc.c
@@ -48,6 +48,7 @@
 #include "nasmlib.h"
 #include "preproc.h"
 #include "hashtbl.h"
+#include "quote.h"
 #include "stdscan.h"
 #include "tokens.h"
 #include "tables.h"
@@ -795,15 +796,12 @@ static Token *tokenize(char *line)
             p++;
             while (*p && isidchar(*p))
                 p++;
-        } else if (*p == '\'' || *p == '"') {
+        } else if (*p == '\'' || *p == '"' || *p == '`') {
             /*
              * A string token.
              */
-            char c = *p;
-            p++;
             type = TOK_STRING;
-            while (*p && *p != c)
-                p++;
+	    p = nasm_skip_string(p);
 
             if (*p) {
                 p++;
@@ -1514,6 +1512,7 @@ static bool if_condition(Token * tline, enum preproc_token ct)
                 break;
             }
             /* Unify surrounding quotes for strings */
+	    /* XXX: this doesn't work anymore */
             if (t->type == TOK_STRING) {
                 tt->text[0] = t->text[0];
                 tt->text[strlen(tt->text) - 1] = t->text[0];
@@ -2079,11 +2078,9 @@ static int do_directive(Token * tline)
         if (tline->next)
             error(ERR_WARNING,
                   "trailing garbage after `%%depend' ignored");
-        if (tline->type != TOK_INTERNAL_STRING) {
-            p = tline->text + 1;        /* point past the quote to the name */
-            p[strlen(p) - 1] = '\0';    /* remove the trailing quote */
-        } else
-            p = tline->text;    /* internal_string is easier */
+	p = tline->text;
+        if (tline->type != TOK_INTERNAL_STRING)
+	    nasm_unquote(p);
 	if (dephead && !in_list(*dephead, p)) {
 	    StrList *sl = nasm_malloc(strlen(p)+1+sizeof sl->next);
 	    sl->next = NULL;
@@ -2107,11 +2104,9 @@ static int do_directive(Token * tline)
         if (tline->next)
             error(ERR_WARNING,
                   "trailing garbage after `%%include' ignored");
-        if (tline->type != TOK_INTERNAL_STRING) {
-            p = tline->text + 1;        /* point past the quote to the name */
-            p[strlen(p) - 1] = '\0';    /* remove the trailing quote */
-        } else
-            p = tline->text;    /* internal_string is easier */
+	p = tline->text;
+        if (tline->type != TOK_INTERNAL_STRING)
+	    nasm_unquote(p);
         inc = nasm_malloc(sizeof(Include));
         inc->next = istk;
         inc->conds = NULL;
@@ -2186,14 +2181,14 @@ static int do_directive(Token * tline)
         tline = tline->next;
         skip_white_(tline);
         if (tok_type_(tline, TOK_STRING)) {
-            p = tline->text + 1;        /* point past the quote to the name */
-            p[strlen(p) - 1] = '\0';    /* remove the trailing quote */
-	    expand_macros_in_string(&p);
+	    p = tline->text;
+	    nasm_unquote(p);
+	    expand_macros_in_string(&p); /* WHY? */
             error(ERR_NONFATAL, "%s", p);
             nasm_free(p);
         } else {
             p = detoken(tline, false);
-            error(ERR_WARNING, "%s", p);
+            error(ERR_WARNING, "%s", p); /* WARNING!??!! */
             nasm_free(p);
         }
         free_tlist(origline);
@@ -2670,11 +2665,9 @@ static int do_directive(Token * tline)
         if (t->next)
             error(ERR_WARNING,
                   "trailing garbage after `%%pathsearch' ignored");
-        if (t->type != TOK_INTERNAL_STRING) {
-            p = t->text + 1;        /* point past the quote to the name */
-            p[strlen(p) - 1] = '\0';    /* remove the trailing quote */
-        } else
-            p = t->text;    /* internal_string is easier */
+	p = tline->text;
+        if (tline->type != TOK_INTERNAL_STRING)
+	    nasm_unquote(p);
 
 	fp = inc_fopen(p, &xsl, &xsl, true);
 	if (fp) {
@@ -2683,8 +2676,7 @@ static int do_directive(Token * tline)
 	}
         macro_start = nasm_malloc(sizeof(*macro_start));
         macro_start->next = NULL;
-	macro_start->text = nasm_strdup(p);
-	nasm_quote(&macro_start->text);
+	macro_start->text = nasm_quote(p, strlen(p));
 	macro_start->type = TOK_STRING;
         macro_start->mac = NULL;
 	if (xsl)
@@ -2736,7 +2728,7 @@ static int do_directive(Token * tline)
 
         macro_start = nasm_malloc(sizeof(*macro_start));
         macro_start->next = NULL;
-        make_tok_num(macro_start, strlen(t->text) - 2);
+        make_tok_num(macro_start, nasm_unquote(t->text));
         macro_start->mac = NULL;
 
         /*
@@ -2750,6 +2742,10 @@ static int do_directive(Token * tline)
         return DIRECTIVE_FOUND;
 
     case PP_SUBSTR:
+    {
+	int64_t a1, a2;
+	size_t len;
+	
 	casesense = true;
 
         tline = tline->next;
@@ -2786,29 +2782,50 @@ static int do_directive(Token * tline)
         tt = t->next;
         tptr = &tt;
         tokval.t_type = TOKEN_INVALID;
-        evalresult =
-            evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL);
+        evalresult = evaluate(ppscan, tptr, &tokval, NULL,
+			      pass, error, NULL);
         if (!evalresult) {
             free_tlist(tline);
             free_tlist(origline);
             return DIRECTIVE_FOUND;
-        }
-        if (!is_simple(evalresult)) {
+        } else if (!is_simple(evalresult)) {
             error(ERR_NONFATAL, "non-constant value given to `%%substr`");
             free_tlist(tline);
             free_tlist(origline);
             return DIRECTIVE_FOUND;
         }
+	a1 = evalresult->value-1;
+
+        while (tok_type_(tt, TOK_WHITESPACE))
+            tt = tt->next;
+	if (!tt) {
+	    a2 = 1;		/* Backwards compatibility: one character */
+	} else {
+	    tokval.t_type = TOKEN_INVALID;
+	    evalresult = evaluate(ppscan, tptr, &tokval, NULL,
+				  pass, error, NULL);
+	    if (!evalresult) {
+		free_tlist(tline);
+		free_tlist(origline);
+		return DIRECTIVE_FOUND;
+	    } else if (!is_simple(evalresult)) {
+		error(ERR_NONFATAL, "non-constant value given to `%%substr`");
+		free_tlist(tline);
+		free_tlist(origline);
+		return DIRECTIVE_FOUND;
+	    }
+	    a2 = evalresult->value;
+	}
+
+	len = nasm_unquote(t->text);
+	if (a2 < 0)
+	    a2 = a2+1+len-a1;
+	if (a1+a2 > (int64_t)len)
+	    a2 = len-a1;
 
         macro_start = nasm_malloc(sizeof(*macro_start));
         macro_start->next = NULL;
-        macro_start->text = nasm_strdup("'''");
-        if (evalresult->value > 0
-            && evalresult->value < (int) strlen(t->text) - 1) {
-            macro_start->text[1] = t->text[evalresult->value];
-        } else {
-            macro_start->text[2] = '\0';
-        }
+        macro_start->text = nasm_quote((a1 < 0) ? "" : t->text+a1, a2);
         macro_start->type = TOK_STRING;
         macro_start->mac = NULL;
 
@@ -2821,6 +2838,7 @@ static int do_directive(Token * tline)
         free_tlist(tline);
         free_tlist(origline);
         return DIRECTIVE_FOUND;
+    }
 
     case PP_ASSIGN:
     case PP_IASSIGN:
@@ -3209,9 +3227,11 @@ again:
 		    if (!m->expansion) {
 			if (!strcmp("__FILE__", m->name)) {
 			    int32_t num = 0;
-			    src_get(&num, &(tline->text));
-			    nasm_quote(&(tline->text));
+			    char *file;
+			    src_get(&num, &file);
+			    tline->text = nasm_quote(file, strlen(file));
 			    tline->type = TOK_STRING;
+			    nasm_free(file);
 			    continue;
 			}
 			if (!strcmp("__LINE__", m->name)) {
diff --git a/quote.c b/quote.c
new file mode 100644
index 00000000..0d8ee2be
--- /dev/null
+++ b/quote.c
@@ -0,0 +1,473 @@
+/* quote.c	library routines for the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the license given in the file "LICENSE"
+ * distributed in the NASM archive.
+ */
+
+#include "compiler.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "nasmlib.h"
+#include "quote.h"
+
+#define numvalue(c)  ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
+
+char *nasm_quote(char *str, size_t len)
+{
+    char c, c1, *p, *q, *nstr, *ep;
+    bool sq_ok, dq_ok;
+    size_t qlen;
+
+    sq_ok = dq_ok = true;
+    ep = str+len;
+    qlen = 0;			/* Length if we need `...` quotes */
+    for (p = str; p < ep; p++) {
+	c = *p;
+	switch (c) {
+	case '\'':
+	    sq_ok = false;
+	    qlen++;
+	    break;
+	case '\"':
+	    dq_ok = false;
+	    qlen++;
+	    break;
+	case '`':
+	case '\\':
+	    qlen += 2;
+	    break;
+	default:
+	    if (c < ' ' || c > '~') {
+		sq_ok = dq_ok = false;
+		switch (c) {
+		case '\a':
+		case '\b':
+		case '\t':
+		case '\n':
+		case '\v':
+		case '\f':
+		case '\r':
+		case 27:
+		    qlen += 2;
+		    break;
+		default:
+		    c1 = (p+1 < ep) ? p[1] : 0;
+		    if (c > 077 || (c1 >= '0' && c1 <= '7'))
+			qlen += 4; /* Must use the full form */
+		    else if (c > 07)
+			qlen += 3;
+		    else
+			qlen += 2;
+		    break;
+		}
+	    } else {
+		qlen++;
+	    }
+	    break;
+	}
+    }
+
+    if (sq_ok || dq_ok) {
+	/* Use '...' or "..." */
+	nstr = nasm_malloc(len+3);
+	nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
+	nstr[len+2] = '\0';
+	memcpy(nstr+1, str, len);
+    } else {
+	/* Need to use `...` quoted syntax */
+	nstr = nasm_malloc(qlen+3);
+	q = nstr;
+	*q++ = '`';
+	for (p = str; p < ep; p++) {
+	    c = *p;
+	    switch (c) {
+	    case '`':
+	    case '\\':
+		*q++ = '\\';
+		*q++ = c;
+		break;
+	    case '\a':
+		*q++ = '\\';
+		*q++ = 'a';
+		break;
+	    case '\b':
+		*q++ = '\\';
+		*q++ = 'b';
+		break;
+	    case '\t':
+		*q++ = '\\';
+		*q++ = 't';
+		break;
+	    case '\n':
+		*q++ = '\\';
+		*q++ = 'n';
+		break;
+	    case '\v':
+		*q++ = '\\';
+		*q++ = 'v';
+		break;
+	    case '\f':
+		*q++ = '\\';
+		*q++ = 'f';
+		break;
+	    case '\r':
+		*q++ = '\\';
+		*q++ = 'r';
+		break;
+	    case 27:
+		*q++ = '\\';
+		*q++ = 'e';
+		break;
+	    default:
+		if (c < ' ' || c > '~') {
+		    c1 = (p+1 < ep) ? p[1] : 0;
+		    if (c1 >= '0' && c1 <= '7')
+			q += sprintf(q, "\\%03o", (unsigned char)c);
+		    else
+			q += sprintf(q, "\\%o", (unsigned char)c);
+		} else {
+		    *q++ = c;
+		}
+		break;
+	    }
+	}
+	*q++ = '`';
+	*q++ = '\0';
+	assert((size_t)(q-nstr) == qlen+3);
+    }
+    return nstr;
+}
+
+static char *emit_utf8(char *q, int32_t v)
+{
+    if (v < 0) {
+	/* Impossible - do nothing */
+    } else if (v <= 0x7f) {
+	*q++ = v;
+    } else if (v <= 0x000007ff) {
+	*q++ = 0xc0 | (v >> 6);
+	*q++ = 0x80 | (v & 63);
+    } else if (v <= 0x0000ffff) {
+	*q++ = 0xe0 | (v >> 12);
+	*q++ = 0x80 | ((v >> 6) & 63);
+	*q++ = 0x80 | (v & 63);
+    } else if (v <= 0x001fffff) {
+	*q++ = 0xf0 | (v >> 18);
+	*q++ = 0x80 | ((v >> 12) & 63);
+	*q++ = 0x80 | ((v >> 6) & 63);
+	*q++ = 0x80 | (v & 63);
+    } else if (v <= 0x03ffffff) {
+	*q++ = 0xf8 | (v >> 24);
+	*q++ = 0x80 | ((v >> 18) & 63);
+	*q++ = 0x80 | ((v >> 12) & 63);
+	*q++ = 0x80 | ((v >> 6) & 63);
+	*q++ = 0x80 | (v & 63);
+    } else {
+	*q++ = 0xfc | (v >> 30);
+	*q++ = 0x80 | ((v >> 24) & 63);
+	*q++ = 0x80 | ((v >> 18) & 63);
+	*q++ = 0x80 | ((v >> 12) & 63);
+	*q++ = 0x80 | ((v >> 6) & 63);
+	*q++ = 0x80 | (v & 63);
+    }
+    return q;
+}
+
+/*
+ * Do an *in-place* dequoting of the specified string, returning the
+ * resulting length (which may be containing embedded nulls.)
+ *
+ * In-place replacement is possible since the unquoted length is always
+ * shorter than or equal to the quoted length.
+ */
+size_t nasm_unquote(char *str)
+{
+    size_t ln;
+    char bq, eq;
+    char *p, *q, *ep;
+    char *escp = NULL;
+    char c;
+    enum unq_state {
+	st_start,
+	st_backslash,
+	st_hex,
+	st_oct,
+	st_ucs,
+    } state;
+    int ndig = 0;
+    int32_t nval = 0;
+
+    bq = str[0];
+    if (!bq)
+	return 0;
+    ln = strlen(str);
+    eq = str[ln-1];
+
+    if ((bq == '\'' || bq == '\"') && bq == eq) {
+	/* '...' or "..." string */
+	memmove(str, str+1, ln-2);
+	str[ln-2] = '\0';
+	return ln-2;
+    }
+    if (bq == '`' || eq == '`') {
+	/* `...` string */
+	q = str;
+	p = str+1;
+	ep = str+ln-1;
+	state = st_start;
+
+	while (p < ep) {
+	    c = *p++;
+	    switch (state) {
+	    case st_start:
+		if (c == '\\')
+		    state = st_backslash;
+		else
+		    *q++ = c;
+		break;
+
+	    case st_backslash:
+		state = st_start;
+		escp = p-1;
+		switch (c) {
+		case 'a':
+		    *q++ = 7;
+		    break;
+		case 'b':
+		    *q++ = 8;
+		    break;
+		case 'e':
+		    *q++ = 27;
+		    break;
+		case 'f':
+		    *q++ = 12;
+		    break;
+		case 'n':
+		    *q++ = 10;
+		    break;
+		case 'r':
+		    *q++ = 13;
+		    break;
+		case 't':
+		    *q++ = 9;
+		    break;
+		case 'u':
+		    state = st_ucs;
+		    ndig = 4;
+		    nval = 0;
+		    break;
+		case 'U':
+		    state = st_ucs;
+		    ndig = 8;
+		    nval = 0;
+		    break;
+		case 'v':
+		    *q++ = 11;
+		case 'x':
+		case 'X':
+		    state = st_hex;
+		    ndig = nval = 0;
+		    break;
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		    state = st_oct;
+		    ndig = 1;
+		    nval = c - '0';
+		    break;
+		default:
+		    *q++ = c;
+		    break;
+		}
+		break;
+
+	    case st_oct:
+		if (c >= '0' && c <= '7') {
+		    nval = (nval << 3) + (c - '0');
+		    if (++ndig >= 3) {
+			*q++ = nval;
+			state = st_start;
+		    }
+		} else {
+		    p--;	/* Process this character again */
+		    *q++ = nval;
+		    state = st_start;
+		}
+		break;
+
+	    case st_hex:
+		if ((c >= '0' && c <= '9') ||
+		    (c >= 'A' && c <= 'F') ||
+		    (c >= 'a' && c <= 'f')) {
+		    nval = (nval << 4) + numvalue(c);
+		    if (++ndig >= 2) {
+			*q++ = nval;
+			state = st_start;
+		    }
+		} else {
+		    p--;	/* Process this character again */
+		    *q++ = ndig ? nval : *escp;
+		    state = st_start;
+		}
+		break;
+
+	    case st_ucs:
+		if ((c >= '0' && c <= '9') ||
+		    (c >= 'A' && c <= 'F') ||
+		    (c >= 'a' && c <= 'f')) {
+		    nval = (nval << 4) + numvalue(c);
+		    if (!--ndig) {
+			q = emit_utf8(q, nval);
+			state = st_start;
+		    }
+		} else {
+		    p--;	/* Process this character again */
+		    if (p > escp+1)
+			q = emit_utf8(q, nval);
+		    else
+			*q++ = *escp;
+		    state = st_start;
+		}
+		break;
+	    }
+	}
+	*q = '\0';
+	return q-str;
+    }
+
+    /* Otherwise, just return the input... */
+    return ln;
+}
+
+/*
+ * Find the end of a quoted string; returns the pointer to the terminating
+ * character (either the ending quote or the null character, if unterminated.)
+ */
+char *nasm_skip_string(char *str)
+{
+    char bq;
+    char *p;
+    char c;
+    enum unq_state {
+	st_start,
+	st_backslash,
+	st_hex,
+	st_oct,
+	st_ucs,
+    } state;
+    int ndig = 0;
+
+    bq = str[0];
+    if (bq == '\'' || bq == '\"') {
+	/* '...' or "..." string */
+	for (p = str+1; *p && *p != bq; p++)
+	    ;
+	return p;
+    } else if (bq == '`') {
+	/* `...` string */
+	p = str+1;
+	state = st_start;
+
+	while ((c = *p++)) {
+	    switch (state) {
+	    case st_start:
+		switch (c) {
+		case '\\':
+		    state = st_backslash;
+		    break;
+		case '`':
+		    return p-1;	/* Found the end */
+		default:
+		    break;
+		}
+		break;
+
+	    case st_backslash:
+		switch (c) {
+		case 'a':
+		case 'b':
+		case 'e':
+		case 'f':
+		case 'n':
+		case 'r':
+		case 't':
+		case 'v':
+		default:
+		    state = st_start;
+		    break;
+		case 'u':
+		    state = st_ucs;
+		    ndig = 4;
+		    break;
+		case 'U':
+		    state = st_ucs;
+		    ndig = 8;
+		    break;
+		case 'x':
+		case 'X':
+		    state = st_hex;
+		    ndig = 0;
+		    break;
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		    state = st_oct;
+		    ndig = 1;
+		    break;
+		}
+		break;
+
+	    case st_oct:
+		if (c >= '0' && c <= '7') {
+		    if (++ndig >= 3)
+			state = st_start;
+		} else {
+		    p--;	/* Process this character again */
+		    state = st_start;
+		}
+		break;
+
+	    case st_hex:
+		if ((c >= '0' && c <= '9') ||
+		    (c >= 'A' && c <= 'F') ||
+		    (c >= 'a' && c <= 'f')) {
+		    if (++ndig >= 2)
+			state = st_start;
+		} else {
+		    p--;	/* Process this character again */
+		    state = st_start;
+		}
+		break;
+
+	    case st_ucs:
+		if ((c >= '0' && c <= '9') ||
+		    (c >= 'A' && c <= 'F') ||
+		    (c >= 'a' && c <= 'f')) {
+		    if (!--ndig)
+			state = st_start;
+		} else {
+		    p--;	/* Process this character again */
+		    state = st_start;
+		}
+		break;
+	    }
+	}
+	return p;		/* Unterminated string... */
+    } else {
+	return str;		/* Not a string... */
+    }
+}
diff --git a/quote.h b/quote.h
new file mode 100644
index 00000000..501f7350
--- /dev/null
+++ b/quote.h
@@ -0,0 +1,11 @@
+#ifndef NASM_QUOTE_H
+#define NASM_QUOTE_H
+
+#include "compiler.h"
+
+char *nasm_quote(char *str, size_t len);
+size_t nasm_unquote(char *str);
+char *nasm_skip_string(char *str);
+
+#endif /* NASM_QUOTE_H */
+
author	H. Peter Anvin <hpa@zytor.com>	2008-06-01 17:23:51 -0700
committer	H. Peter Anvin <hpa@zytor.com>	2008-06-01 17:23:51 -0700
commit	8cad14bbcf0b8c056e6f81dccf4af38537e0bac6 (patch)
tree	454627762c5d12027c8877c7cdde360673cca0d0
parent	7f2f8b35e6b8ea67fe620f3363b5eaa06f55a222 (diff)
download	nasm-8cad14bbcf0b8c056e6f81dccf4af38537e0bac6.tar.gz