summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2008-06-01 17:23:51 -0700
committerH. Peter Anvin <hpa@zytor.com>2008-06-01 17:23:51 -0700
commit8cad14bbcf0b8c056e6f81dccf4af38537e0bac6 (patch)
tree454627762c5d12027c8877c7cdde360673cca0d0
parent7f2f8b35e6b8ea67fe620f3363b5eaa06f55a222 (diff)
downloadnasm-8cad14bbcf0b8c056e6f81dccf4af38537e0bac6.tar.gz
qstring: first cut at full quoted string support in the preprocessor
First attempt at properly handle quoted strings in the preprocessor. This also adds range support in %substr. No support in the assembler yet.
-rw-r--r--Makefile.in2
-rw-r--r--nasmlib.c225
-rw-r--r--nasmlib.h1
-rw-r--r--preproc.c100
-rw-r--r--quote.c473
-rw-r--r--quote.h11
6 files changed, 545 insertions, 267 deletions
diff --git a/Makefile.in b/Makefile.in
index bd037841..230bd069 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -65,7 +65,7 @@ NASM = nasm.$(O) nasmlib.$(O) float.$(O) insnsa.$(O) insnsb.$(O) \
output/outelf32.$(O) output/outelf64.$(O) \
output/outobj.$(O) output/outas86.$(O) output/outrdf2.$(O) \
output/outdbg.$(O) output/outieee.$(O) output/outmacho.$(O) \
- preproc.$(O) pptok.$(O) macros.$(O) \
+ preproc.$(O) quote.$(O) pptok.$(O) macros.$(O) \
listing.$(O) eval.$(O) exprlib.$(O) stdscan.$(O) tokhash.$(O) \
regvals.$(O) regflags.$(O)
diff --git a/nasmlib.c b/nasmlib.c
index 8cd41cf8..d74b8acf 100644
--- a/nasmlib.c
+++ b/nasmlib.c
@@ -930,231 +930,6 @@ int src_get(int32_t *xline, char **xname)
return 0;
}
-/* XXX: This is broken for strings which contain multiple quotes...
- NASM doesn't have a sane syntax for dealing with those currently. */
-void nasm_quote(char **str)
-{
- int ln = strlen(*str);
- char q = (*str)[0];
- char *p;
- if (ln > 1 && (*str)[ln - 1] == q && (q == '"' || q == '\''))
- return;
- q = '"';
- if (strchr(*str, q))
- q = '\'';
- p = nasm_malloc(ln + 3);
- strcpy(p + 1, *str);
- nasm_free(*str);
- p[ln + 1] = p[0] = q;
- p[ln + 2] = 0;
- *str = p;
-}
-
-static char *emit_utf8(char *q, int32_t v)
-{
- if (v < 0) {
- /* Impossible - do nothing */
- } else if (v <= 0x7f) {
- *q++ = v;
- } else if (v <= 0x000007ff) {
- *q++ = 0xc0 | (v >> 6);
- *q++ = 0x80 | (v & 63);
- } else if (v <= 0x0000ffff) {
- *q++ = 0xe0 | (v >> 12);
- *q++ = 0x80 | ((v >> 6) & 63);
- *q++ = 0x80 | (v & 63);
- } else if (v <= 0x001fffff) {
- *q++ = 0xf0 | (v >> 18);
- *q++ = 0x80 | ((v >> 12) & 63);
- *q++ = 0x80 | ((v >> 6) & 63);
- *q++ = 0x80 | (v & 63);
- } else if (v <= 0x03ffffff) {
- *q++ = 0xf8 | (v >> 24);
- *q++ = 0x80 | ((v >> 18) & 63);
- *q++ = 0x80 | ((v >> 12) & 63);
- *q++ = 0x80 | ((v >> 6) & 63);
- *q++ = 0x80 | (v & 63);
- } else {
- *q++ = 0xfc | (v >> 30);
- *q++ = 0x80 | ((v >> 24) & 63);
- *q++ = 0x80 | ((v >> 18) & 63);
- *q++ = 0x80 | ((v >> 12) & 63);
- *q++ = 0x80 | ((v >> 6) & 63);
- *q++ = 0x80 | (v & 63);
- }
- return q;
-}
-
-/*
- * Do an *in-place* dequoting of the specified string, returning the
- * resulting length (which may be containing embedded nulls.)
- *
- * In-place replacement is possible since the unquoted length is always
- * shorter than or equal to the quoted length.
- */
-size_t nasm_unquote(char *str)
-{
- size_t ln;
- char bq, eq;
- char *p, *q, *ep, *escp;
- char c;
- enum unq_state {
- st_start,
- st_backslash,
- st_hex,
- st_oct,
- st_ucs,
- } state;
- int ndig = 0;
- int32_t nval = 0;
-
- bq = str[0];
- if (!bq)
- return 0;
- ln = strlen(str);
- eq = str[ln-1];
-
- if ((bq == '\'' || bq == '\"') && bq == eq) {
- /* '...' or "..." string */
- memmove(str, str+1, ln-2);
- str[ln-2] = '\0';
- return ln-2;
- }
- if (bq == '`' || eq == '`') {
- /* `...` string */
- q = str;
- p = str+1;
- ep = str+ln-1;
- state = st_start;
-
- while (p < ep) {
- c = *p++;
- switch (state) {
- case st_start:
- if (c == '\\')
- state = st_backslash;
- else
- *q++ = c;
- break;
-
- case st_backslash:
- state = st_start;
- escp = p-1;
- switch (c) {
- case 'a':
- *q++ = 7;
- break;
- case 'b':
- *q++ = 8;
- break;
- case 'e':
- *q++ = 27;
- break;
- case 'f':
- *q++ = 12;
- break;
- case 'n':
- *q++ = 10;
- break;
- case 'r':
- *q++ = 13;
- break;
- case 't':
- *q++ = 9;
- break;
- case 'u':
- state = st_ucs;
- ndig = 4;
- nval = 0;
- break;
- case 'U':
- state = st_ucs;
- ndig = 8;
- nval = 0;
- break;
- case 'v':
- *q++ = 11;
- case 'x':
- case 'X':
- state = st_hex;
- ndig = nval = 0;
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- state = st_oct;
- ndig = 1;
- nval = c - '0';
- break;
- default:
- *q++ = c;
- break;
- }
- break;
-
- case st_oct:
- if (c >= '0' && c <= '7') {
- nval = (nval << 3) + (c - '0');
- if (++ndig >= 3) {
- *q++ = nval;
- state = st_start;
- }
- } else {
- p--; /* Process this character again */
- *q++ = nval;
- state = st_start;
- }
- break;
-
- case st_hex:
- if ((c >= '0' && c <= '9') ||
- (c >= 'A' && c <= 'F') ||
- (c >= 'a' && c <= 'f')) {
- nval = (nval << 4) + numvalue(c);
- if (++ndig >= 2) {
- *q++ = nval;
- state = st_start;
- }
- } else {
- p--; /* Process this character again */
- *q++ = ndig ? nval : *escp;
- state = st_start;
- }
- break;
-
- case st_ucs:
- if ((c >= '0' && c <= '9') ||
- (c >= 'A' && c <= 'F') ||
- (c >= 'a' && c <= 'f')) {
- nval = (nval << 4) + numvalue(c);
- if (!--ndig) {
- q = emit_utf8(q, nval);
- state = st_start;
- }
- } else {
- p--; /* Process this character again */
- if (p > escp+1)
- q = emit_utf8(q, nval);
- else
- *q++ = *escp;
- state = st_start;
- }
- break;
- }
- }
- *q = '\0';
- return q-str;
- }
-
- /* Otherwise, just return the input... */
- return ln;
-}
-
char *nasm_strcat(char *one, char *two)
{
char *rslt;
diff --git a/nasmlib.h b/nasmlib.h
index 2f9f87dd..39137af9 100644
--- a/nasmlib.h
+++ b/nasmlib.h
@@ -395,7 +395,6 @@ int32_t src_get_linnum(void);
*/
int src_get(int32_t *xline, char **xname);
-void nasm_quote(char **str);
char *nasm_strcat(char *one, char *two);
void null_debug_routine(const char *directive, const char *params);
diff --git a/preproc.c b/preproc.c
index 6f8eb0b5..46f8c6b6 100644
--- a/preproc.c
+++ b/preproc.c
@@ -48,6 +48,7 @@
#include "nasmlib.h"
#include "preproc.h"
#include "hashtbl.h"
+#include "quote.h"
#include "stdscan.h"
#include "tokens.h"
#include "tables.h"
@@ -795,15 +796,12 @@ static Token *tokenize(char *line)
p++;
while (*p && isidchar(*p))
p++;
- } else if (*p == '\'' || *p == '"') {
+ } else if (*p == '\'' || *p == '"' || *p == '`') {
/*
* A string token.
*/
- char c = *p;
- p++;
type = TOK_STRING;
- while (*p && *p != c)
- p++;
+ p = nasm_skip_string(p);
if (*p) {
p++;
@@ -1514,6 +1512,7 @@ static bool if_condition(Token * tline, enum preproc_token ct)
break;
}
/* Unify surrounding quotes for strings */
+ /* XXX: this doesn't work anymore */
if (t->type == TOK_STRING) {
tt->text[0] = t->text[0];
tt->text[strlen(tt->text) - 1] = t->text[0];
@@ -2079,11 +2078,9 @@ static int do_directive(Token * tline)
if (tline->next)
error(ERR_WARNING,
"trailing garbage after `%%depend' ignored");
- if (tline->type != TOK_INTERNAL_STRING) {
- p = tline->text + 1; /* point past the quote to the name */
- p[strlen(p) - 1] = '\0'; /* remove the trailing quote */
- } else
- p = tline->text; /* internal_string is easier */
+ p = tline->text;
+ if (tline->type != TOK_INTERNAL_STRING)
+ nasm_unquote(p);
if (dephead && !in_list(*dephead, p)) {
StrList *sl = nasm_malloc(strlen(p)+1+sizeof sl->next);
sl->next = NULL;
@@ -2107,11 +2104,9 @@ static int do_directive(Token * tline)
if (tline->next)
error(ERR_WARNING,
"trailing garbage after `%%include' ignored");
- if (tline->type != TOK_INTERNAL_STRING) {
- p = tline->text + 1; /* point past the quote to the name */
- p[strlen(p) - 1] = '\0'; /* remove the trailing quote */
- } else
- p = tline->text; /* internal_string is easier */
+ p = tline->text;
+ if (tline->type != TOK_INTERNAL_STRING)
+ nasm_unquote(p);
inc = nasm_malloc(sizeof(Include));
inc->next = istk;
inc->conds = NULL;
@@ -2186,14 +2181,14 @@ static int do_directive(Token * tline)
tline = tline->next;
skip_white_(tline);
if (tok_type_(tline, TOK_STRING)) {
- p = tline->text + 1; /* point past the quote to the name */
- p[strlen(p) - 1] = '\0'; /* remove the trailing quote */
- expand_macros_in_string(&p);
+ p = tline->text;
+ nasm_unquote(p);
+ expand_macros_in_string(&p); /* WHY? */
error(ERR_NONFATAL, "%s", p);
nasm_free(p);
} else {
p = detoken(tline, false);
- error(ERR_WARNING, "%s", p);
+ error(ERR_WARNING, "%s", p); /* WARNING!??!! */
nasm_free(p);
}
free_tlist(origline);
@@ -2670,11 +2665,9 @@ static int do_directive(Token * tline)
if (t->next)
error(ERR_WARNING,
"trailing garbage after `%%pathsearch' ignored");
- if (t->type != TOK_INTERNAL_STRING) {
- p = t->text + 1; /* point past the quote to the name */
- p[strlen(p) - 1] = '\0'; /* remove the trailing quote */
- } else
- p = t->text; /* internal_string is easier */
+ p = tline->text;
+ if (tline->type != TOK_INTERNAL_STRING)
+ nasm_unquote(p);
fp = inc_fopen(p, &xsl, &xsl, true);
if (fp) {
@@ -2683,8 +2676,7 @@ static int do_directive(Token * tline)
}
macro_start = nasm_malloc(sizeof(*macro_start));
macro_start->next = NULL;
- macro_start->text = nasm_strdup(p);
- nasm_quote(&macro_start->text);
+ macro_start->text = nasm_quote(p, strlen(p));
macro_start->type = TOK_STRING;
macro_start->mac = NULL;
if (xsl)
@@ -2736,7 +2728,7 @@ static int do_directive(Token * tline)
macro_start = nasm_malloc(sizeof(*macro_start));
macro_start->next = NULL;
- make_tok_num(macro_start, strlen(t->text) - 2);
+ make_tok_num(macro_start, nasm_unquote(t->text));
macro_start->mac = NULL;
/*
@@ -2750,6 +2742,10 @@ static int do_directive(Token * tline)
return DIRECTIVE_FOUND;
case PP_SUBSTR:
+ {
+ int64_t a1, a2;
+ size_t len;
+
casesense = true;
tline = tline->next;
@@ -2786,29 +2782,50 @@ static int do_directive(Token * tline)
tt = t->next;
tptr = &tt;
tokval.t_type = TOKEN_INVALID;
- evalresult =
- evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL);
+ evalresult = evaluate(ppscan, tptr, &tokval, NULL,
+ pass, error, NULL);
if (!evalresult) {
free_tlist(tline);
free_tlist(origline);
return DIRECTIVE_FOUND;
- }
- if (!is_simple(evalresult)) {
+ } else if (!is_simple(evalresult)) {
error(ERR_NONFATAL, "non-constant value given to `%%substr`");
free_tlist(tline);
free_tlist(origline);
return DIRECTIVE_FOUND;
}
+ a1 = evalresult->value-1;
+
+ while (tok_type_(tt, TOK_WHITESPACE))
+ tt = tt->next;
+ if (!tt) {
+ a2 = 1; /* Backwards compatibility: one character */
+ } else {
+ tokval.t_type = TOKEN_INVALID;
+ evalresult = evaluate(ppscan, tptr, &tokval, NULL,
+ pass, error, NULL);
+ if (!evalresult) {
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ } else if (!is_simple(evalresult)) {
+ error(ERR_NONFATAL, "non-constant value given to `%%substr`");
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ a2 = evalresult->value;
+ }
+
+ len = nasm_unquote(t->text);
+ if (a2 < 0)
+ a2 = a2+1+len-a1;
+ if (a1+a2 > (int64_t)len)
+ a2 = len-a1;
macro_start = nasm_malloc(sizeof(*macro_start));
macro_start->next = NULL;
- macro_start->text = nasm_strdup("'''");
- if (evalresult->value > 0
- && evalresult->value < (int) strlen(t->text) - 1) {
- macro_start->text[1] = t->text[evalresult->value];
- } else {
- macro_start->text[2] = '\0';
- }
+ macro_start->text = nasm_quote((a1 < 0) ? "" : t->text+a1, a2);
macro_start->type = TOK_STRING;
macro_start->mac = NULL;
@@ -2821,6 +2838,7 @@ static int do_directive(Token * tline)
free_tlist(tline);
free_tlist(origline);
return DIRECTIVE_FOUND;
+ }
case PP_ASSIGN:
case PP_IASSIGN:
@@ -3209,9 +3227,11 @@ again:
if (!m->expansion) {
if (!strcmp("__FILE__", m->name)) {
int32_t num = 0;
- src_get(&num, &(tline->text));
- nasm_quote(&(tline->text));
+ char *file;
+ src_get(&num, &file);
+ tline->text = nasm_quote(file, strlen(file));
tline->type = TOK_STRING;
+ nasm_free(file);
continue;
}
if (!strcmp("__LINE__", m->name)) {
diff --git a/quote.c b/quote.c
new file mode 100644
index 00000000..0d8ee2be
--- /dev/null
+++ b/quote.c
@@ -0,0 +1,473 @@
+/* quote.c library routines for the Netwide Assembler
+ *
+ * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
+ * Julian Hall. All rights reserved. The software is
+ * redistributable under the license given in the file "LICENSE"
+ * distributed in the NASM archive.
+ */
+
+#include "compiler.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "nasmlib.h"
+#include "quote.h"
+
+#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
+
+char *nasm_quote(char *str, size_t len)
+{
+ char c, c1, *p, *q, *nstr, *ep;
+ bool sq_ok, dq_ok;
+ size_t qlen;
+
+ sq_ok = dq_ok = true;
+ ep = str+len;
+ qlen = 0; /* Length if we need `...` quotes */
+ for (p = str; p < ep; p++) {
+ c = *p;
+ switch (c) {
+ case '\'':
+ sq_ok = false;
+ qlen++;
+ break;
+ case '\"':
+ dq_ok = false;
+ qlen++;
+ break;
+ case '`':
+ case '\\':
+ qlen += 2;
+ break;
+ default:
+ if (c < ' ' || c > '~') {
+ sq_ok = dq_ok = false;
+ switch (c) {
+ case '\a':
+ case '\b':
+ case '\t':
+ case '\n':
+ case '\v':
+ case '\f':
+ case '\r':
+ case 27:
+ qlen += 2;
+ break;
+ default:
+ c1 = (p+1 < ep) ? p[1] : 0;
+ if (c > 077 || (c1 >= '0' && c1 <= '7'))
+ qlen += 4; /* Must use the full form */
+ else if (c > 07)
+ qlen += 3;
+ else
+ qlen += 2;
+ break;
+ }
+ } else {
+ qlen++;
+ }
+ break;
+ }
+ }
+
+ if (sq_ok || dq_ok) {
+ /* Use '...' or "..." */
+ nstr = nasm_malloc(len+3);
+ nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
+ nstr[len+2] = '\0';
+ memcpy(nstr+1, str, len);
+ } else {
+ /* Need to use `...` quoted syntax */
+ nstr = nasm_malloc(qlen+3);
+ q = nstr;
+ *q++ = '`';
+ for (p = str; p < ep; p++) {
+ c = *p;
+ switch (c) {
+ case '`':
+ case '\\':
+ *q++ = '\\';
+ *q++ = c;
+ break;
+ case '\a':
+ *q++ = '\\';
+ *q++ = 'a';
+ break;
+ case '\b':
+ *q++ = '\\';
+ *q++ = 'b';
+ break;
+ case '\t':
+ *q++ = '\\';
+ *q++ = 't';
+ break;
+ case '\n':
+ *q++ = '\\';
+ *q++ = 'n';
+ break;
+ case '\v':
+ *q++ = '\\';
+ *q++ = 'v';
+ break;
+ case '\f':
+ *q++ = '\\';
+ *q++ = 'f';
+ break;
+ case '\r':
+ *q++ = '\\';
+ *q++ = 'r';
+ break;
+ case 27:
+ *q++ = '\\';
+ *q++ = 'e';
+ break;
+ default:
+ if (c < ' ' || c > '~') {
+ c1 = (p+1 < ep) ? p[1] : 0;
+ if (c1 >= '0' && c1 <= '7')
+ q += sprintf(q, "\\%03o", (unsigned char)c);
+ else
+ q += sprintf(q, "\\%o", (unsigned char)c);
+ } else {
+ *q++ = c;
+ }
+ break;
+ }
+ }
+ *q++ = '`';
+ *q++ = '\0';
+ assert((size_t)(q-nstr) == qlen+3);
+ }
+ return nstr;
+}
+
+static char *emit_utf8(char *q, int32_t v)
+{
+ if (v < 0) {
+ /* Impossible - do nothing */
+ } else if (v <= 0x7f) {
+ *q++ = v;
+ } else if (v <= 0x000007ff) {
+ *q++ = 0xc0 | (v >> 6);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x0000ffff) {
+ *q++ = 0xe0 | (v >> 12);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x001fffff) {
+ *q++ = 0xf0 | (v >> 18);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x03ffffff) {
+ *q++ = 0xf8 | (v >> 24);
+ *q++ = 0x80 | ((v >> 18) & 63);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else {
+ *q++ = 0xfc | (v >> 30);
+ *q++ = 0x80 | ((v >> 24) & 63);
+ *q++ = 0x80 | ((v >> 18) & 63);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ }
+ return q;
+}
+
+/*
+ * Do an *in-place* dequoting of the specified string, returning the
+ * resulting length (which may be containing embedded nulls.)
+ *
+ * In-place replacement is possible since the unquoted length is always
+ * shorter than or equal to the quoted length.
+ */
+size_t nasm_unquote(char *str)
+{
+ size_t ln;
+ char bq, eq;
+ char *p, *q, *ep;
+ char *escp = NULL;
+ char c;
+ enum unq_state {
+ st_start,
+ st_backslash,
+ st_hex,
+ st_oct,
+ st_ucs,
+ } state;
+ int ndig = 0;
+ int32_t nval = 0;
+
+ bq = str[0];
+ if (!bq)
+ return 0;
+ ln = strlen(str);
+ eq = str[ln-1];
+
+ if ((bq == '\'' || bq == '\"') && bq == eq) {
+ /* '...' or "..." string */
+ memmove(str, str+1, ln-2);
+ str[ln-2] = '\0';
+ return ln-2;
+ }
+ if (bq == '`' || eq == '`') {
+ /* `...` string */
+ q = str;
+ p = str+1;
+ ep = str+ln-1;
+ state = st_start;
+
+ while (p < ep) {
+ c = *p++;
+ switch (state) {
+ case st_start:
+ if (c == '\\')
+ state = st_backslash;
+ else
+ *q++ = c;
+ break;
+
+ case st_backslash:
+ state = st_start;
+ escp = p-1;
+ switch (c) {
+ case 'a':
+ *q++ = 7;
+ break;
+ case 'b':
+ *q++ = 8;
+ break;
+ case 'e':
+ *q++ = 27;
+ break;
+ case 'f':
+ *q++ = 12;
+ break;
+ case 'n':
+ *q++ = 10;
+ break;
+ case 'r':
+ *q++ = 13;
+ break;
+ case 't':
+ *q++ = 9;
+ break;
+ case 'u':
+ state = st_ucs;
+ ndig = 4;
+ nval = 0;
+ break;
+ case 'U':
+ state = st_ucs;
+ ndig = 8;
+ nval = 0;
+ break;
+ case 'v':
+ *q++ = 11;
+ case 'x':
+ case 'X':
+ state = st_hex;
+ ndig = nval = 0;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ state = st_oct;
+ ndig = 1;
+ nval = c - '0';
+ break;
+ default:
+ *q++ = c;
+ break;
+ }
+ break;
+
+ case st_oct:
+ if (c >= '0' && c <= '7') {
+ nval = (nval << 3) + (c - '0');
+ if (++ndig >= 3) {
+ *q++ = nval;
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ *q++ = nval;
+ state = st_start;
+ }
+ break;
+
+ case st_hex:
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f')) {
+ nval = (nval << 4) + numvalue(c);
+ if (++ndig >= 2) {
+ *q++ = nval;
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ *q++ = ndig ? nval : *escp;
+ state = st_start;
+ }
+ break;
+
+ case st_ucs:
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f')) {
+ nval = (nval << 4) + numvalue(c);
+ if (!--ndig) {
+ q = emit_utf8(q, nval);
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ if (p > escp+1)
+ q = emit_utf8(q, nval);
+ else
+ *q++ = *escp;
+ state = st_start;
+ }
+ break;
+ }
+ }
+ *q = '\0';
+ return q-str;
+ }
+
+ /* Otherwise, just return the input... */
+ return ln;
+}
+
+/*
+ * Find the end of a quoted string; returns the pointer to the terminating
+ * character (either the ending quote or the null character, if unterminated.)
+ */
+char *nasm_skip_string(char *str)
+{
+ char bq;
+ char *p;
+ char c;
+ enum unq_state {
+ st_start,
+ st_backslash,
+ st_hex,
+ st_oct,
+ st_ucs,
+ } state;
+ int ndig = 0;
+
+ bq = str[0];
+ if (bq == '\'' || bq == '\"') {
+ /* '...' or "..." string */
+ for (p = str+1; *p && *p != bq; p++)
+ ;
+ return p;
+ } else if (bq == '`') {
+ /* `...` string */
+ p = str+1;
+ state = st_start;
+
+ while ((c = *p++)) {
+ switch (state) {
+ case st_start:
+ switch (c) {
+ case '\\':
+ state = st_backslash;
+ break;
+ case '`':
+ return p-1; /* Found the end */
+ default:
+ break;
+ }
+ break;
+
+ case st_backslash:
+ switch (c) {
+ case 'a':
+ case 'b':
+ case 'e':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ default:
+ state = st_start;
+ break;
+ case 'u':
+ state = st_ucs;
+ ndig = 4;
+ break;
+ case 'U':
+ state = st_ucs;
+ ndig = 8;
+ break;
+ case 'x':
+ case 'X':
+ state = st_hex;
+ ndig = 0;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ state = st_oct;
+ ndig = 1;
+ break;
+ }
+ break;
+
+ case st_oct:
+ if (c >= '0' && c <= '7') {
+ if (++ndig >= 3)
+ state = st_start;
+ } else {
+ p--; /* Process this character again */
+ state = st_start;
+ }
+ break;
+
+ case st_hex:
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f')) {
+ if (++ndig >= 2)
+ state = st_start;
+ } else {
+ p--; /* Process this character again */
+ state = st_start;
+ }
+ break;
+
+ case st_ucs:
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f')) {
+ if (!--ndig)
+ state = st_start;
+ } else {
+ p--; /* Process this character again */
+ state = st_start;
+ }
+ break;
+ }
+ }
+ return p; /* Unterminated string... */
+ } else {
+ return str; /* Not a string... */
+ }
+}
diff --git a/quote.h b/quote.h
new file mode 100644
index 00000000..501f7350
--- /dev/null
+++ b/quote.h
@@ -0,0 +1,11 @@
+#ifndef NASM_QUOTE_H
+#define NASM_QUOTE_H
+
+#include "compiler.h"
+
+char *nasm_quote(char *str, size_t len);
+size_t nasm_unquote(char *str);
+char *nasm_skip_string(char *str);
+
+#endif /* NASM_QUOTE_H */
+