summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDemi Marie Obenour <demiobenour@gmail.com>2022-11-30 21:40:17 -0500
committerDemi Marie Obenour <demiobenour@gmail.com>2022-11-30 21:48:37 -0500
commitfdee678bcdca6dde1398b21ce1ce93b1deea7ed0 (patch)
treec1f0734e421beae7cd8de066ab94bfddc8da1b68
parentb381a4b7ff5f253893e39cb4fdd430646377b0aa (diff)
downloadocaml-fdee678bcdca6dde1398b21ce1ce93b1deea7ed0.tar.gz
ocamlyacc: Forbid quoted literals in tokens and types
They would result in invalid OCaml code being generated, and removing them allows for a bunch of code to be deleted.
-rw-r--r--Changes6
-rw-r--r--yacc/defs.h1
-rw-r--r--yacc/error.c6
-rw-r--r--yacc/reader.c159
4 files changed, 17 insertions, 155 deletions
diff --git a/Changes b/Changes
index 8f1883af12..8822e7ff1b 100644
--- a/Changes
+++ b/Changes
@@ -125,6 +125,12 @@ Working version
used in any context where a character set is expected.
(Nicolás Ojeda Bär, Martin Jambon, review by Sébastien Hinderer)
+- #11773: ocamlyacc: Do not allow quoted literals (such as 'a' or "bc")
+ in a token name or %type declaration. Previously such literals were
+ accepted by ocamlyacc, but produced malformed OCaml that was rejected
+ by the compiler.
+ (Demi Marie Obenour)
+
### Manual and documentation:
- #9430, #11291: Document the general desugaring rules for binding operators.
diff --git a/yacc/defs.h b/yacc/defs.h
index 6104a8217b..7933bfb73d 100644
--- a/yacc/defs.h
+++ b/yacc/defs.h
@@ -357,3 +357,4 @@ extern void unterminated_text (int t_lineno, char *t_line, char *t_cptr) Noretur
extern void used_reserved (char *s) Noreturn;
extern void verbose (void);
extern void write_section (char **section);
+extern void invalid_literal(int s_lineno, char *s_line, char *s_cptr) Noreturn;
diff --git a/yacc/error.c b/yacc/error.c
index 4b3db68929..f1a89835cc 100644
--- a/yacc/error.c
+++ b/yacc/error.c
@@ -98,6 +98,12 @@ void unterminated_comment(int c_lineno, char *c_line, char *c_cptr)
}
+void invalid_literal(int s_lineno, char *s_line, char *s_cptr)
+{
+ gen_error(s_lineno, s_line, s_cptr, "cannot use literal as token name");
+}
+
+
void unterminated_string(int s_lineno, char *s_line, char *s_cptr)
{
gen_error(s_lineno, s_line, s_cptr, "unterminated string");
diff --git a/yacc/reader.c b/yacc/reader.c
index 6e5f3550ad..25bd1277fd 100644
--- a/yacc/reader.c
+++ b/yacc/reader.c
@@ -641,157 +641,6 @@ loop:
}
}
-static int
-hexval(int c)
-{
- if (c >= '0' && c <= '9')
- return (c - '0');
- if (c >= 'A' && c <= 'F')
- return (c - 'A' + 10);
- if (c >= 'a' && c <= 'f')
- return (c - 'a' + 10);
- return (-1);
-}
-
-
-static bucket *
-get_literal(void)
-{
- int c, quote;
- int i;
- int n;
- char *s;
- bucket *bp;
- int s_lineno = lineno;
- char *s_line = dup_line();
- char *s_cptr = s_line + (cptr - line);
-
- quote = *cptr++;
- cinc = 0;
- for (;;)
- {
- c = *cptr++;
- if (c == quote) break;
- if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
- if (c == '\\')
- {
- char *c_cptr = cptr - 1;
-
- c = *cptr++;
- switch (c)
- {
- case '\n':
- get_line();
- if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
- continue;
-
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- n = c - '0';
- c = *cptr;
- if (IS_OCTAL(c))
- {
- n = (n << 3) + (c - '0');
- c = *++cptr;
- if (IS_OCTAL(c))
- {
- n = (n << 3) + (c - '0');
- ++cptr;
- }
- }
- if (n > MAXCHAR) illegal_character(c_cptr);
- c = n;
- break;
-
- case 'x':
- c = *cptr++;
- n = hexval(c);
- if (n < 0 || n >= 16)
- illegal_character(c_cptr);
- for (;;)
- {
- c = *cptr;
- i = hexval(c);
- if (i < 0 || i >= 16) break;
- ++cptr;
- n = (n << 4) + i;
- if (n > MAXCHAR) illegal_character(c_cptr);
- }
- c = n;
- break;
-
- case 'a': c = 7; break;
- case 'b': c = '\b'; break;
- case 'f': c = '\f'; break;
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- case 'v': c = '\v'; break;
- }
- }
- cachec(c);
- }
- FREE(s_line);
-
- n = cinc;
- s = MALLOC(n);
- if (s == 0) no_space();
-
- for (i = 0; i < n; ++i)
- s[i] = cache[i];
-
- cinc = 0;
- if (n == 1)
- cachec('\'');
- else
- cachec('"');
-
- for (i = 0; i < n; ++i)
- {
- c = ((unsigned char *)s)[i];
- if (c == '\\' || c == cache[0])
- {
- cachec('\\');
- cachec(c);
- }
- else if (isprint(c))
- cachec(c);
- else
- {
- cachec('\\');
- switch (c)
- {
- case 7: cachec('a'); break;
- case '\b': cachec('b'); break;
- case '\f': cachec('f'); break;
- case '\n': cachec('n'); break;
- case '\r': cachec('r'); break;
- case '\t': cachec('t'); break;
- case '\v': cachec('v'); break;
- default:
- cachec(((c >> 6) & 7) + '0');
- cachec(((c >> 3) & 7) + '0');
- cachec((c & 7) + '0');
- break;
- }
- }
- }
-
- if (n == 1)
- cachec('\'');
- else
- cachec('"');
-
- cachec(NUL);
- bp = lookup(cache);
- bp->class = TERM;
- if (n == 1 && bp->value == UNDEFINED)
- bp->value = *(unsigned char *)s;
- FREE(s);
-
- return (bp);
-}
-
static int
is_reserved(char *name)
@@ -915,7 +764,7 @@ declare_tokens(int assoc)
if (isalpha(c) || c == '_' || c == '.' || c == '$')
bp = get_name();
else if (c == '\'' || c == '"')
- bp = get_literal();
+ invalid_literal(lineno, line, cptr);
else
return;
@@ -977,7 +826,7 @@ declare_types(void)
if (isalpha(c) || c == '_' || c == '.' || c == '$')
bp = get_name();
else if (c == '\'' || c == '"')
- bp = get_literal();
+ invalid_literal(lineno, line, cptr);
else
return;
@@ -1221,7 +1070,7 @@ add_symbol(void)
c = *cptr;
if (c == '\'' || c == '"')
- bp = get_literal();
+ invalid_literal(s_lineno, line, cptr);
else
bp = get_name();
@@ -1409,7 +1258,7 @@ mark_symbol(void)
if (isalpha(c) || c == '_' || c == '.' || c == '$')
bp = get_name();
else if (c == '\'' || c == '"')
- bp = get_literal();
+ invalid_literal(lineno, line, cptr);
else
{
syntax_error(lineno, line, cptr);