summaryrefslogtreecommitdiff
path: root/Parser
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1994-08-29 12:43:07 +0000
committerGuido van Rossum <guido@python.org>1994-08-29 12:43:07 +0000
commitdf833b0759c92cac9acadf9fb8d38f2df639e43a (patch)
tree8e5a6e8b384f50488e2e200290e05fa68e2ecc9e /Parser
parent534bf31af28b77b7197b3d0a1f0b75efb7522284 (diff)
downloadcpython-df833b0759c92cac9acadf9fb8d38f2df639e43a.tar.gz
* Parser/tokenizer.c: backup over illegal newline in string
literal (for "completeness" test)
Diffstat (limited to 'Parser')
-rw-r--r--Parser/tokenizer.c211
1 files changed, 119 insertions, 92 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 6504e0cec8..8b1b085961 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1,5 +1,5 @@
/***********************************************************
-Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
+Copyright 1991, 1992, 1993, 1994 by Stichting Mathematisch Centrum,
Amsterdam, The Netherlands.
All Rights Reserved
@@ -24,19 +24,18 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
/* Tokenizer implementation */
-/* XXX This is rather old, should be restructured perhaps */
-/* XXX Need a better interface to report errors than writing to stderr */
-/* XXX Should use editor resource to fetch true tab size on Macintosh */
-
#include "pgenheaders.h"
#include <ctype.h>
-#include "string.h"
-#include "fgetsintr.h"
#include "tokenizer.h"
#include "errcode.h"
+extern char *my_readline PROTO((char *));
+/* Return malloc'ed string including trailing \n;
+ empty malloc'ed string for EOF;
+ NULL if interrupted */
+
/* Don't ever change this -- it would break the portability of Python code */
#define TABSIZE 8
@@ -99,7 +98,7 @@ tok_new()
struct tok_state *tok = NEW(struct tok_state, 1);
if (tok == NULL)
return NULL;
- tok->buf = tok->cur = tok->end = tok->inp = NULL;
+ tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK;
tok->fp = NULL;
tok->tabsize = TABSIZE;
@@ -158,7 +157,6 @@ void
tok_free(tok)
struct tok_state *tok;
{
- /* XXX really need a separate flag to say 'my buffer' */
if (tok->fp != NULL && tok->buf != NULL)
DEL(tok->buf);
DEL(tok);
@@ -180,58 +178,78 @@ tok_nextc(tok)
tok->done = E_EOF;
return EOF;
}
-#ifdef USE_READLINE
if (tok->prompt != NULL) {
- extern char *readline PROTO((char *prompt));
- static int been_here;
- if (!been_here) {
- /* Force rebind of TAB to insert-tab */
- extern int rl_insert();
- rl_bind_key('\t', rl_insert);
- been_here++;
- }
- if (tok->buf != NULL)
- free(tok->buf);
- tok->buf = readline(tok->prompt);
- (void) intrcheck(); /* Clear pending interrupt */
+ char *new = my_readline(tok->prompt);
if (tok->nextprompt != NULL)
tok->prompt = tok->nextprompt;
- if (tok->buf == NULL) {
+ if (new == NULL)
+ tok->done = E_INTR;
+ else if (*new == '\0') {
+ free(new);
tok->done = E_EOF;
}
+ else if (tok->start != NULL) {
+ int start = tok->start - tok->buf;
+ int oldlen = tok->cur - tok->buf;
+ int newlen = oldlen + strlen(new);
+ char *buf = realloc(tok->buf, newlen+1);
+ tok->lineno++;
+ if (buf == NULL) {
+ free(tok->buf);
+ free(new);
+ tok->done = E_NOMEM;
+ return EOF;
+ }
+ tok->buf = buf;
+ tok->cur = tok->buf + oldlen;
+ strcpy(tok->buf + oldlen, new);
+ free(new);
+ tok->inp = tok->buf + newlen;
+ tok->end = tok->inp + 1;
+ tok->start = tok->buf + start;
+ }
else {
- tok->end = strchr(tok->buf, '\0');
- if (tok->end > tok->buf)
- add_history(tok->buf);
- /* Replace trailing '\n' by '\0'
- (we don't need a '\0', but the
- tokenizer wants a '\n'...) */
- *tok->end++ = '\n';
- tok->inp = tok->end;
+ tok->lineno++;
+ if (tok->buf != NULL)
+ free(tok->buf);
+ tok->buf = new;
tok->cur = tok->buf;
+ tok->inp = strchr(tok->buf, '\0');
+ tok->end = tok->inp + 1;
}
}
- else
-#endif
- {
- if (tok->prompt != NULL) {
- fprintf(stderr, "%s", tok->prompt);
- if (tok->nextprompt != NULL)
- tok->prompt = tok->nextprompt;
- }
- if (tok->buf == NULL) {
- tok->buf = NEW(char, BUFSIZ);
+ else {
+ int done = 0;
+ int cur = 0;
+ if (tok->start == NULL) {
if (tok->buf == NULL) {
- tok->done = E_NOMEM;
- return EOF;
+ tok->buf = NEW(char, BUFSIZ);
+ if (tok->buf == NULL) {
+ tok->done = E_NOMEM;
+ return EOF;
+ }
+ tok->end = tok->buf + BUFSIZ;
+ }
+ if (fgets(tok->buf, (int)(tok->end - tok->buf),
+ tok->fp) == NULL) {
+ tok->done = E_EOF;
+ done = 1;
+ }
+ else {
+ tok->done = E_OK;
+ tok->inp = strchr(tok->buf, '\0');
+ done = tok->inp[-1] == '\n';
}
- tok->end = tok->buf + BUFSIZ;
}
- tok->done = fgets_intr(tok->buf,
- (int)(tok->end - tok->buf), tok->fp);
- tok->inp = strchr(tok->buf, '\0');
+ else {
+ cur = tok->cur - tok->buf;
+ tok->done = E_OK;
+ }
+ tok->lineno++;
/* Read until '\n' or EOF */
- while (tok->inp+1==tok->end && tok->inp[-1]!='\n') {
+ while (!done) {
+ int curstart = tok->start == NULL ? -1 :
+ tok->start - tok->buf;
int curvalid = tok->inp - tok->buf;
int cursize = tok->end - tok->buf;
int newsize = cursize + BUFSIZ;
@@ -245,13 +263,19 @@ tok_nextc(tok)
tok->buf = newbuf;
tok->inp = tok->buf + curvalid;
tok->end = tok->buf + newsize;
- if (fgets_intr(tok->inp,
+ tok->start = curstart < 0 ? NULL :
+ tok->buf + curstart;
+ if (fgets(tok->inp,
(int)(tok->end - tok->inp),
- tok->fp) != E_OK)
- break;
+ tok->fp) == NULL) {
+ /* Last line does not end in \n,
+ fake one */
+ strcpy(tok->inp, "\n");
+ }
tok->inp = strchr(tok->inp, '\0');
+ done = tok->inp[-1] == '\n';
}
- tok->cur = tok->buf;
+ tok->cur = tok->buf + cur;
}
if (tok->done != E_OK) {
if (tok->prompt != NULL)
@@ -360,14 +384,15 @@ tok_get(tok, p_start, p_end)
register int c;
int blankline;
+ *p_start = *p_end = NULL;
nextline:
+ tok->start = NULL;
blankline = 0;
/* Get indentation level */
if (tok->atbol) {
register int col = 0;
tok->atbol = 0;
- tok->lineno++;
for (;;) {
c = tok_nextc(tok);
if (c == ' ')
@@ -423,7 +448,7 @@ tok_get(tok, p_start, p_end)
}
}
- *p_start = *p_end = tok->cur;
+ tok->start = tok->cur;
/* Return pending indents/dedents */
if (tok->pendin != 0) {
@@ -438,13 +463,14 @@ tok_get(tok, p_start, p_end)
}
again:
+ tok->start = NULL;
/* Skip spaces */
do {
c = tok_nextc(tok);
} while (c == ' ' || c == '\t');
/* Set start of current token */
- *p_start = tok->cur - 1;
+ tok->start = tok->cur - 1;
/* Skip comment */
if (c == '#') {
@@ -467,7 +493,6 @@ tok_get(tok, p_start, p_end)
/* Check for EOF and errors now */
if (c == EOF) {
- *p_start = *p_end = tok->cur;
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
}
@@ -477,6 +502,7 @@ tok_get(tok, p_start, p_end)
c = tok_nextc(tok);
} while (isalnum(c) || c == '_');
tok_backup(tok, c);
+ *p_start = tok->start;
*p_end = tok->cur;
return NAME;
}
@@ -486,6 +512,7 @@ tok_get(tok, p_start, p_end)
tok->atbol = 1;
if (blankline || tok->level > 0)
goto nextline;
+ *p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
return NEWLINE;
}
@@ -498,6 +525,7 @@ tok_get(tok, p_start, p_end)
}
else {
tok_backup(tok, c);
+ *p_start = tok->start;
*p_end = tok->cur;
return DOT;
}
@@ -538,9 +566,7 @@ tok_get(tok, p_start, p_end)
else {
/* Accept floating point numbers.
XXX This accepts incomplete things like
- XXX 12e or 1e+; worry run-time.
- XXX Doesn't accept numbers
- XXX starting with a dot */
+ XXX 12e or 1e+; worry run-time */
if (c == '.') {
fraction:
/* Fraction */
@@ -560,58 +586,58 @@ tok_get(tok, p_start, p_end)
}
}
tok_backup(tok, c);
+ *p_start = tok->start;
*p_end = tok->cur;
return NUMBER;
}
- /* String (single quotes) */
- if (c == '\'') {
+ /* String */
+ if (c == '\'' || c == '"') {
+ int quote = c;
+ int triple = 0;
+ int tripcount = 0;
for (;;) {
c = tok_nextc(tok);
- if (c == '\n' || c == EOF) {
- tok->done = E_TOKEN;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (c == '\\') {
- c = tok_nextc(tok);
- *p_end = tok->cur;
- if (c == '\n' || c == EOF) {
+ if (c == '\n') {
+ if (!triple) {
tok->done = E_TOKEN;
- tok->cur = tok->inp;
+ tok_backup(tok, c);
return ERRORTOKEN;
}
- continue;
+ tripcount = 0;
}
- if (c == '\'')
- break;
- }
- *p_end = tok->cur;
- return STRING;
- }
-
- /* String (double quotes) */
- if (c == '\"') {
- for (;;) {
- c = tok_nextc(tok);
- if (c == '\n' || c == EOF) {
+ else if (c == EOF) {
tok->done = E_TOKEN;
tok->cur = tok->inp;
return ERRORTOKEN;
}
- if (c == '\\') {
+ else if (c == quote) {
+ tripcount++;
+ if (tok->cur == tok->start+2) {
+ c = tok_nextc(tok);
+ if (c == quote) {
+ triple = 1;
+ tripcount = 0;
+ continue;
+ }
+ tok_backup(tok, c);
+ }
+ if (!triple || tripcount == 3)
+ break;
+ }
+ else if (c == '\\') {
+ tripcount = 0;
c = tok_nextc(tok);
- *p_end = tok->cur;
- if (c == '\n' || c == EOF) {
+ if (c == EOF) {
tok->done = E_TOKEN;
tok->cur = tok->inp;
return ERRORTOKEN;
}
- continue;
}
- if (c == '\"')
- break;
+ else
+ tripcount = 0;
}
+ *p_start = tok->start;
*p_end = tok->cur;
return STRING;
}
@@ -624,7 +650,6 @@ tok_get(tok, p_start, p_end)
tok->cur = tok->inp;
return ERRORTOKEN;
}
- tok->lineno++;
goto again; /* Read next line */
}
@@ -633,13 +658,14 @@ tok_get(tok, p_start, p_end)
int c2 = tok_nextc(tok);
int token = tok_2char(c, c2);
if (token != OP) {
+ *p_start = tok->start;
*p_end = tok->cur;
return token;
}
tok_backup(tok, c2);
}
- /* Keep track of parenteses nesting level */
+ /* Keep track of parentheses nesting level */
switch (c) {
case '(':
case '[':
@@ -654,6 +680,7 @@ tok_get(tok, p_start, p_end)
}
/* Punctuation character */
+ *p_start = tok->start;
*p_end = tok->cur;
return tok_1char(c);
}