diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 7 | ||||
-rw-r--r-- | Parser/asdl.py | 28 | ||||
-rwxr-xr-x | Parser/asdl_c.py | 25 | ||||
-rw-r--r-- | Parser/grammar.mak | 45 | ||||
-rw-r--r-- | Parser/myreadline.c | 10 | ||||
-rw-r--r-- | Parser/parsetok.c | 16 | ||||
-rw-r--r-- | Parser/tokenizer.c | 117 | ||||
-rw-r--r-- | Parser/tokenizer.h | 3 |
8 files changed, 135 insertions, 116 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index a4394c9adf..9a9b933143 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -11,7 +11,7 @@ module Python version "$Revision$" stmt = FunctionDef(identifier name, arguments args, stmt* body, expr* decorator_list) - | ClassDef(identifier name, expr* bases, stmt* body, expr *decorator_list) + | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list) | Return(expr? value) | Delete(expr* targets) @@ -34,7 +34,7 @@ module Python version "$Revision$" | Assert(expr test, expr? msg) | Import(alias* names) - | ImportFrom(identifier module, alias* names, int? level) + | ImportFrom(identifier? module, alias* names, int? level) -- Doesn't capture requirement that locals must be -- defined if globals is @@ -56,7 +56,10 @@ module Python version "$Revision$" | Lambda(arguments args, expr body) | IfExp(expr test, expr body, expr orelse) | Dict(expr* keys, expr* values) + | Set(expr* elts) | ListComp(expr elt, comprehension* generators) + | SetComp(expr elt, comprehension* generators) + | DictComp(expr key, expr value, comprehension* generators) | GeneratorExp(expr elt, comprehension* generators) -- the grammar constrains where yield expressions can occur | Yield(expr? value) diff --git a/Parser/asdl.py b/Parser/asdl.py index 0cada2b0aa..7f5856b18a 100644 --- a/Parser/asdl.py +++ b/Parser/asdl.py @@ -10,14 +10,12 @@ browser. Changes for Python: Add support for module versions """ -#__metaclass__ = type - import os import traceback import spark -class Token: +class Token(object): # spark seems to dispatch in the parser based on a token's # type attribute def __init__(self, type, lineno): @@ -45,7 +43,7 @@ class String(Token): self.value = value self.lineno = lineno -class ASDLSyntaxError: +class ASDLSyntaxError(Exception): def __init__(self, lineno, token=None, msg=None): self.lineno = lineno @@ -206,19 +204,19 @@ class ASDLParser(spark.GenericParser, object): def p_field_2(self, (type, _, name)): " field ::= Id * Id " - return Field(type, name, seq=1) + return Field(type, name, seq=True) def p_field_3(self, (type, _, name)): " field ::= Id ? Id " - return Field(type, name, opt=1) + return Field(type, name, opt=True) def p_field_4(self, (type, _)): " field ::= Id * " - return Field(type, seq=1) + return Field(type, seq=True) def p_field_5(self, (type, _)): " field ::= Id ? " - return Field(type, opt=1) + return Field(type, opt=True) builtin_types = ("identifier", "string", "int", "bool", "object") @@ -226,7 +224,7 @@ builtin_types = ("identifier", "string", "int", "bool", "object") # not sure if any of the methods are useful yet, but I'm adding them # piecemeal as they seem helpful -class AST: +class AST(object): pass # a marker class class Module(AST): @@ -258,7 +256,7 @@ class Constructor(AST): return "Constructor(%s, %s)" % (self.name, self.fields) class Field(AST): - def __init__(self, type, name=None, seq=0, opt=0): + def __init__(self, type, name=None, seq=False, opt=False): self.type = type self.name = name self.seq = seq @@ -266,9 +264,9 @@ class Field(AST): def __repr__(self): if self.seq: - extra = ", seq=1" + extra = ", seq=True" elif self.opt: - extra = ", opt=1" + extra = ", opt=True" else: extra = "" if self.name is None: @@ -296,7 +294,7 @@ class Product(AST): class VisitorBase(object): - def __init__(self, skip=0): + def __init__(self, skip=False): self.cache = {} self.skip = skip @@ -331,7 +329,7 @@ class VisitorBase(object): class Check(VisitorBase): def __init__(self): - super(Check, self).__init__(skip=1) + super(Check, self).__init__(skip=True) self.cons = {} self.errors = 0 self.types = {} @@ -373,7 +371,7 @@ def check(mod): v.visit(mod) for t in v.types: - if not mod.types.has_key(t) and not t in builtin_types: + if t not in mod.types and not t in builtin_types: v.errors += 1 uses = ", ".join(v.types[t]) print "Undefined type %s, used in %s" % (t, uses) diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 3772b129ec..634ad29bc5 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -86,7 +86,7 @@ class EmitVisitor(asdl.VisitorBase): self.file = file super(EmitVisitor, self).__init__() - def emit(self, s, depth, reflow=1): + def emit(self, s, depth, reflow=True): # XXX reflow long lines? if reflow: lines = reflow_lines(s, depth) @@ -255,7 +255,7 @@ class PrototypeVisitor(EmitVisitor): ctype = get_c_type(type) self.emit_function(cons.name, ctype, args, attrs) - def emit_function(self, name, ctype, args, attrs, union=1): + def emit_function(self, name, ctype, args, attrs, union=True): args = args + attrs if args: argstr = ", ".join(["%s %s" % (atype, aname) @@ -267,19 +267,19 @@ class PrototypeVisitor(EmitVisitor): for i in range(1, len(args)+1): margs += ", a%d" % i self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0, - reflow = 0) - self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), 0) + reflow=False) + self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), False) def visitProduct(self, prod, name): self.emit_function(name, get_c_type(name), - self.get_args(prod.fields), [], union=0) + self.get_args(prod.fields), [], union=False) class FunctionVisitor(PrototypeVisitor): """Visitor to generate constructor functions for AST.""" - def emit_function(self, name, ctype, args, attrs, union=1): - def emit(s, depth=0, reflow=1): + def emit_function(self, name, ctype, args, attrs, union=True): + def emit(s, depth=0, reflow=True): self.emit(s, depth, reflow) argstr = ", ".join(["%s %s" % (atype, aname) for atype, aname, opt in args + attrs]) @@ -298,7 +298,7 @@ class FunctionVisitor(PrototypeVisitor): emit("PyErr_SetString(PyExc_ValueError,", 2) msg = "field %s is required for %s" % (argname, name) emit(' "%s");' % msg, - 2, reflow=0) + 2, reflow=False) emit('return NULL;', 2) emit('}', 1) @@ -314,7 +314,7 @@ class FunctionVisitor(PrototypeVisitor): emit("") def emit_body_union(self, name, args, attrs): - def emit(s, depth=0, reflow=1): + def emit(s, depth=0, reflow=True): self.emit(s, depth, reflow) emit("p->kind = %s_kind;" % name, 1) for argtype, argname, opt in args: @@ -323,7 +323,7 @@ class FunctionVisitor(PrototypeVisitor): emit("p->%s = %s;" % (argname, argname), 1) def emit_body_struct(self, name, args, attrs): - def emit(s, depth=0, reflow=1): + def emit(s, depth=0, reflow=True): self.emit(s, depth, reflow) for argtype, argname, opt in args: emit("p->%s = %s;" % (argname, argname), 1) @@ -733,8 +733,9 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) { int i, result; PyObject *s, *l = PyTuple_New(num_fields); - if (!l) return 0; - for(i = 0; i < num_fields; i++) { + if (!l) + return 0; + for (i = 0; i < num_fields; i++) { s = PyString_FromString(attrs[i]); if (!s) { Py_DECREF(l); diff --git a/Parser/grammar.mak b/Parser/grammar.mak deleted file mode 100644 index 55f028ffb8..0000000000 --- a/Parser/grammar.mak +++ /dev/null @@ -1,45 +0,0 @@ -# This manages to rebuild graminit.{h, c} under MSVC 6 (Windows), via -# -# nmake /f grammar.mak -# -# You may also need to copy python23.dll into this directory, or get -# it on your search path. -# -# The intermediate files can be nuked afterwards: -# -# nmake /f grammar.mak clean -# -# I don't understand the maze of preprocessor #define's on Windows, and -# as a result this requires linking with python23.lib, so it's of no use -# for bootstrapping (the cause appears to be a useless-- in this -# particular case --pragma in PC\pyconfig.h, which demands that -# python23.lib get linked in). - -LIBS= ..\PCbuild\python25.lib - -CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD - -GRAMMAR_H= ..\Include\graminit.h -GRAMMAR_C= ..\Python\graminit.c -GRAMMAR_INPUT= ..\Grammar\Grammar - -PGEN= pgen.exe - -POBJS= acceler.obj grammar1.obj listnode.obj node.obj parser.obj \ - parsetok.obj tokenizer.obj bitset.obj metagrammar.obj - -PARSER_OBJS= $(POBJS) myreadline.obj - -PGOBJS= firstsets.obj grammar.obj pgen.obj printgrammar.obj pgenmain.obj - -PGENOBJS= $(POBJS) $(PGOBJS) - -$(GRAMMAR_H) $(GRAMMAR_C): $(PGEN) $(GRAMMAR_INPUT) - $(PGEN) $(GRAMMAR_INPUT) $(GRAMMAR_H) $(GRAMMAR_C) - -$(PGEN): $(PGENOBJS) - $(CC) $(PGENOBJS) $(LIBS) /Fe$(PGEN) - -clean: - del *.obj - del $(PGEN) diff --git a/Parser/myreadline.c b/Parser/myreadline.c index 122f896869..34fb45c932 100644 --- a/Parser/myreadline.c +++ b/Parser/myreadline.c @@ -40,7 +40,7 @@ static int my_fgets(char *buf, int len, FILE *fp) { char *p; - for (;;) { + while (1) { if (PyOS_InputHook != NULL) (void)(PyOS_InputHook)(); errno = 0; @@ -77,6 +77,7 @@ my_fgets(char *buf, int len, FILE *fp) } #endif /* MS_WINDOWS */ if (feof(fp)) { + clearerr(fp); return -1; /* EOF */ } #ifdef EINTR @@ -89,9 +90,10 @@ my_fgets(char *buf, int len, FILE *fp) #ifdef WITH_THREAD PyEval_SaveThread(); #endif - if (s < 0) { - return 1; - } + if (s < 0) + return 1; + /* try again */ + continue; } #endif if (PyOS_InterruptOccurred()) { diff --git a/Parser/parsetok.c b/Parser/parsetok.c index cad7a80765..e8d396a5f3 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -51,7 +51,7 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, initerr(err_ret, filename); - if ((tok = PyTokenizer_FromString(s)) == NULL) { + if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) { err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; return NULL; } @@ -243,16 +243,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, err_ret->text = text; } } else if (tok->encoding != NULL) { + /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was + * allocated using PyMem_ + */ node* r = PyNode_New(encoding_decl); - if (!r) { + if (r) + r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1); + if (!r || !r->n_str) { err_ret->error = E_NOMEM; + if (r) + PyObject_FREE(r); n = NULL; goto done; } - r->n_str = tok->encoding; + strcpy(r->n_str, tok->encoding); + PyMem_FREE(tok->encoding); + tok->encoding = NULL; r->n_nchildren = 1; r->n_child = n; - tok->encoding = NULL; n = r; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index a08f1838af..ee6313b311 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -92,7 +92,6 @@ char *_PyParser_TokenNames[] = { "<N_TOKENS>" }; - /* Create and initialize a new tok_state structure */ static struct tok_state * @@ -105,6 +104,7 @@ tok_new(void) tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; tok->done = E_OK; tok->fp = NULL; + tok->input = NULL; tok->tabsize = TABSIZE; tok->indent = 0; tok->indstack[0] = 0; @@ -130,6 +130,17 @@ tok_new(void) return tok; } +static char * +new_string(const char *s, Py_ssize_t len) +{ + char* result = (char *)PyMem_MALLOC(len + 1); + if (result != NULL) { + memcpy(result, s, len); + result[len] = '\0'; + } + return result; +} + #ifdef PGEN static char * @@ -144,10 +155,10 @@ decoding_feof(struct tok_state *tok) return feof(tok->fp); } -static const char * -decode_str(const char *str, struct tok_state *tok) +static char * +decode_str(const char *str, int exec_input, struct tok_state *tok) { - return str; + return new_string(str, strlen(str)); } #else /* PGEN */ @@ -162,16 +173,6 @@ error_ret(struct tok_state *tok) /* XXX */ return NULL; /* as if it were EOF */ } -static char * -new_string(const char *s, Py_ssize_t len) -{ - char* result = (char *)PyMem_MALLOC(len + 1); - if (result != NULL) { - memcpy(result, s, len); - result[len] = '\0'; - } - return result; -} static char * get_normal_name(char *s) /* for utf-8 and latin-1 */ @@ -180,20 +181,26 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */ int i; for (i = 0; i < 12; i++) { int c = s[i]; - if (c == '\0') break; - else if (c == '_') buf[i] = '-'; - else buf[i] = tolower(c); + if (c == '\0') + break; + else if (c == '_') + buf[i] = '-'; + else + buf[i] = tolower(c); } buf[i] = '\0'; if (strcmp(buf, "utf-8") == 0 || - strncmp(buf, "utf-8-", 6) == 0) return "utf-8"; + strncmp(buf, "utf-8-", 6) == 0) + return "utf-8"; else if (strcmp(buf, "latin-1") == 0 || strcmp(buf, "iso-8859-1") == 0 || strcmp(buf, "iso-latin-1") == 0 || strncmp(buf, "latin-1-", 8) == 0 || strncmp(buf, "iso-8859-1-", 11) == 0 || - strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1"; - else return s; + strncmp(buf, "iso-latin-1-", 12) == 0) + return "iso-8859-1"; + else + return s; } /* Return the coding spec in S, or NULL if none is found. */ @@ -222,7 +229,7 @@ get_coding_spec(const char *s, Py_ssize_t size) } while (t[0] == '\x20' || t[0] == '\t'); begin = t; - while (isalnum(Py_CHARMASK(t[0])) || + while (Py_ISALNUM(t[0]) || t[0] == '-' || t[0] == '_' || t[0] == '.') t++; @@ -417,7 +424,8 @@ fp_readl(char *s, int size, struct tok_state *tok) memcpy(s, str, utf8len); s[utf8len] = '\0'; Py_DECREF(utf8); - if (utf8len == 0) return NULL; /* EOF */ + if (utf8len == 0) + return NULL; /* EOF */ return s; #endif } @@ -589,17 +597,62 @@ translate_into_utf8(const char* str, const char* enc) { } #endif + +static char * +translate_newlines(const char *s, int exec_input, struct tok_state *tok) { + int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length; + char *buf, *current; + char c = '\0'; + buf = PyMem_MALLOC(needed_length); + if (buf == NULL) { + tok->done = E_NOMEM; + return NULL; + } + for (current = buf; *s; s++, current++) { + c = *s; + if (skip_next_lf) { + skip_next_lf = 0; + if (c == '\n') { + c = *++s; + if (!c) + break; + } + } + if (c == '\r') { + skip_next_lf = 1; + c = '\n'; + } + *current = c; + } + /* If this is exec input, add a newline to the end of the string if + there isn't one already. */ + if (exec_input && c != '\n') { + *current = '\n'; + current++; + } + *current = '\0'; + final_length = current - buf + 1; + if (final_length < needed_length && final_length) + /* should never fail */ + buf = PyMem_REALLOC(buf, final_length); + return buf; +} + /* Decode a byte string STR for use as the buffer of TOK. Look for encoding declarations inside STR, and record them inside TOK. */ static const char * -decode_str(const char *str, struct tok_state *tok) +decode_str(const char *input, int single, struct tok_state *tok) { PyObject* utf8 = NULL; + const char *str; const char *s; const char *newl[2] = {NULL, NULL}; int lineno = 0; + tok->input = str = translate_newlines(input, single, tok); + if (str == NULL) + return NULL; tok->enc = NULL; tok->str = str; if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) @@ -639,11 +692,8 @@ decode_str(const char *str, struct tok_state *tok) if (tok->enc != NULL) { assert(utf8 == NULL); utf8 = translate_into_utf8(str, tok->enc); - if (utf8 == NULL) { - PyErr_Format(PyExc_SyntaxError, - "unknown encoding: %s", tok->enc); + if (utf8 == NULL) return error_ret(tok); - } str = PyString_AsString(utf8); } #endif @@ -657,12 +707,12 @@ decode_str(const char *str, struct tok_state *tok) /* Set up tokenizer for string */ struct tok_state * -PyTokenizer_FromString(const char *str) +PyTokenizer_FromString(const char *str, int exec_input) { struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; - str = (char *)decode_str(str, tok); + str = (char *)decode_str(str, exec_input, tok); if (str == NULL) { PyTokenizer_Free(tok); return NULL; @@ -708,6 +758,8 @@ PyTokenizer_Free(struct tok_state *tok) #endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); + if (tok->input) + PyMem_FREE((char *)tok->input); PyMem_FREE(tok); } @@ -953,7 +1005,7 @@ tok_backup(register struct tok_state *tok, register int c) { if (c != EOF) { if (--tok->cur < tok->buf) - Py_FatalError("tok_backup: begin of buffer"); + Py_FatalError("tok_backup: beginning of buffer"); if (*tok->cur != c) *tok->cur = c; } @@ -1132,7 +1184,6 @@ indenterror(struct tok_state *tok) return 0; } - /* Get next token, after space stripping etc. */ static int @@ -1288,7 +1339,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) } /* Identifier (most frequent token!) */ - if (isalpha(c) || c == '_') { + if (Py_ISALPHA(c) || c == '_') { /* Process r"", u"" and ur"" */ switch (c) { case 'b': @@ -1314,7 +1365,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) goto letter_quote; break; } - while (isalnum(c) || c == '_') { + while (c != EOF && (Py_ISALNUM(c) || c == '_')) { c = tok_nextc(tok); } tok_backup(tok, c); diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 19aa702430..f15e2523f7 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -52,9 +52,10 @@ struct tok_state { #endif const char* enc; const char* str; + const char* input; /* Tokenizer's newline translated copy of the string. */ }; -extern struct tok_state *PyTokenizer_FromString(const char *); +extern struct tok_state *PyTokenizer_FromString(const char *, int); extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *); extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); |