summaryrefslogtreecommitdiff
path: root/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'Parser')
-rw-r--r--Parser/Python.asdl7
-rw-r--r--Parser/asdl.py28
-rwxr-xr-xParser/asdl_c.py25
-rw-r--r--Parser/grammar.mak45
-rw-r--r--Parser/myreadline.c10
-rw-r--r--Parser/parsetok.c16
-rw-r--r--Parser/tokenizer.c117
-rw-r--r--Parser/tokenizer.h3
8 files changed, 135 insertions, 116 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index a4394c9adf..9a9b933143 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -11,7 +11,7 @@ module Python version "$Revision$"
stmt = FunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list)
- | ClassDef(identifier name, expr* bases, stmt* body, expr *decorator_list)
+ | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list)
| Return(expr? value)
| Delete(expr* targets)
@@ -34,7 +34,7 @@ module Python version "$Revision$"
| Assert(expr test, expr? msg)
| Import(alias* names)
- | ImportFrom(identifier module, alias* names, int? level)
+ | ImportFrom(identifier? module, alias* names, int? level)
-- Doesn't capture requirement that locals must be
-- defined if globals is
@@ -56,7 +56,10 @@ module Python version "$Revision$"
| Lambda(arguments args, expr body)
| IfExp(expr test, expr body, expr orelse)
| Dict(expr* keys, expr* values)
+ | Set(expr* elts)
| ListComp(expr elt, comprehension* generators)
+ | SetComp(expr elt, comprehension* generators)
+ | DictComp(expr key, expr value, comprehension* generators)
| GeneratorExp(expr elt, comprehension* generators)
-- the grammar constrains where yield expressions can occur
| Yield(expr? value)
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 0cada2b0aa..7f5856b18a 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -10,14 +10,12 @@ browser.
Changes for Python: Add support for module versions
"""
-#__metaclass__ = type
-
import os
import traceback
import spark
-class Token:
+class Token(object):
# spark seems to dispatch in the parser based on a token's
# type attribute
def __init__(self, type, lineno):
@@ -45,7 +43,7 @@ class String(Token):
self.value = value
self.lineno = lineno
-class ASDLSyntaxError:
+class ASDLSyntaxError(Exception):
def __init__(self, lineno, token=None, msg=None):
self.lineno = lineno
@@ -206,19 +204,19 @@ class ASDLParser(spark.GenericParser, object):
def p_field_2(self, (type, _, name)):
" field ::= Id * Id "
- return Field(type, name, seq=1)
+ return Field(type, name, seq=True)
def p_field_3(self, (type, _, name)):
" field ::= Id ? Id "
- return Field(type, name, opt=1)
+ return Field(type, name, opt=True)
def p_field_4(self, (type, _)):
" field ::= Id * "
- return Field(type, seq=1)
+ return Field(type, seq=True)
def p_field_5(self, (type, _)):
" field ::= Id ? "
- return Field(type, opt=1)
+ return Field(type, opt=True)
builtin_types = ("identifier", "string", "int", "bool", "object")
@@ -226,7 +224,7 @@ builtin_types = ("identifier", "string", "int", "bool", "object")
# not sure if any of the methods are useful yet, but I'm adding them
# piecemeal as they seem helpful
-class AST:
+class AST(object):
pass # a marker class
class Module(AST):
@@ -258,7 +256,7 @@ class Constructor(AST):
return "Constructor(%s, %s)" % (self.name, self.fields)
class Field(AST):
- def __init__(self, type, name=None, seq=0, opt=0):
+ def __init__(self, type, name=None, seq=False, opt=False):
self.type = type
self.name = name
self.seq = seq
@@ -266,9 +264,9 @@ class Field(AST):
def __repr__(self):
if self.seq:
- extra = ", seq=1"
+ extra = ", seq=True"
elif self.opt:
- extra = ", opt=1"
+ extra = ", opt=True"
else:
extra = ""
if self.name is None:
@@ -296,7 +294,7 @@ class Product(AST):
class VisitorBase(object):
- def __init__(self, skip=0):
+ def __init__(self, skip=False):
self.cache = {}
self.skip = skip
@@ -331,7 +329,7 @@ class VisitorBase(object):
class Check(VisitorBase):
def __init__(self):
- super(Check, self).__init__(skip=1)
+ super(Check, self).__init__(skip=True)
self.cons = {}
self.errors = 0
self.types = {}
@@ -373,7 +371,7 @@ def check(mod):
v.visit(mod)
for t in v.types:
- if not mod.types.has_key(t) and not t in builtin_types:
+ if t not in mod.types and not t in builtin_types:
v.errors += 1
uses = ", ".join(v.types[t])
print "Undefined type %s, used in %s" % (t, uses)
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 3772b129ec..634ad29bc5 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -86,7 +86,7 @@ class EmitVisitor(asdl.VisitorBase):
self.file = file
super(EmitVisitor, self).__init__()
- def emit(self, s, depth, reflow=1):
+ def emit(self, s, depth, reflow=True):
# XXX reflow long lines?
if reflow:
lines = reflow_lines(s, depth)
@@ -255,7 +255,7 @@ class PrototypeVisitor(EmitVisitor):
ctype = get_c_type(type)
self.emit_function(cons.name, ctype, args, attrs)
- def emit_function(self, name, ctype, args, attrs, union=1):
+ def emit_function(self, name, ctype, args, attrs, union=True):
args = args + attrs
if args:
argstr = ", ".join(["%s %s" % (atype, aname)
@@ -267,19 +267,19 @@ class PrototypeVisitor(EmitVisitor):
for i in range(1, len(args)+1):
margs += ", a%d" % i
self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0,
- reflow = 0)
- self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), 0)
+ reflow=False)
+ self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), False)
def visitProduct(self, prod, name):
self.emit_function(name, get_c_type(name),
- self.get_args(prod.fields), [], union=0)
+ self.get_args(prod.fields), [], union=False)
class FunctionVisitor(PrototypeVisitor):
"""Visitor to generate constructor functions for AST."""
- def emit_function(self, name, ctype, args, attrs, union=1):
- def emit(s, depth=0, reflow=1):
+ def emit_function(self, name, ctype, args, attrs, union=True):
+ def emit(s, depth=0, reflow=True):
self.emit(s, depth, reflow)
argstr = ", ".join(["%s %s" % (atype, aname)
for atype, aname, opt in args + attrs])
@@ -298,7 +298,7 @@ class FunctionVisitor(PrototypeVisitor):
emit("PyErr_SetString(PyExc_ValueError,", 2)
msg = "field %s is required for %s" % (argname, name)
emit(' "%s");' % msg,
- 2, reflow=0)
+ 2, reflow=False)
emit('return NULL;', 2)
emit('}', 1)
@@ -314,7 +314,7 @@ class FunctionVisitor(PrototypeVisitor):
emit("")
def emit_body_union(self, name, args, attrs):
- def emit(s, depth=0, reflow=1):
+ def emit(s, depth=0, reflow=True):
self.emit(s, depth, reflow)
emit("p->kind = %s_kind;" % name, 1)
for argtype, argname, opt in args:
@@ -323,7 +323,7 @@ class FunctionVisitor(PrototypeVisitor):
emit("p->%s = %s;" % (argname, argname), 1)
def emit_body_struct(self, name, args, attrs):
- def emit(s, depth=0, reflow=1):
+ def emit(s, depth=0, reflow=True):
self.emit(s, depth, reflow)
for argtype, argname, opt in args:
emit("p->%s = %s;" % (argname, argname), 1)
@@ -733,8 +733,9 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
{
int i, result;
PyObject *s, *l = PyTuple_New(num_fields);
- if (!l) return 0;
- for(i = 0; i < num_fields; i++) {
+ if (!l)
+ return 0;
+ for (i = 0; i < num_fields; i++) {
s = PyString_FromString(attrs[i]);
if (!s) {
Py_DECREF(l);
diff --git a/Parser/grammar.mak b/Parser/grammar.mak
deleted file mode 100644
index 55f028ffb8..0000000000
--- a/Parser/grammar.mak
+++ /dev/null
@@ -1,45 +0,0 @@
-# This manages to rebuild graminit.{h, c} under MSVC 6 (Windows), via
-#
-# nmake /f grammar.mak
-#
-# You may also need to copy python23.dll into this directory, or get
-# it on your search path.
-#
-# The intermediate files can be nuked afterwards:
-#
-# nmake /f grammar.mak clean
-#
-# I don't understand the maze of preprocessor #define's on Windows, and
-# as a result this requires linking with python23.lib, so it's of no use
-# for bootstrapping (the cause appears to be a useless-- in this
-# particular case --pragma in PC\pyconfig.h, which demands that
-# python23.lib get linked in).
-
-LIBS= ..\PCbuild\python25.lib
-
-CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD
-
-GRAMMAR_H= ..\Include\graminit.h
-GRAMMAR_C= ..\Python\graminit.c
-GRAMMAR_INPUT= ..\Grammar\Grammar
-
-PGEN= pgen.exe
-
-POBJS= acceler.obj grammar1.obj listnode.obj node.obj parser.obj \
- parsetok.obj tokenizer.obj bitset.obj metagrammar.obj
-
-PARSER_OBJS= $(POBJS) myreadline.obj
-
-PGOBJS= firstsets.obj grammar.obj pgen.obj printgrammar.obj pgenmain.obj
-
-PGENOBJS= $(POBJS) $(PGOBJS)
-
-$(GRAMMAR_H) $(GRAMMAR_C): $(PGEN) $(GRAMMAR_INPUT)
- $(PGEN) $(GRAMMAR_INPUT) $(GRAMMAR_H) $(GRAMMAR_C)
-
-$(PGEN): $(PGENOBJS)
- $(CC) $(PGENOBJS) $(LIBS) /Fe$(PGEN)
-
-clean:
- del *.obj
- del $(PGEN)
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 122f896869..34fb45c932 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -40,7 +40,7 @@ static int
my_fgets(char *buf, int len, FILE *fp)
{
char *p;
- for (;;) {
+ while (1) {
if (PyOS_InputHook != NULL)
(void)(PyOS_InputHook)();
errno = 0;
@@ -77,6 +77,7 @@ my_fgets(char *buf, int len, FILE *fp)
}
#endif /* MS_WINDOWS */
if (feof(fp)) {
+ clearerr(fp);
return -1; /* EOF */
}
#ifdef EINTR
@@ -89,9 +90,10 @@ my_fgets(char *buf, int len, FILE *fp)
#ifdef WITH_THREAD
PyEval_SaveThread();
#endif
- if (s < 0) {
- return 1;
- }
+ if (s < 0)
+ return 1;
+ /* try again */
+ continue;
}
#endif
if (PyOS_InterruptOccurred()) {
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index cad7a80765..e8d396a5f3 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -51,7 +51,7 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
initerr(err_ret, filename);
- if ((tok = PyTokenizer_FromString(s)) == NULL) {
+ if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
@@ -243,16 +243,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
err_ret->text = text;
}
} else if (tok->encoding != NULL) {
+ /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
+ * allocated using PyMem_
+ */
node* r = PyNode_New(encoding_decl);
- if (!r) {
+ if (r)
+ r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
+ if (!r || !r->n_str) {
err_ret->error = E_NOMEM;
+ if (r)
+ PyObject_FREE(r);
n = NULL;
goto done;
}
- r->n_str = tok->encoding;
+ strcpy(r->n_str, tok->encoding);
+ PyMem_FREE(tok->encoding);
+ tok->encoding = NULL;
r->n_nchildren = 1;
r->n_child = n;
- tok->encoding = NULL;
n = r;
}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index a08f1838af..ee6313b311 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -92,7 +92,6 @@ char *_PyParser_TokenNames[] = {
"<N_TOKENS>"
};
-
/* Create and initialize a new tok_state structure */
static struct tok_state *
@@ -105,6 +104,7 @@ tok_new(void)
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK;
tok->fp = NULL;
+ tok->input = NULL;
tok->tabsize = TABSIZE;
tok->indent = 0;
tok->indstack[0] = 0;
@@ -130,6 +130,17 @@ tok_new(void)
return tok;
}
+static char *
+new_string(const char *s, Py_ssize_t len)
+{
+ char* result = (char *)PyMem_MALLOC(len + 1);
+ if (result != NULL) {
+ memcpy(result, s, len);
+ result[len] = '\0';
+ }
+ return result;
+}
+
#ifdef PGEN
static char *
@@ -144,10 +155,10 @@ decoding_feof(struct tok_state *tok)
return feof(tok->fp);
}
-static const char *
-decode_str(const char *str, struct tok_state *tok)
+static char *
+decode_str(const char *str, int exec_input, struct tok_state *tok)
{
- return str;
+ return new_string(str, strlen(str));
}
#else /* PGEN */
@@ -162,16 +173,6 @@ error_ret(struct tok_state *tok) /* XXX */
return NULL; /* as if it were EOF */
}
-static char *
-new_string(const char *s, Py_ssize_t len)
-{
- char* result = (char *)PyMem_MALLOC(len + 1);
- if (result != NULL) {
- memcpy(result, s, len);
- result[len] = '\0';
- }
- return result;
-}
static char *
get_normal_name(char *s) /* for utf-8 and latin-1 */
@@ -180,20 +181,26 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */
int i;
for (i = 0; i < 12; i++) {
int c = s[i];
- if (c == '\0') break;
- else if (c == '_') buf[i] = '-';
- else buf[i] = tolower(c);
+ if (c == '\0')
+ break;
+ else if (c == '_')
+ buf[i] = '-';
+ else
+ buf[i] = tolower(c);
}
buf[i] = '\0';
if (strcmp(buf, "utf-8") == 0 ||
- strncmp(buf, "utf-8-", 6) == 0) return "utf-8";
+ strncmp(buf, "utf-8-", 6) == 0)
+ return "utf-8";
else if (strcmp(buf, "latin-1") == 0 ||
strcmp(buf, "iso-8859-1") == 0 ||
strcmp(buf, "iso-latin-1") == 0 ||
strncmp(buf, "latin-1-", 8) == 0 ||
strncmp(buf, "iso-8859-1-", 11) == 0 ||
- strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1";
- else return s;
+ strncmp(buf, "iso-latin-1-", 12) == 0)
+ return "iso-8859-1";
+ else
+ return s;
}
/* Return the coding spec in S, or NULL if none is found. */
@@ -222,7 +229,7 @@ get_coding_spec(const char *s, Py_ssize_t size)
} while (t[0] == '\x20' || t[0] == '\t');
begin = t;
- while (isalnum(Py_CHARMASK(t[0])) ||
+ while (Py_ISALNUM(t[0]) ||
t[0] == '-' || t[0] == '_' || t[0] == '.')
t++;
@@ -417,7 +424,8 @@ fp_readl(char *s, int size, struct tok_state *tok)
memcpy(s, str, utf8len);
s[utf8len] = '\0';
Py_DECREF(utf8);
- if (utf8len == 0) return NULL; /* EOF */
+ if (utf8len == 0)
+ return NULL; /* EOF */
return s;
#endif
}
@@ -589,17 +597,62 @@ translate_into_utf8(const char* str, const char* enc) {
}
#endif
+
+static char *
+translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
+ int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
+ char *buf, *current;
+ char c = '\0';
+ buf = PyMem_MALLOC(needed_length);
+ if (buf == NULL) {
+ tok->done = E_NOMEM;
+ return NULL;
+ }
+ for (current = buf; *s; s++, current++) {
+ c = *s;
+ if (skip_next_lf) {
+ skip_next_lf = 0;
+ if (c == '\n') {
+ c = *++s;
+ if (!c)
+ break;
+ }
+ }
+ if (c == '\r') {
+ skip_next_lf = 1;
+ c = '\n';
+ }
+ *current = c;
+ }
+ /* If this is exec input, add a newline to the end of the string if
+ there isn't one already. */
+ if (exec_input && c != '\n') {
+ *current = '\n';
+ current++;
+ }
+ *current = '\0';
+ final_length = current - buf + 1;
+ if (final_length < needed_length && final_length)
+ /* should never fail */
+ buf = PyMem_REALLOC(buf, final_length);
+ return buf;
+}
+
/* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them
inside TOK. */
static const char *
-decode_str(const char *str, struct tok_state *tok)
+decode_str(const char *input, int single, struct tok_state *tok)
{
PyObject* utf8 = NULL;
+ const char *str;
const char *s;
const char *newl[2] = {NULL, NULL};
int lineno = 0;
+ tok->input = str = translate_newlines(input, single, tok);
+ if (str == NULL)
+ return NULL;
tok->enc = NULL;
tok->str = str;
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@@ -639,11 +692,8 @@ decode_str(const char *str, struct tok_state *tok)
if (tok->enc != NULL) {
assert(utf8 == NULL);
utf8 = translate_into_utf8(str, tok->enc);
- if (utf8 == NULL) {
- PyErr_Format(PyExc_SyntaxError,
- "unknown encoding: %s", tok->enc);
+ if (utf8 == NULL)
return error_ret(tok);
- }
str = PyString_AsString(utf8);
}
#endif
@@ -657,12 +707,12 @@ decode_str(const char *str, struct tok_state *tok)
/* Set up tokenizer for string */
struct tok_state *
-PyTokenizer_FromString(const char *str)
+PyTokenizer_FromString(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
if (tok == NULL)
return NULL;
- str = (char *)decode_str(str, tok);
+ str = (char *)decode_str(str, exec_input, tok);
if (str == NULL) {
PyTokenizer_Free(tok);
return NULL;
@@ -708,6 +758,8 @@ PyTokenizer_Free(struct tok_state *tok)
#endif
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
+ if (tok->input)
+ PyMem_FREE((char *)tok->input);
PyMem_FREE(tok);
}
@@ -953,7 +1005,7 @@ tok_backup(register struct tok_state *tok, register int c)
{
if (c != EOF) {
if (--tok->cur < tok->buf)
- Py_FatalError("tok_backup: begin of buffer");
+ Py_FatalError("tok_backup: beginning of buffer");
if (*tok->cur != c)
*tok->cur = c;
}
@@ -1132,7 +1184,6 @@ indenterror(struct tok_state *tok)
return 0;
}
-
/* Get next token, after space stripping etc. */
static int
@@ -1288,7 +1339,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
}
/* Identifier (most frequent token!) */
- if (isalpha(c) || c == '_') {
+ if (Py_ISALPHA(c) || c == '_') {
/* Process r"", u"" and ur"" */
switch (c) {
case 'b':
@@ -1314,7 +1365,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
goto letter_quote;
break;
}
- while (isalnum(c) || c == '_') {
+ while (c != EOF && (Py_ISALNUM(c) || c == '_')) {
c = tok_nextc(tok);
}
tok_backup(tok, c);
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 19aa702430..f15e2523f7 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -52,9 +52,10 @@ struct tok_state {
#endif
const char* enc;
const char* str;
+ const char* input; /* Tokenizer's newline translated copy of the string. */
};
-extern struct tok_state *PyTokenizer_FromString(const char *);
+extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);