8 files changed, 135 insertions, 116 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index a4394c9adf..9a9b933143 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -11,7 +11,7 @@ module Python version "$Revision$"
 
 	stmt = FunctionDef(identifier name, arguments args, 
                             stmt* body, expr* decorator_list)
-	      | ClassDef(identifier name, expr* bases, stmt* body, expr *decorator_list)
+	      | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list)
 	      | Return(expr? value)
 
 	      | Delete(expr* targets)
@@ -34,7 +34,7 @@ module Python version "$Revision$"
 	      | Assert(expr test, expr? msg)
 
 	      | Import(alias* names)
-	      | ImportFrom(identifier module, alias* names, int? level)
+	      | ImportFrom(identifier? module, alias* names, int? level)
 
 	      -- Doesn't capture requirement that locals must be
 	      -- defined if globals is
@@ -56,7 +56,10 @@ module Python version "$Revision$"
 	     | Lambda(arguments args, expr body)
 	     | IfExp(expr test, expr body, expr orelse)
 	     | Dict(expr* keys, expr* values)
+	     | Set(expr* elts)
 	     | ListComp(expr elt, comprehension* generators)
+	     | SetComp(expr elt, comprehension* generators)
+	     | DictComp(expr key, expr value, comprehension* generators)
 	     | GeneratorExp(expr elt, comprehension* generators)
 	     -- the grammar constrains where yield expressions can occur
 	     | Yield(expr? value)
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 0cada2b0aa..7f5856b18a 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -10,14 +10,12 @@ browser.
 Changes for Python: Add support for module versions
 """
 
-#__metaclass__ = type
-
 import os
 import traceback
 
 import spark
 
-class Token:
+class Token(object):
     # spark seems to dispatch in the parser based on a token's
     # type attribute
     def __init__(self, type, lineno):
@@ -45,7 +43,7 @@ class String(Token):
         self.value = value
         self.lineno = lineno
 
-class ASDLSyntaxError:
+class ASDLSyntaxError(Exception):
 
     def __init__(self, lineno, token=None, msg=None):
         self.lineno = lineno
@@ -206,19 +204,19 @@ class ASDLParser(spark.GenericParser, object):
 
     def p_field_2(self, (type, _, name)):
         " field ::= Id * Id "
-        return Field(type, name, seq=1)
+        return Field(type, name, seq=True)
 
     def p_field_3(self, (type, _, name)):
         " field ::= Id ? Id "
-        return Field(type, name, opt=1)
+        return Field(type, name, opt=True)
 
     def p_field_4(self, (type, _)):
         " field ::= Id * "
-        return Field(type, seq=1)
+        return Field(type, seq=True)
 
     def p_field_5(self, (type, _)):
         " field ::= Id ? "
-        return Field(type, opt=1)
+        return Field(type, opt=True)
 
 builtin_types = ("identifier", "string", "int", "bool", "object")
 
@@ -226,7 +224,7 @@ builtin_types = ("identifier", "string", "int", "bool", "object")
 # not sure if any of the methods are useful yet, but I'm adding them
 # piecemeal as they seem helpful
 
-class AST:
+class AST(object):
     pass # a marker class
 
 class Module(AST):
@@ -258,7 +256,7 @@ class Constructor(AST):
         return "Constructor(%s, %s)" % (self.name, self.fields)
 
 class Field(AST):
-    def __init__(self, type, name=None, seq=0, opt=0):
+    def __init__(self, type, name=None, seq=False, opt=False):
         self.type = type
         self.name = name
         self.seq = seq
@@ -266,9 +264,9 @@ class Field(AST):
 
     def __repr__(self):
         if self.seq:
-            extra = ", seq=1"
+            extra = ", seq=True"
         elif self.opt:
-            extra = ", opt=1"
+            extra = ", opt=True"
         else:
             extra = ""
         if self.name is None:
@@ -296,7 +294,7 @@ class Product(AST):
 
 class VisitorBase(object):
 
-    def __init__(self, skip=0):
+    def __init__(self, skip=False):
         self.cache = {}
         self.skip = skip
 
@@ -331,7 +329,7 @@ class VisitorBase(object):
 class Check(VisitorBase):
 
     def __init__(self):
-        super(Check, self).__init__(skip=1)
+        super(Check, self).__init__(skip=True)
         self.cons = {}
         self.errors = 0
         self.types = {}
@@ -373,7 +371,7 @@ def check(mod):
     v.visit(mod)
 
     for t in v.types:
-        if not mod.types.has_key(t) and not t in builtin_types:
+        if t not in mod.types and not t in builtin_types:
             v.errors += 1
             uses = ", ".join(v.types[t])
             print "Undefined type %s, used in %s" % (t, uses)
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 3772b129ec..634ad29bc5 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -86,7 +86,7 @@ class EmitVisitor(asdl.VisitorBase):
         self.file = file
         super(EmitVisitor, self).__init__()
 
-    def emit(self, s, depth, reflow=1):
+    def emit(self, s, depth, reflow=True):
         # XXX reflow long lines?
         if reflow:
             lines = reflow_lines(s, depth)
@@ -255,7 +255,7 @@ class PrototypeVisitor(EmitVisitor):
         ctype = get_c_type(type)
         self.emit_function(cons.name, ctype, args, attrs)
 
-    def emit_function(self, name, ctype, args, attrs, union=1):
+    def emit_function(self, name, ctype, args, attrs, union=True):
         args = args + attrs
         if args:
             argstr = ", ".join(["%s %s" % (atype, aname)
@@ -267,19 +267,19 @@ class PrototypeVisitor(EmitVisitor):
         for i in range(1, len(args)+1):
             margs += ", a%d" % i
         self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0,
-                reflow = 0)
-        self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), 0)
+                reflow=False)
+        self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), False)
 
     def visitProduct(self, prod, name):
         self.emit_function(name, get_c_type(name),
-                           self.get_args(prod.fields), [], union=0)
+                           self.get_args(prod.fields), [], union=False)
 
 
 class FunctionVisitor(PrototypeVisitor):
     """Visitor to generate constructor functions for AST."""
 
-    def emit_function(self, name, ctype, args, attrs, union=1):
-        def emit(s, depth=0, reflow=1):
+    def emit_function(self, name, ctype, args, attrs, union=True):
+        def emit(s, depth=0, reflow=True):
             self.emit(s, depth, reflow)
         argstr = ", ".join(["%s %s" % (atype, aname)
                             for atype, aname, opt in args + attrs])
@@ -298,7 +298,7 @@ class FunctionVisitor(PrototypeVisitor):
                 emit("PyErr_SetString(PyExc_ValueError,", 2)
                 msg = "field %s is required for %s" % (argname, name)
                 emit('                "%s");' % msg,
-                     2, reflow=0)
+                     2, reflow=False)
                 emit('return NULL;', 2)
                 emit('}', 1)
 
@@ -314,7 +314,7 @@ class FunctionVisitor(PrototypeVisitor):
         emit("")
 
     def emit_body_union(self, name, args, attrs):
-        def emit(s, depth=0, reflow=1):
+        def emit(s, depth=0, reflow=True):
             self.emit(s, depth, reflow)
         emit("p->kind = %s_kind;" % name, 1)
         for argtype, argname, opt in args:
@@ -323,7 +323,7 @@ class FunctionVisitor(PrototypeVisitor):
             emit("p->%s = %s;" % (argname, argname), 1)
 
     def emit_body_struct(self, name, args, attrs):
-        def emit(s, depth=0, reflow=1):
+        def emit(s, depth=0, reflow=True):
             self.emit(s, depth, reflow)
         for argtype, argname, opt in args:
             emit("p->%s = %s;" % (argname, argname), 1)
@@ -733,8 +733,9 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
 {
     int i, result;
     PyObject *s, *l = PyTuple_New(num_fields);
-    if (!l) return 0;
-    for(i = 0; i < num_fields; i++) {
+    if (!l)
+        return 0;
+    for (i = 0; i < num_fields; i++) {
         s = PyString_FromString(attrs[i]);
         if (!s) {
             Py_DECREF(l);
diff --git a/Parser/grammar.mak b/Parser/grammar.mak
deleted file mode 100644
index 55f028ffb8..0000000000
--- a/Parser/grammar.mak
+++ /dev/null
@@ -1,45 +0,0 @@
-# This manages to rebuild graminit.{h, c} under MSVC 6 (Windows), via
-#
-#     nmake /f grammar.mak
-#
-# You may also need to copy python23.dll into this directory, or get
-# it on your search path.
-#
-# The intermediate files can be nuked afterwards:
-#
-#     nmake /f grammar.mak clean
-#
-# I don't understand the maze of preprocessor #define's on Windows, and
-# as a result this requires linking with python23.lib, so it's of no use
-# for bootstrapping (the cause appears to be a useless-- in this
-# particular case --pragma in PC\pyconfig.h, which demands that
-# python23.lib get linked in).
-
-LIBS= ..\PCbuild\python25.lib
-
-CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD
-
-GRAMMAR_H= ..\Include\graminit.h
-GRAMMAR_C= ..\Python\graminit.c
-GRAMMAR_INPUT= ..\Grammar\Grammar
-
-PGEN= pgen.exe
-
-POBJS= acceler.obj grammar1.obj listnode.obj node.obj parser.obj \
-       parsetok.obj tokenizer.obj bitset.obj metagrammar.obj
-
-PARSER_OBJS= $(POBJS) myreadline.obj
-
-PGOBJS= firstsets.obj grammar.obj pgen.obj printgrammar.obj pgenmain.obj
-
-PGENOBJS= $(POBJS) $(PGOBJS)
-
-$(GRAMMAR_H) $(GRAMMAR_C): $(PGEN) $(GRAMMAR_INPUT)
-		$(PGEN) $(GRAMMAR_INPUT) $(GRAMMAR_H) $(GRAMMAR_C)
-
-$(PGEN):	$(PGENOBJS)
-		$(CC) $(PGENOBJS) $(LIBS) /Fe$(PGEN)
-
-clean:
-        del *.obj
-        del $(PGEN)
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 122f896869..34fb45c932 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -40,7 +40,7 @@ static int
 my_fgets(char *buf, int len, FILE *fp)
 {
     char *p;
-    for (;;) {
+    while (1) {
         if (PyOS_InputHook != NULL)
             (void)(PyOS_InputHook)();
         errno = 0;
@@ -77,6 +77,7 @@ my_fgets(char *buf, int len, FILE *fp)
         }
 #endif /* MS_WINDOWS */
         if (feof(fp)) {
+            clearerr(fp);
             return -1; /* EOF */
         }
 #ifdef EINTR
@@ -89,9 +90,10 @@ my_fgets(char *buf, int len, FILE *fp)
 #ifdef WITH_THREAD
             PyEval_SaveThread();
 #endif
-            if (s < 0) {
-                return 1;
-            }
+            if (s < 0)
+                    return 1;
+	    /* try again */
+            continue;
         }
 #endif
         if (PyOS_InterruptOccurred()) {
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index cad7a80765..e8d396a5f3 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -51,7 +51,7 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
 
     initerr(err_ret, filename);
 
-    if ((tok = PyTokenizer_FromString(s)) == NULL) {
+    if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
         return NULL;
     }
@@ -243,16 +243,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
             err_ret->text = text;
         }
     } else if (tok->encoding != NULL) {
+        /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
+         * allocated using PyMem_
+         */
         node* r = PyNode_New(encoding_decl);
-        if (!r) {
+        if (r)
+            r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
+        if (!r || !r->n_str) {
             err_ret->error = E_NOMEM;
+            if (r)
+                PyObject_FREE(r);
             n = NULL;
             goto done;
         }
-        r->n_str = tok->encoding;
+        strcpy(r->n_str, tok->encoding);
+        PyMem_FREE(tok->encoding);
+        tok->encoding = NULL;
         r->n_nchildren = 1;
         r->n_child = n;
-        tok->encoding = NULL;
         n = r;
     }
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index a08f1838af..ee6313b311 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -92,7 +92,6 @@ char *_PyParser_TokenNames[] = {
     "<N_TOKENS>"
 };
 
-
 /* Create and initialize a new tok_state structure */
 
 static struct tok_state *
@@ -105,6 +104,7 @@ tok_new(void)
     tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
     tok->done = E_OK;
     tok->fp = NULL;
+    tok->input = NULL;
     tok->tabsize = TABSIZE;
     tok->indent = 0;
     tok->indstack[0] = 0;
@@ -130,6 +130,17 @@ tok_new(void)
     return tok;
 }
 
+static char *
+new_string(const char *s, Py_ssize_t len)
+{
+    char* result = (char *)PyMem_MALLOC(len + 1);
+    if (result != NULL) {
+        memcpy(result, s, len);
+        result[len] = '\0';
+    }
+    return result;
+}
+
 #ifdef PGEN
 
 static char *
@@ -144,10 +155,10 @@ decoding_feof(struct tok_state *tok)
     return feof(tok->fp);
 }
 
-static const char *
-decode_str(const char *str, struct tok_state *tok)
+static char *
+decode_str(const char *str, int exec_input, struct tok_state *tok)
 {
-    return str;
+    return new_string(str, strlen(str));
 }
 
 #else /* PGEN */
@@ -162,16 +173,6 @@ error_ret(struct tok_state *tok) /* XXX */
     return NULL;                /* as if it were EOF */
 }
 
-static char *
-new_string(const char *s, Py_ssize_t len)
-{
-    char* result = (char *)PyMem_MALLOC(len + 1);
-    if (result != NULL) {
-        memcpy(result, s, len);
-        result[len] = '\0';
-    }
-    return result;
-}
 
 static char *
 get_normal_name(char *s)        /* for utf-8 and latin-1 */
@@ -180,20 +181,26 @@ get_normal_name(char *s)        /* for utf-8 and latin-1 */
     int i;
     for (i = 0; i < 12; i++) {
         int c = s[i];
-        if (c == '\0') break;
-        else if (c == '_') buf[i] = '-';
-        else buf[i] = tolower(c);
+        if (c == '\0')
+            break;
+        else if (c == '_')
+            buf[i] = '-';
+        else
+            buf[i] = tolower(c);
     }
     buf[i] = '\0';
     if (strcmp(buf, "utf-8") == 0 ||
-        strncmp(buf, "utf-8-", 6) == 0) return "utf-8";
+        strncmp(buf, "utf-8-", 6) == 0)
+        return "utf-8";
     else if (strcmp(buf, "latin-1") == 0 ||
              strcmp(buf, "iso-8859-1") == 0 ||
              strcmp(buf, "iso-latin-1") == 0 ||
              strncmp(buf, "latin-1-", 8) == 0 ||
              strncmp(buf, "iso-8859-1-", 11) == 0 ||
-             strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1";
-    else return s;
+             strncmp(buf, "iso-latin-1-", 12) == 0)
+        return "iso-8859-1";
+    else
+        return s;
 }
 
 /* Return the coding spec in S, or NULL if none is found.  */
@@ -222,7 +229,7 @@ get_coding_spec(const char *s, Py_ssize_t size)
             } while (t[0] == '\x20' || t[0] == '\t');
 
             begin = t;
-            while (isalnum(Py_CHARMASK(t[0])) ||
+            while (Py_ISALNUM(t[0]) ||
                    t[0] == '-' || t[0] == '_' || t[0] == '.')
                 t++;
 
@@ -417,7 +424,8 @@ fp_readl(char *s, int size, struct tok_state *tok)
     memcpy(s, str, utf8len);
     s[utf8len] = '\0';
     Py_DECREF(utf8);
-    if (utf8len == 0) return NULL; /* EOF */
+    if (utf8len == 0)
+        return NULL; /* EOF */
     return s;
 #endif
 }
@@ -589,17 +597,62 @@ translate_into_utf8(const char* str, const char* enc) {
 }
 #endif
 
+
+static char *
+translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
+    int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
+    char *buf, *current;
+    char c = '\0';
+    buf = PyMem_MALLOC(needed_length);
+    if (buf == NULL) {
+        tok->done = E_NOMEM;
+        return NULL;
+    }
+    for (current = buf; *s; s++, current++) {
+        c = *s;
+        if (skip_next_lf) {
+            skip_next_lf = 0;
+            if (c == '\n') {
+                c = *++s;
+                if (!c)
+                    break;
+            }
+        }
+        if (c == '\r') {
+            skip_next_lf = 1;
+            c = '\n';
+        }
+        *current = c;
+    }
+    /* If this is exec input, add a newline to the end of the string if
+       there isn't one already. */
+    if (exec_input && c != '\n') {
+        *current = '\n';
+        current++;
+    }
+    *current = '\0';
+    final_length = current - buf + 1;
+    if (final_length < needed_length && final_length)
+        /* should never fail */
+        buf = PyMem_REALLOC(buf, final_length);
+    return buf;
+}
+
 /* Decode a byte string STR for use as the buffer of TOK.
    Look for encoding declarations inside STR, and record them
    inside TOK.  */
 
 static const char *
-decode_str(const char *str, struct tok_state *tok)
+decode_str(const char *input, int single, struct tok_state *tok)
 {
     PyObject* utf8 = NULL;
+    const char *str;
     const char *s;
     const char *newl[2] = {NULL, NULL};
     int lineno = 0;
+    tok->input = str = translate_newlines(input, single, tok);
+    if (str == NULL)
+        return NULL;
     tok->enc = NULL;
     tok->str = str;
     if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@@ -639,11 +692,8 @@ decode_str(const char *str, struct tok_state *tok)
     if (tok->enc != NULL) {
         assert(utf8 == NULL);
         utf8 = translate_into_utf8(str, tok->enc);
-        if (utf8 == NULL) {
-            PyErr_Format(PyExc_SyntaxError,
-                "unknown encoding: %s", tok->enc);
+        if (utf8 == NULL)
             return error_ret(tok);
-        }
         str = PyString_AsString(utf8);
     }
 #endif
@@ -657,12 +707,12 @@ decode_str(const char *str, struct tok_state *tok)
 /* Set up tokenizer for string */
 
 struct tok_state *
-PyTokenizer_FromString(const char *str)
+PyTokenizer_FromString(const char *str, int exec_input)
 {
     struct tok_state *tok = tok_new();
     if (tok == NULL)
         return NULL;
-    str = (char *)decode_str(str, tok);
+    str = (char *)decode_str(str, exec_input, tok);
     if (str == NULL) {
         PyTokenizer_Free(tok);
         return NULL;
@@ -708,6 +758,8 @@ PyTokenizer_Free(struct tok_state *tok)
 #endif
     if (tok->fp != NULL && tok->buf != NULL)
         PyMem_FREE(tok->buf);
+    if (tok->input)
+        PyMem_FREE((char *)tok->input);
     PyMem_FREE(tok);
 }
 
@@ -953,7 +1005,7 @@ tok_backup(register struct tok_state *tok, register int c)
 {
     if (c != EOF) {
         if (--tok->cur < tok->buf)
-            Py_FatalError("tok_backup: begin of buffer");
+            Py_FatalError("tok_backup: beginning of buffer");
         if (*tok->cur != c)
             *tok->cur = c;
     }
@@ -1132,7 +1184,6 @@ indenterror(struct tok_state *tok)
     return 0;
 }
 
-
 /* Get next token, after space stripping etc. */
 
 static int
@@ -1288,7 +1339,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
     }
 
     /* Identifier (most frequent token!) */
-    if (isalpha(c) || c == '_') {
+    if (Py_ISALPHA(c) || c == '_') {
         /* Process r"", u"" and ur"" */
         switch (c) {
         case 'b':
@@ -1314,7 +1365,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
                 goto letter_quote;
             break;
         }
-        while (isalnum(c) || c == '_') {
+        while (c != EOF && (Py_ISALNUM(c) || c == '_')) {
             c = tok_nextc(tok);
         }
         tok_backup(tok, c);
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 19aa702430..f15e2523f7 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -52,9 +52,10 @@ struct tok_state {
 #endif
     const char* enc;
     const char* str;
+    const char* input; /* Tokenizer's newline translated copy of the string. */
 };
 
-extern struct tok_state *PyTokenizer_FromString(const char *);
+extern struct tok_state *PyTokenizer_FromString(const char *, int);
 extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
 extern void PyTokenizer_Free(struct tok_state *);
 extern int PyTokenizer_Get(struct tok_state *, char **, char **);