%{ /* This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ #include "config.h" #include "system.h" #include "ansidecl.h" #include "coretypes.h" #include "opts.h" #include "tree.h" #include "gimple.h" #include "toplev.h" #include "debug.h" #include "options.h" #include "flags.h" #include "convert.h" #include "diagnostic-core.h" #include "langhooks.h" #include "langhooks-def.h" #include "target.h" #include #include #include "vec.h" #include "hashtab.h" #include "gpython.h" #include "py-dot.h" #include "py-vec.h" #include "py-tree.h" #include "y.py.h" extern int yyparse( void ); int lang_token_action (char *yytext); /* Stack required for INDENT and DEDENT tokens @see * - http://docs.python.org/reference/lexical_analysis.html */ static VEC(gpy_int,gc) * gpy_indent_stack; /* Whenver we pass a [, { or (, we may not generate a * NEWLINE token unless we passed the closing ], } or ). * implicit_lining is a counter which keeps track of the * nesting levels. */ static unsigned int implicit_lining = 0; /* The indentation level which is calculated according to * the python indentation rules at @see * - http://docs.python.org/reference/lexical_analysis.html */ static unsigned int indentation = 0; /* The top entry of the indendation stack gpy_indent_stack */ static unsigned int top_len = 0; /* We may not generate a NEWLINE token unless we saw a real * language token. Empty lines consisting soley of spaces, * tabs, formfeeds and comments must be ignored. */ static unsigned int token_count = 0; /* maybe_dedent is true if we must potentially generate dedent tokens * in lang_token_action. This is per default true once we start a * new line because we might directly see a token without any whitespaces * so that we must generate DEDENT tokens. * * It is false if we know for sure that INDENT tokens will be generated. */ static bool maybe_dedent = true; /* indentation_mode is true only once we start a new line. It tells the * scanner to calculate the indentation value. If false, no indentation * value is generated. This is the default case after we scanned the first * real language token. */ static bool indentation_mode = true; /* This flag is used in lang_token_ction to control the generation * of INDENT and DEDENT tokens. INDENT and DEDENT tokens will potentially * only be generated at the beginning of each line and BEFORE the first * real language token is returned to the parser. Once the first language * token is recognized, gen_indentation is set to false. */ static bool gen_indentation = true; #define NO_TOKEN -1 /* The LANG_TOKEN_ACTION macro allows us to define common actions associated * with each real language tokens (really returned to the parser) before * the token is really returned. * * This MUST NOT be used for tokens which aren't passed to the parser, though * ignored by the scanner. */ #define LANG_TOKEN_ACTION do { \ fprintf(stdout, "LANG_TOKEN_ACTION\n"); \ int tk = lang_token_action(yytext); \ if (tk != NO_TOKEN) \ return tk; \ } while (0); #define UNPUT_TOKEN() yyless(0) %} %option yylineno %x xLINE_CONT xINDENTATION DIGIT [0-9] ID [_a-zA-Z][a-zA_Z0-9_$]* QSTRING \"[^\"\n]*[\"\n] NEWLINES (\r\n|\r|\n) FORMFEED [\f] WHITESPACE [ \t] %% #.* ; /* comment */ class { LANG_TOKEN_ACTION; return CLASS; } def { LANG_TOKEN_ACTION; return DEF; } except { LANG_TOKEN_ACTION; return EXCEPT; } finally { LANG_TOKEN_ACTION; return FINALLY; } try { LANG_TOKEN_ACTION; return TRY; } as { LANG_TOKEN_ACTION; return AS; } assert { LANG_TOKEN_ACTION; return ASSERT; } del { LANG_TOKEN_ACTION; return DEL; } exec { LANG_TOKEN_ACTION; return EXEC; } from { LANG_TOKEN_ACTION; return FROM; } global { LANG_TOKEN_ACTION; return GLOBAL; } import { LANG_TOKEN_ACTION; return IMPORT; } in { LANG_TOKEN_ACTION; return IN; } is { LANG_TOKEN_ACTION; return IS; } lambda { LANG_TOKEN_ACTION; return LAMBDA; } pass { LANG_TOKEN_ACTION; return PASS; } raise { LANG_TOKEN_ACTION; return RAISE; } with { LANG_TOKEN_ACTION; return WITH; } yield { LANG_TOKEN_ACTION; return YIELD; } break { LANG_TOKEN_ACTION; return BREAK; } continue { LANG_TOKEN_ACTION; return CONTINUE; } return { LANG_TOKEN_ACTION; return RETURN; } for { LANG_TOKEN_ACTION; return FOR; } while { LANG_TOKEN_ACTION; return WHILE; } print { LANG_TOKEN_ACTION; fprintf(stderr, "REALLY: print\n"); return PRINT; } if { LANG_TOKEN_ACTION; return IF; } elif { LANG_TOKEN_ACTION; return ELIF; } else { LANG_TOKEN_ACTION; return ELSE; } \[ { LANG_TOKEN_ACTION; implicit_lining++; return '['; } \] { LANG_TOKEN_ACTION; implicit_lining++; return ']'; } \( { LANG_TOKEN_ACTION; implicit_lining++; return '('; } \) { LANG_TOKEN_ACTION; implicit_lining--; return ')'; } \{ { LANG_TOKEN_ACTION; implicit_lining--; return '{'; } \} { LANG_TOKEN_ACTION; implicit_lining--; return '}'; } ";" { LANG_TOKEN_ACTION; return ';'; } "," { LANG_TOKEN_ACTION; return ','; } "." { LANG_TOKEN_ACTION; return '.'; } ":" { LANG_TOKEN_ACTION; return ':'; } "=" { LANG_TOKEN_ACTION; return '='; } "+" { LANG_TOKEN_ACTION; return '+'; } "-" { LANG_TOKEN_ACTION; return '-'; } "/" { LANG_TOKEN_ACTION; return '/'; } "*" { LANG_TOKEN_ACTION; return '*'; } "|" { LANG_TOKEN_ACTION; return '|'; } "==" { LANG_TOKEN_ACTION; return EQUAL_EQUAL; } "!=" { LANG_TOKEN_ACTION; return NOT_EQUAL; } "<" { LANG_TOKEN_ACTION; return LESS; } "<=" { LANG_TOKEN_ACTION; return LESS_EQUAL; } ">" { LANG_TOKEN_ACTION; return GREATER; } ">=" { LANG_TOKEN_ACTION; return GREATER_EQUAL; } "or" { LANG_TOKEN_ACTION; return OR; } "and" { LANG_TOKEN_ACTION; return AND; } "not" { LANG_TOKEN_ACTION; return NOT; } "True" { LANG_TOKEN_ACTION; return V_TRUE; } "False" { LANG_TOKEN_ACTION; return V_FALSE; } {QSTRING} { LANG_TOKEN_ACTION; yylval.string= xstrdup( (yytext+1) ); if( yylval.string[ yyleng-2 ] != '\"' ) { error("Un-termintated character string!\n"); } else { yylval.string[yyleng-2] = '\0'; } return STRING; } {DIGIT}+ { LANG_TOKEN_ACTION; mpfr_t x; mpfr_init2( x, 32 ); if( mpfr_set_str( x, yytext, 10, GMP_RNDU) ) { fatal_error("error initilizing integer value <%s>!\n", yytext ); } yylval.integer = mpfr_get_si( x, GMP_RNDU ); mpfr_clear( x ); return INTEGER; } {ID} { LANG_TOKEN_ACTION; // printf("IDENTIFIER!\n"); yylval.string = xstrdup( yytext ); return IDENTIFIER; } \\ { BEGIN xLINE_CONT; } {NEWLINES} { BEGIN INITIAL; } . { fatal_error("non newline character after line continuation character!\n"); BEGIN INITIAL; } {NEWLINES} { if (implicit_lining == 0 && token_count > 0) { indentation = 0; indentation_mode = true; gen_indentation = true; maybe_dedent = true; token_count = 0; fprintf(stdout, "DEBUG: NEWLINE\n"); return NEWLINE; } } {FORMFEED} { /* can be ignored or undefined behavior, so just ignored */} {WHITESPACE}+ { if (indentation_mode == true) { fprintf(stdout, "DEBUG: determine INDENT: '%s' (%i) (%d)\n", yytext, yytext[0], strlen(yytext)); for (unsigned int i = 0; i < strlen(yytext); i++) { if (yytext[i] == ' ') indentation++; else /* tabs fill up the indentation to a multiple of 8 chars */ indentation = (indentation + 0x8) & ~((unsigned int)0x7); } fprintf(stdout, "INDENTATION: %u\n", indentation); if (indentation > top_len) maybe_dedent = false; indentation_mode = false; } } <> { if( top_len != 0 ) { printf("DEDENT EOF!\n"); VEC_pop( gpy_int, gpy_indent_stack ); top_len = VEC_index( gpy_int, gpy_indent_stack, VEC_length( gpy_int, gpy_indent_stack )-1 ); return DEDENT; } else { return 0; } } %% int yywrap( void ) { return 1; } int gpy_lex_parse( const char * gpy_in ) { VEC_safe_push( gpy_int, gc, gpy_indent_stack, 0 ); top_len = 0; int retval = 1; FILE * fd = fopen( gpy_in, "rb" ); debug("trying to open <%s>!\n", gpy_in); if( fd ) { yyin = fd; retval = yyparse( ); fclose( fd ); yylex_destroy( ); } else { fprintf(stderr, "error opening <%s>!\n", gpy_in ); retval = 0; } return retval; } int lang_token_action (char *yytext) { if (++token_count > 0) { indentation_mode = false; } fprintf(stdout, "TOKENCOUNT: %d\n", token_count); if (gen_indentation == true) { if (maybe_dedent == true) { if (indentation < top_len) { maybe_dedent = true; /* unput the space to re-match this rule */ UNPUT_TOKEN(); fprintf(stdout, "DEDENT: top(%u) - %u\n", top_len, indentation); VEC_pop(gpy_int, gpy_indent_stack); top_len = VEC_index( gpy_int, gpy_indent_stack, VEC_length( gpy_int, gpy_indent_stack )-1 ); gen_indentation = true; fprintf(stdout, "DEBUG: DEDENT\n"); return DEDENT; } else if (indentation == top_len) { maybe_dedent = false; } else { fatal_error("unmatched indentation!\n"); } } else { if (indentation > top_len) { VEC_safe_push( gpy_int, gc, gpy_indent_stack, indentation ); top_len = indentation; gen_indentation = false; UNPUT_TOKEN(); fprintf(stdout, "DEBUG: INDENT\n"); return INDENT; } } } gen_indentation = false; return NO_TOKEN; }