%{
/* This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
. */
#include "config.h"
#include "system.h"
#include "ansidecl.h"
#include "coretypes.h"
#include "opts.h"
#include "tree.h"
#include "gimple.h"
#include "toplev.h"
#include "debug.h"
#include "options.h"
#include "flags.h"
#include "convert.h"
#include "diagnostic-core.h"
#include "langhooks.h"
#include "langhooks-def.h"
#include "target.h"
#include
#include
#include "vec.h"
#include "hashtab.h"
#include "gpython.h"
#include "py-dot.h"
#include "py-vec.h"
#include "py-tree.h"
#include "y.py.h"
extern int yyparse( void );
int lang_token_action (char *yytext);
/* Stack required for INDENT and DEDENT tokens @see
* - http://docs.python.org/reference/lexical_analysis.html
*/
static VEC(gpy_int,gc) * gpy_indent_stack;
/* Whenver we pass a [, { or (, we may not generate a
* NEWLINE token unless we passed the closing ], } or ).
* implicit_lining is a counter which keeps track of the
* nesting levels.
*/
static unsigned int implicit_lining = 0;
/* The indentation level which is calculated according to
* the python indentation rules at @see
* - http://docs.python.org/reference/lexical_analysis.html
*/
static unsigned int indentation = 0;
/* The top entry of the indendation stack gpy_indent_stack */
static unsigned int top_len = 0;
/* We may not generate a NEWLINE token unless we saw a real
* language token. Empty lines consisting soley of spaces,
* tabs, formfeeds and comments must be ignored.
*/
static unsigned int token_count = 0;
/* maybe_dedent is true if we must potentially generate dedent tokens
* in lang_token_action. This is per default true once we start a
* new line because we might directly see a token without any whitespaces
* so that we must generate DEDENT tokens.
*
* It is false if we know for sure that INDENT tokens will be generated.
*/
static bool maybe_dedent = true;
/* indentation_mode is true only once we start a new line. It tells the
* scanner to calculate the indentation value. If false, no indentation
* value is generated. This is the default case after we scanned the first
* real language token.
*/
static bool indentation_mode = true;
/* This flag is used in lang_token_ction to control the generation
* of INDENT and DEDENT tokens. INDENT and DEDENT tokens will potentially
* only be generated at the beginning of each line and BEFORE the first
* real language token is returned to the parser. Once the first language
* token is recognized, gen_indentation is set to false.
*/
static bool gen_indentation = true;
#define NO_TOKEN -1
/* The LANG_TOKEN_ACTION macro allows us to define common actions associated
* with each real language tokens (really returned to the parser) before
* the token is really returned.
*
* This MUST NOT be used for tokens which aren't passed to the parser, though
* ignored by the scanner.
*/
#define LANG_TOKEN_ACTION do { \
fprintf(stdout, "LANG_TOKEN_ACTION\n"); \
int tk = lang_token_action(yytext); \
if (tk != NO_TOKEN) \
return tk; \
} while (0);
#define UNPUT_TOKEN() yyless(0)
%}
%option yylineno
%x xLINE_CONT xINDENTATION
DIGIT [0-9]
ID [_a-zA-Z][a-zA_Z0-9_$]*
QSTRING \"[^\"\n]*[\"\n]
NEWLINES (\r\n|\r|\n)
FORMFEED [\f]
WHITESPACE [ \t]
%%
#.* ; /* comment */
class { LANG_TOKEN_ACTION; return CLASS; }
def { LANG_TOKEN_ACTION; return DEF; }
except { LANG_TOKEN_ACTION; return EXCEPT; }
finally { LANG_TOKEN_ACTION; return FINALLY; }
try { LANG_TOKEN_ACTION; return TRY; }
as { LANG_TOKEN_ACTION; return AS; }
assert { LANG_TOKEN_ACTION; return ASSERT; }
del { LANG_TOKEN_ACTION; return DEL; }
exec { LANG_TOKEN_ACTION; return EXEC; }
from { LANG_TOKEN_ACTION; return FROM; }
global { LANG_TOKEN_ACTION; return GLOBAL; }
import { LANG_TOKEN_ACTION; return IMPORT; }
in { LANG_TOKEN_ACTION; return IN; }
is { LANG_TOKEN_ACTION; return IS; }
lambda { LANG_TOKEN_ACTION; return LAMBDA; }
pass { LANG_TOKEN_ACTION; return PASS; }
raise { LANG_TOKEN_ACTION; return RAISE; }
with { LANG_TOKEN_ACTION; return WITH; }
yield { LANG_TOKEN_ACTION; return YIELD; }
break { LANG_TOKEN_ACTION; return BREAK; }
continue { LANG_TOKEN_ACTION; return CONTINUE; }
return { LANG_TOKEN_ACTION; return RETURN; }
for { LANG_TOKEN_ACTION; return FOR; }
while { LANG_TOKEN_ACTION; return WHILE; }
print { LANG_TOKEN_ACTION; fprintf(stderr, "REALLY: print\n"); return PRINT; }
if { LANG_TOKEN_ACTION; return IF; }
elif { LANG_TOKEN_ACTION; return ELIF; }
else { LANG_TOKEN_ACTION; return ELSE; }
\[ { LANG_TOKEN_ACTION; implicit_lining++; return '['; }
\] { LANG_TOKEN_ACTION; implicit_lining++; return ']'; }
\( { LANG_TOKEN_ACTION; implicit_lining++; return '('; }
\) { LANG_TOKEN_ACTION; implicit_lining--; return ')'; }
\{ { LANG_TOKEN_ACTION; implicit_lining--; return '{'; }
\} { LANG_TOKEN_ACTION; implicit_lining--; return '}'; }
";" { LANG_TOKEN_ACTION; return ';'; }
"," { LANG_TOKEN_ACTION; return ','; }
"." { LANG_TOKEN_ACTION; return '.'; }
":" { LANG_TOKEN_ACTION; return ':'; }
"=" { LANG_TOKEN_ACTION; return '='; }
"+" { LANG_TOKEN_ACTION; return '+'; }
"-" { LANG_TOKEN_ACTION; return '-'; }
"/" { LANG_TOKEN_ACTION; return '/'; }
"*" { LANG_TOKEN_ACTION; return '*'; }
"|" { LANG_TOKEN_ACTION; return '|'; }
"==" { LANG_TOKEN_ACTION; return EQUAL_EQUAL; }
"!=" { LANG_TOKEN_ACTION; return NOT_EQUAL; }
"<" { LANG_TOKEN_ACTION; return LESS; }
"<=" { LANG_TOKEN_ACTION; return LESS_EQUAL; }
">" { LANG_TOKEN_ACTION; return GREATER; }
">=" { LANG_TOKEN_ACTION; return GREATER_EQUAL; }
"or" { LANG_TOKEN_ACTION; return OR; }
"and" { LANG_TOKEN_ACTION; return AND; }
"not" { LANG_TOKEN_ACTION; return NOT; }
"True" { LANG_TOKEN_ACTION; return V_TRUE; }
"False" { LANG_TOKEN_ACTION; return V_FALSE; }
{QSTRING} {
LANG_TOKEN_ACTION;
yylval.string= xstrdup( (yytext+1) );
if( yylval.string[ yyleng-2 ] != '\"' ) {
error("Un-termintated character string!\n");
}
else {
yylval.string[yyleng-2] = '\0';
}
return STRING;
}
{DIGIT}+ {
LANG_TOKEN_ACTION;
mpfr_t x;
mpfr_init2( x, 32 );
if( mpfr_set_str( x, yytext, 10, GMP_RNDU) )
{
fatal_error("error initilizing integer value <%s>!\n", yytext );
}
yylval.integer = mpfr_get_si( x, GMP_RNDU );
mpfr_clear( x );
return INTEGER;
}
{ID} {
LANG_TOKEN_ACTION;
// printf("IDENTIFIER!\n");
yylval.string = xstrdup( yytext );
return IDENTIFIER;
}
\\ {
BEGIN xLINE_CONT;
}
{NEWLINES} {
BEGIN INITIAL;
}
. {
fatal_error("non newline character after line continuation character!\n");
BEGIN INITIAL;
}
{NEWLINES} {
if (implicit_lining == 0 && token_count > 0)
{
indentation = 0;
indentation_mode = true;
gen_indentation = true;
maybe_dedent = true;
token_count = 0;
fprintf(stdout, "DEBUG: NEWLINE\n");
return NEWLINE;
}
}
{FORMFEED} { /* can be ignored or undefined behavior, so just ignored */}
{WHITESPACE}+ {
if (indentation_mode == true)
{
fprintf(stdout, "DEBUG: determine INDENT: '%s' (%i) (%d)\n",
yytext, yytext[0], strlen(yytext));
for (unsigned int i = 0; i < strlen(yytext); i++)
{
if (yytext[i] == ' ')
indentation++;
else
/* tabs fill up the indentation to a multiple of 8 chars */
indentation = (indentation + 0x8) & ~((unsigned int)0x7);
}
fprintf(stdout, "INDENTATION: %u\n", indentation);
if (indentation > top_len)
maybe_dedent = false;
indentation_mode = false;
}
}
<> {
if( top_len != 0 )
{
printf("DEDENT EOF!\n");
VEC_pop( gpy_int, gpy_indent_stack );
top_len = VEC_index( gpy_int, gpy_indent_stack,
VEC_length( gpy_int, gpy_indent_stack )-1 );
return DEDENT;
}
else
{
return 0;
}
}
%%
int yywrap( void )
{
return 1;
}
int gpy_lex_parse( const char * gpy_in )
{
VEC_safe_push( gpy_int, gc, gpy_indent_stack, 0 );
top_len = 0;
int retval = 1;
FILE * fd = fopen( gpy_in, "rb" );
debug("trying to open <%s>!\n", gpy_in);
if( fd )
{
yyin = fd;
retval = yyparse( );
fclose( fd );
yylex_destroy( );
}
else
{
fprintf(stderr, "error opening <%s>!\n", gpy_in );
retval = 0;
}
return retval;
}
int lang_token_action (char *yytext)
{
if (++token_count > 0)
{
indentation_mode = false;
}
fprintf(stdout, "TOKENCOUNT: %d\n", token_count);
if (gen_indentation == true)
{
if (maybe_dedent == true)
{
if (indentation < top_len)
{
maybe_dedent = true;
/* unput the space to re-match this rule */
UNPUT_TOKEN();
fprintf(stdout, "DEDENT: top(%u) - %u\n", top_len, indentation);
VEC_pop(gpy_int, gpy_indent_stack);
top_len = VEC_index( gpy_int, gpy_indent_stack,
VEC_length( gpy_int, gpy_indent_stack )-1 );
gen_indentation = true;
fprintf(stdout, "DEBUG: DEDENT\n");
return DEDENT;
}
else if (indentation == top_len)
{
maybe_dedent = false;
}
else
{
fatal_error("unmatched indentation!\n");
}
}
else
{
if (indentation > top_len)
{
VEC_safe_push( gpy_int, gc, gpy_indent_stack, indentation );
top_len = indentation;
gen_indentation = false;
UNPUT_TOKEN();
fprintf(stdout, "DEBUG: INDENT\n");
return INDENT;
}
}
}
gen_indentation = false;
return NO_TOKEN;
}