diff options
author | vishalgupta97 <vishalgupta7972@gmail.com> | 2018-05-26 00:08:56 +0530 |
---|---|---|
committer | vishalgupta97 <vishalgupta7972@gmail.com> | 2018-05-26 00:08:56 +0530 |
commit | cf633fb61fd64b0b49c3d99ea9cb881ebc8cd1ac (patch) | |
tree | 53b383792d593b41ddbfa9395e89c6f54c97190b | |
parent | 596e9e130f38ae068ac6150b2d62eafd544900f6 (diff) | |
download | automake-cf633fb61fd64b0b49c3d99ea9cb881ebc8cd1ac.tar.gz |
Implemented Lexer, Parser and AST module.
The grammar seperates the automake rule into left side and right side. It
divides the left side according to the primaries like PROGRAMS, LIBRARIES.
* Lexer.pm: Lexer module. Converts a string into tokens.
* ParserTable.pm: Contains the parsing table.
* automake.y: Contains the grammar.
* automake.png: Contains the state transition diagram generated by the
grammar.
* Tree.pm: Contains submodules for creating tree nodes.
* parser.pl: Implements the parser.
-rw-r--r-- | lib/Automake/Parser/.gitignore | 3 | ||||
-rw-r--r-- | lib/Automake/Parser/Lexer.pm | 51 | ||||
-rw-r--r-- | lib/Automake/Parser/ParserTable.pm | 48 | ||||
-rw-r--r-- | lib/Automake/Parser/Tree.pm | 164 | ||||
-rw-r--r-- | lib/Automake/Parser/ast.png | bin | 0 -> 338893 bytes | |||
-rw-r--r-- | lib/Automake/Parser/automake.png | bin | 0 -> 244817 bytes | |||
-rw-r--r-- | lib/Automake/Parser/automake.y | 24 | ||||
-rw-r--r-- | lib/Automake/Parser/input.txt | 8 | ||||
-rw-r--r-- | lib/Automake/Parser/parser.pl | 70 |
9 files changed, 368 insertions, 0 deletions
diff --git a/lib/Automake/Parser/.gitignore b/lib/Automake/Parser/.gitignore new file mode 100644 index 000000000..4253bbeb5 --- /dev/null +++ b/lib/Automake/Parser/.gitignore @@ -0,0 +1,3 @@ +*.gv +*.dot +*.tab.c
\ No newline at end of file diff --git a/lib/Automake/Parser/Lexer.pm b/lib/Automake/Parser/Lexer.pm new file mode 100644 index 000000000..ed8129a5d --- /dev/null +++ b/lib/Automake/Parser/Lexer.pm @@ -0,0 +1,51 @@ +package Lexer; + +use Exporter; + +our @ISA = qw(Exporter); +our @EXPORT = qw(lex); + +# lex(string) +# Takes as input a string of line. Divides it into tokens as specified +# by Regex and outputs an array of Tokens. Every Token is an array having +# two values: token name and its value. If its an operator, it has only +# one value. +sub lex($) +{ + my @tokens; + my $rhs = 0; + while( $_ ) + { + if( $rhs && s/^(.+)//o ) + { + push @tokens, ["rhs",$1]; + $rhs=0; + } + elsif(s/^(PROGRAMS|LIBRARIES|LTLIBRARIES|LISP|PYTHON|JAVA|SCRIPTS|DATA|HEADERS|MASN|TEXINFOS)//o) + { + push @tokens, [$1]; + } + elsif(s/^([a-zA-Z0-9]+)//o) + { + push @tokens, ["value",$1]; + } + elsif(s/^(=)//o) + { + push @tokens, [$1]; + $rhs = 1; + } + elsif(s/^(:|\n|_)//o) + { + push @tokens, [$1]; + } + elsif(s/^(\r| )//o) + { + } + else + { + die "Incorrect input $_"; + } + } + return @tokens; +} + diff --git a/lib/Automake/Parser/ParserTable.pm b/lib/Automake/Parser/ParserTable.pm new file mode 100644 index 000000000..f918f0519 --- /dev/null +++ b/lib/Automake/Parser/ParserTable.pm @@ -0,0 +1,48 @@ +package ParserTable; + +use Exporter; +use Tree; + +our @ISA = qw(Exporter); +our @Export = qw(@table $accept); + +#Stores the state number where the input is accepted +our $accept=9; + +# Stores the state diagram. Its an array of hashes. Each index corresponds +# to ith state. Every key in hash corresponds to a token, value corresponds +# to next state. reduce key specifies the reduction of token. Its an array +# consisting of number of elements to be reduced and a reference to a function +# to create a node. +our @table=( + {value => 1, input => 2, stmts => 3, stmt => 4, lhs => 5, optionlist => 6}, + {":" => 7, "_" => 8}, + {end => 9}, + {value => 1, lhs => 5, optionlist => 6, stmt => 10, reduce => [1, \&input]}, #input : stmts + {"\n" => 11}, + {"=" => 12}, + {value => 13, PROGRAMS => 14, LIBRARIES => 15, LTLIBRARIES => 16, LISP => 17, PYTHON => 18, JAVA => 19, SCRIPTS => 20, DATA => 21, HEADERS => 22, MASN => 23, TEXINFOS => 24, primaries => 25}, + {rhs => 26}, + {reduce => [2, \&optionlist]}, #optionlist : value '_' + {}, + {"\n" => 27}, + {reduce => [2, \&stmts]}, #stmts : stmt '\n' + {rhs => 28}, + {"_" =>29, reduce => [1, \&primaries]}, #primaries : value + {reduce => [1, \&primaries]}, #primaries : PROGRAMS + {reduce => [1, \&primaries]}, #primaries : LIBRARIES + {reduce => [1, \&primaries]}, #primaries : LTLIBRARIES + {reduce => [1, \&primaries]}, #primaries : LISP + {reduce => [1, \&primaries]}, #primaries : PYTHON + {reduce => [1, \&primaries]}, #primaries : JAVA + {reduce => [1, \&primaries]}, #primaries : SCRIPTS + {reduce => [1, \&primaries]}, #primaries : DATA + {reduce => [1, \&primaries]}, #primaries : HEADERS + {reduce => [1, \&primaries]}, #primaries : MASN + {reduce => [1, \&primaries]}, #primaries : TEXINFOS + {reduce => [2, \&lhs]}, #lhs : optionlist primaries + {reduce => [3, \&stmt]}, #stmt : value ':' rhs + {reduce => [3, \&stmts]}, #stmts : stmts stmt '\n' + {reduce => [3, \&stmt]}, #stmt : lhs '=' rhs + {reduce => [3, \&optionlist]} #optionlist : optionlist value '_' + );
\ No newline at end of file diff --git a/lib/Automake/Parser/Tree.pm b/lib/Automake/Parser/Tree.pm new file mode 100644 index 000000000..cad08fcde --- /dev/null +++ b/lib/Automake/Parser/Tree.pm @@ -0,0 +1,164 @@ +package Tree; + +use Exporter; + +our @ISA = qw(Exporter); +our @EXPORT = qw(input stmt stmts lhs primaries optionlist traverse printgraph); + +# Grammar Rule : (1) input => stmts +# Create a node having child as stmts. +sub input($) +{ + my ($val) = @_; + my %node = (name => input, childs => [$val]); + return \%node; +} + +# Grammar Rule : (1) stmt => lhs '=' rhs +# Create a node for Automake rule. It has lhs and rhs as childs. +# (2) stmt => value ':' rhs +# Create a node for Make rule. It has value and rhs as childs. +sub stmt($$$) +{ + my ($lhs, $sym, $rhs) = @_; + my %node; + if($sym -> [0] eq '=') + { + my %rhsnode = (name => rhs, val => $rhs -> [1]); + %node = (name => stmt, childs => [$lhs, \%rhsnode],type => automake); + } + else + { + %node = (name => stmt, lhs => $lhs, rhs =>$rhs->[1],type => make); + } + return \%node; +} + +# Grammar Rule : (1) stmts=> stmt '\n' +# Creates a node having a child as stmt +# (2) stmts=> stmts stmt '\n' +# Creates a node having a child as stmt. Insert the created node into +# the childs array of the stmts(First Argument). +sub stmts($$;$) +{ + my ($val1,$val2,$val3) = @_; + if($val3 == undef) + { + my %node = (name => stmts, childs => [$val1]); + my %nodeval = (name => stmts, childs => [\%node]); + return \%nodeval; + } + else + { + my %node = (name => stmts,childs => [$val2]); + push @{$val1->{childs}},\%node; + return $val1; + } +} + +# Grammar Rule : (1) lhs => optionlist primaries +# Create a node for left hand side of variable defination consisting of +# option list and primary. +sub lhs($$) +{ + my ($val1, $val2) = @_; + my %node = (name => lhs, childs => [$val1, $val2]); + return \%node; +} + +# Grammar Rule : (1) primaries : PROGRAMS +# (2) primaries : LIBRARIES +# (3) primaries : LTLIBRARIES +# (4) primaries : LISP +# (5) primaries : PYTHON +# (6) primaries : JAVA +# (7) primaries : SCRIPTS +# (8) primaries : DATA +# (9) primaries : HEADERS +# (10) primaries : MASN +# (11) primaries : TEXINFOS +# (12) primaries : value +# Creates a node corresponding to the given primary. +sub primaries($) +{ + my ($val) = @_; + my %node; + if( $val -> [0] eq 'value') + { + %node = ( name => primaries, val=> $val -> [1]); + } + else + { + %node = ( name => primaries, val => $val1); + } + return \%node; +} + +# Grammar Rule : (1) optionlist : value '_' +# Create a node having data value in val key. +# (2) optionlist : optionlist value '_' +# Add the data value to val key in the node pointed by optionlist(First Argument). +sub optionlist($$;$) +{ + my ($val1, $val2, $val3) = @_; + if($val3 == undef) + { + my %node = (name => optionlist, val => [$val1 -> [1]]); + return \%node; + } + else + { + push @{$val1 -> {val}},$val2 -> [1]; + return $val1; + } +} + +# printgraph(Hash) +# prints the AST by traversing the tree starting at node pointed by hash. +sub printgraph($) +{ + my $FH; + open($FH, '>', 'ast.gv') or die $!; + print $FH "graph graphname {\n"; + my ($ref) = @_; + print $FH "0 [label=\"Root\"];"; + traverse($ref, $FH, 0, 1); + print $FH "}\n"; + close $FH; +} +# traverse(Hash, File Handle, Parent Id, Node Id) +# Traverses the tree recursively. Prints the information about the current +# node to file. Call all its child with Parent Id equal to current Node Id +# and Node Id equal to (Parent Id*2+i) where i is the ith Child. +sub traverse($$$$) +{ + my ($ref,$FH,$parent,$id)=@_; + my %node = %$ref; + #print $level," ",$pos," ",$node{name}," "; + print $FH "$parent--$id;\n"; + my $label = ""; + @keys = sort grep {!/^childs/} keys %node; + foreach $key (@keys) + { + $label .= $key."=>"; + if(ref($node{$key}) eq 'ARRAY') + { + $label .= join(" ",@{$node{$key}})."\n"; + } + else + { + $label .= $node{$key}." "; + } + } + print $FH "$id [label=\"$label\"];"; + if( $node{childs} ) + { + my $val1 = $node{childs}; + my $i = 1; + foreach $child (@$val1) + { + traverse($child,$FH,$id,2*$id+$i); + $i++; + } + } +}
\ No newline at end of file diff --git a/lib/Automake/Parser/ast.png b/lib/Automake/Parser/ast.png Binary files differnew file mode 100644 index 000000000..2d81044d6 --- /dev/null +++ b/lib/Automake/Parser/ast.png diff --git a/lib/Automake/Parser/automake.png b/lib/Automake/Parser/automake.png Binary files differnew file mode 100644 index 000000000..c95bbb20a --- /dev/null +++ b/lib/Automake/Parser/automake.png diff --git a/lib/Automake/Parser/automake.y b/lib/Automake/Parser/automake.y new file mode 100644 index 000000000..9d18adfad --- /dev/null +++ b/lib/Automake/Parser/automake.y @@ -0,0 +1,24 @@ +%token value rhs PROGRAMS LIBRARIES LTLIBRARIES LISP PYTHON JAVA SCRIPTS DATA HEADERS MASN TEXINFOS +%% + +input : stmts ; +stmts : stmt '\n' + | stmts stmt '\n' +stmt : lhs '=' rhs + | value ':' rhs +lhs : optionlist primaries +primaries : PROGRAMS + | LIBRARIES + | LTLIBRARIES + | LISP + | PYTHON + | JAVA + | SCRIPTS + | DATA + | HEADERS + | MASN + | TEXINFOS + | value +optionlist : value '_' + | optionlist value '_' +%%
\ No newline at end of file diff --git a/lib/Automake/Parser/input.txt b/lib/Automake/Parser/input.txt new file mode 100644 index 000000000..c65f5d013 --- /dev/null +++ b/lib/Automake/Parser/input.txt @@ -0,0 +1,8 @@ +dist_bin_PROGRAMS = server client +server_SOURCES = server.c db.c +client_SOURCES = client.c dep.c +noinst_LIBRARIES = libfoo.a +noinst_LTLIBRARIES = foolib.a +files_JAVA = a.java b.java +files_PYTHON = chk.py app.py test.py +test_SCRIPTS = t1.sh t2.sh diff --git a/lib/Automake/Parser/parser.pl b/lib/Automake/Parser/parser.pl new file mode 100644 index 000000000..0163224de --- /dev/null +++ b/lib/Automake/Parser/parser.pl @@ -0,0 +1,70 @@ +#!/usr/bin/perl +use strict; +use Lexer; +use Tree; +use ParserTable; + +my $debug = 0; + +open ( data, "<input.txt" ); + +#Stores the list of tokens generated by lexer. +my @tokens; + +while ( <data> ) +{ + push @tokens, lex($_); +} +if( $debug ) +{ + print "Lexer Output\n"; + foreach my $token ( @tokens ) + { + print join(" ", @{$token}), "\n"; + } +} + +push @tokens, ["end"]; +my @stack = (0); +print "Parser Output\n" if $debug; + +while ( @stack ) +{ + if($stack[-1] == $ParserTable::accept) + { + print "Complete\n"; + printgraph( $stack[-4] ); + last; + } + my @curr_token = @{ $tokens[0] }; + if(my $val = $ParserTable::table[ $stack[-1] ]{ $curr_token[0] }) + { + push @stack, \@curr_token, $val; + shift @tokens; + } + elsif(my $val = $ParserTable::table[ $stack[-1] ]{ reduce }) + { + my @val1 = @$val; + my @param; + for(my $i = 1; $i <= 2 * $val1[0]; $i++) + { + if($i%2 == 0) + { + $val = pop @stack; + push @param,$val; + } + else + { + pop @stack; + } + } + @param = reverse @param; + push @stack, $val1[1]->( @param ); + push @stack, $ParserTable::table[ $stack[-2] ]{ $stack[-1]->{ name }}; + } + else + { + die "Unexpected Token ". @curr_token."\n"; + } + print @stack, "\n" if $debug; +}
\ No newline at end of file |