summaryrefslogtreecommitdiff
path: root/src/pdabuild.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/pdabuild.cc')
-rw-r--r--src/pdabuild.cc2099
1 files changed, 2099 insertions, 0 deletions
diff --git a/src/pdabuild.cc b/src/pdabuild.cc
new file mode 100644
index 0000000..7978edf
--- /dev/null
+++ b/src/pdabuild.cc
@@ -0,0 +1,2099 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define EOF_REGION 0
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <stdlib.h>
+
+/* Parsing. */
+#include "global.h"
+#include "parsedata.h"
+#include "pdacodegen.h"
+#include "pdarun.h"
+#include "redfsm.h"
+#include "fsmcodegen.h"
+#include "redbuild.h"
+
+/* Dumping the fsm. */
+#include "mergesort.h"
+
+using std::endl;
+using std::cerr;
+using std::cout;
+
+char startDefName[] = "start";
+
+/* Count the transitions in the fsm by walking the state list. */
+int countTransitions( PdaGraph *fsm )
+{
+ int numTrans = 0;
+ PdaState *state = fsm->stateList.head;
+ while ( state != 0 ) {
+ numTrans += state->transMap.length();
+ state = state->next;
+ }
+ return numTrans;
+}
+
+LangEl::LangEl( Namespace *nspace, const String &name, Type type )
+:
+ nspace(nspace),
+ name(name),
+ lit(name),
+ type(type),
+ id(-1),
+ isContext(false),
+ //displayString(0),
+ numAppearances(0),
+ commit(false),
+ isIgnore(false),
+ reduceFirst(false),
+ isLiteral(false),
+ isRepeat(false),
+ isList(false),
+ isOpt(false),
+ parseStop(false),
+ isEOF(false),
+ repeatOf(0),
+ tokenDef(0),
+ rootDef(0),
+ termDup(0),
+ eofLel(0),
+ pdaGraph(0),
+ pdaTables(0),
+ transBlock(0),
+ objectDef(0),
+ thisSize(0),
+ ofiOffset(0),
+ generic(0),
+ parserId(-1),
+ predType(PredNone),
+ predValue(0),
+ contextDef(0),
+ contextIn(0),
+ noPreIgnore(false),
+ noPostIgnore(false),
+ isZero(false)
+{
+}
+
+PdaGraph *ProdElList::walk( Compiler *pd, Production *prod )
+{
+ PdaGraph *prodFsm = new PdaGraph();
+ PdaState *last = prodFsm->addState();
+ prodFsm->setStartState( last );
+
+ int prodLength = 0;
+ for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) {
+ //PdaGraph *itemFsm = prodEl->walk( pd );
+ long value = prodEl->langEl->id;
+
+ PdaState *newState = prodFsm->addState();
+ PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value );
+
+ newTrans->isShift = true;
+ newTrans->shiftPrior = prodEl->priorVal;
+ //cerr << "PRIOR VAL: " << newTrans->shiftPrior << endl;
+
+ if ( prodEl->commit ) {
+ //cout << "COMMIT: inserting commit of length: " << pd->prodLength << endl;
+ /* Insert the commit into transitions out of last */
+ for ( TransMap::Iter trans = last->transMap; trans.lte(); trans++ )
+ trans->value->commits.insert( prodLength );
+ }
+
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ prodFsm->setFinState( last );
+ return prodFsm;
+}
+
+
+ProdElList *Compiler::makeProdElList( LangEl *langEl )
+{
+ ProdElList *prodElList = new ProdElList();
+ UniqueType *uniqueType = findUniqueType( TYPE_TREE, langEl );
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueType );
+ prodElList->append( new ProdEl( internal, typeRef ) );
+ prodElList->tail->langEl = langEl;
+ return prodElList;
+}
+
+void Compiler::makeDefinitionNames()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ int prodNum = 1;
+ for ( LelDefList::Iter def = lel->defList; def.lte(); def++ ) {
+ def->data.setAs( lel->name.length() + 32, "%s-%i",
+ lel->name.data, prodNum++ );
+ }
+ }
+}
+
+/* Make sure there there are no language elements whose type is unkonwn. This
+ * can happen when an id is used on the rhs of a definition but is not defined
+ * as anything. */
+void Compiler::noUndefindLangEls()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->type == LangEl::Unknown )
+ error() << "'" << lel->name << "' was not defined as anything" << endp;
+ }
+}
+
+void Compiler::makeLangElIds()
+{
+ /* The first id 0 is reserved for the stack sentinal. A negative id means
+ * error to the parsing function, inducing backtracking. */
+ nextSymbolId = 1;
+
+ /* First pass assigns to the user terminals. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Must be a term, and not any of the special reserved terminals.
+ * Remember if the non terminal is a user non terminal. */
+ if ( lel->type == LangEl::Term &&
+ !lel->isEOF &&
+ lel != errorLangEl &&
+ lel != noTokenLangEl )
+ {
+ lel->id = nextSymbolId++;
+ }
+ }
+
+ //eofLangEl->id = nextSymbolId++;
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Must be a term, and not any of the special reserved terminals.
+ * Remember if the non terminal is a user non terminal. */
+ if ( lel->isEOF )
+ lel->id = nextSymbolId++;
+ }
+
+ /* Next assign to the eof notoken, which we always create. */
+ noTokenLangEl->id = nextSymbolId++;
+
+ /* Possibly assign to the error language element. */
+ if ( errorLangEl != 0 )
+ errorLangEl->id = nextSymbolId++;
+
+ /* Save this for the code generation. */
+ firstNonTermId = nextSymbolId;
+
+ /* A third and final pass assigns to everything else. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Anything else not yet assigned gets assigned now. */
+ if ( lel->id < 0 )
+ lel->id = nextSymbolId++;
+ }
+
+ assert( ptrLangEl->id == LEL_ID_PTR );
+ assert( boolLangEl->id == LEL_ID_BOOL );
+ assert( intLangEl->id == LEL_ID_INT );
+ assert( strLangEl->id == LEL_ID_STR );
+ assert( streamLangEl->id == LEL_ID_STREAM );
+ assert( ignoreLangEl->id == LEL_ID_IGNORE );
+}
+
+void Compiler::refNameSpace( LangEl *lel, Namespace *nspace )
+{
+ if ( nspace == rootNamespace ) {
+ lel->refName = "::" + lel->refName;
+ return;
+ }
+
+ lel->refName = nspace->name + "::" + lel->refName;
+ lel->declName = nspace->name + "::" + lel->declName;
+ lel->xmlTag = nspace->name + "::" + lel->xmlTag;
+ refNameSpace( lel, nspace->parentNamespace );
+}
+
+void Compiler::makeLangElNames()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->id == LEL_ID_VOID ) {
+ lel->fullName = "_void";
+ lel->fullLit = "_void";
+ lel->refName = "_void";
+ lel->declName = "_void";
+ lel->xmlTag = "void";
+
+ }
+ else if ( lel->id == LEL_ID_INT ) {
+ lel->fullName = "_int";
+ lel->fullLit = "_int";
+ lel->refName = "_int";
+ lel->declName = "_int";
+ lel->xmlTag = "int";
+ }
+ else if ( lel->id == LEL_ID_BOOL ) {
+ lel->fullName = "_bool";
+ lel->fullLit = "_bool";
+ lel->refName = "_bool";
+ lel->declName = "_bool";
+ lel->xmlTag = "bool";
+ }
+ else {
+ lel->fullName = lel->name;
+ lel->fullLit = lel->lit;
+ lel->refName = lel->lit;
+ lel->declName = lel->lit;
+ lel->xmlTag = lel->name;
+ }
+
+ /* If there is also a namespace next to the type, we add a prefix to
+ * the type. It's not convenient to name C++ classes the same as a
+ * namespace in the same scope. We don't want to restrict colm, so we
+ * add a workaround for the least-common case. The type gets t_ prefix.
+ * */
+ Namespace *nspace = lel->nspace->findNamespace( lel->name );
+ if ( nspace != 0 ) {
+ lel->refName = "t_" + lel->refName;
+ lel->fullName = "t_" + lel->fullName;
+ lel->declName = "t_" + lel->declName;
+ lel->xmlTag = "t_" + lel->xmlTag;
+ }
+
+ refNameSpace( lel, lel->nspace );
+ }
+}
+
+/* Set up dot sets, shift info, and prod sets. */
+void Compiler::makeProdFsms()
+{
+ /* There are two items in the index for each production (high and low). */
+ int indexLen = prodList.length() * 2;
+ dotItemIndex.setAsNew( indexLen );
+ int dsiLow = 0, indexPos = 0;
+
+ /* Build FSMs for all production language elements. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ prod->fsm = prod->prodElList->walk( this, prod );
+
+ makeNonTermFirstSets();
+ makeFirstSets();
+
+ /* Build FSMs for all production language elements. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( addUniqueEmptyProductions ) {
+ /* This must be re-implemented. */
+ assert( false );
+ //if ( !prod->isLeftRec && prod->uniqueEmptyLeader != 0 ) {
+ // PdaGraph *emptyLeader = prod->uniqueEmptyLeader->walk( this );
+ // emptyLeader->concatOp( prod->fsm );
+ // prod->fsm = emptyLeader;
+ //}
+ }
+
+ /* Compute the machine's length. */
+ prod->fsmLength = prod->fsm->fsmLength( );
+
+ /* Productions have a unique production id for each final state.
+ * This lets us use a production length specific to each final state.
+ * Start states are always isolated therefore if the start state is
+ * final then reductions from it will always have a fixed production
+ * length. This is a simple method for determining the length
+ * of zero-length derivations when reducing. */
+
+ /* Number of dot items needed for the production is elements + 1
+ * because the dot can be before the first and after the last element. */
+ int numForProd = prod->fsm->stateList.length() + 1;
+
+ /* Set up the low and high values in the index for this production. */
+ dotItemIndex.data[indexPos].key = dsiLow;
+ dotItemIndex.data[indexPos].value = prod;
+ dotItemIndex.data[indexPos+1].key = dsiLow + numForProd - 1;
+ dotItemIndex.data[indexPos+1].value = prod;
+
+ int dsi = dsiLow;
+ for ( PdaStateList::Iter state = prod->fsm->stateList; state.lte(); state++, dsi++ ) {
+ /* All transitions are shifts. */
+ for ( TransMap::Iter out = state->transMap; out.lte(); out++ )
+ assert( out->value->isShift );
+
+ state->dotSet.insert( dsi );
+ }
+
+ /* Move over the production. */
+ dsiLow += numForProd;
+ indexPos += 2;
+
+ if ( prod->prodCommit ) {
+ for ( PdaStateSet::Iter fin = prod->fsm->finStateSet; fin.lte(); fin++ ) {
+ int length = prod->fsmLength;
+ //cerr << "PENDING COMMIT IN FINAL STATE of " << prod->prodId <<
+ // " with len: " << length << endl;
+ (*fin)->pendingCommits.insert( ProdIdPair( prod->prodId, length ) );
+ }
+ }
+ }
+
+ /* Make the final state specific prod id to prod id mapping. */
+ prodIdIndex = new Production*[prodList.length()];
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ prodIdIndex[prod->prodId] = prod;
+}
+
+/* Want the first set of over src. If the first set contains epsilon, go over
+ * it and over tab. If overSrc is the end of the production, find the follow
+ * from the table, taking only the characters on which the parent is reduced.
+ * */
+void Compiler::findFollow( AlphSet &result, PdaState *overTab,
+ PdaState *overSrc, Production *parentDef )
+{
+ if ( overSrc->isFinState() ) {
+ assert( overSrc->transMap.length() == 0 );
+
+ /* At the end of the production. Turn to the table. */
+ long redCode = makeReduceCode( parentDef->prodId, false );
+ for ( TransMap::Iter tabTrans = overTab->transMap; tabTrans.lte(); tabTrans++ ) {
+ for ( ActDataList::Iter adl = tabTrans->value->actions; adl.lte(); adl++ ) {
+ if ( *adl == redCode )
+ result.insert( tabTrans->key );
+ }
+ }
+ }
+ else {
+ /* Get the first set of the item. If the first set contains epsilon
+ * then move over overSrc and overTab and recurse. */
+ assert( overSrc->transMap.length() == 1 );
+ TransMap::Iter pastTrans = overSrc->transMap;
+
+ LangEl *langEl = langElIndex[pastTrans->key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ bool hasEpsilon = false;
+ for ( LelDefList::Iter def = langEl->defList; def.lte(); def++ ) {
+ result.insert( def->firstSet );
+
+ if ( def->firstSet.find( -1 ) )
+ hasEpsilon = true;
+ }
+
+ /* Find the equivalent state in the parser. */
+ if ( hasEpsilon ) {
+ PdaTrans *tabTrans = overTab->findTrans( pastTrans->key );
+ findFollow( result, tabTrans->toState,
+ pastTrans->value->toState, parentDef );
+ }
+
+ /* Now possibly the dup. */
+ if ( langEl->termDup != 0 )
+ result.insert( langEl->termDup->id );
+ }
+ else {
+ result.insert( pastTrans->key );
+ }
+ }
+}
+
+PdaState *Compiler::followProd( PdaState *tabState, PdaState *prodState )
+{
+ while ( prodState->transMap.length() == 1 ) {
+ TransMap::Iter prodTrans = prodState->transMap;
+ PdaTrans *tabTrans = tabState->findTrans( prodTrans->key );
+ prodState = prodTrans->value->toState;
+ tabState = tabTrans->toState;
+ }
+ return tabState;
+}
+
+void Compiler::trySetTime( PdaTrans *trans, long code, long &time )
+{
+ /* Find the item. */
+ for ( ActDataList::Iter adl = trans->actions; adl.lte(); adl++ ) {
+ if ( *adl == code ) {
+ /* If the time of the shift is not already set, set it. */
+ if ( trans->actOrds[adl.pos()] == 0 ) {
+ //cerr << "setting time: state = " << tabState->stateNum
+ // << ", trans = " << tabTrans->lowKey
+ // << ", time = " << time << endl;
+ trans->actOrds[adl.pos()] = time++;
+ }
+ break;
+ }
+ }
+}
+
+/* Go down a defintiion and then handle the follow actions. */
+void Compiler::pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
+ PdaTrans *tabTrans, PdaTrans *srcTrans, Production *parentDef,
+ Production *definition, long &time )
+{
+ /* We need the follow from tabState/srcState over the defintion we are
+ * currently processing. */
+ PdaState *overTab = tabTrans->toState;
+ PdaState *overSrc = srcTrans->toState;
+
+ AlphSet alphSet;
+ if ( parentDef == rootEl->rootDef )
+ alphSet.insert( rootEl->eofLel->id );
+ else
+ findFollow( alphSet, overTab, overSrc, parentDef );
+
+ /* Now follow the production to find out where it expands to. */
+ PdaState *expandToState = followProd( tabState, definition->fsm->startState );
+
+ /* Find the reduce item. */
+ long redCode = makeReduceCode( definition->prodId, false );
+
+ for ( TransMap::Iter tt = expandToState->transMap; tt.lte(); tt++ ) {
+ if ( alphSet.find( tt->key ) ) {
+ trySetTime( tt->value, redCode, time );
+
+ /* If the items token region is not recorded in the state, do it now. */
+ addRegion( expandToState, tt->value, tt->key,
+ tt->value->noPreIgnore, tt->value->noPostIgnore );
+ }
+ }
+}
+
+bool regionVectHas( RegionVect &regVect, TokenRegion *region )
+{
+ for ( RegionVect::Iter trvi = regVect; trvi.lte(); trvi++ ) {
+ if ( *trvi == region )
+ return true;
+ }
+ return false;
+}
+
+void Compiler::addRegion( PdaState *tabState, PdaTrans *tabTrans,
+ long pdaKey, bool noPreIgnore, bool noPostIgnore )
+{
+ LangEl *langEl = langElIndex[pdaKey];
+ if ( langEl != 0 && langEl->type == LangEl::Term ) {
+ TokenRegion *region = 0;
+ RegionSet *regionSet = 0;
+
+ /* If it is not the eof, then use the region associated
+ * with the token definition. */
+ if ( langEl->isZero ) {
+ region = langEl->tokenDef->regionSet->collectIgnore;
+ regionSet = langEl->tokenDef->regionSet;
+ }
+ else if ( !langEl->isEOF && langEl->tokenDef != 0 ) {
+ region = langEl->tokenDef->regionSet->tokenIgnore;
+ regionSet = langEl->tokenDef->regionSet;
+ }
+
+ if ( region != 0 ) {
+ /* region. */
+ TokenRegion *scanRegion = region;
+
+ if ( langEl->noPreIgnore )
+ scanRegion = regionSet->tokenOnly;
+
+ if ( !regionVectHas( tabState->regions, scanRegion ) )
+ tabState->regions.append( scanRegion );
+
+ /* Pre-region of to state */
+ PdaState *toState = tabTrans->toState;
+ if ( !langEl->noPostIgnore &&
+ regionSet->ignoreOnly != 0 &&
+ !regionVectHas( toState->preRegions, regionSet->ignoreOnly ) )
+ {
+ toState->preRegions.append( regionSet->ignoreOnly );
+ }
+ }
+ }
+}
+
+#if 0
+ orderState( tabState, prodState, time ):
+ if not tabState.dotSet.find( prodState.dotID )
+ tabState.dotSet.insert( prodState.dotID )
+ tabTrans = tabState.findMatchingTransition( prodState.getTransition() )
+
+ if tabTrans is NonTerminal:
+ for production in tabTrans.nonTerm.prodList:
+ orderState( tabState, production.startState, time )
+
+ for all expandToState in tabTrans.expandToStates:
+ for all followTrans in expandToState.transList
+ reduceAction = findAction( production.reduction )
+ if reduceAction.time is unset:
+ reduceAction.time = time++
+ end
+ end
+ end
+ end
+ end
+
+ shiftAction = tabTrans.findAction( shift )
+ if shiftAction.time is unset:
+ shiftAction.time = time++
+ end
+
+ orderState( tabTrans.toState, prodTrans.toState, time )
+ end
+ end
+
+ orderState( parseTable.startState, startProduction.startState, 1 )
+#endif
+
+void Compiler::pdaOrderProd( LangEl *rootEl, PdaState *tabState,
+ PdaState *srcState, Production *parentDef, long &time )
+{
+ assert( srcState->dotSet.length() == 1 );
+ if ( tabState->dotSet2.find( srcState->dotSet[0] ) )
+ return;
+ tabState->dotSet2.insert( srcState->dotSet[0] );
+
+ assert( srcState->transMap.length() == 0 || srcState->transMap.length() == 1 );
+
+ if ( srcState->transMap.length() == 1 ) {
+ TransMap::Iter srcTrans = srcState->transMap;
+
+ /* Find the equivalent state in the parser. */
+ PdaTrans *tabTrans = tabState->findTrans( srcTrans->key );
+
+ /* Recurse into the transition if it is a non-terminal. */
+ LangEl *langEl = langElIndex[srcTrans->key];
+ if ( langEl != 0 ) {
+ if ( langEl->reduceFirst ) {
+ /* Use a shortest match ordering for the contents of this
+ * nonterminal. Does follows for all productions first, then
+ * goes down the productions. */
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) {
+ pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value,
+ parentDef, expDef, time );
+ }
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ )
+ pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time );
+
+ }
+ else {
+ /* The default action ordering. For each prod, goes down the
+ * prod then sets the follow before going to the next prod. */
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) {
+ pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time );
+
+ pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value,
+ parentDef, expDef, time );
+ }
+ }
+ }
+
+ trySetTime( tabTrans, SHIFT_CODE, time );
+
+ /* Now possibly for the dup. */
+ if ( langEl != 0 && langEl->termDup != 0 ) {
+ PdaTrans *dupTrans = tabState->findTrans( langEl->termDup->id );
+ trySetTime( dupTrans, SHIFT_CODE, time );
+ }
+
+ /* If the items token region is not recorded in the state, do it now. */
+ addRegion( tabState, tabTrans, srcTrans->key,
+ srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore );
+
+ /* Go over one in the production. */
+ pdaOrderProd( rootEl, tabTrans->toState,
+ srcTrans->value->toState, parentDef, time );
+ }
+}
+
+void Compiler::pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( (state->stateBits & SB_ISMARKED) == 0 );
+
+ /* Traverse the src state's transitions. */
+ long last = 0;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( ! trans.first() )
+ assert( last < trans->key );
+ last = trans->key;
+ }
+ }
+
+ /* Compute the action orderings, record the max value. */
+ long time = 1;
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ PdaState *startState = (*pe)->rootDef->fsm->startState;
+ pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time );
+
+ /* Walk over the start lang el and set the time for shift of
+ * the eof action that completes the parse. */
+ PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id );
+ PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id );
+ eofTrans->actOrds[0] = time++;
+ }
+
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( state->regions.length() == 0 ) {
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ /* There are no regions and EOF leaves the state. Add the eof
+ * token region. */
+ PdaTrans *trans = tel->value;
+ LangEl *lel = langElIndex[trans->lowKey];
+ if ( lel != 0 && lel->isEOF )
+ state->regions.append( EOF_REGION );
+ }
+ }
+ }
+
+ ///* Warn about states with empty token region lists. */
+ //for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ // if ( state->regions.length() == 0 ) {
+ // warning() << "state has an empty token region, state: " <<
+ // state->stateNum << endl;
+ // }
+ //}
+
+ /* Some actions may not have an ordering. I believe these to be actions
+ * that result in a parse error and they arise because the state tables
+ * are LALR(1) but the action ordering is LR(1). LALR(1) causes some
+ * reductions that lead nowhere. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+ /* Check every action has an ordering. */
+ for ( ActDataList::Iter adl = trans->actOrds; adl.lte(); adl++ ) {
+ if ( *adl == 0 )
+ *adl = time++;
+ }
+ }
+ }
+}
+
+void Compiler::advanceReductions( PdaGraph *pdaGraph )
+{
+ /* Loop all states. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( !state->advanceReductions )
+ continue;
+
+ bool outHasShift = false;
+ ReductionMap outReds;
+ LongSet outCommits;
+ for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) {
+ /* Get the transition from the trans el. */
+ if ( out->value->isShift )
+ outHasShift = true;
+ outReds.insert( out->value->reductions );
+ outCommits.insert( out->value->commits );
+ }
+
+ bool inHasShift = false;
+ ReductionMap inReds;
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ /* Get the transition from the trans el. */
+ if ( in->isShift )
+ inHasShift = true;
+ inReds.insert( in->reductions );
+ }
+
+ if ( !outHasShift && outReds.length() == 1 &&
+ inHasShift && inReds.length() == 0 )
+ {
+ //cerr << "moving reduction to shift" << endl;
+
+ /* Move the reduction to all in transitions. */
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ assert( in->actions.length() == 1 );
+ assert( in->actions[0] == SHIFT_CODE );
+ in->actions[0] = makeReduceCode( outReds[0].key, true );
+ in->afterShiftCommits.insert( outCommits );
+ }
+
+ /*
+ * Remove all transitions out of the state.
+ */
+
+ /* Detach out range transitions. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ pdaGraph->detachTrans( state, trans->value->toState, trans->value );
+ delete trans->value;
+ }
+ state->transMap.empty();
+
+ /* Redirect all the in transitions to the actionDestState. */
+ pdaGraph->inTransMove( actionDestState, state );
+ }
+ }
+
+ pdaGraph->removeUnreachableStates();
+}
+
+void Compiler::sortActions( PdaGraph *pdaGraph )
+{
+ /* Sort the actions. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+
+ /* Sort by the action ords. */
+ ActDataList actions( trans->actions );
+ ActDataList actOrds( trans->actOrds );
+ ActDataList actPriors( trans->actPriors );
+ trans->actions.empty();
+ trans->actOrds.empty();
+ trans->actPriors.empty();
+ while ( actOrds.length() > 0 ) {
+ int min = 0;
+ for ( int i = 1; i < actOrds.length(); i++ ) {
+ if ( actPriors[i] > actPriors[min] ||
+ (actPriors[i] == actPriors[min] &&
+ actOrds[i] < actOrds[min] ) )
+ {
+ min = i;
+ }
+ }
+ trans->actions.append( actions[min] );
+ trans->actOrds.append( actOrds[min] );
+ trans->actPriors.append( actPriors[min] );
+ actions.remove(min);
+ actOrds.remove(min);
+ actPriors.remove(min);
+ }
+
+ if ( branchPointInfo && trans->actions.length() > 1 ) {
+ cerr << "info: branch point"
+ << " state: " << state->stateNum
+ << " trans: ";
+ LangEl *lel = langElIndex[trans->lowKey];
+ if ( lel == 0 )
+ cerr << (char)trans->lowKey << endl;
+ else
+ cerr << lel->lit << endl;
+
+ for ( ActDataList::Iter act = trans->actions; act.lte(); act++ ) {
+ switch ( *act & 0x3 ) {
+ case 1:
+ cerr << " shift" << endl;
+ break;
+ case 2:
+ cerr << " reduce " <<
+ prodIdIndex[(*act >> 2)]->data << endl;
+ break;
+ case 3:
+ cerr << " shift-reduce" << endl;
+ break;
+ }
+ }
+ }
+
+ /* Verify that shifts of nonterminals don't have any branch
+ * points or commits. */
+ if ( trans->lowKey >= firstNonTermId ) {
+ if ( trans->actions.length() != 1 ||
+ (trans->actions[0] & 0x3) != 1 )
+ {
+ error() << "TRANS ON NONTERMINAL is something "
+ "other than a shift" << endl;
+ }
+ if ( trans->commits.length() > 0 )
+ error() << "TRANS ON NONTERMINAL has a commit" << endl;
+ }
+
+ /* TODO: Shift-reduces are optimizations. Verify that
+ * shift-reduces exist only if they don't entail a conflict. */
+ }
+ }
+}
+
+void Compiler::reduceActions( PdaGraph *pdaGraph )
+{
+ /* Reduce the actions. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+ PdaActionSetEl *inSet;
+
+ int commitLen = trans->commits.length() > 0 ?
+ trans->commits[trans->commits.length()-1] : 0;
+
+ if ( trans->afterShiftCommits.length() > 0 ) {
+ int afterShiftCommit = trans->afterShiftCommits[
+ trans->afterShiftCommits.length()-1];
+
+ if ( commitLen > 0 && commitLen+1 > afterShiftCommit )
+ commitLen = ( commitLen + 1 );
+ else
+ commitLen = afterShiftCommit;
+ }
+ else {
+ commitLen = commitLen * -1;
+ }
+
+ //if ( commitLen != 0 ) {
+ // cerr << "FINAL ACTION COMMIT LEN: " << commitLen << endl;
+ //}
+
+ pdaGraph->actionSet.insert( ActionData( trans->toState->stateNum,
+ trans->actions, commitLen ), &inSet );
+ trans->actionSetEl = inSet;
+ }
+ }
+}
+
+void Compiler::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph )
+{
+ /* Get the entry into the graph and traverse over the root. The resulting
+ * state can have eof, nothing else can. */
+ PdaState *overStart = pdaGraph->followFsm(
+ langEl->startState,
+ langEl->rootDef->fsm );
+
+ /* The graph must reduce to root all on it's own. It cannot depend on
+ * require EOF. */
+ for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
+ if ( st == overStart )
+ continue;
+
+ for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
+ if ( tr->value->lowKey == langEl->eofLel->id )
+ st->advanceReductions = true;
+ }
+ }
+}
+
+void Compiler::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph )
+{
+ /* Get the entry into the graph and traverse over the root. The resulting
+ * state can have eof, nothing else can. */
+ PdaState *overStart = pdaGraph->followFsm(
+ langEl->startState,
+ langEl->rootDef->fsm );
+
+ /* The graph must reduce to root all on it's own. It cannot depend on
+ * require EOF. */
+ for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
+ if ( st == overStart )
+ continue;
+
+ for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
+ if ( tr->value->lowKey == langEl->eofLel->id ) {
+ /* This needs a better error message. Appears to be voodoo. */
+ error() << "grammar is not usable with parse_stop" << endp;
+ }
+ }
+ }
+}
+
+LangEl *Compiler::predOf( PdaTrans *trans, long action )
+{
+ LangEl *lel;
+ if ( action == SHIFT_CODE )
+ lel = langElIndex[trans->lowKey];
+ else
+ lel = prodIdIndex[action >> 2]->predOf;
+ return lel;
+}
+
+
+bool Compiler::precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 )
+{
+ bool swap = false;
+ if ( l2->predValue > l1->predValue )
+ swap = true;
+ else if ( l1->predValue == l2->predValue ) {
+ if ( l1->predType == PredLeft && action1 == SHIFT_CODE )
+ swap = true;
+ else if ( l1->predType == PredRight && action2 == SHIFT_CODE )
+ swap = true;
+ }
+ return swap;
+}
+
+bool Compiler::precedenceRemoveBoth( LangEl *l1, LangEl *l2 )
+{
+ if ( l1->predValue == l2->predValue && l1->predType == PredNonassoc )
+ return true;
+ return false;
+}
+
+void Compiler::resolvePrecedence( PdaGraph *pdaGraph )
+{
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+
+ for ( long t = 0; t < state->transMap.length(); /* increment at end */ ) {
+ PdaTrans *trans = state->transMap[t].value;
+
+again:
+ /* Find action with precedence. */
+ for ( int i = 0; i < trans->actions.length(); i++ ) {
+ LangEl *li = predOf( trans, trans->actions[i] );
+
+ if ( li != 0 && li->predType != PredNone ) {
+ /* Find another action with precedence. */
+ for ( int j = i+1; j < trans->actions.length(); j++ ) {
+ LangEl *lj = predOf( trans, trans->actions[j] );
+
+ if ( lj != 0 && lj->predType != PredNone ) {
+ /* Conflict to check. */
+ bool swap = precedenceSwap( trans->actions[i],
+ trans->actions[j], li, lj );
+
+ if ( swap ) {
+ long t = trans->actions[i];
+ trans->actions[i] = trans->actions[j];
+ trans->actions[j] = t;
+ }
+
+ trans->actions.remove( j );
+ if ( precedenceRemoveBoth( li, lj ) )
+ trans->actions.remove( i );
+
+ goto again;
+ }
+ }
+ }
+ }
+
+ /* If there are still actions then move to the next one. If not,
+ * (due to nonassoc) then remove the transition. */
+ if ( trans->actions.length() > 0 )
+ t += 1;
+ else
+ state->transMap.vremove( t );
+ }
+ }
+}
+
+void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ pdaGraph->maxState = pdaGraph->stateList.length() - 1;
+ pdaGraph->maxLelId = nextSymbolId - 1;
+ pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId;
+
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->value->isShift ) {
+ trans->value->actions.append( SHIFT_CODE );
+ trans->value->actPriors.append( trans->value->shiftPrior );
+ }
+ for ( ReductionMap::Iter red = trans->value->reductions; red.lte(); red++ ) {
+ trans->value->actions.append( makeReduceCode( red->key, false ) );
+ trans->value->actPriors.append( red->value );
+ }
+ trans->value->actOrds.appendDup( 0, trans->value->actions.length() );
+ }
+ }
+
+ pdaActionOrder( pdaGraph, parserEls );
+ sortActions( pdaGraph );
+ resolvePrecedence( pdaGraph );
+
+ /* Verify that any type we parse_stop can actually be parsed that way. */
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ LangEl *lel = *pe;
+ if ( lel->parseStop )
+ computeAdvanceReductions(lel , pdaGraph);
+ }
+
+ advanceReductions( pdaGraph );
+ pdaGraph->setStateNumbers();
+ reduceActions( pdaGraph );
+
+ /* Set the action ids. */
+ int actionSetId = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ asi->key.id = actionSetId++;
+
+ /* Get the max index. */
+ pdaGraph->maxIndex = actionSetId - 1;
+
+ /* Compute the max prod length. */
+ pdaGraph->maxProdLen = 0;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( (unsigned)prod->fsmLength > pdaGraph->maxProdLen )
+ pdaGraph->maxProdLen = prod->fsmLength;
+ }
+
+ /* Asserts that any transition with a nonterminal has a single action
+ * which is either a shift or a shift-reduce. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ LangEl *langEl = langElIndex[trans->value->lowKey];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ assert( trans->value->actions.length() == 1 );
+ assert( trans->value->actions[0] == SHIFT_CODE ||
+ (trans->value->actions[0] & 0x3) == SHIFT_REDUCE_CODE );
+ }
+ }
+ }
+
+ /* Assert that shift reduces always appear on their own. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) {
+ if ( (*act & 0x3) == SHIFT_REDUCE_CODE )
+ assert( trans->value->actions.length() == 1 );
+ }
+ }
+ }
+
+ /* Verify that any type we parse_stop can actually be parsed that way. */
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ LangEl *lel = *pe;
+ if ( lel->parseStop )
+ verifyParseStopGrammar(lel , pdaGraph);
+ }
+}
+
+void Compiler::wrapNonTerminals()
+{
+ /* Make a language element that will be used to make the root productions.
+ * These are used for making parsers rooted at any production (including
+ * the start symbol). */
+ rootLangEl = declareLangEl( this, rootNamespace, "_root", LangEl::NonTerm );
+
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Make a single production used when the lel is a root. */
+ ProdElList *prodElList = makeProdElList( lel );
+ lel->rootDef = Production::cons( InputLoc(), rootLangEl,
+ prodElList, String(), false, 0,
+ prodList.length(), rootLangEl->defList.length() );
+ prodList.append( lel->rootDef );
+ rootLangEl->defList.append( lel->rootDef );
+
+ /* First resolve. */
+ for ( ProdElList::Iter prodEl = *prodElList; prodEl.lte(); prodEl++ )
+ resolveProdEl( prodEl );
+ }
+}
+
+bool Compiler::makeNonTermFirstSetProd( Production *prod, PdaState *state )
+{
+ bool modified = false;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->key >= firstNonTermId ) {
+ long *inserted = prod->nonTermFirstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+
+ bool hasEpsilon = false;
+ LangEl *lel = langElIndex[trans->key];
+ for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) {
+ for ( ProdIdSet::Iter pid = ldef->nonTermFirstSet;
+ pid.lte(); pid++ )
+ {
+ if ( *pid == -1 )
+ hasEpsilon = true;
+ else {
+ long *inserted = prod->nonTermFirstSet.insert( *pid );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ }
+ }
+
+ if ( hasEpsilon ) {
+ if ( trans->value->toState->isFinState() ) {
+ long *inserted = prod->nonTermFirstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeNonTermFirstSetProd( prod, trans->value->toState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+ }
+ return modified;
+}
+
+
+void Compiler::makeNonTermFirstSets()
+{
+ bool modified = true;
+ while ( modified ) {
+ modified = false;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->fsm->startState->isFinState() ) {
+ long *inserted = prod->nonTermFirstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeNonTermFirstSetProd( prod, prod->fsm->startState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->nonTermFirstSet.find( prod->prodName->id ) )
+ prod->isLeftRec = true;
+ }
+}
+
+void Compiler::printNonTermFirstSets()
+{
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ cerr << prod->data << ": ";
+ for ( ProdIdSet::Iter pid = prod->nonTermFirstSet; pid.lte(); pid++ )
+ {
+ if ( *pid < 0 )
+ cerr << " <EPSILON>";
+ else {
+ LangEl *lel = langElIndex[*pid];
+ cerr << " " << lel->name;
+ }
+ }
+ cerr << endl;
+
+ if ( prod->isLeftRec )
+ cerr << "PROD IS LEFT REC: " << prod->data << endl;
+ }
+}
+
+bool Compiler::makeFirstSetProd( Production *prod, PdaState *state )
+{
+ bool modified = false;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->key < firstNonTermId ) {
+ long *inserted = prod->firstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ else {
+ long *inserted = prod->firstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+
+ LangEl *klangEl = langElIndex[trans->key];
+ if ( klangEl != 0 && klangEl->termDup != 0 ) {
+ long *inserted2 = prod->firstSet.insert( klangEl->termDup->id );
+ if ( inserted2 != 0 )
+ modified = true;
+ }
+
+ bool hasEpsilon = false;
+ LangEl *lel = langElIndex[trans->key];
+ for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) {
+ for ( ProdIdSet::Iter pid = ldef->firstSet;
+ pid.lte(); pid++ )
+ {
+ if ( *pid == -1 )
+ hasEpsilon = true;
+ else {
+ long *inserted = prod->firstSet.insert( *pid );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ }
+ }
+
+ if ( hasEpsilon ) {
+ if ( trans->value->toState->isFinState() ) {
+ long *inserted = prod->firstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeFirstSetProd( prod, trans->value->toState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+ }
+ return modified;
+}
+
+
+void Compiler::makeFirstSets()
+{
+ bool modified = true;
+ while ( modified ) {
+ modified = false;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->fsm->startState->isFinState() ) {
+ long *inserted = prod->firstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeFirstSetProd( prod, prod->fsm->startState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+}
+
+void Compiler::printFirstSets()
+{
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ cerr << prod->data << ": ";
+ for ( ProdIdSet::Iter pid = prod->firstSet; pid.lte(); pid++ )
+ {
+ if ( *pid < 0 )
+ cerr << " <EPSILON>";
+ else {
+ LangEl *lel = langElIndex[*pid];
+ if ( lel != 0 )
+ cerr << endl << " " << lel->name;
+ else
+ cerr << endl << " " << *pid;
+ }
+ }
+ cerr << endl;
+ }
+}
+
+void Compiler::insertUniqueEmptyProductions()
+{
+ int limit = prodList.length();
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->prodId == limit )
+ break;
+
+ /* Get a language element. */
+ char name[20];
+ sprintf(name, "U%li", prodList.length());
+ LangEl *prodName = addLangEl( this, rootNamespace, name, LangEl::NonTerm );
+ Production *newDef = Production::cons( InputLoc(), prodName,
+ 0, String(), false, 0, prodList.length(), prodName->defList.length() );
+ prodName->defList.append( newDef );
+ prodList.append( newDef );
+
+ prod->uniqueEmptyLeader = prodName;
+ }
+}
+
+LocalInfo *Compiler::makeLocalInfo( Locals &locals )
+{
+ LocalInfo *localInfo = new LocalInfo[locals.locals.length()];
+ memset( localInfo, 0, sizeof(LocalInfo) * locals.locals.length() );
+
+ for ( Vector<LocalLoc>::Iter l = locals.locals; l.lte(); l++ ) {
+ localInfo[l.pos()].type = (int) l->type;
+ localInfo[l.pos()].offset = l->offset;
+ }
+ return localInfo;
+}
+
+void Compiler::makeRuntimeData()
+{
+ long count = 0;
+
+ /*
+ * ProdLengths
+ * ProdLhsIs
+ * ProdNames
+ * ProdCodeBlocks
+ * ProdCodeBlockLens
+ */
+
+ runtimeData->frameInfo = new FrameInfo[nextFrameId];
+ runtimeData->numFrames = nextFrameId;
+ memset( runtimeData->frameInfo, 0, sizeof(FrameInfo) * nextFrameId );
+
+ /*
+ * Init code block.
+ */
+ if ( rootCodeBlock == 0 ) {
+ runtimeData->rootCode = 0;
+ runtimeData->rootCodeLen = 0;
+ runtimeData->rootFrameId = 0;
+ }
+ else {
+ runtimeData->rootCode = rootCodeBlock->codeWC.data;
+ runtimeData->rootCodeLen = rootCodeBlock->codeWC.length();
+ runtimeData->rootFrameId = rootCodeBlock->frameId;
+ }
+
+ runtimeData->frameInfo[rootCodeBlock->frameId].codeWV = 0;
+ runtimeData->frameInfo[rootCodeBlock->frameId].codeLenWV = 0;
+
+ runtimeData->frameInfo[rootCodeBlock->frameId].locals = makeLocalInfo( rootCodeBlock->locals );
+ runtimeData->frameInfo[rootCodeBlock->frameId].localsLen = rootCodeBlock->locals.locals.length();
+
+ runtimeData->frameInfo[rootCodeBlock->frameId].frameSize = rootLocalFrame->size();
+ runtimeData->frameInfo[rootCodeBlock->frameId].argSize = 0;
+
+ /*
+ * prodInfo
+ */
+ count = prodList.length();
+ runtimeData->prodInfo = new ProdInfo[count];
+ runtimeData->numProds = count;
+
+ count = 0;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ runtimeData->prodInfo[count].lhsId = prod->prodName->id;
+ runtimeData->prodInfo[count].prodNum = prod->prodNum;
+ runtimeData->prodInfo[count].length = prod->fsmLength;
+ runtimeData->prodInfo[count].name = prod->data;
+ runtimeData->prodInfo[count].frameId = -1;
+
+ CodeBlock *block = prod->redBlock;
+ if ( block != 0 ) {
+ runtimeData->prodInfo[count].frameId = block->frameId;
+ runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frameInfo[block->frameId].locals = makeLocalInfo( block->locals );
+ runtimeData->frameInfo[block->frameId].localsLen = block->locals.locals.length();
+
+ runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
+ runtimeData->frameInfo[block->frameId].argSize = 0;
+ }
+
+ runtimeData->prodInfo[count].lhsUpref = true;
+ runtimeData->prodInfo[count].copy = prod->copy.data;
+ runtimeData->prodInfo[count].copyLen = prod->copy.length() / 2;
+ count += 1;
+ }
+
+ /*
+ * regionInfo
+ */
+ runtimeData->numRegions = regionList.length()+1;
+ runtimeData->regionInfo = new RegionInfo[runtimeData->numRegions];
+ memset( runtimeData->regionInfo, 0, sizeof(RegionInfo) * runtimeData->numRegions );
+
+ runtimeData->regionInfo[0].defaultToken = -1;
+ runtimeData->regionInfo[0].eofFrameId = -1;
+ runtimeData->regionInfo[0].ciLelId = 0;
+
+ for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+ long regId = reg->id+1;
+ runtimeData->regionInfo[regId].defaultToken =
+ reg->impl->defaultTokenInstance == 0 ?
+ -1 :
+ reg->impl->defaultTokenInstance->tokenDef->tdLangEl->id;
+ runtimeData->regionInfo[regId].eofFrameId = -1;
+ runtimeData->regionInfo[regId].ciLelId = reg->zeroLel != 0 ? reg->zeroLel->id : 0;
+
+ CodeBlock *block = reg->preEofBlock;
+ if ( block != 0 ) {
+ runtimeData->regionInfo[regId].eofFrameId = block->frameId;
+ runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frameInfo[block->frameId].locals = makeLocalInfo( block->locals );
+ runtimeData->frameInfo[block->frameId].localsLen = block->locals.locals.length();
+
+ runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
+ runtimeData->frameInfo[block->frameId].argSize = 0;
+ }
+ }
+
+ /*
+ * lelInfo
+ */
+
+ count = nextSymbolId;
+ runtimeData->lelInfo = new LangElInfo[count];
+ runtimeData->numLangEls = count;
+ memset( runtimeData->lelInfo, 0, sizeof(LangElInfo)*count );
+
+ for ( int i = 0; i < nextSymbolId; i++ ) {
+ LangEl *lel = langElIndex[i];
+ if ( lel != 0 ) {
+ runtimeData->lelInfo[i].name = lel->fullLit;
+ runtimeData->lelInfo[i].xmlTag = lel->xmlTag;
+ runtimeData->lelInfo[i].repeat = lel->isRepeat;
+ runtimeData->lelInfo[i].list = lel->isList;
+ runtimeData->lelInfo[i].literal = lel->isLiteral;
+ runtimeData->lelInfo[i].ignore = lel->isIgnore;
+ runtimeData->lelInfo[i].frameId = -1;
+
+ CodeBlock *block = lel->transBlock;
+ if ( block != 0 ) {
+ runtimeData->lelInfo[i].frameId = block->frameId;
+ runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frameInfo[block->frameId].locals = makeLocalInfo( block->locals );
+ runtimeData->frameInfo[block->frameId].localsLen = block->locals.locals.length();
+
+ runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
+ runtimeData->frameInfo[block->frameId].argSize = 0;
+ }
+
+ runtimeData->lelInfo[i].objectTypeId =
+ lel->objectDef == 0 ? 0 : lel->objectDef->id;
+ runtimeData->lelInfo[i].ofiOffset = lel->ofiOffset;
+ runtimeData->lelInfo[i].objectLength =
+ lel->objectDef != 0 ? lel->objectDef->size() : 0;
+
+// runtimeData->lelInfo[i].contextTypeId = 0;
+// lel->context == 0 ? 0 : lel->context->contextObjDef->id;
+// runtimeData->lelInfo[i].contextLength = 0; //lel->context == 0 ? 0 :
+// lel->context->contextObjDef->size();
+// if ( lel->context != 0 ) {
+// cout << "type: " << runtimeData->lelInfo[i].contextTypeId << " length: " <<
+// runtimeData->lelInfo[i].contextLength << endl;
+// }
+
+ runtimeData->lelInfo[i].termDupId = lel->termDup == 0 ? 0 : lel->termDup->id;
+ runtimeData->lelInfo[i].genericId = lel->generic == 0 ? 0 : lel->generic->id;
+
+ if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 &&
+ lel->tokenDef->join->context != 0 )
+ runtimeData->lelInfo[i].markId = lel->tokenDef->join->mark->markId;
+ else
+ runtimeData->lelInfo[i].markId = -1;
+
+ runtimeData->lelInfo[i].numCaptureAttr = 0;
+ }
+ else {
+ memset(&runtimeData->lelInfo[i], 0, sizeof(LangElInfo) );
+ runtimeData->lelInfo[i].name = "__UNUSED";
+ runtimeData->lelInfo[i].xmlTag = "__UNUSED";
+ runtimeData->lelInfo[i].frameId = -1;
+ }
+ }
+
+ /*
+ * FunctionInfo
+ */
+ count = functionList.length();
+
+ runtimeData->functionInfo = new FunctionInfo[count];
+ runtimeData->numFunctions = count;
+ memset( runtimeData->functionInfo, 0, sizeof(FunctionInfo)*count );
+ for ( FunctionList::Iter func = functionList; func.lte(); func++ ) {
+ runtimeData->functionInfo[func->funcId].name = func->name;
+ runtimeData->functionInfo[func->funcId].frameId = -1;
+
+ CodeBlock *block = func->codeBlock;
+ if ( block != 0 ) {
+ runtimeData->functionInfo[func->funcId].frameId = block->frameId;
+
+ runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frameInfo[block->frameId].codeWC = block->codeWC.data;
+ runtimeData->frameInfo[block->frameId].codeLenWC = block->codeWC.length();
+
+ runtimeData->frameInfo[block->frameId].locals = makeLocalInfo( block->locals );
+ runtimeData->frameInfo[block->frameId].localsLen = block->locals.locals.length();
+
+ runtimeData->frameInfo[block->frameId].frameSize = func->localFrame->size();
+ runtimeData->frameInfo[block->frameId].argSize = func->paramListSize;
+ }
+
+ runtimeData->functionInfo[func->funcId].frameSize = func->localFrame->size();
+ runtimeData->functionInfo[func->funcId].argSize = func->paramListSize;
+ }
+
+ /*
+ * PatConsInfo
+ */
+
+ /* Filled in later after patterns are parsed. */
+ runtimeData->patReplInfo = new PatConsInfo[nextPatConsId];
+ memset( runtimeData->patReplInfo, 0, sizeof(PatConsInfo) * nextPatConsId );
+ runtimeData->numPatterns = nextPatConsId;
+ runtimeData->patReplNodes = 0;
+ runtimeData->numPatternNodes = 0;
+
+
+ /*
+ * GenericInfo
+ */
+ count = 1;
+ for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ )
+ count += nspace->genericList.length();
+ assert( count == nextGenericId );
+
+ runtimeData->genericInfo = new GenericInfo[count];
+ runtimeData->numGenerics = count;
+ memset( &runtimeData->genericInfo[0], 0, sizeof(GenericInfo) );
+ for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) {
+ for ( GenericList::Iter gen = nspace->genericList; gen.lte(); gen++ ) {
+ runtimeData->genericInfo[gen->id].type = gen->typeId;
+ runtimeData->genericInfo[gen->id].typeArg = gen->utArg->typeId;
+ runtimeData->genericInfo[gen->id].keyType = gen->keyUT != 0 ?
+ gen->keyUT->typeId : 0;
+ runtimeData->genericInfo[gen->id].keyOffset = 0;
+ runtimeData->genericInfo[gen->id].langElId = gen->langEl->id;
+ runtimeData->genericInfo[gen->id].parserId = gen->utArg->langEl->parserId;
+ }
+ }
+
+ runtimeData->argvGenericId = argvTypeRef->generic->id;
+
+ /*
+ * Literals
+ */
+ runtimeData->numLiterals = literalStrings.length();
+ runtimeData->litdata = new const char *[literalStrings.length()];
+ runtimeData->litlen = new long [literalStrings.length()];
+ runtimeData->literals = 0;
+ for ( StringMap::Iter el = literalStrings; el.lte(); el++ ) {
+ /* Data. */
+ char *data = new char[el->key.length()+1];
+ memcpy( data, el->key.data, el->key.length() );
+ data[el->key.length()] = 0;
+ runtimeData->litdata[el->value] = data;
+
+ /* Length. */
+ runtimeData->litlen[el->value] = el->key.length();
+ }
+
+ /* Captured attributes. Loop over tokens and count first. */
+ long numCapturedAttr = 0;
+// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+// for ( TokenInstanceListReg::Iter td = reg->tokenInstanceList; td.lte(); td++ )
+// numCapturedAttr += td->reCaptureVect.length();
+// }
+ runtimeData->captureAttr = new CaptureAttr[numCapturedAttr];
+ runtimeData->numCapturedAttr = numCapturedAttr;
+ memset( runtimeData->captureAttr, 0, sizeof( CaptureAttr ) * numCapturedAttr );
+
+ count = 0;
+// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+// for ( TokenInstanceListReg::Iter td = reg->tokenInstanceList; td.lte(); td++ ) {
+// runtimeData->lelInfo[td->token->id].captureAttr = count;
+// runtimeData->lelInfo[td->token->id].numCaptureAttr = td->reCaptureVect.length();
+// for ( ReCaptureVect::Iter c = td->reCaptureVect; c.lte(); c++ ) {
+// runtimeData->captureAttr[count].mark_enter = c->markEnter->markId;
+// runtimeData->captureAttr[count].mark_leave = c->markLeave->markId;
+// runtimeData->captureAttr[count].offset = c->objField->offset;
+//
+// count += 1;
+// }
+// }
+// }
+
+ runtimeData->fsmTables = fsmTables;
+ runtimeData->pdaTables = pdaTables;
+
+ /* FIXME: need a parser descriptor. */
+ runtimeData->startStates = new int[nextParserId];
+ runtimeData->eofLelIds = new int[nextParserId];
+ runtimeData->parserLelIds = new int[nextParserId];
+ runtimeData->numParsers = nextParserId;
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->parserId >= 0 ) {
+ runtimeData->startStates[lel->parserId] = lel->startState->stateNum;
+ runtimeData->eofLelIds[lel->parserId] = lel->eofLel->id;
+ runtimeData->parserLelIds[lel->parserId] = lel->id;
+ }
+ }
+
+ runtimeData->globalSize = globalObjectDef->size();
+
+ /*
+ * firstNonTermId
+ */
+ runtimeData->firstNonTermId = firstNonTermId;
+
+ /* Special trees. */
+ runtimeData->integerId = intLangEl->id;
+ runtimeData->stringId = strLangEl->id;
+ runtimeData->anyId = anyLangEl->id;
+ runtimeData->eofId = 0; //eofLangEl->id;
+ runtimeData->noTokenId = noTokenLangEl->id;
+
+ runtimeData->fsmExecute = &internalFsmExecute;
+ runtimeData->sendNamedLangEl = &internalSendNamedLangEl;
+ runtimeData->initBindings = &internalInitBindings;
+ runtimeData->popBinding = &internalPopBinding;
+}
+
+/* Borrow alg->state for mapsTo. */
+void countNodes( Program *prg, int &count, ParseTree *parseTree, Kid *kid )
+{
+ if ( kid != 0 ) {
+ count += 1;
+
+ /* Should't have to recurse here. */
+ Tree *ignoreList = treeLeftIgnore( prg, kid->tree );
+ if ( ignoreList != 0 ) {
+ Kid *ignore = ignoreList->child;
+ while ( ignore != 0 ) {
+ count += 1;
+ ignore = ignore->next;
+ }
+ }
+
+ ignoreList = treeRightIgnore( prg, kid->tree );
+ if ( ignoreList != 0 ) {
+ Kid *ignore = ignoreList->child;
+ while ( ignore != 0 ) {
+ count += 1;
+ ignore = ignore->next;
+ }
+ }
+
+ //count += prg->rtd->lelInfo[kid->tree->id].numCaptureAttr;
+
+ if ( !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ treeChild( prg, kid->tree ) != 0 )
+ {
+ countNodes( prg, count, parseTree->child, treeChild( prg, kid->tree ) );
+ }
+ countNodes( prg, count, parseTree->next, kid->next );
+ }
+}
+
+void fillNodes( Program *prg, int &nextAvail, Bindings *bindings, long &bindId,
+ PatConsNode *nodes, ParseTree *parseTree, Kid *kid, int ind )
+{
+ if ( kid != 0 ) {
+ PatConsNode &node = nodes[ind];
+
+ Kid *child =
+ !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ treeChild( prg, kid->tree ) != 0
+ ?
+ treeChild( prg, kid->tree ) : 0;
+
+ ParseTree *ptChild =
+ !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ treeChild( prg, kid->tree ) != 0
+ ?
+ parseTree->child : 0;
+
+ /* Set up the fields. */
+ node.id = kid->tree->id;
+ node.prodNum = kid->tree->prodNum;
+ node.length = stringLength( kid->tree->tokdata );
+ node.data = stringData( kid->tree->tokdata );
+
+ /* Ignore items. */
+ Tree *ignoreList = treeLeftIgnore( prg, kid->tree );
+ Kid *ignore = ignoreList == 0 ? 0 : ignoreList->child;
+ node.leftIgnore = ignore == 0 ? -1 : nextAvail;
+
+ while ( ignore != 0 ) {
+ PatConsNode &node = nodes[nextAvail++];
+
+ memset( &node, 0, sizeof(PatConsNode) );
+ node.id = ignore->tree->id;
+ node.prodNum = ignore->tree->prodNum;
+ node.next = ignore->next == 0 ? -1 : nextAvail;
+
+ node.length = stringLength( ignore->tree->tokdata );
+ node.data = stringData( ignore->tree->tokdata );
+
+ ignore = ignore->next;
+ }
+
+ /* Ignore items. */
+ ignoreList = treeRightIgnore( prg, kid->tree );
+ ignore = ignoreList == 0 ? 0 : ignoreList->child;
+ node.rightIgnore = ignore == 0 ? -1 : nextAvail;
+
+ while ( ignore != 0 ) {
+ PatConsNode &node = nodes[nextAvail++];
+
+ memset( &node, 0, sizeof(PatConsNode) );
+ node.id = ignore->tree->id;
+ node.prodNum = ignore->tree->prodNum;
+ node.next = ignore->next == 0 ? -1 : nextAvail;
+
+ node.length = stringLength( ignore->tree->tokdata );
+ node.data = stringData( ignore->tree->tokdata );
+
+ ignore = ignore->next;
+ }
+
+ ///* The captured attributes. */
+ //for ( int i = 0; i < prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; i++ ) {
+ // CaptureAttr *cap = prg->rtd->captureAttr +
+ // prg->rtd->lelInfo[kid->tree->id].captureAttr + i;
+ //
+ // Tree *attr = colm_get_attr( kid->tree, cap->offset );
+ //
+ // PatConsNode &node = nodes[nextAvail++];
+ // memset( &node, 0, sizeof(PatConsNode) );
+ //
+ // node.id = attr->id;
+ // node.prodNum = attr->prodNum;
+ // node.length = stringLength( attr->tokdata );
+ // node.data = stringData( attr->tokdata );
+ //}
+
+ node.stop = parseTree->flags & PF_TERM_DUP;
+
+ node.child = child == 0 ? -1 : nextAvail++;
+
+ /* Recurse. */
+ fillNodes( prg, nextAvail, bindings, bindId, nodes, ptChild, child, node.child );
+
+ /* Since the parser is bottom up the bindings are in a bottom up
+ * traversal order. Check after recursing. */
+ node.bindId = 0;
+ if ( bindId < bindings->length() && bindings->data[bindId] == parseTree ) {
+ /* Remember that binding ids are indexed from one. */
+ node.bindId = bindId++;
+
+ //cout << "binding match in " << __PRETTY_FUNCTION__ << endl;
+ //cout << "bindId: " << node.bindId << endl;
+ }
+
+ node.next = kid->next == 0 ? -1 : nextAvail++;
+
+ /* Move to the next child. */
+ fillNodes( prg, nextAvail, bindings, bindId, nodes, parseTree->next, kid->next, node.next );
+ }
+}
+
+void Compiler::fillInPatterns( Program *prg )
+{
+ /*
+ * patReplNodes
+ */
+
+ /* Count is referenced and computed by mapNode. */
+ int count = 0;
+ for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
+ countNodes( prg, count,
+ pat->pdaRun->stackTop->next,
+ pat->pdaRun->stackTop->next->shadow );
+ }
+
+ for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) {
+ countNodes( prg, count,
+ repl->pdaRun->stackTop->next,
+ repl->pdaRun->stackTop->next->shadow );
+ }
+
+ runtimeData->patReplNodes = new PatConsNode[count];
+ runtimeData->numPatternNodes = count;
+
+ int nextAvail = 0;
+
+ for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
+ int ind = nextAvail++;
+ runtimeData->patReplInfo[pat->patRepId].offset = ind;
+
+ /* BindIds are indexed base one. */
+ runtimeData->patReplInfo[pat->patRepId].numBindings =
+ pat->pdaRun->bindings->length() - 1;
+
+ /* Init the bind */
+ long bindId = 1;
+ fillNodes( prg, nextAvail, pat->pdaRun->bindings, bindId,
+ runtimeData->patReplNodes,
+ pat->pdaRun->stackTop->next,
+ pat->pdaRun->stackTop->next->shadow,
+ ind );
+ }
+
+ for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) {
+ int ind = nextAvail++;
+ runtimeData->patReplInfo[repl->patRepId].offset = ind;
+
+ /* BindIds are indexed base one. */
+ runtimeData->patReplInfo[repl->patRepId].numBindings =
+ repl->pdaRun->bindings->length() - 1;
+
+ long bindId = 1;
+ fillNodes( prg, nextAvail, repl->pdaRun->bindings, bindId,
+ runtimeData->patReplNodes,
+ repl->pdaRun->stackTop->next,
+ repl->pdaRun->stackTop->next->shadow,
+ ind );
+ }
+
+ assert( nextAvail == count );
+}
+
+
+int Compiler::findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &curLen )
+{
+ for ( int start = 0; start < curLen; ) {
+ int offset = start;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( pdaTables->owners[offset] != -1 )
+ goto next_start;
+
+ offset++;
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ offset += next->key - trans->key - 1;
+ }
+ }
+
+ /* Got though the whole list without a conflict. */
+ return start;
+
+next_start:
+ start++;
+ }
+
+ return curLen;
+}
+
+struct CmpSpan
+{
+ static int compare( PdaState *state1, PdaState *state2 )
+ {
+ int dist1 = 0, dist2 = 0;
+
+ if ( state1->transMap.length() > 0 ) {
+ TransMap::Iter first1 = state1->transMap.first();
+ TransMap::Iter last1 = state1->transMap.last();
+ dist1 = last1->key - first1->key;
+ }
+
+ if ( state2->transMap.length() > 0 ) {
+ TransMap::Iter first2 = state2->transMap.first();
+ TransMap::Iter last2 = state2->transMap.last();
+ dist2 = last2->key - first2->key;
+ }
+
+ if ( dist1 < dist2 )
+ return 1;
+ else if ( dist2 < dist1 )
+ return -1;
+ return 0;
+ }
+};
+
+PdaGraph *Compiler::makePdaGraph( LangElSet &parserEls )
+{
+ //for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ // cerr << prod->prodId << " " << prod->data << endl;
+
+ PdaGraph *pdaGraph = new PdaGraph();
+ lalr1GenerateParser( pdaGraph, parserEls );
+ pdaGraph->setStateNumbers();
+ analyzeMachine( pdaGraph, parserEls );
+
+ //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
+
+ return pdaGraph;
+}
+
+PdaTables *Compiler::makePdaTables( PdaGraph *pdaGraph )
+{
+ int count, pos;
+ PdaTables *pdaTables = new PdaTables;
+
+ /*
+ * Counting max indices.
+ */
+ count = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ count++;
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ count += next->key - trans->key - 1;
+ }
+ }
+ }
+
+
+ /* Allocate indicies and owners. */
+ pdaTables->numIndicies = count;
+ pdaTables->indicies = new int[count];
+ pdaTables->owners = new int[count];
+ for ( long i = 0; i < count; i++ ) {
+ pdaTables->indicies[i] = -1;
+ pdaTables->owners[i] = -1;
+ }
+
+ /* Allocate offsets. */
+ int numStates = pdaGraph->stateList.length();
+ pdaTables->offsets = new unsigned int[numStates];
+ pdaTables->numStates = numStates;
+
+ /* Place transitions into indicies/owners */
+ PdaState **states = new PdaState*[numStates];
+ long ds = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ states[ds++] = state;
+
+ /* Sorting baseded on span length. Gives an improvement, but incures a
+ * cost. Off for now. */
+ //MergeSort< PdaState*, CmpSpan > mergeSort;
+ //mergeSort.sort( states, numStates );
+
+ int indLen = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ PdaState *state = states[s];
+
+ int indOff = findIndexOff( pdaTables, pdaGraph, state, indLen );
+ pdaTables->offsets[state->stateNum] = indOff;
+
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ pdaTables->indicies[indOff] = trans->value->actionSetEl->key.id;
+ pdaTables->owners[indOff] = state->stateNum;
+ indOff++;
+
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ indOff += next->key - trans->key - 1;
+ }
+ }
+
+ if ( indOff > indLen )
+ indLen = indOff;
+ }
+
+ /* We allocated the max, but cmpression gives us less. */
+ pdaTables->numIndicies = indLen;
+ delete[] states;
+
+
+ /*
+ * Keys
+ */
+ count = pdaGraph->stateList.length() * 2;;
+ pdaTables->keys = new int[count];
+ pdaTables->numKeys = count;
+
+ count = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( state->transMap.length() == 0 ) {
+ pdaTables->keys[count+0] = 0;
+ pdaTables->keys[count+1] = 0;
+ }
+ else {
+ TransMap::Iter first = state->transMap.first();
+ TransMap::Iter last = state->transMap.last();
+ pdaTables->keys[count+0] = first->key;
+ pdaTables->keys[count+1] = last->key;
+ }
+ count += 2;
+ }
+
+ /*
+ * Targs
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->targs = new unsigned int[count];
+ pdaTables->numTargs = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ pdaTables->targs[count++] = asi->key.targ;
+
+ /*
+ * ActInds
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->actInds = new unsigned int[count];
+ pdaTables->numActInds = count;
+
+ count = pos = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) {
+ pdaTables->actInds[count++] = pos;
+ pos += asi->key.actions.length() + 1;
+ }
+
+ /*
+ * Actions
+ */
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ count += asi->key.actions.length() + 1;
+
+ pdaTables->actions = new unsigned int[count];
+ pdaTables->numActions = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) {
+ for ( ActDataList::Iter ali = asi->key.actions; ali.lte(); ali++ )
+ pdaTables->actions[count++] = *ali;
+
+ pdaTables->actions[count++] = 0;
+ }
+
+ /*
+ * CommitLen
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->commitLen = new int[count];
+ pdaTables->numCommitLen = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ pdaTables->commitLen[count++] = asi->key.commitLen;
+
+ /*
+ * tokenRegionInds. Start at one so region index 0 is null (unset).
+ */
+ count = 0;
+ pos = 1;
+ pdaTables->tokenRegionInds = new int[pdaTables->numStates];
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ pdaTables->tokenRegionInds[count++] = pos;
+ pos += state->regions.length() + 1;
+ }
+
+
+ /*
+ * tokenRegions. Build in a null at the beginning.
+ */
+
+ count = 1;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ count += state->regions.length() + 1;
+
+ pdaTables->numRegionItems = count;
+ pdaTables->tokenRegions = new int[pdaTables->numRegionItems];
+
+ count = 0;
+ pdaTables->tokenRegions[count++] = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) {
+ int id = ( *reg == EOF_REGION ) ? 0 : (*reg)->id + 1;
+ pdaTables->tokenRegions[count++] = id;
+ }
+
+ pdaTables->tokenRegions[count++] = 0;
+ }
+
+ /*
+ * tokenPreRegions. Build in a null at the beginning.
+ */
+
+ count = 1;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ count += state->regions.length() + 1;
+
+ pdaTables->numPreRegionItems = count;
+ pdaTables->tokenPreRegions = new int[pdaTables->numPreRegionItems];
+
+ count = 0;
+ pdaTables->tokenPreRegions[count++] = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) {
+ assert( state->preRegions.length() <= 1 );
+ if ( state->preRegions.length() == 0 || state->preRegions[0]->impl->wasEmpty )
+ pdaTables->tokenPreRegions[count++] = -1;
+ else
+ pdaTables->tokenPreRegions[count++] = state->preRegions[0]->id + 1;
+ }
+
+ pdaTables->tokenPreRegions[count++] = 0;
+ }
+
+
+ return pdaTables;
+}
+
+void Compiler::makeParser( LangElSet &parserEls )
+{
+ pdaGraph = makePdaGraph( parserEls );
+ pdaTables = makePdaTables( pdaGraph );
+}
+