1 files changed, 1170 insertions, 0 deletions
diff --git a/src/compiler.cc b/src/compiler.cc
new file mode 100644
index 0000000..d60519e
--- /dev/null
+++ b/src/compiler.cc
@@ -0,0 +1,1170 @@
+/*
+ *  Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/*  This file is part of Colm.
+ *
+ *  Colm is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ * 
+ *  Colm is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ * 
+ *  You should have received a copy of the GNU General Public License
+ *  along with Colm; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <sstream>
+
+#include "global.h"
+#include "avltree.h"
+#include "parsedata.h"
+#include "parser.h"
+#include "parsedata.h"
+#include "parsetree.h"
+#include "mergesort.h"
+#include "redbuild.h"
+#include "pdacodegen.h"
+#include "fsmcodegen.h"
+#include "pdarun.h"
+#include "colm.h"
+#include "pool.h"
+
+using std::ostringstream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+char machineMain[] = "main";
+exit_object endp;
+void operator<<( ostream &out, exit_object & )
+{
+	out << endl;
+	exit(1);
+}
+
+/* Perform minimization after an operation according 
+ * to the command line args. */
+void afterOpMinimize( FsmGraph *fsm, bool lastInSeq )
+{
+	/* Switch on the prefered minimization algorithm. */
+	if ( lastInSeq ) {
+		/* First clean up the graph. FsmGraph operations may leave these
+		 * lying around. There should be no dead end states. The subtract
+		 * intersection operators are the only places where they may be
+		 * created and those operators clean them up. */
+		fsm->removeUnreachableStates();
+		fsm->minimizePartition2();
+	}
+}
+
+/* Count the transitions in the fsm by walking the state list. */
+int countTransitions( FsmGraph *fsm )
+{
+	int numTrans = 0;
+	FsmState *state = fsm->stateList.head;
+	while ( state != 0 ) {
+		numTrans += state->outList.length();
+		state = state->next;
+	}
+	return numTrans;
+}
+
+Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd )
+{
+	/* Reset errno so we can check for overflow or underflow. In the event of
+	 * an error, sets the return val to the upper or lower bound being tested
+	 * against. */
+	errno = 0;
+	unsigned int size = keyOps->alphType->size;
+	bool unusedBits = size < sizeof(unsigned long);
+
+	unsigned long ul = strtoul( str, 0, 16 );
+
+	if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) {
+		error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+		ul = 1 << (size * 8);
+	}
+
+	if ( unusedBits && ul >> (size * 8 - 1) )
+		ul |= (ULONG_MAX >> (size*8 ) ) << (size*8);
+
+	return Key( (long)ul );
+}
+
+Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd )
+{
+	/* Convert the number to a decimal. First reset errno so we can check
+	 * for overflow or underflow. */
+	errno = 0;
+	long long minVal = keyOps->alphType->minVal;
+	long long maxVal = keyOps->alphType->maxVal;
+
+	long long ll = strtoll( str, 0, 10 );
+
+	/* Check for underflow. */
+	if ( (errno == ERANGE && ll < 0) || ll < minVal) {
+		error(loc) << "literal " << str << " underflows the alphabet type" << endl;
+		ll = minVal;
+	}
+	/* Check for overflow. */
+	else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) {
+		error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+		ll = maxVal;
+	}
+
+	return Key( (long)ll );
+}
+
+/* Make an fsm key in int format (what the fsm graph uses) from an alphabet
+ * number returned by the parser. Validates that the number doesn't overflow
+ * the alphabet type. */
+Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd )
+{
+	/* Switch on hex/decimal format. */
+	if ( str[0] == '0' && str[1] == 'x' )
+		return makeFsmKeyHex( str, loc, pd );
+	else
+		return makeFsmKeyDec( str, loc, pd );
+}
+
+/* Make an fsm int format (what the fsm graph uses) from a single character.
+ * Performs proper conversion depending on signed/unsigned property of the
+ * alphabet. */
+Key makeFsmKeyChar( char c, Compiler *pd )
+{
+	/* Copy from a char type. */
+	return Key( c );
+}
+
+/* Make an fsm key array in int format (what the fsm graph uses) from a string
+ * of characters. Performs proper conversion depending on signed/unsigned
+ * property of the alphabet. */
+void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd )
+{
+	/* Copy from a char star type. */
+	char *src = data;
+	for ( int i = 0; i < len; i++ )
+		result[i] = Key(src[i]);
+}
+
+/* Like makeFsmKeyArray except the result has only unique keys. They ordering
+ * will be changed. */
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, 
+		bool caseInsensitive, Compiler *pd )
+{
+	/* Copy from a char star type. */
+	char *src = data;
+	for ( int si = 0; si < len; si++ ) {
+		Key key( src[si] );
+		result.insert( key );
+		if ( caseInsensitive ) {
+			if ( key.isLower() )
+				result.insert( key.toUpper() );
+			else if ( key.isUpper() )
+				result.insert( key.toLower() );
+		}
+	}
+}
+
+FsmGraph *dotFsm( Compiler *pd )
+{
+	FsmGraph *retFsm = new FsmGraph();
+	retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
+	return retFsm;
+}
+
+FsmGraph *dotStarFsm( Compiler *pd )
+{
+	FsmGraph *retFsm = new FsmGraph();
+	retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
+	return retFsm;
+}
+
+/* Make a builtin type. Depends on the signed nature of the alphabet type. */
+FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd )
+{
+	/* FsmGraph created to return. */
+	FsmGraph *retFsm = 0;
+
+	switch ( builtin ) {
+	case BT_Any: {
+		/* All characters. */
+		retFsm = dotFsm( pd );
+		break;
+	}
+	case BT_Ascii: {
+		/* Ascii characters 0 to 127. */
+		retFsm = new FsmGraph();
+		retFsm->rangeFsm( 0, 127 );
+		break;
+	}
+	case BT_Extend: {
+		/* Ascii extended characters. This is the full byte range. Dependent
+		 * on signed, vs no signed. If the alphabet is one byte then just use
+		 * dot fsm. */
+		retFsm = new FsmGraph();
+		retFsm->rangeFsm( -128, 127 );
+		break;
+	}
+	case BT_Alpha: {
+		/* Alpha [A-Za-z]. */
+		FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph();
+		upper->rangeFsm( 'A', 'Z' );
+		lower->rangeFsm( 'a', 'z' );
+		upper->unionOp( lower );
+		upper->minimizePartition2();
+		retFsm = upper;
+		break;
+	}
+	case BT_Digit: {
+		/* Digits [0-9]. */
+		retFsm = new FsmGraph();
+		retFsm->rangeFsm( '0', '9' );
+		break;
+	}
+	case BT_Alnum: {
+		/* Alpha numerics [0-9A-Za-z]. */
+		FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph();
+		FsmGraph *upper = new FsmGraph();
+		digit->rangeFsm( '0', '9' );
+		upper->rangeFsm( 'A', 'Z' );
+		lower->rangeFsm( 'a', 'z' );
+		digit->unionOp( upper );
+		digit->unionOp( lower );
+		digit->minimizePartition2();
+		retFsm = digit;
+		break;
+	}
+	case BT_Lower: {
+		/* Lower case characters. */
+		retFsm = new FsmGraph();
+		retFsm->rangeFsm( 'a', 'z' );
+		break;
+	}
+	case BT_Upper: {
+		/* Upper case characters. */
+		retFsm = new FsmGraph();
+		retFsm->rangeFsm( 'A', 'Z' );
+		break;
+	}
+	case BT_Cntrl: {
+		/* Control characters. */
+		FsmGraph *cntrl = new FsmGraph();
+		FsmGraph *highChar = new FsmGraph();
+		cntrl->rangeFsm( 0, 31 );
+		highChar->concatFsm( 127 );
+		cntrl->unionOp( highChar );
+		cntrl->minimizePartition2();
+		retFsm = cntrl;
+		break;
+	}
+	case BT_Graph: {
+		/* Graphical ascii characters [!-~]. */
+		retFsm = new FsmGraph();
+		retFsm->rangeFsm( '!', '~' );
+		break;
+	}
+	case BT_Print: {
+		/* Printable characters. Same as graph except includes space. */
+		retFsm = new FsmGraph();
+		retFsm->rangeFsm( ' ', '~' );
+		break;
+	}
+	case BT_Punct: {
+		/* Punctuation. */
+		FsmGraph *range1 = new FsmGraph();
+		FsmGraph *range2 = new FsmGraph();
+		FsmGraph *range3 = new FsmGraph(); 
+		FsmGraph *range4 = new FsmGraph();
+		range1->rangeFsm( '!', '/' );
+		range2->rangeFsm( ':', '@' );
+		range3->rangeFsm( '[', '`' );
+		range4->rangeFsm( '{', '~' );
+		range1->unionOp( range2 );
+		range1->unionOp( range3 );
+		range1->unionOp( range4 );
+		range1->minimizePartition2();
+		retFsm = range1;
+		break;
+	}
+	case BT_Space: {
+		/* Whitespace: [\t\v\f\n\r ]. */
+		FsmGraph *cntrl = new FsmGraph();
+		FsmGraph *space = new FsmGraph();
+		cntrl->rangeFsm( '\t', '\r' );
+		space->concatFsm( ' ' );
+		cntrl->unionOp( space );
+		cntrl->minimizePartition2();
+		retFsm = cntrl;
+		break;
+	}
+	case BT_Xdigit: {
+		/* Hex digits [0-9A-Fa-f]. */
+		FsmGraph *digit = new FsmGraph();
+		FsmGraph *upper = new FsmGraph();
+		FsmGraph *lower = new FsmGraph();
+		digit->rangeFsm( '0', '9' );
+		upper->rangeFsm( 'A', 'F' );
+		lower->rangeFsm( 'a', 'f' );
+		digit->unionOp( upper );
+		digit->unionOp( lower );
+		digit->minimizePartition2();
+		retFsm = digit;
+		break;
+	}
+	case BT_Lambda: {
+		retFsm = new FsmGraph();
+		retFsm->lambdaFsm();
+		break;
+	}
+	case BT_Empty: {
+		retFsm = new FsmGraph();
+		retFsm->emptyFsm();
+		break;
+	}}
+
+	return retFsm;
+}
+
+/*
+ * Compiler
+ */
+
+/* Initialize the structure that will collect info during the parse of a
+ * machine. */
+Compiler::Compiler( )
+:	
+	nextPriorKey(0),
+	nextNameId(0),
+	alphTypeSet(false),
+	getKeyExpr(0),
+	accessExpr(0),
+	curStateExpr(0),
+	lowerNum(0),
+	upperNum(0),
+	errorCount(0),
+	curActionOrd(0),
+	curPriorOrd(0),
+	nextEpsilonResolvedLink(0),
+	nextTokenId(1),
+	rootCodeBlock(0),
+	mainReturnUT(0),
+	//access(0),
+	//tokenStruct(0),
+
+	ptrLangEl(0),
+	boolLangEl(0),
+	intLangEl(0),
+	strLangEl(0),
+	streamLangEl(0),
+	anyLangEl(0),
+	rootLangEl(0),
+	noTokenLangEl(0),
+	eofLangEl(0),
+	errorLangEl(0),
+	ignoreLangEl(0),
+
+	firstNonTermId(0),
+	prodIdIndex(0),
+
+	uniqueTypeNil(0),
+	uniqueTypePtr(0),
+	uniqueTypeBool(0),
+	uniqueTypeInt(0),
+	uniqueTypeStr(0),
+	uniqueTypeStream(0),
+	uniqueTypeIgnore(0),
+	uniqueTypeAny(0),
+	nextPatConsId(0),
+	nextGenericId(1),
+	nextFuncId(0),
+	loopCleanup(0),
+	nextObjectId(1),     /* 0 is  reserved for no object. */
+	nextFrameId(0),
+	nextParserId(0),
+	revertOn(true),
+	predValue(0),
+	nextMatchEndNum(0),
+	argvTypeRef(0),
+	inContiguous(false),
+	contiguousOffset(0),
+	contiguousStretch(0)
+{
+}
+
+/* Clean up the data collected during a parse. */
+Compiler::~Compiler()
+{
+	/* Delete all the nodes in the action list. Will cause all the
+	 * string data that represents the actions to be deallocated. */
+	actionList.empty();
+}
+
+ostream &operator<<( ostream &out, const Token &token )
+{
+	out << token.data;
+	return out;
+}
+
+/* Write out a name reference. */
+ostream &operator<<( ostream &out, const NameRef &nameRef )
+{
+	int pos = 0;
+	if ( nameRef[pos] == 0 ) {
+		out << "::";
+		pos += 1;
+	}
+	out << nameRef[pos++];
+	for ( ; pos < nameRef.length(); pos++ )
+		out << "::" << nameRef[pos];
+	return out;
+}
+
+NameInst **Compiler::makeNameIndex()
+{
+	/* The number of nodes in the tree can now be given by nextNameId. Put a
+	 * null pointer on the end of the list to terminate it. */
+	NameInst **nameIndex = new NameInst*[nextNameId+1];
+	memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) );
+
+	for ( NameInstList::Iter ni = nameInstList; ni.lte(); ni++ )
+		nameIndex[ni->id] = ni;
+
+	return nameIndex;
+}
+
+void Compiler::createBuiltin( const char *name, BuiltinMachine builtin )
+{
+	LexExpression *expression = LexExpression::cons( builtin );
+	LexJoin *join = LexJoin::cons( expression );
+	LexDefinition *varDef = new LexDefinition( name, join );
+	GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
+	rootNamespace->rlMap.insert( graphDictEl );
+}
+
+/* Initialize the graph dict with builtin types. */
+void Compiler::initGraphDict( )
+{
+	createBuiltin( "any", BT_Any );
+	createBuiltin( "ascii", BT_Ascii );
+	createBuiltin( "extend", BT_Extend );
+	createBuiltin( "alpha", BT_Alpha );
+	createBuiltin( "digit", BT_Digit );
+	createBuiltin( "alnum", BT_Alnum );
+	createBuiltin( "lower", BT_Lower );
+	createBuiltin( "upper", BT_Upper );
+	createBuiltin( "cntrl", BT_Cntrl );
+	createBuiltin( "graph", BT_Graph );
+	createBuiltin( "print", BT_Print );
+	createBuiltin( "punct", BT_Punct );
+	createBuiltin( "space", BT_Space );
+	createBuiltin( "xdigit", BT_Xdigit );
+	createBuiltin( "null", BT_Lambda );
+	createBuiltin( "zlen", BT_Lambda );
+	createBuiltin( "empty", BT_Empty );
+}
+
+/* Initialize the key operators object that will be referenced by all fsms
+ * created. */
+void Compiler::initKeyOps( )
+{
+	/* Signedness and bounds. */
+	HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
+	thisKeyOps.setAlphType( alphType );
+
+	if ( lowerNum != 0 ) {
+		/* If ranges are given then interpret the alphabet type. */
+		thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
+		thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
+	}
+}
+
+/* Remove duplicates of unique actions from an action table. */
+void Compiler::removeDups( ActionTable &table )
+{
+	/* Scan through the table looking for unique actions to 
+	 * remove duplicates of. */
+	for ( int i = 0; i < table.length(); i++ ) {
+		/* Remove any duplicates ahead of i. */
+		for ( int r = i+1; r < table.length(); ) {
+			if ( table[r].value == table[i].value )
+				table.vremove(r);
+			else
+				r += 1;
+		}
+	}
+}
+
+/* Remove duplicates from action lists. This operates only on transition and
+ * eof action lists and so should be called once all actions have been
+ * transfered to their final resting place. */
+void Compiler::removeActionDups( FsmGraph *graph )
+{
+	/* Loop all states. */
+	for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
+		/* Loop all transitions. */
+		for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+			removeDups( trans->actionTable );
+		removeDups( state->toStateActionTable );
+		removeDups( state->fromStateActionTable );
+		removeDups( state->eofActionTable );
+	}
+}
+
+Action *Compiler::newAction( const String &name, InlineList *inlineList )
+{
+	InputLoc loc;
+	loc.line = 1;
+	loc.col = 1;
+	loc.fileName = 0;
+
+	Action *action = Action::cons( loc, name, inlineList );
+	actionList.append( action );
+	return action;
+}
+
+void Compiler::initLongestMatchData()
+{
+	if ( regionSetList.length() > 0 ) {
+		/* The initActId action gives act a default value. */
+		InlineList *il4 = InlineList::cons();
+		il4->append( InlineItem::cons( InputLoc(), InlineItem::LmInitAct ) );
+		initActId = newAction( "initact", il4 );
+		initActId->isLmAction = true;
+
+		/* The setTokStart action sets tokstart. */
+		InlineList *il5 = InlineList::cons();
+		il5->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokStart ) );
+		setTokStart = newAction( "tokstart", il5 );
+		setTokStart->isLmAction = true;
+
+		/* The setTokEnd action sets tokend. */
+		InlineList *il3 = InlineList::cons();
+		il3->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokEnd ) );
+		setTokEnd = newAction( "tokend", il3 );
+		setTokEnd->isLmAction = true;
+
+		/* The action will also need an ordering: ahead of all user action
+		 * embeddings. */
+		initActIdOrd = curActionOrd++;
+		setTokStartOrd = curActionOrd++;
+		setTokEndOrd = curActionOrd++;
+	}
+}
+
+void Compiler::finishGraphBuild( FsmGraph *graph )
+{
+	/* Resolve any labels that point to multiple states. Any labels that are
+	 * still around are referenced only by gotos and calls and they need to be
+	 * made into deterministic entry points. */
+	graph->deterministicEntry();
+
+	/*
+	 * All state construction is now complete.
+	 */
+
+	/* Transfer global error actions. */
+	for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
+		graph->transferErrorActions( state, 0 );
+	
+	removeActionDups( graph );
+
+	/* Remove unreachable states. There should be no dead end states. The
+	 * subtract and intersection operators are the only places where they may
+	 * be created and those operators clean them up. */
+	graph->removeUnreachableStates();
+
+	/* No more fsm operations are to be done. Action ordering numbers are
+	 * no longer of use and will just hinder minimization. Clear them. */
+	graph->nullActionKeys();
+
+	/* Transition priorities are no longer of use. We can clear them
+	 * because they will just hinder minimization as well. Clear them. */
+	graph->clearAllPriorities();
+
+	/* Minimize here even if we minimized at every op. Now that function
+	 * keys have been cleared we may get a more minimal fsm. */
+	graph->minimizePartition2();
+	graph->compressTransitions();
+}
+
+/* Build the name tree and supporting data structures. */
+NameInst *Compiler::makeNameTree()
+{
+	/* Create the root name. */
+	nextNameId = 1;
+
+	/* First make the name tree. */
+	for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) {
+		/* Recurse on the instance. */
+		rel->makeNameTree( rel->loc, this );
+	}
+
+	return 0;
+}
+
+FsmGraph *Compiler::makeAllRegions()
+{
+	/* Build the name tree and supporting data structures. */
+	makeNameTree();
+	NameInst **nameIndex = makeNameIndex();
+
+	int numGraphs = 0;
+	FsmGraph **graphs = new FsmGraph*[regionImplList.length()];
+
+	/* Make all the instantiations, we know that main exists in this list. */
+	for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) {
+		/* Build the graph from a walk of the parse tree. */
+		FsmGraph *newGraph = rel->walk( this );
+
+		/* Wrap up the construction. */
+		finishGraphBuild( newGraph );
+
+		/* Save off the new graph. */
+		graphs[numGraphs++] = newGraph;
+	}
+
+	/* NOTE: If putting in minimization here we need to include eofTarget
+	 * into the minimization algorithm. It is currently set by the longest
+	 * match operator and not considered anywhere else. */
+
+	FsmGraph *all;
+	if ( numGraphs == 0 ) {
+		all = new FsmGraph;
+		all->lambdaFsm();
+	}
+	else {
+		/* Add all the other graphs into the first. */
+		all = graphs[0];
+		all->globOp( graphs+1, numGraphs-1 );
+		delete[] graphs;
+	}
+
+	/* Go through all the token regions and check for lmRequiresErrorState. */
+	for ( RegionImplList::Iter reg = regionImplList; reg.lte(); reg++ ) {
+		if ( reg->lmSwitchHandlesError )
+			all->lmRequiresErrorState = true;
+	}
+
+	all->nameIndex = nameIndex;
+
+	return all;
+}
+
+void Compiler::analyzeAction( Action *action, InlineList *inlineList )
+{
+	/* FIXME: Actions used as conditions should be very constrained. */
+	for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+		//if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
+		//	action->anyCall = true;
+
+		/* Need to recurse into longest match items. */
+		if ( item->type == InlineItem::LmSwitch ) {
+			RegionImpl *lm = item->tokenRegion;
+			for ( TokenInstanceListReg::Iter lmi = lm->tokenInstanceList; lmi.lte(); lmi++ ) {
+				if ( lmi->action != 0 )
+					analyzeAction( action, lmi->action->inlineList );
+			}
+		}
+
+		if ( item->type == InlineItem::LmOnLast || 
+				item->type == InlineItem::LmOnNext ||
+				item->type == InlineItem::LmOnLagBehind )
+		{
+			TokenInstance *lmi = item->longestMatchPart;
+			if ( lmi->action != 0 )
+				analyzeAction( action, lmi->action->inlineList );
+		}
+
+		if ( item->children != 0 )
+			analyzeAction( action, item->children );
+	}
+}
+
+void Compiler::analyzeGraph( FsmGraph *graph )
+{
+	for ( ActionList::Iter act = actionList; act.lte(); act++ )
+		analyzeAction( act, act->inlineList );
+
+	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+		/* The transition list. */
+		for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+			for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
+				at->value->numTransRefs += 1;
+		}
+
+		for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
+			at->value->numToStateRefs += 1;
+
+		for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
+			at->value->numFromStateRefs += 1;
+
+		for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
+			at->value->numEofRefs += 1;
+	}
+}
+
+FsmGraph *Compiler::makeScanner()
+{
+	/* Make the graph, do minimization. */
+	FsmGraph *fsmGraph = makeAllRegions();
+
+	/* If any errors have occured in the input file then don't write anything. */
+	if ( gblErrorCount > 0 )
+		return 0;
+
+	analyzeGraph( fsmGraph );
+
+	/* Decide if an error state is necessary.
+	 *  1. There is an error transition
+	 *  2. There is a gap in the transitions
+	 *  3. The longest match operator requires it. */
+	if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() )
+		fsmGraph->errState = fsmGraph->addState();
+
+	/* State numbers need to be assigned such that all final states have a
+	 * larger state id number than all non-final states. This enables the
+	 * first_final mechanism to function correctly. We also want states to be
+	 * ordered in a predictable fashion. So we first apply a depth-first
+	 * search, then do a stable sort by final state status, then assign
+	 * numbers. */
+
+	fsmGraph->depthFirstOrdering();
+	fsmGraph->sortStatesByFinal();
+	fsmGraph->setStateNumbers( 0 );
+
+	return fsmGraph;
+}
+
+LangEl *Compiler::makeRepeatProd( const InputLoc &loc, Namespace *nspace,
+		const String &repeatName, UniqueType *ut )
+{
+	LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm );
+	prodName->isRepeat = true;
+
+	ProdElList *prodElList1 = new ProdElList;
+
+	/* Build the first production of the repeat. */
+	TypeRef *typeRef1 = TypeRef::cons( loc, ut );
+	ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
+
+	UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
+	TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT );
+	ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 );
+
+	prodElList1->append( factor1 );
+	prodElList1->append( factor2 );
+
+	Production *newDef1 = Production::cons( InputLoc(),
+			prodName, prodElList1, String(), false, 0,
+			prodList.length(), prodName->defList.length() );
+
+	prodName->defList.append( newDef1 );
+	prodList.append( newDef1 );
+
+	/* Build the second production of the repeat. */
+	ProdElList *prodElList2 = new ProdElList;
+
+	Production *newDef2 = Production::cons( InputLoc(),
+			prodName, prodElList2, String(), false, 0,
+			prodList.length(), prodName->defList.length() );
+
+	prodName->defList.append( newDef2 );
+	prodList.append( newDef2 );
+
+	return prodName;
+}
+
+LangEl *Compiler::makeListProd( const InputLoc &loc, Namespace *nspace,
+		const String &listName, UniqueType *ut )
+{
+	LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm );
+	prodName->isList = true;
+
+	/* Build the first production of the list. */
+	TypeRef *typeRef1 = TypeRef::cons( loc, ut );
+	ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
+
+	UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
+	TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT );
+	ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef2, 0 );
+
+	ProdElList *prodElList1 = new ProdElList;
+	prodElList1->append( factor1 );
+	prodElList1->append( factor2 );
+
+	Production *newDef1 = Production::cons( loc,
+			prodName, prodElList1, String(), false, 0,
+			prodList.length(), prodName->defList.length() );
+
+	prodName->defList.append( newDef1 );
+	prodList.append( newDef1 );
+
+	/* Build the second production of the list. */
+	TypeRef *typeRef3 = TypeRef::cons( loc, ut );
+	ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef3, 0 );
+
+	ProdElList *prodElList2 = new ProdElList;
+	prodElList2->append( factor3 );
+
+	Production *newDef2 = Production::cons( loc,
+			prodName, prodElList2, String(), false, 0,
+			prodList.length(), prodName->defList.length() );
+
+	prodName->defList.append( newDef2 );
+	prodList.append( newDef2 );
+
+	return prodName;
+}
+
+LangEl *Compiler::makeOptProd( const InputLoc &loc, Namespace *nspace,
+		const String &optName, UniqueType *ut )
+{
+	LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm );
+	prodName->isOpt = true;
+
+	ProdElList *prodElList1 = new ProdElList;
+
+	/* Build the first production of the repeat. */
+	TypeRef *typeRef1 = TypeRef::cons( loc, ut );
+	ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef1, 0 );
+	prodElList1->append( factor1 );
+
+	Production *newDef1 = Production::cons( loc,
+			prodName, prodElList1, String(), false, 0,
+			prodList.length(), prodName->defList.length() );
+
+	prodName->defList.append( newDef1 );
+	prodList.append( newDef1 );
+
+	/* Build the second production of the repeat. */
+	ProdElList *prodElList2 = new ProdElList;
+
+	Production *newDef2 = Production::cons( loc,
+			prodName, prodElList2, String(), false, 0,
+			prodList.length(), prodName->defList.length() );
+
+	prodName->defList.append( newDef2 );
+	prodList.append( newDef2 );
+
+	return prodName;
+}
+
+Namespace *Namespace::findNamespace( const String &name )
+{
+	for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) {
+		if ( strcmp( name, (*c)->name ) == 0 )
+			return *c;
+	}
+	return 0;
+}
+
+/* Search from a previously resolved qualification. (name 1+ in a qual list). */
+Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart )
+{
+	/* While there are still parts in the qualification.  */
+	while ( qualPart.lte() ) {
+		Namespace *child = from->findNamespace( *qualPart );
+		if ( child == 0 )
+			return 0;
+
+		from = child;
+		qualPart.increment();
+	}
+
+	return from;
+}
+
+Namespace *NamespaceQual::getQual( Compiler *pd )
+{
+	/* Do the search only once. */
+	if ( cachedNspaceQual != 0 )
+		return cachedNspaceQual;
+	
+	if ( qualNames.length() == 0 ) {
+		/* No qualification, use the region the qualification was 
+		 * declared in. */
+		cachedNspaceQual = declInNspace;
+	}
+	else if ( strcmp( qualNames[0], "root" ) == 0 ) {
+		/* First item is "root." Start the downward search from there. */
+		StringVect::Iter qualPart = qualNames;
+		qualPart.increment();
+		cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart );
+		return cachedNspaceQual;
+	}
+	else {
+		/* Have a qualification. Move upwards through the declared
+		 * regions looking for the first part. */
+		StringVect::Iter qualPart = qualNames;
+		Namespace *parentNamespace = declInNspace;
+		while ( parentNamespace != 0 ) {
+			/* Search for the first part underneath the current parent. */
+			Namespace *child = parentNamespace->findNamespace( *qualPart );
+
+			if ( child != 0 ) {
+				/* Found the first part. Start going below the result.  */
+				qualPart.increment();
+				cachedNspaceQual = searchFrom( child, qualPart );
+				return cachedNspaceQual;
+			}
+
+			/* Not found, move up to the parent. */
+			parentNamespace = parentNamespace->parentNamespace;
+		}
+
+		/* Failed to find the place to start from. */
+		cachedNspaceQual = 0;
+	}
+
+	return cachedNspaceQual;
+}
+
+void Compiler::initEmptyScanner( RegionSet *regionSet, TokenRegion *reg )
+{
+	if ( reg != 0 && reg->impl->tokenInstanceList.length() == 0 ) {
+		reg->impl->wasEmpty = true;
+
+		static int def = 1;
+		String name( 64, "__%p_DEF_PAT_%d", reg, def++ );
+
+		LexJoin *join = LexJoin::cons( LexExpression::cons( BT_Any ) );
+
+		TokenDef *tokenDef = TokenDef::cons( name, String(), false, false,
+				join, 0, internal, nextTokenId++, rootNamespace, 
+				regionSet, 0, 0 );
+			
+		TokenInstance *tokenInstance = TokenInstance::cons( tokenDef,
+				join, internal, nextTokenId++,
+				rootNamespace, reg );
+
+		reg->impl->tokenInstanceList.append( tokenInstance );
+
+		/* These do not go in the namespace so so they cannot get declared
+		 * in the declare pass. */
+		LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term );
+
+		tokenInstance->tokenDef->tdLangEl = lel;
+		lel->tokenDef = tokenDef;
+	}
+}
+
+void Compiler::initEmptyScanners()
+{
+	for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) {
+		initEmptyScanner( regionSet, regionSet->tokenIgnore );
+		initEmptyScanner( regionSet, regionSet->tokenOnly );
+		initEmptyScanner( regionSet, regionSet->ignoreOnly );
+		initEmptyScanner( regionSet, regionSet->collectIgnore );
+	}
+}
+
+PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc, 
+		int parserId, StreamImpl *sourceStream )
+{
+	StreamImpl *in = newSourceStreamGeneric( "<internal>" );
+
+	PdaRun *pdaRun = new PdaRun;
+	initPdaRun( prg, pdaRun, pdaTables, parserId, 0, false, 0 );
+
+	Stream *res = streamAllocate( prg );
+	res->id = LEL_ID_STREAM;
+	res->in = sourceStream;
+	in->funcs->appendStream( in, (Tree*)res );
+	in->funcs->setEof( in );
+
+	long pcr = parseLoop( prg, sp, pdaRun, in, PcrStart );
+	assert( pcr == PcrDone );
+	if ( pdaRun->parseError ) {
+		cerr << ( loc.fileName != 0 ? loc.fileName : "<input>" ) <<
+				":" << loc.line << ":" << loc.col;
+
+		if ( pdaRun->parseErrorText != 0 ) {
+			cerr << ": relative error: " << 
+					pdaRun->parseErrorText->tokdata->data;
+		}
+		else {
+			cerr << ": parse error";
+		}
+
+		cerr << endl;
+		gblErrorCount += 1;
+	}
+
+	return pdaRun;
+}
+
+
+void Compiler::parsePatterns()
+{
+	Program *prg = colm_new_program( runtimeData );
+
+	/* Turn off context-dependent parsing. */
+	prg->ctxDepParsing = 0;
+
+	Tree **sp = prg->stackRoot;
+
+	for ( ConsList::Iter cons = replList; cons.lte(); cons++ ) {
+		StreamImpl *in = newSourceStreamCons( "<internal>", cons );
+		cons->pdaRun = parsePattern( prg, sp, cons->loc, cons->langEl->parserId, in );
+	}
+
+	for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
+		StreamImpl *in = newSourceStreamPat( "<internal>", pat );
+		pat->pdaRun = parsePattern( prg, sp, pat->loc, pat->langEl->parserId, in );
+	}
+
+	/* Bail on above errors. */
+	if ( gblErrorCount > 0 )
+		exit(1);
+
+	fillInPatterns( prg );
+}
+
+void Compiler::collectParserEls( BstSet<LangEl*> &parserEls )
+{
+	for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
+		/* We assume the reduction action compilation phase was run before
+		 * pattern parsing and it decorated the pattern with the target type. */
+		assert( pat->langEl != 0 );
+		if ( pat->langEl->type != LangEl::NonTerm )
+			error(pat->loc) << "pattern type is not a non-terminal" << endp;
+
+		if ( pat->langEl->parserId < 0 ) {
+			/* Make a parser for the language element. */
+			parserEls.insert( pat->langEl );
+			pat->langEl->parserId = nextParserId++;
+		}
+	}
+
+	for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) {
+		/* We assume the reduction action compilation phase was run before
+		 * replacement parsing decorated the replacement with the target type. */
+		assert( repl->langEl != 0 );
+
+		if ( repl->langEl->parserId < 0 ) {
+			/* Make a parser for the language element. */
+			parserEls.insert( repl->langEl );
+			repl->langEl->parserId = nextParserId++;
+		}
+	}
+
+	/* Make parsers that we need. */
+	for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+		if ( lel->parserId >= 0 )
+			parserEls.insert( lel );
+	}
+}
+
+void Compiler::generateOutput( long activeRealm )
+{
+	FsmCodeGen *fsmGen = new FsmCodeGen( *outStream, redFsm, fsmTables );
+
+	PdaCodeGen *pdaGen = new PdaCodeGen( *outStream );
+
+	fsmGen->writeIncludes();
+	pdaGen->defineRuntime();
+	fsmGen->writeCode();
+
+	/* Make parsers that we need. */
+	pdaGen->writeParserData( 0, pdaTables );
+
+	/* Write the runtime data. */
+	pdaGen->writeRuntimeData( runtimeData, pdaTables );
+
+	if ( !gblLibrary ) 
+		fsmGen->writeMain( activeRealm );
+
+	outStream->flush();
+}
+
+
+void Compiler::prepGrammar()
+{
+	/* This will create language elements. */
+	wrapNonTerminals();
+
+	makeLangElIds();
+	makeLangElNames();
+	makeDefinitionNames();
+	noUndefindLangEls();
+
+	/* Put the language elements in an index by language element id. */
+	langElIndex = new LangEl*[nextSymbolId+1];
+	memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) );
+	for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
+		langElIndex[lel->id] = lel;
+
+	makeProdFsms();
+
+	/* Allocate the Runtime data now. Every PdaTable that we make 
+	 * will reference it, but it will be filled in after all the tables are
+	 * built. */
+	runtimeData = new RuntimeData;
+}
+
+void Compiler::compile()
+{
+	beginProcessing();
+	initKeyOps();
+
+	declarePass();
+
+	resolvePass();
+
+	makeTerminalWrappers();
+	makeEofElements();
+
+	/*
+	 * Parsers
+	 */
+
+	/* Init the longest match data */
+	initLongestMatchData();
+	FsmGraph *fsmGraph = makeScanner();
+
+	prepGrammar();
+
+	initAllLanguageObjects();
+	initAllFrameObjects();
+
+	for ( FunctionList::Iter f = functionList; f.lte(); f++ )
+		initUserFunctions( f, f->isUserIter );
+
+	/* Compile bytecode. */
+	compileByteCode();
+
+	/* Make the reduced fsm. */
+	RedFsmBuild reduce( this, fsmGraph );
+	redFsm = reduce.reduceMachine();
+
+	BstSet<LangEl*> parserEls;
+	collectParserEls( parserEls );
+
+	makeParser( parserEls );
+
+	/* Make the scanner tables. */
+	fsmTables = redFsm->makeFsmTables();
+
+	/* Now that all parsers are built, make the global runtimeData. */
+	makeRuntimeData();
+
+	/* 
+	 * All compilation is now complete.
+	 */
+	
+	/* Parse constructors and patterns. */
+	parsePatterns();
+}
+