diff options
author | Adrian Thurston <thurston@colm.net> | 2020-03-27 00:10:15 +0200 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2020-03-27 00:10:15 +0200 |
commit | 6a3871964c4eee0984a5e5be3f09e3bc5082ddce (patch) | |
tree | 203104fe30b206a9c8b5de1cafb5b74f5c77167f | |
parent | d8296ea1c38c13bbdb67adc94c2af2fa67f57dbd (diff) | |
download | colm-6a3871964c4eee0984a5e5be3f09e3bc5082ddce.tar.gz |
split longest-match structs into components that live here and in ragel
-rw-r--r-- | libfsm/action.h | 12 | ||||
-rw-r--r-- | libfsm/asm.h | 4 | ||||
-rw-r--r-- | libfsm/codegen.h | 2 | ||||
-rw-r--r-- | libfsm/fsmap.cc | 6 | ||||
-rw-r--r-- | libfsm/fsmgraph.h | 166 | ||||
-rw-r--r-- | libfsm/gendata.cc | 4 | ||||
-rw-r--r-- | libfsm/idbase.cc | 6 | ||||
-rw-r--r-- | libfsm/parsedata.h | 216 |
8 files changed, 177 insertions, 239 deletions
diff --git a/libfsm/action.h b/libfsm/action.h index 5aea6f7f..5c79befe 100644 --- a/libfsm/action.h +++ b/libfsm/action.h @@ -27,9 +27,9 @@ struct NameInst; struct NameRef; -struct LongestMatch; +struct FsmLongestMatch; struct InlineList; -struct LongestMatchPart; +struct FsmLongestMatchPart; struct Action; struct CondSpace; @@ -53,8 +53,8 @@ struct InlineItem InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : loc(loc), nameRef(nameRef), children(0), type(type) { } - InlineItem( const InputLoc &loc, LongestMatch *longestMatch, - LongestMatchPart *longestMatchPart, Type type ) : loc(loc), + InlineItem( const InputLoc &loc, FsmLongestMatch *longestMatch, + FsmLongestMatchPart *longestMatchPart, Type type ) : loc(loc), nameRef(0), children(0), longestMatch(longestMatch), longestMatchPart(longestMatchPart), type(type) { } @@ -86,8 +86,8 @@ struct InlineItem NameRef *nameRef; NameInst *nameTarg; InlineList *children; - LongestMatch *longestMatch; - LongestMatchPart *longestMatchPart; + FsmLongestMatch *longestMatch; + FsmLongestMatchPart *longestMatchPart; long substPos; Action *wrappedAction; CondSpace *condSpace; diff --git a/libfsm/asm.h b/libfsm/asm.h index 3b4229d4..2506e582 100644 --- a/libfsm/asm.h +++ b/libfsm/asm.h @@ -48,8 +48,8 @@ struct NameInst; struct GenInlineItem; struct GenInlineList; struct RedAction; -struct LongestMatch; -struct LongestMatchPart; +struct FsmLongestMatch; +struct FsmLongestMatchPart; class AsmCodeGen; struct RedTransAp; struct RedStateAp; diff --git a/libfsm/codegen.h b/libfsm/codegen.h index e1a7877b..dcc24e3b 100644 --- a/libfsm/codegen.h +++ b/libfsm/codegen.h @@ -50,7 +50,7 @@ struct NameInst; struct GenInlineItem; struct GenInlineList; struct RedAction; -struct LongestMatch; +struct FsmLongestMatch; struct LongestMatchPart; string itoa( int i ); diff --git a/libfsm/fsmap.cc b/libfsm/fsmap.cc index e38680f3..98863f52 100644 --- a/libfsm/fsmap.cc +++ b/libfsm/fsmap.cc @@ -55,7 +55,7 @@ bool ActionTable::hasAction( Action *action ) } /* Insert an action into an action table. */ -void LmActionTable::setAction( int ordering, LongestMatchPart *action ) +void LmActionTable::setAction( int ordering, FsmLongestMatchPart *action ) { /* Multi-insert in case specific instances of an action appear in a * transition more than once. */ @@ -314,7 +314,7 @@ void FsmAp::leaveFsmAction( int ordering, Action *action ) } /* Add functions to the longest match action table for constructing scanners. */ -void FsmAp::longMatchAction( int ordering, LongestMatchPart *lmPart ) +void FsmAp::longMatchAction( int ordering, FsmLongestMatchPart *lmPart ) { /* Walk all final states. */ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { @@ -1125,7 +1125,7 @@ int FsmAp::compareStateData( const StateAp *state1, const StateAp *state2 ) if ( cmpRes != 0 ) return cmpRes; - return CmpTable<LongestMatchPart*>::compare( + return CmpTable<FsmLongestMatchPart*>::compare( state1->lmNfaParts, state2->lmNfaParts ); } diff --git a/libfsm/fsmgraph.h b/libfsm/fsmgraph.h index 4af13101..aee6f718 100644 --- a/libfsm/fsmgraph.h +++ b/libfsm/fsmgraph.h @@ -60,7 +60,7 @@ struct TransAp; struct StateAp; struct FsmAp; struct Action; -struct LongestMatchPart; +struct FsmLongestMatchPart; struct CondSpace; struct FsmCtx; struct InlineBlock; @@ -279,13 +279,13 @@ typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; /* Transistion Action Element. */ -typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl; +typedef SBstMapEl< int, FsmLongestMatchPart* > LmActionTableEl; /* Transition Action Table. */ struct LmActionTable - : public SBstMap< int, LongestMatchPart*, CmpOrd<int> > + : public SBstMap< int, FsmLongestMatchPart*, CmpOrd<int> > { - void setAction( int ordering, LongestMatchPart *action ); + void setAction( int ordering, FsmLongestMatchPart *action ); void setActions( const LmActionTable &other ); }; @@ -950,7 +950,7 @@ typedef Vector<EptVectEl> EptVect; typedef BstSet<int> EntryIdSet; /* Set of longest match items that may be active in a given state. */ -typedef BstSet<LongestMatchPart*> LmItemSet; +typedef BstSet<FsmLongestMatchPart*> LmItemSet; /* A Conditions which is to be * transfered on pending out transitions. */ @@ -1961,7 +1961,7 @@ struct FsmAp void allTransAction( int ordering, Action *action ); void finishFsmAction( int ordering, Action *action ); void leaveFsmAction( int ordering, Action *action ); - void longMatchAction( int ordering, LongestMatchPart *lmPart ); + void longMatchAction( int ordering, FsmLongestMatchPart *lmPart ); /* Set conditions. */ CondSpace *addCondSpace( const CondSet &condSet ); @@ -2537,4 +2537,158 @@ template< class Trans > int FsmAp::compareCondBitElimPtr( Trans *trans1, Trans * return 0; } +struct NameInst; + +/* Tree nodes. */ + +struct NfaUnion; +struct MachineDef; +struct FsmLongestMatch; +struct FsmLongestMatchPart; +struct FsmLmPartList; +struct Range; +struct LengthDef; +struct Action; +struct InlineList; + +/* Reference to a named state. */ +struct NameRef : public Vector<std::string> {}; +typedef Vector<NameRef*> NameRefList; +typedef Vector<NameInst*> NameTargList; + +/* + * FsmLongestMatch + * + * Wherever possible the item match will execute on the character. If not + * possible the item match will execute on a lookahead character and either + * hold the current char (if one away) or backup. + * + * How to handle the problem of backing up over a buffer break? + * + * Don't want to use pending out transitions for embedding item match because + * the role of item match action is different: it may sometimes match on the + * final transition, or may match on a lookahead character. + * + * Don't want to invent a new operator just for this. So just trail action + * after machine, this means we can only use literal actions. + * + * The item action may + * + * What states of the machine will be final. The item actions that wrap around + * on the last character will go straight to the start state. + * + * Some transitions will be lookahead transitions, they will hold the current + * character. Crossing them with regular transitions must be restricted + * because it does not make sense. The transition cannot simultaneously hold + * and consume the current character. + */ +struct FsmLongestMatchPart +{ + FsmLongestMatchPart( Action *action, int longestMatchId ) + : + action(action), + longestMatchId(longestMatchId), + inLmSelect(false) + { } + + Action *action; + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + Action *actNfaOnLast; + Action *actNfaOnNext; + Action *actNfaOnEof; + int longestMatchId; + bool inLmSelect; + FsmLongestMatch *longestMatch; + + FsmLongestMatchPart *prev, *next; +}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct FsmLmPartList + : DList<FsmLongestMatchPart> {}; + +struct FsmLongestMatch +{ + /* Construct with a list of joins */ + FsmLongestMatch( FsmLmPartList *longestMatchList ) + : + longestMatchList(longestMatchList), + lmSwitchHandlesError(false) + { + } + + FsmLmPartList *longestMatchList; + bool lmSwitchHandlesError; + + void restart( FsmAp *graph, TransAp *trans ); + void restart( FsmAp *graph, CondAp *cond ); +}; + +struct NameMapVal +{ + Vector<NameInst*> vals; +}; + +/* Tree of instantiated names. */ +typedef AvlMapEl<std::string, NameMapVal*> NameMapEl; +typedef AvlMap<std::string, NameMapVal*, CmpString> NameMap; +typedef Vector<NameInst*> NameVect; + +/* Node in the tree of instantiated names. */ +struct NameInst +{ + NameInst( const InputLoc &loc, NameInst *parent, std::string name, int id, bool isLabel ) : + loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), + isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} + + ~NameInst(); + + InputLoc loc; + + /* Keep parent pointers in the name tree to retrieve + * fully qulified names. */ + NameInst *parent; + + std::string name; + int id; + bool isLabel; + bool isLongestMatch; + + int numRefs; + int numUses; + + /* Names underneath us, excludes anonymous names. */ + NameMap children; + + /* All names underneath us in order of appearance. */ + NameVect childVect; + + /* Join scopes need an implicit "final" target. */ + NameInst *start, *final; + + /* During a fsm generation walk, lists the names that are referenced by + * epsilon operations in the current scope. After the link is made by the + * epsilon reference and the join operation is complete, the label can + * have its refcount decremented. Once there are no more references the + * entry point can be removed from the fsm returned. */ + NameVect referencedNames; + + /* Pointers for the name search queue. */ + NameInst *prev, *next; + + /* Check if this name inst or any name inst below is referenced. */ + bool anyRefsRec(); +}; + +typedef DList<NameInst> NameInstList; + +extern const int ORD_PUSH; +extern const int ORD_RESTORE; +extern const int ORD_COND; +extern const int ORD_COND2; +extern const int ORD_TEST; + #endif diff --git a/libfsm/gendata.cc b/libfsm/gendata.cc index f0be11fa..4e3253ad 100644 --- a/libfsm/gendata.cc +++ b/libfsm/gendata.cc @@ -225,7 +225,7 @@ void Reducer::makeLmSwitch( GenInlineList *outList, InlineItem *item ) { GenInlineItem *lmSwitch = new GenInlineItem( InputLoc(), GenInlineItem::LmSwitch ); GenInlineList *lmList = lmSwitch->children = new GenInlineList; - LongestMatch *longestMatch = item->longestMatch; + FsmLongestMatch *longestMatch = item->longestMatch; /* We can't put the <exec> here because we may need to handle the error * case and in that case p should not be changed. Instead use a default @@ -255,7 +255,7 @@ void Reducer::makeLmSwitch( GenInlineList *outList, InlineItem *item ) } bool needDefault = false; - for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) { + for ( FsmLmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) { if ( lmi->inLmSelect ) { if ( lmi->action == 0 ) needDefault = true; diff --git a/libfsm/idbase.cc b/libfsm/idbase.cc index 21eca5c3..38850b56 100644 --- a/libfsm/idbase.cc +++ b/libfsm/idbase.cc @@ -103,8 +103,8 @@ void FsmCtx::analyzeAction( Action *action, InlineList *inlineList ) /* Need to recurse into longest match items. */ if ( item->type == InlineItem::LmSwitch ) { - LongestMatch *lm = item->longestMatch; - for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { + FsmLongestMatch *lm = item->longestMatch; + for ( FsmLmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { if ( lmi->action != 0 ) analyzeAction( action, lmi->action->inlineList ); } @@ -114,7 +114,7 @@ void FsmCtx::analyzeAction( Action *action, InlineList *inlineList ) item->type == InlineItem::LmOnNext || item->type == InlineItem::LmOnLagBehind ) { - LongestMatchPart *lmi = item->longestMatchPart; + FsmLongestMatchPart *lmi = item->longestMatchPart; if ( lmi->action != 0 ) analyzeAction( action, lmi->action->inlineList ); } diff --git a/libfsm/parsedata.h b/libfsm/parsedata.h index 5fe21af8..4d11b1c7 100644 --- a/libfsm/parsedata.h +++ b/libfsm/parsedata.h @@ -23,221 +23,5 @@ #ifndef _PARSEDATA_H #define _PARSEDATA_H -#include <iostream> -#include <limits.h> - -#include "avlmap.h" -#include "bstmap.h" -#include "dlist.h" -#include "fsmgraph.h" -#include "compare.h" -#include "common.h" - -#include "ragel.h" -#include "avlmap.h" -#include "bstmap.h" -#include "vector.h" -#include "dlist.h" -#include "fsmgraph.h" - -struct NameInst; -struct Join; -struct ParseData; - -/* Tree nodes. */ - -struct NfaUnion; -struct MachineDef; -struct LongestMatch; -struct LongestMatchPart; -struct LmPartList; -struct Range; -struct LengthDef; -struct Action; -struct InlineList; - -/* Reference to a named state. */ -struct NameRef : public Vector<std::string> {}; -typedef Vector<NameRef*> NameRefList; -typedef Vector<NameInst*> NameTargList; - -/* - * LongestMatch - * - * Wherever possible the item match will execute on the character. If not - * possible the item match will execute on a lookahead character and either - * hold the current char (if one away) or backup. - * - * How to handle the problem of backing up over a buffer break? - * - * Don't want to use pending out transitions for embedding item match because - * the role of item match action is different: it may sometimes match on the - * final transition, or may match on a lookahead character. - * - * Don't want to invent a new operator just for this. So just trail action - * after machine, this means we can only use literal actions. - * - * The item action may - * - * What states of the machine will be final. The item actions that wrap around - * on the last character will go straight to the start state. - * - * Some transitions will be lookahead transitions, they will hold the current - * character. Crossing them with regular transitions must be restricted - * because it does not make sense. The transition cannot simultaneously hold - * and consume the current character. - */ -struct LongestMatchPart -{ - LongestMatchPart( Join *join, Action *action, - const InputLoc &semiLoc, int longestMatchId ) - : - join(join), action(action), semiLoc(semiLoc), - longestMatchId(longestMatchId), inLmSelect(false) { } - - InputLoc getLoc(); - - Join *join; - Action *action; - InputLoc semiLoc; - - Action *setActId; - Action *actOnLast; - Action *actOnNext; - Action *actLagBehind; - Action *actNfaOnLast; - Action *actNfaOnNext; - Action *actNfaOnEof; - int longestMatchId; - bool inLmSelect; - LongestMatch *longestMatch; - - LongestMatchPart *prev, *next; -}; - -/* Declare a new type so that ptreetypes.h need not include dlist.h. */ -struct LmPartList - : DList<LongestMatchPart> {}; - -struct LongestMatch -{ - /* Construct with a list of joins */ - LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) - : - loc(loc), - longestMatchList(longestMatchList), - lmSwitchHandlesError(false), - nfaConstruction(false) - { } - - InputLoc loc; - LmPartList *longestMatchList; - std::string name; - Action *lmActSelect; - bool lmSwitchHandlesError; - bool nfaConstruction; - - LongestMatch *next, *prev; - - /* Tree traversal. */ - FsmRes walkClassic( ParseData *pd ); - FsmRes walk( ParseData *pd ); - - FsmRes mergeNfaStates( ParseData *pd, FsmAp *fsm ); - bool onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in ); - bool matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st ); - void eliminateNfaActions( ParseData *pd, FsmAp *fsm ); - void advanceNfaActions( ParseData *pd, FsmAp *fsm ); - FsmRes buildBaseNfa( ParseData *pd ); - FsmRes walkNfa( ParseData *pd ); - - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - void transferScannerLeavingActions( FsmAp *graph ); - void runLongestMatch( ParseData *pd, FsmAp *graph ); - Action *newLmAction( ParseData *pd, const InputLoc &loc, const char *name, - InlineList *inlineList ); - void makeActions( ParseData *pd ); - void findName( ParseData *pd ); - void restart( FsmAp *graph, TransAp *trans ); - void restart( FsmAp *graph, CondAp *cond ); -}; - - -/* Priority name dictionary. */ -typedef AvlMapEl<std::string, int> PriorDictEl; -typedef AvlMap<std::string, int, CmpString> PriorDict; - -struct NameMapVal -{ - Vector<NameInst*> vals; -}; - -/* Tree of instantiated names. */ -typedef AvlMapEl<std::string, NameMapVal*> NameMapEl; -typedef AvlMap<std::string, NameMapVal*, CmpString> NameMap; -typedef Vector<NameInst*> NameVect; - -/* Node in the tree of instantiated names. */ -struct NameInst -{ - NameInst( const InputLoc &loc, NameInst *parent, std::string name, int id, bool isLabel ) : - loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), - isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} - - ~NameInst(); - - InputLoc loc; - - /* Keep parent pointers in the name tree to retrieve - * fully qulified names. */ - NameInst *parent; - - std::string name; - int id; - bool isLabel; - bool isLongestMatch; - - int numRefs; - int numUses; - - /* Names underneath us, excludes anonymous names. */ - NameMap children; - - /* All names underneath us in order of appearance. */ - NameVect childVect; - - /* Join scopes need an implicit "final" target. */ - NameInst *start, *final; - - /* During a fsm generation walk, lists the names that are referenced by - * epsilon operations in the current scope. After the link is made by the - * epsilon reference and the join operation is complete, the label can - * have its refcount decremented. Once there are no more references the - * entry point can be removed from the fsm returned. */ - NameVect referencedNames; - - /* Pointers for the name search queue. */ - NameInst *prev, *next; - - /* Check if this name inst or any name inst below is referenced. */ - bool anyRefsRec(); -}; - -typedef DList<NameInst> NameInstList; - -/* Stack frame used in walking the name tree. */ -struct NameFrame -{ - NameInst *prevNameInst; - int prevNameChild; - NameInst *prevLocalScope; -}; - -extern const int ORD_PUSH; -extern const int ORD_RESTORE; -extern const int ORD_COND; -extern const int ORD_COND2; -extern const int ORD_TEST; #endif |