summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2020-03-27 00:10:15 +0200
committerAdrian Thurston <thurston@colm.net>2020-03-27 00:10:15 +0200
commit6a3871964c4eee0984a5e5be3f09e3bc5082ddce (patch)
tree203104fe30b206a9c8b5de1cafb5b74f5c77167f
parentd8296ea1c38c13bbdb67adc94c2af2fa67f57dbd (diff)
downloadcolm-6a3871964c4eee0984a5e5be3f09e3bc5082ddce.tar.gz
split longest-match structs into components that live here and in ragel
-rw-r--r--libfsm/action.h12
-rw-r--r--libfsm/asm.h4
-rw-r--r--libfsm/codegen.h2
-rw-r--r--libfsm/fsmap.cc6
-rw-r--r--libfsm/fsmgraph.h166
-rw-r--r--libfsm/gendata.cc4
-rw-r--r--libfsm/idbase.cc6
-rw-r--r--libfsm/parsedata.h216
8 files changed, 177 insertions, 239 deletions
diff --git a/libfsm/action.h b/libfsm/action.h
index 5aea6f7f..5c79befe 100644
--- a/libfsm/action.h
+++ b/libfsm/action.h
@@ -27,9 +27,9 @@
struct NameInst;
struct NameRef;
-struct LongestMatch;
+struct FsmLongestMatch;
struct InlineList;
-struct LongestMatchPart;
+struct FsmLongestMatchPart;
struct Action;
struct CondSpace;
@@ -53,8 +53,8 @@ struct InlineItem
InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
loc(loc), nameRef(nameRef), children(0), type(type) { }
- InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
- LongestMatchPart *longestMatchPart, Type type ) : loc(loc),
+ InlineItem( const InputLoc &loc, FsmLongestMatch *longestMatch,
+ FsmLongestMatchPart *longestMatchPart, Type type ) : loc(loc),
nameRef(0), children(0), longestMatch(longestMatch),
longestMatchPart(longestMatchPart), type(type) { }
@@ -86,8 +86,8 @@ struct InlineItem
NameRef *nameRef;
NameInst *nameTarg;
InlineList *children;
- LongestMatch *longestMatch;
- LongestMatchPart *longestMatchPart;
+ FsmLongestMatch *longestMatch;
+ FsmLongestMatchPart *longestMatchPart;
long substPos;
Action *wrappedAction;
CondSpace *condSpace;
diff --git a/libfsm/asm.h b/libfsm/asm.h
index 3b4229d4..2506e582 100644
--- a/libfsm/asm.h
+++ b/libfsm/asm.h
@@ -48,8 +48,8 @@ struct NameInst;
struct GenInlineItem;
struct GenInlineList;
struct RedAction;
-struct LongestMatch;
-struct LongestMatchPart;
+struct FsmLongestMatch;
+struct FsmLongestMatchPart;
class AsmCodeGen;
struct RedTransAp;
struct RedStateAp;
diff --git a/libfsm/codegen.h b/libfsm/codegen.h
index e1a7877b..dcc24e3b 100644
--- a/libfsm/codegen.h
+++ b/libfsm/codegen.h
@@ -50,7 +50,7 @@ struct NameInst;
struct GenInlineItem;
struct GenInlineList;
struct RedAction;
-struct LongestMatch;
+struct FsmLongestMatch;
struct LongestMatchPart;
string itoa( int i );
diff --git a/libfsm/fsmap.cc b/libfsm/fsmap.cc
index e38680f3..98863f52 100644
--- a/libfsm/fsmap.cc
+++ b/libfsm/fsmap.cc
@@ -55,7 +55,7 @@ bool ActionTable::hasAction( Action *action )
}
/* Insert an action into an action table. */
-void LmActionTable::setAction( int ordering, LongestMatchPart *action )
+void LmActionTable::setAction( int ordering, FsmLongestMatchPart *action )
{
/* Multi-insert in case specific instances of an action appear in a
* transition more than once. */
@@ -314,7 +314,7 @@ void FsmAp::leaveFsmAction( int ordering, Action *action )
}
/* Add functions to the longest match action table for constructing scanners. */
-void FsmAp::longMatchAction( int ordering, LongestMatchPart *lmPart )
+void FsmAp::longMatchAction( int ordering, FsmLongestMatchPart *lmPart )
{
/* Walk all final states. */
for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
@@ -1125,7 +1125,7 @@ int FsmAp::compareStateData( const StateAp *state1, const StateAp *state2 )
if ( cmpRes != 0 )
return cmpRes;
- return CmpTable<LongestMatchPart*>::compare(
+ return CmpTable<FsmLongestMatchPart*>::compare(
state1->lmNfaParts, state2->lmNfaParts );
}
diff --git a/libfsm/fsmgraph.h b/libfsm/fsmgraph.h
index 4af13101..aee6f718 100644
--- a/libfsm/fsmgraph.h
+++ b/libfsm/fsmgraph.h
@@ -60,7 +60,7 @@ struct TransAp;
struct StateAp;
struct FsmAp;
struct Action;
-struct LongestMatchPart;
+struct FsmLongestMatchPart;
struct CondSpace;
struct FsmCtx;
struct InlineBlock;
@@ -279,13 +279,13 @@ typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
/* Transistion Action Element. */
-typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl;
+typedef SBstMapEl< int, FsmLongestMatchPart* > LmActionTableEl;
/* Transition Action Table. */
struct LmActionTable
- : public SBstMap< int, LongestMatchPart*, CmpOrd<int> >
+ : public SBstMap< int, FsmLongestMatchPart*, CmpOrd<int> >
{
- void setAction( int ordering, LongestMatchPart *action );
+ void setAction( int ordering, FsmLongestMatchPart *action );
void setActions( const LmActionTable &other );
};
@@ -950,7 +950,7 @@ typedef Vector<EptVectEl> EptVect;
typedef BstSet<int> EntryIdSet;
/* Set of longest match items that may be active in a given state. */
-typedef BstSet<LongestMatchPart*> LmItemSet;
+typedef BstSet<FsmLongestMatchPart*> LmItemSet;
/* A Conditions which is to be
* transfered on pending out transitions. */
@@ -1961,7 +1961,7 @@ struct FsmAp
void allTransAction( int ordering, Action *action );
void finishFsmAction( int ordering, Action *action );
void leaveFsmAction( int ordering, Action *action );
- void longMatchAction( int ordering, LongestMatchPart *lmPart );
+ void longMatchAction( int ordering, FsmLongestMatchPart *lmPart );
/* Set conditions. */
CondSpace *addCondSpace( const CondSet &condSet );
@@ -2537,4 +2537,158 @@ template< class Trans > int FsmAp::compareCondBitElimPtr( Trans *trans1, Trans *
return 0;
}
+struct NameInst;
+
+/* Tree nodes. */
+
+struct NfaUnion;
+struct MachineDef;
+struct FsmLongestMatch;
+struct FsmLongestMatchPart;
+struct FsmLmPartList;
+struct Range;
+struct LengthDef;
+struct Action;
+struct InlineList;
+
+/* Reference to a named state. */
+struct NameRef : public Vector<std::string> {};
+typedef Vector<NameRef*> NameRefList;
+typedef Vector<NameInst*> NameTargList;
+
+/*
+ * FsmLongestMatch
+ *
+ * Wherever possible the item match will execute on the character. If not
+ * possible the item match will execute on a lookahead character and either
+ * hold the current char (if one away) or backup.
+ *
+ * How to handle the problem of backing up over a buffer break?
+ *
+ * Don't want to use pending out transitions for embedding item match because
+ * the role of item match action is different: it may sometimes match on the
+ * final transition, or may match on a lookahead character.
+ *
+ * Don't want to invent a new operator just for this. So just trail action
+ * after machine, this means we can only use literal actions.
+ *
+ * The item action may
+ *
+ * What states of the machine will be final. The item actions that wrap around
+ * on the last character will go straight to the start state.
+ *
+ * Some transitions will be lookahead transitions, they will hold the current
+ * character. Crossing them with regular transitions must be restricted
+ * because it does not make sense. The transition cannot simultaneously hold
+ * and consume the current character.
+ */
+struct FsmLongestMatchPart
+{
+ FsmLongestMatchPart( Action *action, int longestMatchId )
+ :
+ action(action),
+ longestMatchId(longestMatchId),
+ inLmSelect(false)
+ { }
+
+ Action *action;
+ Action *setActId;
+ Action *actOnLast;
+ Action *actOnNext;
+ Action *actLagBehind;
+ Action *actNfaOnLast;
+ Action *actNfaOnNext;
+ Action *actNfaOnEof;
+ int longestMatchId;
+ bool inLmSelect;
+ FsmLongestMatch *longestMatch;
+
+ FsmLongestMatchPart *prev, *next;
+};
+
+/* Declare a new type so that ptreetypes.h need not include dlist.h. */
+struct FsmLmPartList
+ : DList<FsmLongestMatchPart> {};
+
+struct FsmLongestMatch
+{
+ /* Construct with a list of joins */
+ FsmLongestMatch( FsmLmPartList *longestMatchList )
+ :
+ longestMatchList(longestMatchList),
+ lmSwitchHandlesError(false)
+ {
+ }
+
+ FsmLmPartList *longestMatchList;
+ bool lmSwitchHandlesError;
+
+ void restart( FsmAp *graph, TransAp *trans );
+ void restart( FsmAp *graph, CondAp *cond );
+};
+
+struct NameMapVal
+{
+ Vector<NameInst*> vals;
+};
+
+/* Tree of instantiated names. */
+typedef AvlMapEl<std::string, NameMapVal*> NameMapEl;
+typedef AvlMap<std::string, NameMapVal*, CmpString> NameMap;
+typedef Vector<NameInst*> NameVect;
+
+/* Node in the tree of instantiated names. */
+struct NameInst
+{
+ NameInst( const InputLoc &loc, NameInst *parent, std::string name, int id, bool isLabel ) :
+ loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
+ isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
+
+ ~NameInst();
+
+ InputLoc loc;
+
+ /* Keep parent pointers in the name tree to retrieve
+ * fully qulified names. */
+ NameInst *parent;
+
+ std::string name;
+ int id;
+ bool isLabel;
+ bool isLongestMatch;
+
+ int numRefs;
+ int numUses;
+
+ /* Names underneath us, excludes anonymous names. */
+ NameMap children;
+
+ /* All names underneath us in order of appearance. */
+ NameVect childVect;
+
+ /* Join scopes need an implicit "final" target. */
+ NameInst *start, *final;
+
+ /* During a fsm generation walk, lists the names that are referenced by
+ * epsilon operations in the current scope. After the link is made by the
+ * epsilon reference and the join operation is complete, the label can
+ * have its refcount decremented. Once there are no more references the
+ * entry point can be removed from the fsm returned. */
+ NameVect referencedNames;
+
+ /* Pointers for the name search queue. */
+ NameInst *prev, *next;
+
+ /* Check if this name inst or any name inst below is referenced. */
+ bool anyRefsRec();
+};
+
+typedef DList<NameInst> NameInstList;
+
+extern const int ORD_PUSH;
+extern const int ORD_RESTORE;
+extern const int ORD_COND;
+extern const int ORD_COND2;
+extern const int ORD_TEST;
+
#endif
diff --git a/libfsm/gendata.cc b/libfsm/gendata.cc
index f0be11fa..4e3253ad 100644
--- a/libfsm/gendata.cc
+++ b/libfsm/gendata.cc
@@ -225,7 +225,7 @@ void Reducer::makeLmSwitch( GenInlineList *outList, InlineItem *item )
{
GenInlineItem *lmSwitch = new GenInlineItem( InputLoc(), GenInlineItem::LmSwitch );
GenInlineList *lmList = lmSwitch->children = new GenInlineList;
- LongestMatch *longestMatch = item->longestMatch;
+ FsmLongestMatch *longestMatch = item->longestMatch;
/* We can't put the <exec> here because we may need to handle the error
* case and in that case p should not be changed. Instead use a default
@@ -255,7 +255,7 @@ void Reducer::makeLmSwitch( GenInlineList *outList, InlineItem *item )
}
bool needDefault = false;
- for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) {
+ for ( FsmLmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) {
if ( lmi->inLmSelect ) {
if ( lmi->action == 0 )
needDefault = true;
diff --git a/libfsm/idbase.cc b/libfsm/idbase.cc
index 21eca5c3..38850b56 100644
--- a/libfsm/idbase.cc
+++ b/libfsm/idbase.cc
@@ -103,8 +103,8 @@ void FsmCtx::analyzeAction( Action *action, InlineList *inlineList )
/* Need to recurse into longest match items. */
if ( item->type == InlineItem::LmSwitch ) {
- LongestMatch *lm = item->longestMatch;
- for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
+ FsmLongestMatch *lm = item->longestMatch;
+ for ( FsmLmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
if ( lmi->action != 0 )
analyzeAction( action, lmi->action->inlineList );
}
@@ -114,7 +114,7 @@ void FsmCtx::analyzeAction( Action *action, InlineList *inlineList )
item->type == InlineItem::LmOnNext ||
item->type == InlineItem::LmOnLagBehind )
{
- LongestMatchPart *lmi = item->longestMatchPart;
+ FsmLongestMatchPart *lmi = item->longestMatchPart;
if ( lmi->action != 0 )
analyzeAction( action, lmi->action->inlineList );
}
diff --git a/libfsm/parsedata.h b/libfsm/parsedata.h
index 5fe21af8..4d11b1c7 100644
--- a/libfsm/parsedata.h
+++ b/libfsm/parsedata.h
@@ -23,221 +23,5 @@
#ifndef _PARSEDATA_H
#define _PARSEDATA_H
-#include <iostream>
-#include <limits.h>
-
-#include "avlmap.h"
-#include "bstmap.h"
-#include "dlist.h"
-#include "fsmgraph.h"
-#include "compare.h"
-#include "common.h"
-
-#include "ragel.h"
-#include "avlmap.h"
-#include "bstmap.h"
-#include "vector.h"
-#include "dlist.h"
-#include "fsmgraph.h"
-
-struct NameInst;
-struct Join;
-struct ParseData;
-
-/* Tree nodes. */
-
-struct NfaUnion;
-struct MachineDef;
-struct LongestMatch;
-struct LongestMatchPart;
-struct LmPartList;
-struct Range;
-struct LengthDef;
-struct Action;
-struct InlineList;
-
-/* Reference to a named state. */
-struct NameRef : public Vector<std::string> {};
-typedef Vector<NameRef*> NameRefList;
-typedef Vector<NameInst*> NameTargList;
-
-/*
- * LongestMatch
- *
- * Wherever possible the item match will execute on the character. If not
- * possible the item match will execute on a lookahead character and either
- * hold the current char (if one away) or backup.
- *
- * How to handle the problem of backing up over a buffer break?
- *
- * Don't want to use pending out transitions for embedding item match because
- * the role of item match action is different: it may sometimes match on the
- * final transition, or may match on a lookahead character.
- *
- * Don't want to invent a new operator just for this. So just trail action
- * after machine, this means we can only use literal actions.
- *
- * The item action may
- *
- * What states of the machine will be final. The item actions that wrap around
- * on the last character will go straight to the start state.
- *
- * Some transitions will be lookahead transitions, they will hold the current
- * character. Crossing them with regular transitions must be restricted
- * because it does not make sense. The transition cannot simultaneously hold
- * and consume the current character.
- */
-struct LongestMatchPart
-{
- LongestMatchPart( Join *join, Action *action,
- const InputLoc &semiLoc, int longestMatchId )
- :
- join(join), action(action), semiLoc(semiLoc),
- longestMatchId(longestMatchId), inLmSelect(false) { }
-
- InputLoc getLoc();
-
- Join *join;
- Action *action;
- InputLoc semiLoc;
-
- Action *setActId;
- Action *actOnLast;
- Action *actOnNext;
- Action *actLagBehind;
- Action *actNfaOnLast;
- Action *actNfaOnNext;
- Action *actNfaOnEof;
- int longestMatchId;
- bool inLmSelect;
- LongestMatch *longestMatch;
-
- LongestMatchPart *prev, *next;
-};
-
-/* Declare a new type so that ptreetypes.h need not include dlist.h. */
-struct LmPartList
- : DList<LongestMatchPart> {};
-
-struct LongestMatch
-{
- /* Construct with a list of joins */
- LongestMatch( const InputLoc &loc, LmPartList *longestMatchList )
- :
- loc(loc),
- longestMatchList(longestMatchList),
- lmSwitchHandlesError(false),
- nfaConstruction(false)
- { }
-
- InputLoc loc;
- LmPartList *longestMatchList;
- std::string name;
- Action *lmActSelect;
- bool lmSwitchHandlesError;
- bool nfaConstruction;
-
- LongestMatch *next, *prev;
-
- /* Tree traversal. */
- FsmRes walkClassic( ParseData *pd );
- FsmRes walk( ParseData *pd );
-
- FsmRes mergeNfaStates( ParseData *pd, FsmAp *fsm );
- bool onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in );
- bool matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st );
- void eliminateNfaActions( ParseData *pd, FsmAp *fsm );
- void advanceNfaActions( ParseData *pd, FsmAp *fsm );
- FsmRes buildBaseNfa( ParseData *pd );
- FsmRes walkNfa( ParseData *pd );
-
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- void transferScannerLeavingActions( FsmAp *graph );
- void runLongestMatch( ParseData *pd, FsmAp *graph );
- Action *newLmAction( ParseData *pd, const InputLoc &loc, const char *name,
- InlineList *inlineList );
- void makeActions( ParseData *pd );
- void findName( ParseData *pd );
- void restart( FsmAp *graph, TransAp *trans );
- void restart( FsmAp *graph, CondAp *cond );
-};
-
-
-/* Priority name dictionary. */
-typedef AvlMapEl<std::string, int> PriorDictEl;
-typedef AvlMap<std::string, int, CmpString> PriorDict;
-
-struct NameMapVal
-{
- Vector<NameInst*> vals;
-};
-
-/* Tree of instantiated names. */
-typedef AvlMapEl<std::string, NameMapVal*> NameMapEl;
-typedef AvlMap<std::string, NameMapVal*, CmpString> NameMap;
-typedef Vector<NameInst*> NameVect;
-
-/* Node in the tree of instantiated names. */
-struct NameInst
-{
- NameInst( const InputLoc &loc, NameInst *parent, std::string name, int id, bool isLabel ) :
- loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
- isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
-
- ~NameInst();
-
- InputLoc loc;
-
- /* Keep parent pointers in the name tree to retrieve
- * fully qulified names. */
- NameInst *parent;
-
- std::string name;
- int id;
- bool isLabel;
- bool isLongestMatch;
-
- int numRefs;
- int numUses;
-
- /* Names underneath us, excludes anonymous names. */
- NameMap children;
-
- /* All names underneath us in order of appearance. */
- NameVect childVect;
-
- /* Join scopes need an implicit "final" target. */
- NameInst *start, *final;
-
- /* During a fsm generation walk, lists the names that are referenced by
- * epsilon operations in the current scope. After the link is made by the
- * epsilon reference and the join operation is complete, the label can
- * have its refcount decremented. Once there are no more references the
- * entry point can be removed from the fsm returned. */
- NameVect referencedNames;
-
- /* Pointers for the name search queue. */
- NameInst *prev, *next;
-
- /* Check if this name inst or any name inst below is referenced. */
- bool anyRefsRec();
-};
-
-typedef DList<NameInst> NameInstList;
-
-/* Stack frame used in walking the name tree. */
-struct NameFrame
-{
- NameInst *prevNameInst;
- int prevNameChild;
- NameInst *prevLocalScope;
-};
-
-extern const int ORD_PUSH;
-extern const int ORD_RESTORE;
-extern const int ORD_COND;
-extern const int ORD_COND2;
-extern const int ORD_TEST;
#endif