summaryrefslogtreecommitdiff
path: root/src/pdarun.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/pdarun.h')
-rw-r--r--src/pdarun.h467
1 files changed, 467 insertions, 0 deletions
diff --git a/src/pdarun.h b/src/pdarun.h
new file mode 100644
index 0000000..5feb683
--- /dev/null
+++ b/src/pdarun.h
@@ -0,0 +1,467 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __COLM_PDARUN_H
+#define __COLM_PDARUN_H
+
+#include <colm/input.h>
+#include <colm/defs.h>
+#include <colm/tree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct colm_program;
+
+#define MARK_SLOTS 32
+
+typedef struct _FsmTables
+{
+ long *actions;
+ long *keyOffsets;
+ char *transKeys;
+ long *singleLengths;
+ long *rangeLengths;
+ long *indexOffsets;
+ long *transTargsWI;
+ long *transActionsWI;
+ long *toStateActions;
+ long *fromStateActions;
+ long *eofActions;
+ long *eofTargs;
+ long *entryByRegion;
+
+ long numStates;
+ long numActions;
+ long numTransKeys;
+ long numSingleLengths;
+ long numRangeLengths;
+ long numIndexOffsets;
+ long numTransTargsWI;
+ long numTransActionsWI;
+ long numRegions;
+
+ long startState;
+ long firstFinal;
+ long errorState;
+
+ struct GenAction **actionSwitch;
+ long numActionSwitch;
+} FsmTables;
+
+typedef struct _FsmRun
+{
+ FsmTables *tables;
+
+ RunBuf *consumeBuf;
+
+ /* FsmRun State. */
+ long region, preRegion;
+ long cs, ncs, act;
+ char *start;
+ char *tokstart;
+ long tokend;
+ long toklen;
+ char *p, *pe;
+
+ /* Bits. */
+ char eof;
+ char returnResult;
+ char skipToklen;
+
+ char *mark[MARK_SLOTS];
+ long matchedToken;
+} FsmRun;
+
+void undoStreamPull( StreamImpl *inputStream, const char *data, long length );
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+struct _Execution;
+
+typedef struct _RtCodeVect
+{
+ Code *data;
+ long tabLen;
+ long allocLen;
+
+ /* FIXME: leak when freed. */
+} RtCodeVect;
+
+void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el );
+void listAddBefore( List *list, ListEl *next_el, ListEl *new_el );
+
+void listPrepend( List *list, ListEl *new_el );
+void listAppend( List *list, ListEl *new_el );
+
+ListEl *listDetach( List *list, ListEl *el );
+ListEl *listDetachFirst(List *list );
+ListEl *listDetachLast(List *list );
+
+long listLength(List *list);
+
+typedef struct _FunctionInfo
+{
+ const char *name;
+ long frameId;
+ long argSize;
+ long frameSize;
+} FunctionInfo;
+
+/*
+ * Program Data.
+ */
+
+typedef struct _PatConsInfo
+{
+ long offset;
+ long numBindings;
+} PatConsInfo;
+
+typedef struct _PatConsNode
+{
+ long id;
+ long prodNum;
+ long next;
+ long child;
+ long bindId;
+ const char *data;
+ long length;
+ long leftIgnore;
+ long rightIgnore;
+
+ /* Just match nonterminal, don't go inside. */
+ unsigned char stop;
+} PatConsNode;
+
+/* FIXME: should have a descriptor for object types to give the length. */
+
+typedef struct _LangElInfo
+{
+ const char *name;
+ const char *xmlTag;
+ unsigned char repeat;
+ unsigned char list;
+ unsigned char literal;
+ unsigned char ignore;
+
+ long frameId;
+
+ long objectTypeId;
+ long ofiOffset;
+ long objectLength;
+
+// long contextTypeId;
+// long contextLength;
+
+ long termDupId;
+ long genericId;
+ long markId;
+ long captureAttr;
+ long numCaptureAttr;
+} LangElInfo;
+
+typedef struct _ObjFieldInfo
+{
+ int typeId;
+} ObjFieldInfo;
+
+typedef struct _ProdInfo
+{
+ unsigned long lhsId;
+ short prodNum;
+ long length;
+ const char *name;
+ long frameId;
+ unsigned char lhsUpref;
+ unsigned char *copy;
+ long copyLen;
+} ProdInfo;
+
+/* Must match the LocalType enum. */
+#define LI_Tree 1
+#define LI_Iter 2
+#define LI_RevIter 3
+#define LI_UserIter 4
+
+typedef struct _LocalInfo
+{
+ char type;
+ short offset;
+} LocalInfo;
+
+typedef struct _FrameInfo
+{
+ Code *codeWV;
+ long codeLenWV;
+ Code *codeWC;
+ long codeLenWC;
+ LocalInfo *locals;
+ long localsLen;
+ long argSize;
+ long frameSize;
+} FrameInfo;
+
+typedef struct _RegionInfo
+{
+ long defaultToken;
+ long eofFrameId;
+ int ciLelId;
+} RegionInfo;
+
+typedef struct _CaptureAttr
+{
+ long mark_enter;
+ long mark_leave;
+ long offset;
+} CaptureAttr;
+
+typedef struct _PdaTables
+{
+ /* Parser table data. */
+ int *indicies;
+ int *owners;
+ int *keys;
+ unsigned int *offsets;
+ unsigned int *targs;
+ unsigned int *actInds;
+ unsigned int *actions;
+ int *commitLen;
+ int *tokenRegionInds;
+ int *tokenRegions;
+ int *tokenPreRegions;
+
+ int numIndicies;
+ int numKeys;
+ int numStates;
+ int numTargs;
+ int numActInds;
+ int numActions;
+ int numCommitLen;
+ int numRegionItems;
+ int numPreRegionItems;
+} PdaTables;
+
+typedef struct _PoolBlock
+{
+ void *data;
+ struct _PoolBlock *next;
+} PoolBlock;
+
+typedef struct _PoolItem
+{
+ struct _PoolItem *next;
+} PoolItem;
+
+typedef struct _PoolAlloc
+{
+ PoolBlock *head;
+ long nextel;
+ PoolItem *pool;
+ int sizeofT;
+} PoolAlloc;
+
+typedef struct _PdaRun
+{
+ int numRetry;
+ ParseTree *stackTop;
+ Ref *tokenList;
+ int cs;
+ int nextRegionInd;
+
+ PdaTables *tables;
+ int parserId;
+
+ /* Reused. */
+ RtCodeVect rcodeCollect;
+ RtCodeVect reverseCode;
+
+ int stopParsing;
+ long stopTarget;
+
+ ParseTree *accumIgnore;
+
+ Kid *btPoint;
+
+ struct Bindings *bindings;
+
+ int revertOn;
+
+ Tree *context;
+
+ int stop;
+ int parseError;
+
+ long steps;
+ long targetSteps;
+
+ int onDeck;
+
+ /*
+ * Data we added when refactoring the parsing engine into a coroutine.
+ */
+
+ ParseTree *parseInput;
+ FrameInfo *fi;
+ int reduction;
+ ParseTree *redLel;
+ int curState;
+ ParseTree *lel;
+ int triggerUndo;
+
+ int tokenId;
+ Head *tokdata;
+ int frameId;
+ int next;
+ ParseTree *undoLel;
+
+ int checkNext;
+ int checkStop;
+
+ /* The lhs is sometimes saved before reduction actions in case it is
+ * replaced and we need to restore it on backtracking */
+ Tree *parsed;
+
+ int reject;
+
+ /* Instruction pointer to use when we stop parsing and execute code. */
+ Code *code;
+
+ int rcBlockCount;
+
+ Tree *parseErrorText;
+
+ FsmRun *fsmRun;
+ FsmRun _fsmRun;
+} PdaRun;
+
+void initPdaRun( struct colm_program *prg, PdaRun *pdaRun, PdaTables *tables,
+ int parserId, long stopTarget, int revertOn, Tree *context );
+void clearPdaRun( struct colm_program *prg, struct colm_tree **sp, PdaRun *pdaRun );
+void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len );
+void rtCodeVectEmpty( RtCodeVect *vect );
+void rtCodeVectRemove( RtCodeVect *vect, long pos, long len );
+
+void initRtCodeVect( RtCodeVect *codeVect );
+
+//inline static void remove( RtCodeVect *vect, long pos );
+inline static void appendCode( RtCodeVect *vect, const Code val );
+inline static void appendCode2( RtCodeVect *vect, const Code *val, long len );
+inline static void appendHalf( RtCodeVect *vect, Half half );
+inline static void appendWord( RtCodeVect *vect, Word word );
+
+inline static void appendCode2( RtCodeVect *vect, const Code *val, long len )
+{
+ rtCodeVectReplace( vect, vect->tabLen, val, len );
+}
+
+inline static void appendCode( RtCodeVect *vect, const Code val )
+{
+ rtCodeVectReplace( vect, vect->tabLen, &val, 1 );
+}
+
+inline static void appendHalf( RtCodeVect *vect, Half half )
+{
+ /* not optimal. */
+ appendCode( vect, half & 0xff );
+ appendCode( vect, (half>>8) & 0xff );
+}
+
+inline static void appendWord( RtCodeVect *vect, Word word )
+{
+ /* not optimal. */
+ appendCode( vect, word & 0xff );
+ appendCode( vect, (word>>8) & 0xff );
+ appendCode( vect, (word>>16) & 0xff );
+ appendCode( vect, (word>>24) & 0xff );
+ #if SIZEOF_LONG == 8
+ appendCode( vect, (word>>32) & 0xff );
+ appendCode( vect, (word>>40) & 0xff );
+ appendCode( vect, (word>>48) & 0xff );
+ appendCode( vect, (word>>56) & 0xff );
+ #endif
+}
+
+void incrementSteps( PdaRun *pdaRun );
+void decrementSteps( PdaRun *pdaRun );
+
+int makeReverseCode( PdaRun *pdaRun );
+void transferReverseCode( PdaRun *pdaRun, ParseTree *tree );
+
+void clearStreamImpl( struct colm_program *prg, Tree **sp, StreamImpl *inputStream );
+void initSourceStream( StreamImpl *in );
+void clearSourceStream( struct colm_program *prg, Tree **sp, StreamImpl *sourceStream );
+
+
+void clearContext( PdaRun *pdaRun, Tree **sp );
+Kid *extractIgnore( PdaRun *pdaRun );
+long stackTopTarget( struct colm_program *prg, PdaRun *pdaRun );
+void runCommit( PdaRun *pdaRun );
+int isParserStopFinished( PdaRun *pdaRun );
+void pdaRunMatch( PdaRun *pdaRun, Kid *tree, Kid *pattern );
+
+/* Offset can be used to look at the next nextRegionInd. */
+int pdaRunGetNextRegion( PdaRun *pdaRun, int offset );
+int pdaRunGetNextPreRegion( PdaRun *pdaRun );
+
+#define PcrStart 1
+#define PcrDone 2
+#define PcrReduction 3
+#define PcrGeneration 4
+#define PcrPreEof 5
+#define PcrReverse 6
+
+long parseToken( struct colm_program *prg, Tree **sp, PdaRun *pdaRun,
+ FsmRun *fsmRun, StreamImpl *inputStream, long entry );
+
+long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree );
+
+Head *streamPull( struct colm_program *prg, PdaRun *pdaRun, StreamImpl *is, long length );
+Head *stringAllocPointer( struct colm_program *prg, const char *data, long length );
+
+void streamPushText( StreamImpl *inputStream, const char *data, long length );
+void streamPushTree( StreamImpl *inputStream, Tree *tree, int ignore );
+void streamPushStream( StreamImpl *inputStream, Tree *tree );
+void undoStreamPush( struct colm_program *prg, Tree **sp, StreamImpl *inputStream, long length );
+void undoStreamAppend( struct colm_program *prg, Tree **sp, StreamImpl *inputStream, struct colm_tree *tree, long length );
+Kid *makeTokenWithData( struct colm_program *prg, PdaRun *pdaRun, FsmRun *fsmRun,
+ StreamImpl *inputStream, int id, Head *tokdata );
+
+void pushBinding( PdaRun *pdaRun, ParseTree *parseTree );
+
+void executeGenerationAction( struct colm_program *prg, Tree **sp, FsmRun *fsmRun, PdaRun *pdaRun,
+ StreamImpl *inputStream, int frameId, Code *code, long id, Head *tokdata );
+Kid *extractIgnore( PdaRun *pdaRun );
+void clearIgnoreList( struct colm_program *prg, Tree **sp, Kid *kid );
+long parseLoop( struct colm_program *prg, Tree **sp, PdaRun *pdaRun,
+ StreamImpl *inputStream, long entry );
+Tree *getParsedRoot( PdaRun *pdaRun, int stop );
+
+void resetToken( PdaRun *pdaRun );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif