diff options
Diffstat (limited to 'src/bytecode.h')
-rw-r--r-- | src/bytecode.h | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/src/bytecode.h b/src/bytecode.h new file mode 100644 index 0000000..bfc7eb4 --- /dev/null +++ b/src/bytecode.h @@ -0,0 +1,513 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _BYTECODE_H +#define _BYTECODE_H + +#include <colm/pdarun.h> +#include <colm/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +typedef unsigned long ulong; +typedef unsigned char uchar; + +#define IN_LOAD_INT 0x01 +#define IN_LOAD_STR 0x02 +#define IN_LOAD_NIL 0x03 +#define IN_LOAD_TRUE 0x04 +#define IN_LOAD_FALSE 0x05 +#define IN_LOAD_TREE 0x06 +#define IN_LOAD_WORD 0x07 + +#define IN_ADD_INT 0x08 +#define IN_SUB_INT 0x09 +#define IN_MULT_INT 0x0a +#define IN_DIV_INT 0x0b + +#define IN_TST_EQL 0x0c +#define IN_TST_NOT_EQL 0x0d +#define IN_TST_LESS 0x0e +#define IN_TST_GRTR 0x0f +#define IN_TST_LESS_EQL 0x10 +#define IN_TST_GRTR_EQL 0x11 +#define IN_TST_LOGICAL_AND 0x12 +#define IN_TST_LOGICAL_OR 0x13 + +#define IN_NOT 0x14 + +#define IN_JMP 0x15 +#define IN_JMP_FALSE 0x16 +#define IN_JMP_TRUE 0x17 + +#define IN_STR_ATOI 0x18 +#define IN_STR_LENGTH 0x19 +#define IN_CONCAT_STR 0x1a +#define IN_TREE_TRIM 0x1b + +#define IN_INIT_LOCALS 0x1c +#define IN_POP 0x1d +#define IN_POP_N_WORDS 0x1e +#define IN_DUP_TOP 0x1f +#define IN_TOP_SWAP 0x20 + +#define IN_REJECT 0x21 +#define IN_MATCH 0x22 +#define IN_CONSTRUCT 0x23 +#define IN_TREE_NEW 0x24 +#define IN_TREE_CAST 0xe4 + +#define IN_GET_LOCAL_R 0x25 +#define IN_GET_LOCAL_WC 0x26 +#define IN_SET_LOCAL_WC 0x27 + +#define IN_GET_LOCAL_REF_R 0x28 +#define IN_GET_LOCAL_REF_WC 0x29 +#define IN_SET_LOCAL_REF_WC 0x2a + +#define IN_SAVE_RET 0x2b + +#define IN_GET_FIELD_R 0x2c +#define IN_GET_FIELD_WC 0x2d +#define IN_GET_FIELD_WV 0x2e +#define IN_GET_FIELD_BKT 0x2f + +#define IN_SET_FIELD_WV 0x30 +#define IN_SET_FIELD_WC 0x31 +#define IN_SET_FIELD_BKT 0x32 +#define IN_SET_FIELD_LEAVE_WC 0x33 + +#define IN_GET_MATCH_LENGTH_R 0x34 +#define IN_GET_MATCH_TEXT_R 0x35 + +#define IN_GET_TOKEN_DATA_R 0x36 +#define IN_SET_TOKEN_DATA_WC 0x37 +#define IN_SET_TOKEN_DATA_WV 0x38 +#define IN_SET_TOKEN_DATA_BKT 0x39 + +#define IN_GET_TOKEN_POS_R 0x3a +#define IN_GET_TOKEN_LINE_R 0x3b + +#define IN_INIT_RHS_EL 0x3c +#define IN_INIT_LHS_EL 0x3d +#define IN_INIT_CAPTURES 0x3e +#define IN_STORE_LHS_EL 0x3f +#define IN_RESTORE_LHS 0x40 + +#define IN_TRITER_FROM_REF 0x41 +#define IN_TRITER_ADVANCE 0x42 +#define IN_TRITER_NEXT_CHILD 0x43 +#define IN_TRITER_GET_CUR_R 0x44 +#define IN_TRITER_GET_CUR_WC 0x45 +#define IN_TRITER_SET_CUR_WC 0x46 +#define IN_TRITER_DESTROY 0x47 +#define IN_TRITER_NEXT_REPEAT 0x48 +#define IN_TRITER_PREV_REPEAT 0x49 + +#define IN_REV_TRITER_FROM_REF 0x4a +#define IN_REV_TRITER_DESTROY 0x4b +#define IN_REV_TRITER_PREV_CHILD 0x4c + +#define IN_UITER_DESTROY 0x4d +#define IN_UITER_CREATE_WV 0x4e +#define IN_UITER_CREATE_WC 0x4f +#define IN_UITER_ADVANCE 0x50 +#define IN_UITER_GET_CUR_R 0x51 +#define IN_UITER_GET_CUR_WC 0x52 +#define IN_UITER_SET_CUR_WC 0x53 + +#define IN_TREE_SEARCH 0x54 + +#define IN_LOAD_GLOBAL_R 0x55 +#define IN_LOAD_GLOBAL_WV 0x56 +#define IN_LOAD_GLOBAL_WC 0x57 +#define IN_LOAD_GLOBAL_BKT 0x58 + +#define IN_PTR_DEREF_R 0x59 +#define IN_PTR_DEREF_WV 0x5a +#define IN_PTR_DEREF_WC 0x60 +#define IN_PTR_DEREF_BKT 0x61 + +#define IN_REF_FROM_LOCAL 0x62 +#define IN_REF_FROM_REF 0x63 +#define IN_REF_FROM_QUAL_REF 0x64 +#define IN_REF_FROM_BACK 0xe3 +#define IN_TRITER_REF_FROM_CUR 0x65 +#define IN_UITER_REF_FROM_CUR 0x66 + +#define IN_MAP_LENGTH 0x67 +#define IN_MAP_FIND 0x68 +#define IN_MAP_INSERT_WV 0x69 +#define IN_MAP_INSERT_WC 0x6a +#define IN_MAP_INSERT_BKT 0x6b +#define IN_MAP_STORE_WV 0x6c +#define IN_MAP_STORE_WC 0x6d +#define IN_MAP_STORE_BKT 0x6e +#define IN_MAP_REMOVE_WV 0x6f +#define IN_MAP_REMOVE_WC 0x70 +#define IN_MAP_REMOVE_BKT 0x71 + +#define IN_LIST_LENGTH 0x72 +#define IN_LIST_APPEND_WV 0x73 +#define IN_LIST_APPEND_WC 0x74 +#define IN_LIST_APPEND_BKT 0x75 +#define IN_LIST_REMOVE_END_WV 0x76 +#define IN_LIST_REMOVE_END_WC 0x77 +#define IN_LIST_REMOVE_END_BKT 0x78 + +#define IN_GET_LIST_MEM_R 0x79 +#define IN_GET_LIST_MEM_WC 0x7a +#define IN_GET_LIST_MEM_WV 0x7b +#define IN_GET_LIST_MEM_BKT 0x7c +#define IN_SET_LIST_MEM_WV 0x7d +#define IN_SET_LIST_MEM_WC 0x7e +#define IN_SET_LIST_MEM_BKT 0x7f + +#define IN_VECTOR_LENGTH 0x80 +#define IN_VECTOR_APPEND_WV 0x81 +#define IN_VECTOR_APPEND_WC 0x82 +#define IN_VECTOR_APPEND_BKT 0x83 +#define IN_VECTOR_INSERT_WV 0x84 +#define IN_VECTOR_INSERT_WC 0x85 +#define IN_VECTOR_INSERT_BKT 0x86 + +#define IN_PRINT 0x87 +#define IN_PRINT_XML_AC 0x88 +#define IN_PRINT_XML 0x89 +#define IN_PRINT_STREAM 0x8a + +#define IN_HALT 0x8b + +#define IN_CALL_WC 0x8c +#define IN_CALL_WV 0x8d +#define IN_RET 0x8e +#define IN_YIELD 0x8f +#define IN_STOP 0x90 + +#define IN_STR_UORD8 0x91 +#define IN_STR_SORD8 0x92 +#define IN_STR_UORD16 0x93 +#define IN_STR_SORD16 0x94 +#define IN_STR_UORD32 0x95 +#define IN_STR_SORD32 0x96 + +#define IN_INT_TO_STR 0x97 +#define IN_TREE_TO_STR 0x98 +#define IN_TREE_TO_STR_TRIM 0x99 + +#define IN_CREATE_TOKEN 0x9a +#define IN_MAKE_TOKEN 0x9b +#define IN_MAKE_TREE 0x9c +#define IN_CONSTRUCT_TERM 0x9d + +#define IN_INPUT_PULL_WV 0x9e +#define IN_INPUT_PULL_WC 0xe1 +#define IN_INPUT_PULL_BKT 0x9f + +#define IN_PARSE_SAVE_STEPS 0xa0 +#define IN_PARSE_INIT_BKT 0xa1 + +#define IN_PARSE_FRAG_WC 0xa2 +#define IN_PARSE_FRAG_EXIT_WC 0xa3 + +#define IN_PARSE_FRAG_WV 0xa4 +#define IN_PARSE_FRAG_EXIT_WV 0xa5 + +#define IN_PARSE_FRAG_BKT 0xa6 +#define IN_PARSE_FRAG_EXIT_BKT 0xa7 + +#define IN_INPUT_APPEND_WC 0xa8 +#define IN_INPUT_APPEND_WV 0xa9 +#define IN_INPUT_APPEND_BKT 0xaa + +#define IN_PARSE_FINISH_WC 0xab +#define IN_PARSE_FINISH_EXIT_WC 0xac + +#define IN_PARSE_FINISH_WV 0xad +#define IN_PARSE_FINISH_EXIT_WV 0xae + +#define IN_PARSE_FINISH_BKT 0xaf +#define IN_PARSE_FINISH_EXIT_BKT 0xb0 + +#define IN_PCR_CALL 0xb1 +#define IN_PCR_RET 0xb2 +#define IN_PCR_END_DECK 0xb3 +#define IN_CONTIGUOUS 0x5c + +#define IN_OPEN_FILE 0xb4 +#define IN_GET_STDIN 0xb5 +#define IN_GET_STDOUT 0xb6 +#define IN_GET_STDERR 0xb7 +#define IN_LOAD_ARGV 0xb8 +#define IN_TO_UPPER 0xb9 +#define IN_TO_LOWER 0xba +#define IN_EXIT 0xbb + +#define IN_LOAD_PARSER_R 0xbd +#define IN_LOAD_PARSER_WV 0xbe +#define IN_LOAD_PARSER_WC 0xbf +#define IN_LOAD_PARSER_BKT 0xc0 + +#define IN_LOAD_INPUT_R 0xc1 +#define IN_LOAD_INPUT_WV 0xc2 +#define IN_LOAD_INPUT_WC 0xc3 +#define IN_LOAD_INPUT_BKT 0xc4 + +#define IN_INPUT_PUSH_WV 0xc5 +#define IN_INPUT_PUSH_BKT 0xc6 +#define IN_INPUT_PUSH_IGNORE_WV 0xc7 + +#define IN_LOAD_CONTEXT_R 0xc8 +#define IN_LOAD_CONTEXT_WV 0xc9 +#define IN_LOAD_CONTEXT_WC 0xca +#define IN_LOAD_CONTEXT_BKT 0xcb + +#define IN_GET_PARSER_CTX_R 0xcd +#define IN_GET_PARSER_CTX_WC 0xce +#define IN_GET_PARSER_CTX_WV 0xcf +#define IN_SET_PARSER_CTX_WC 0xd0 +#define IN_SET_PARSER_CTX_WV 0xd1 + +#define IN_LOAD_CTX_R 0xd2 +#define IN_LOAD_CTX_WC 0xd3 +#define IN_LOAD_CTX_WV 0xd4 +#define IN_LOAD_CTX_BKT 0xd5 + +#define IN_SPRINTF 0xd6 + +#define IN_GET_RHS_VAL_R 0xd7 +#define IN_GET_RHS_VAL_WC 0xd8 +#define IN_GET_RHS_VAL_WV 0xd9 +#define IN_GET_RHS_VAL_BKT 0xda +#define IN_SET_RHS_VAL_WC 0xdb +#define IN_SET_RHS_VAL_WV 0xdc +#define IN_SET_RHS_VAL_BKT 0xdd + +#define IN_CONSTRUCT_INPUT 0xde +#define IN_SET_INPUT 0xdf +#define IN_GET_INPUT 0xe0 + +#define IN_GET_PARSER_MEM_R 0x5b +#define IN_GET_PARSER_MEM_WC 0x00 +#define IN_GET_PARSER_MEM_WV 0x00 +#define IN_GET_PARSER_MEM_BKT 0x00 +#define IN_SET_PARSER_MEM_WC 0x00 +#define IN_SET_PARSER_MEM_WV 0x00 +#define IN_SET_PARSER_MEM_BKT 0x00 + +#define IN_GET_ERROR 0xcc +#define IN_SET_ERROR 0xe2 + +#define IN_SYSTEM 0xe5 + + +/* Types */ +#define TYPE_NIL 0x01 +#define TYPE_TREE 0x02 +#define TYPE_REF 0x03 +#define TYPE_PTR 0x04 +#define TYPE_ITER 0x05 +#define TYPE_IGNORE_LIST 0x06 + +/* Types of Generics. */ +#define GEN_LIST 0x10 +#define GEN_MAP 0x11 +#define GEN_VECTOR 0x12 +#define GEN_PARSER 0x13 + +/* Known language element ids. */ +#define LEL_ID_PTR 1 +#define LEL_ID_VOID 2 +#define LEL_ID_BOOL 3 +#define LEL_ID_INT 4 +#define LEL_ID_STR 5 +#define LEL_ID_STREAM 6 +#define LEL_ID_IGNORE 7 + +/* + * Flags + */ + +/* A tree that has been generated by a termDup. */ +#define PF_TERM_DUP 0x0001 + +/* Has been processed by the commit function. All children have also been + * processed. */ +#define PF_COMMITTED 0x0002 + +/* Created by a token generation action, not made from the input. */ +#define PF_ARTIFICIAL 0x0004 + +/* Named node from a pattern or constructor. */ +#define PF_NAMED 0x0008 + +/* There is reverse code associated with this tree node. */ +#define PF_HAS_RCODE 0x0010 + +#define PF_RIGHT_IGNORE 0x0020 + +#define PF_LEFT_IL_ATTACHED 0x0400 +#define PF_RIGHT_IL_ATTACHED 0x0800 + +#define AF_LEFT_IGNORE 0x0100 +#define AF_RIGHT_IGNORE 0x0200 + +#define AF_SUPPRESS_LEFT 0x4000 +#define AF_SUPPRESS_RIGHT 0x8000 + +/* + * Call stack. + */ + +/* Number of spots in the frame, after the args. */ +#define FR_AA 4 + +/* Positions relative to the frame pointer. */ +#define FR_RV 3 /* return value */ +#define FR_RI 2 /* return instruction */ +#define FR_RFP 1 /* return frame pointer */ +#define FR_RFD 0 /* return frame id. */ + +/* + * Calling Convention: + * a1 + * a2 + * a3 + * ... + * return value FR_RV + * return instr FR_RI + * return frame ptr FR_RFP + * return frame id FR_RFD + */ + +/* + * User iterator call stack. + * Adds an iframe pointer, removes the return value. + */ + +/* Number of spots in the frame, after the args. */ +#define IFR_AA 3 + +/* Positions relative to the frame pointer. */ +#define IFR_RIN 2 /* return instruction */ +#define IFR_RIF 1 /* return iframe pointer */ +#define IFR_RFR 0 /* return frame pointer */ + +/* Exported to modules other than bytecode.c */ +#define vm_push(i) ( ( sp == prg->sb_beg ? (sp = vm_bs_add(prg, sp, 1)) : 0 ), (*(--sp) = (i)) ) +#define vm_pushn(n) ( ( (sp-(n)) < prg->sb_beg ? (sp = vm_bs_add(prg, sp, n)) : 0 ), (sp -= (n)) ) + +#define vm_pop() ({ SW r = *sp; (sp+1) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, 1)) : (sp += 1); r; }) +#define vm_pop_ignore() ({ (sp+1) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, 1)) : (sp += 1); }) +#define vm_popn(n) ({ (sp+(n)) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, n)) : (sp += (n)); }) + +#define vm_contiguous(n) ( ( (sp-(n)) < prg->sb_beg ? (sp = vm_bs_add(prg, sp, n)) : 0 ) ) + +#define vm_top() (*sp) +#define vm_ptop() (sp) + +#define vm_ssize() ( prg->sb_total + (prg->sb_end - sp) ) + +#define vm_local(o) (exec->framePtr[o]) +#define vm_plocal(o) (&exec->framePtr[o]) +#define vm_local_iframe(o) (exec->iframePtr[o]) +#define vm_plocal_iframe(o) (&exec->iframePtr[o]) + +void vm_init( struct colm_program * ); +Tree** vm_bs_add( struct colm_program *, Tree **, int ); +Tree** vm_bs_pop( struct colm_program *, Tree **, int ); +void vm_clear( struct colm_program * ); + +typedef Tree *SW; +typedef Tree **StackPtr; + +/* Can't use sizeof() because we have used types that are bigger than the + * serial representation. */ +#define SIZEOF_CODE 1 +#define SIZEOF_HALF 2 +#define SIZEOF_WORD sizeof(Word) + +typedef struct _Execution +{ + Tree **framePtr; + Tree **iframePtr; + long frameId; + + long rcodeUnitLen; + + Parser *parser; + long steps; + long pcr; +} Execution; + +long stringLength( Head *str ); +const char *stringData( Head *str ); +Head *stringAllocFull( struct colm_program *prg, const char *data, long length ); +Head *initStrSpace( long length ); +Head *stringCopy( struct colm_program *prg, Head *head ); +void stringFree( struct colm_program *prg, Head *head ); +void stringShorten( Head *tokdata, long newlen ); +Head *concatStr( Head *s1, Head *s2 ); +Word strAtoi( Head *str ); +Word strUord16( Head *head ); +Word strUord8( Head *head ); +Word cmpString( Head *s1, Head *s2 ); +Head *stringToUpper( Head *s ); +Head *stringToLower( Head *s ); +Head *stringSprintf( struct colm_program *prg, Str *format, Int *integer ); + +Head *makeLiteral( struct colm_program *prg, long litoffset ); +Head *intToStr( struct colm_program *prg, Word i ); + +Tree *constructString( struct colm_program *prg, Head *s ); + +void mainExecution( struct colm_program *prg, Execution *exec, Code *code ); +void reductionExecution( Execution *exec, Tree **sp ); +void generationExecution( Execution *exec, Tree **sp ); +void reverseExecution( Execution *exec, Tree **sp, RtCodeVect *allRev ); + +Kid *allocAttrs( struct colm_program *prg, long length ); +void freeAttrs( struct colm_program *prg, Kid *attrs ); +void setAttr( Tree *tree, long pos, Tree *val ); +Kid *getAttrKid( Tree *tree, long pos ); + +Tree *splitTree( struct colm_program *prg, Tree *t ); +void rcodeDownrefAll( struct colm_program *prg, Tree **sp, RtCodeVect *cv ); +void commitFull( struct colm_program *prg, Tree **sp, PdaRun *pdaRun, long commitReduce ); +Tree *getParsedRoot( PdaRun *pdaRun, int stop ); +void splitRef( struct colm_program *prg, Tree ***sp, Ref *fromRef ); + +void allocGlobal( struct colm_program *prg ); +Tree **executeCode( struct colm_program *prg, Execution *exec, Tree **sp, Code *instr ); +void rcodeDownref( struct colm_program *prg, Tree **sp, Code *instr ); +Code *popReverseCode( RtCodeVect *allRev ); + +#ifdef __cplusplus +} +#endif + +#endif |