diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 1995-05-04 10:15:53 +0000 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 1995-05-04 10:15:53 +0000 |
commit | 61bd8ace6bdb2652f4d51d64e3239a7105f56c26 (patch) | |
tree | e8b957df0957c1b483d41d68973824e280445548 | |
parent | 8f9ea2a7b886e3e0a5cfd76b11fe79d083a7f20c (diff) | |
download | ocaml-61bd8ace6bdb2652f4d51d64e3239a7105f56c26.tar.gz |
Passage a la version bootstrappee (franchissement du Rubicon)
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@2 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
277 files changed, 34415 insertions, 0 deletions
diff --git a/.depend b/.depend new file mode 100644 index 0000000000..351690b705 --- /dev/null +++ b/.depend @@ -0,0 +1,211 @@ +bytecomp/codegen.cmi: bytecomp/lambda.cmi bytecomp/instruct.cmi +bytecomp/dectree.cmi: bytecomp/lambda.cmi +bytecomp/emitcode.cmi: typing/ident.cmi bytecomp/lambda.cmi \ + bytecomp/instruct.cmi utils/config.cmi +bytecomp/instruct.cmi: typing/ident.cmi bytecomp/lambda.cmi +bytecomp/lambda.cmi: typing/ident.cmi utils/misc.cmi typing/path.cmi \ + parsing/asttypes.cmi +bytecomp/librarian.cmi: utils/config.cmi +bytecomp/linker.cmi: bytecomp/symtable.cmi bytecomp/emitcode.cmi +bytecomp/matching.cmi: typing/ident.cmi bytecomp/lambda.cmi \ + typing/typedtree.cmi parsing/location.cmi +bytecomp/printinstr.cmi: bytecomp/instruct.cmi +bytecomp/printlambda.cmi: bytecomp/lambda.cmi +bytecomp/symtable.cmi: typing/ident.cmi bytecomp/emitcode.cmi +bytecomp/translcore.cmi: typing/ident.cmi bytecomp/lambda.cmi \ + typing/typedtree.cmi parsing/location.cmi parsing/asttypes.cmi +bytecomp/translmod.cmi: bytecomp/lambda.cmi typing/typedtree.cmi +driver/compile.cmi: typing/env.cmi +parsing/lexer.cmi: parsing/parser.cmi +parsing/location.cmi: utils/misc.cmi +parsing/parse.cmi: parsing/parsetree.cmi +parsing/parser.cmi: parsing/parsetree.cmi +parsing/parsetree.cmi: utils/misc.cmi parsing/location.cmi \ + parsing/longident.cmi parsing/asttypes.cmi +toplevel/printval.cmi: typing/path.cmi typing/typedtree.cmi typing/env.cmi +toplevel/topdirs.cmi: parsing/longident.cmi +toplevel/toploop.cmi: parsing/parsetree.cmi typing/env.cmi \ + parsing/longident.cmi +typing/ctype.cmi: typing/ident.cmi typing/typedtree.cmi typing/env.cmi +typing/env.cmi: typing/ident.cmi typing/path.cmi typing/typedtree.cmi \ + parsing/longident.cmi +typing/includecore.cmi: typing/ident.cmi typing/typedtree.cmi \ + typing/env.cmi +typing/includemod.cmi: typing/ident.cmi typing/typedtree.cmi typing/env.cmi +typing/mtype.cmi: typing/ident.cmi typing/path.cmi typing/typedtree.cmi \ + typing/env.cmi +typing/parmatch.cmi: typing/typedtree.cmi parsing/location.cmi +typing/path.cmi: typing/ident.cmi +typing/predef.cmi: typing/ident.cmi typing/path.cmi typing/typedtree.cmi \ + typing/env.cmi +typing/printtyp.cmi: typing/ident.cmi typing/path.cmi typing/typedtree.cmi \ + parsing/longident.cmi +typing/subst.cmi: typing/ident.cmi typing/path.cmi typing/typedtree.cmi +typing/typecore.cmi: parsing/parsetree.cmi typing/typedtree.cmi \ + parsing/location.cmi typing/env.cmi parsing/longident.cmi \ + parsing/asttypes.cmi +typing/typedecl.cmi: typing/ident.cmi parsing/parsetree.cmi \ + typing/typedtree.cmi parsing/location.cmi typing/env.cmi +typing/typedtree.cmi: typing/ident.cmi utils/misc.cmi typing/path.cmi \ + parsing/location.cmi parsing/asttypes.cmi +typing/typemod.cmi: parsing/parsetree.cmi typing/includemod.cmi \ + typing/typedtree.cmi parsing/location.cmi typing/env.cmi \ + parsing/longident.cmi +typing/typetexp.cmi: parsing/parsetree.cmi typing/typedtree.cmi \ + parsing/location.cmi typing/env.cmi parsing/longident.cmi +bytecomp/codegen.cmo: bytecomp/codegen.cmi bytecomp/codegen.cmi \ + bytecomp/lambda.cmi utils/misc.cmi typing/ident.cmi \ + bytecomp/dectree.cmi bytecomp/instruct.cmi parsing/asttypes.cmi +bytecomp/dectree.cmo: bytecomp/dectree.cmi bytecomp/lambda.cmi +bytecomp/emitcode.cmo: bytecomp/emitcode.cmi utils/meta.cmi \ + bytecomp/lambda.cmi utils/config.cmi utils/misc.cmi typing/ident.cmi \ + bytecomp/emitcode.cmi bytecomp/instruct.cmi typing/env.cmi \ + parsing/asttypes.cmi bytecomp/opcodes.cmo +bytecomp/instruct.cmo: bytecomp/instruct.cmi typing/ident.cmi \ + bytecomp/lambda.cmi +bytecomp/lambda.cmo: bytecomp/lambda.cmi typing/ident.cmi utils/misc.cmi \ + utils/cset.cmi typing/path.cmi parsing/asttypes.cmi +bytecomp/librarian.cmo: bytecomp/librarian.cmi utils/misc.cmi \ + bytecomp/emitcode.cmi utils/config.cmi +bytecomp/linker.cmo: bytecomp/linker.cmi utils/cset.cmi utils/config.cmi \ + utils/misc.cmi typing/ident.cmi utils/clflags.cmo bytecomp/symtable.cmi \ + bytecomp/emitcode.cmi bytecomp/opcodes.cmo +bytecomp/matching.cmo: bytecomp/matching.cmi typing/predef.cmi \ + bytecomp/lambda.cmi typing/typedtree.cmi parsing/location.cmi \ + typing/ctype.cmi parsing/asttypes.cmi +bytecomp/printinstr.cmo: bytecomp/printinstr.cmi bytecomp/printlambda.cmi \ + typing/ident.cmi bytecomp/lambda.cmi bytecomp/instruct.cmi +bytecomp/printlambda.cmo: bytecomp/printlambda.cmi bytecomp/lambda.cmi \ + typing/ident.cmi utils/misc.cmi parsing/asttypes.cmi +bytecomp/runtimedef.cmo: bytecomp/runtimedef.cmi +bytecomp/symtable.cmo: bytecomp/symtable.cmi typing/predef.cmi \ + utils/meta.cmi bytecomp/runtimedef.cmi bytecomp/lambda.cmi \ + utils/config.cmi utils/tbl.cmi utils/misc.cmi typing/ident.cmi \ + utils/clflags.cmo bytecomp/symtable.cmi bytecomp/emitcode.cmi \ + parsing/asttypes.cmi +bytecomp/translcore.cmo: bytecomp/translcore.cmi typing/predef.cmi \ + bytecomp/lambda.cmi typing/typedtree.cmi bytecomp/matching.cmi \ + parsing/location.cmi typing/ident.cmi utils/misc.cmi \ + bytecomp/translcore.cmi typing/ctype.cmi typing/path.cmi \ + parsing/asttypes.cmi +bytecomp/translmod.cmo: bytecomp/translmod.cmi bytecomp/translmod.cmi \ + typing/ident.cmi utils/misc.cmi bytecomp/translcore.cmi \ + bytecomp/lambda.cmi typing/typedtree.cmi +driver/compile.cmo: driver/compile.cmi typing/typemod.cmi \ + typing/printtyp.cmi bytecomp/codegen.cmi typing/typedtree.cmi \ + utils/config.cmi bytecomp/translmod.cmi utils/misc.cmi \ + typing/includemod.cmi utils/crc.cmi bytecomp/printinstr.cmi \ + bytecomp/printlambda.cmi parsing/location.cmi parsing/parse.cmi \ + utils/clflags.cmo bytecomp/emitcode.cmi typing/env.cmi +driver/errors.cmo: driver/errors.cmi typing/typemod.cmi typing/typedecl.cmi \ + parsing/parse.cmi parsing/location.cmi bytecomp/librarian.cmi \ + bytecomp/linker.cmi bytecomp/symtable.cmi bytecomp/translcore.cmi \ + parsing/lexer.cmi typing/includemod.cmi typing/env.cmi \ + typing/typecore.cmi typing/typetexp.cmi +driver/main.cmo: utils/clflags.cmo driver/errors.cmi bytecomp/linker.cmi \ + driver/compile.cmi bytecomp/librarian.cmi utils/config.cmi +lex/grammar.cmo: lex/grammar.cmi +parsing/lexer.cmo: parsing/lexer.cmi utils/misc.cmi parsing/parser.cmi +parsing/location.cmo: parsing/location.cmi utils/misc.cmi \ + utils/terminfo.cmi +parsing/parse.cmo: parsing/parse.cmi parsing/parser.cmi parsing/lexer.cmi \ + parsing/location.cmi +parsing/parser.cmo: parsing/parser.cmi parsing/parsetree.cmi \ + parsing/location.cmi utils/misc.cmi utils/clflags.cmo \ + parsing/longident.cmi parsing/asttypes.cmi +stdlib/arg.cmo: stdlib/arg.cmi +stdlib/array.cmo: stdlib/array.cmi +stdlib/baltree.cmo: stdlib/baltree.cmi +stdlib/char.cmo: stdlib/char.cmi +stdlib/filename.cmo: stdlib/filename.cmi +stdlib/format.cmo: stdlib/format.cmi +stdlib/gc.cmo: stdlib/gc.cmi +stdlib/hashtbl.cmo: stdlib/hashtbl.cmi +stdlib/lexing.cmo: stdlib/lexing.cmi +stdlib/list.cmo: stdlib/list.cmi +stdlib/obj.cmo: stdlib/obj.cmi +stdlib/parsing.cmo: stdlib/parsing.cmi +stdlib/pervasives.cmo: stdlib/pervasives.cmi +stdlib/printexc.cmo: stdlib/printexc.cmi +stdlib/printf.cmo: stdlib/printf.cmi +stdlib/queue.cmo: stdlib/queue.cmi +stdlib/set.cmo: stdlib/set.cmi +stdlib/sort.cmo: stdlib/sort.cmi +stdlib/stack.cmo: stdlib/stack.cmi +stdlib/string.cmo: stdlib/string.cmi +stdlib/sys.cmo: stdlib/sys.cmi +tools/dumpobj.cmo: bytecomp/runtimedef.cmi bytecomp/lambda.cmi \ + utils/tbl.cmi utils/config.cmi typing/ident.cmi bytecomp/emitcode.cmi \ + parsing/asttypes.cmi bytecomp/opcodes.cmo +toplevel/expunge.cmo: utils/cset.cmi bytecomp/runtimedef.cmi \ + utils/config.cmi utils/misc.cmi typing/ident.cmi bytecomp/symtable.cmi +toplevel/printval.cmo: toplevel/printval.cmi typing/predef.cmi \ + typing/printtyp.cmi typing/typedtree.cmi typing/ident.cmi \ + typing/ctype.cmi typing/path.cmi typing/env.cmi parsing/longident.cmi +toplevel/topdirs.cmo: toplevel/topdirs.cmi typing/predef.cmi \ + typing/printtyp.cmi utils/meta.cmi typing/typedtree.cmi \ + toplevel/toploop.cmi utils/config.cmi utils/misc.cmi \ + toplevel/printval.cmi bytecomp/linker.cmi bytecomp/symtable.cmi \ + parsing/longident.cmi parsing/parse.cmi parsing/location.cmi \ + typing/ctype.cmi typing/path.cmi bytecomp/emitcode.cmi typing/env.cmi \ + bytecomp/opcodes.cmo +toplevel/toploop.cmo: toplevel/toploop.cmi typing/typemod.cmi \ + typing/printtyp.cmi utils/meta.cmi bytecomp/codegen.cmi \ + parsing/parsetree.cmi typing/typedtree.cmi utils/config.cmi \ + bytecomp/translmod.cmi utils/misc.cmi toplevel/printval.cmi \ + bytecomp/symtable.cmi parsing/longident.cmi bytecomp/printinstr.cmi \ + bytecomp/printlambda.cmi parsing/parse.cmi parsing/location.cmi \ + utils/clflags.cmo driver/errors.cmi bytecomp/emitcode.cmi \ + driver/compile.cmi typing/env.cmi +toplevel/topmain.cmo: utils/clflags.cmo toplevel/toploop.cmi +typing/ctype.cmo: typing/ctype.cmi utils/misc.cmi typing/ctype.cmi \ + typing/path.cmi typing/typedtree.cmi typing/env.cmi +typing/env.cmo: typing/env.cmi typing/predef.cmi typing/typedtree.cmi \ + utils/tbl.cmi utils/config.cmi utils/misc.cmi typing/ident.cmi \ + typing/path.cmi typing/subst.cmi typing/env.cmi parsing/longident.cmi \ + parsing/asttypes.cmi +typing/ident.cmo: typing/ident.cmi utils/misc.cmi +typing/includecore.cmo: typing/includecore.cmi utils/misc.cmi \ + typing/ctype.cmi typing/path.cmi typing/typedtree.cmi +typing/includemod.cmo: typing/includemod.cmi typing/includecore.cmi \ + typing/printtyp.cmi typing/typedtree.cmi utils/tbl.cmi utils/misc.cmi \ + typing/ident.cmi typing/path.cmi typing/includemod.cmi typing/env.cmi +typing/mtype.cmo: typing/mtype.cmi typing/ident.cmi typing/ctype.cmi \ + typing/path.cmi typing/typedtree.cmi typing/env.cmi +typing/parmatch.cmo: typing/parmatch.cmi typing/typedtree.cmi \ + parsing/location.cmi typing/parmatch.cmi utils/misc.cmi \ + typing/ctype.cmi parsing/asttypes.cmi +typing/path.cmo: typing/path.cmi typing/ident.cmi +typing/predef.cmo: typing/predef.cmi typing/ident.cmi utils/misc.cmi \ + typing/ctype.cmi typing/path.cmi typing/typedtree.cmi +typing/printtyp.cmo: typing/printtyp.cmi typing/typedtree.cmi \ + utils/misc.cmi typing/ident.cmi typing/path.cmi parsing/longident.cmi \ + parsing/asttypes.cmi +typing/subst.cmo: typing/subst.cmi typing/ident.cmi utils/misc.cmi \ + typing/path.cmi typing/typedtree.cmi +typing/typecore.cmo: typing/typecore.cmi typing/predef.cmi \ + typing/printtyp.cmi parsing/parsetree.cmi typing/typedtree.cmi \ + parsing/location.cmi typing/parmatch.cmi typing/ident.cmi \ + utils/misc.cmi typing/path.cmi typing/ctype.cmi typing/env.cmi \ + parsing/longident.cmi typing/typetexp.cmi parsing/asttypes.cmi +typing/typedecl.cmo: typing/typedecl.cmi utils/cset.cmi \ + parsing/parsetree.cmi typing/typedtree.cmi parsing/location.cmi \ + typing/ctype.cmi typing/env.cmi typing/typetexp.cmi +typing/typedtree.cmo: typing/typedtree.cmi typing/ident.cmi utils/misc.cmi \ + typing/path.cmi parsing/location.cmi parsing/asttypes.cmi +typing/typemod.cmo: typing/typemod.cmi typing/printtyp.cmi \ + typing/typedecl.cmi parsing/parsetree.cmi typing/typedtree.cmi \ + typing/mtype.cmi parsing/location.cmi utils/misc.cmi typing/ident.cmi \ + typing/ctype.cmi typing/path.cmi typing/includemod.cmi typing/subst.cmi \ + typing/env.cmi parsing/longident.cmi typing/typecore.cmi \ + typing/typetexp.cmi +typing/typetexp.cmo: typing/typetexp.cmi typing/printtyp.cmi \ + parsing/parsetree.cmi typing/typedtree.cmi parsing/location.cmi \ + utils/tbl.cmi typing/ctype.cmi typing/env.cmi parsing/longident.cmi +utils/config.cmo: utils/config.cmi +utils/crc.cmo: utils/crc.cmi utils/crc.cmi +utils/cset.cmo: utils/cset.cmi utils/cset.cmi +utils/meta.cmo: utils/meta.cmi +utils/misc.cmo: utils/misc.cmi +utils/tbl.cmo: utils/tbl.cmi +utils/terminfo.cmo: utils/terminfo.cmi diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..d0e2cb20fd --- /dev/null +++ b/Makefile @@ -0,0 +1,267 @@ +# The main Makefile + +include config/Makefile.h +include Makefile.config + +CAMLC=boot/camlrun boot/camlc -I boot +COMPFLAGS=$(INCLUDES) +LINKFLAGS= +CAMLYACC=boot/camlyacc +YACCFLAGS= +CAMLLEX=boot/camlrun boot/camllex +CAMLDEP=tools/camldep +DEPFLAGS=$(INCLUDES) +CAMLRUN=byterun/camlrun + +INCLUDES=-I utils -I parsing -I typing -I bytecomp -I driver -I toplevel + +UTILS=utils/misc.cmo utils/tbl.cmo utils/cset.cmo utils/config.cmo \ + utils/clflags.cmo utils/meta.cmo utils/terminfo.cmo utils/crc.cmo + +PARSING=parsing/location.cmo parsing/parser.cmo parsing/lexer.cmo parsing/parse.cmo + +TYPING=typing/ident.cmo typing/path.cmo typing/typedtree.cmo \ + typing/subst.cmo typing/printtyp.cmo \ + typing/predef.cmo typing/env.cmo \ + typing/ctype.cmo typing/mtype.cmo \ + typing/includecore.cmo typing/includemod.cmo typing/parmatch.cmo \ + typing/typetexp.cmo typing/typecore.cmo \ + typing/typedecl.cmo typing/typemod.cmo + +BYTECOMP=bytecomp/lambda.cmo bytecomp/printlambda.cmo \ + bytecomp/matching.cmo bytecomp/translcore.cmo bytecomp/translmod.cmo \ + bytecomp/instruct.cmo bytecomp/dectree.cmo bytecomp/codegen.cmo \ + bytecomp/printinstr.cmo bytecomp/opcodes.cmo bytecomp/emitcode.cmo \ + bytecomp/runtimedef.cmo bytecomp/symtable.cmo \ + bytecomp/librarian.cmo bytecomp/linker.cmo + +DRIVER=driver/errors.cmo driver/compile.cmo driver/main.cmo + +TOPLEVEL=driver/errors.cmo driver/compile.cmo \ + toplevel/printval.cmo toplevel/toploop.cmo toplevel/topdirs.cmo \ + toplevel/topmain.cmo + +COMPOBJS=$(UTILS) $(PARSING) $(TYPING) $(BYTECOMP) $(DRIVER) + +TOPOBJS=$(UTILS) $(PARSING) $(TYPING) $(BYTECOMP) $(TOPLEVEL) + +EXPUNGEOBJS=utils/misc.cmo utils/cset.cmo utils/tbl.cmo \ + utils/config.cmo utils/clflags.cmo \ + typing/ident.cmo typing/predef.cmo \ + bytecomp/runtimedef.cmo bytecomp/symtable.cmo \ + toplevel/expunge.cmo + +PERVASIVES= arg array baltree char filename format hashtbl lexing list \ + obj parsing pervasives printexc printf queue set sort stack string sys \ + topfuncs + +# Recompile the system using the bootstrap compiler +all: runtime camlc camltop lex/camllex yacc/camlyacc library + +# Compile everything the first time +world: coldstart all + +# Start up the system from the distribution compiler +coldstart: + cd byterun; $(MAKE) all + cp byterun/camlrun boot/camlrun + cd yacc; $(MAKE) all + cp yacc/camlyacc boot/camlyacc + cd stdlib; $(MAKE) COMPILER=../boot/camlc all + cp stdlib/stdlib.cma stdlib/*.cmi stdlib/header.exe boot + +# Promote the newly compiled system to the rank of bootstrap compiler +promote: + test -d boot/Saved || mkdir boot/Saved + mv boot/Saved boot/Saved.prev + mkdir boot/Saved + mv boot/Saved.prev boot/Saved/Saved.prev + mv boot/camlrun boot/camlc boot/camllex boot/camlyacc boot/Saved + mv boot/*.cmi boot/stdlib.cma boot/header.exe boot/Saved + cp byterun/camlrun boot/camlrun + cp camlc boot/camlc + cp lex/camllex boot/camllex + cp yacc/camlyacc boot/camlyacc + cp stdlib/stdlib.cma stdlib/*.cmi stdlib/header.exe boot + +# Restore the saved bootstrap compiler if a problem arises +restore: + mv boot/Saved/* boot + rmdir boot/Saved + mv boot/Saved.prev boot/Saved + +# Check if fixpoint reached +compare: + @if cmp -s boot/camlc camlc && cmp -s boot/camllex lex/camllex; \ + then echo "Fixpoint reached, bootstrap succeeded."; \ + else echo "Fixpoint not reached, try one more bootstrapping cycle."; \ + fi + +# Complete bootstrapping cycle +bootstrap: promote clean all compare + +# Installation +install: + test -d $(BINDIR) || mkdir $(BINDIR) + test -d $(LIBDIR) || mkdir $(LIBDIR) + test -d $(MANDIR) || mkdir $(MANDIR) + cd byterun; $(MAKE) install + cp camlc $(BINDIR)/cslc + cp camltop $(BINDIR)/csltop + cd stdlib; $(MAKE) install + cp lex/camllex $(BINDIR)/csllex + cp yacc/camlyacc $(BINDIR)/cslyacc + +realclean:: clean + +# The compiler + +camlc: $(COMPOBJS) + $(CAMLC) $(LINKFLAGS) -o camlc $(COMPOBJS) + +clean:: + rm -f camlc + +# The toplevel + +camltop: $(TOPOBJS) expunge + $(CAMLC) $(LINKFLAGS) -linkall -o camltop.tmp $(TOPOBJS) + $(CAMLRUN) ./expunge camltop.tmp camltop $(PERVASIVES) + rm -f camltop.tmp + +clean:: + rm -f camltop + +# The configuration file + +utils/config.ml: utils/config.mlp Makefile.config + sed -e 's|%%LIBDIR%%|$(LIBDIR)|' \ + -e 's|%%CC%%|$(CC) $(CCLINKFLAGS) $(LOWADDRESSES)|' \ + -e 's|%%CCLIBS%%|$(CCLIBS)|' \ + utils/config.mlp > utils/config.ml + +clean:: + rm -f utils/config.ml + +# The parser + +parsing/parser.mli parsing/parser.ml: parsing/parser.mly + $(CAMLYACC) $(YACCFLAGS) parsing/parser.mly + +clean:: + rm -f parsing/parser.mli parsing/parser.ml parsing/parser.output + +beforedepend:: parsing/parser.mli parsing/parser.ml + +# The lexer + +parsing/lexer.ml: parsing/lexer.mll + $(CAMLLEX) parsing/lexer.mll + +clean:: + rm -f parsing/lexer.ml + +beforedepend:: parsing/lexer.ml + +# The numeric opcodes + +bytecomp/opcodes.ml: byterun/instruct.h + sed -n -e '/^enum/p' -e 's/,//g' -e '/^ /p' byterun/instruct.h | \ + awk -f tools/make-opcodes > bytecomp/opcodes.ml + +clean:: + rm -f bytecomp/opcodes.ml + +beforedepend:: bytecomp/opcodes.ml + +# The predefined exceptions and primitives + +runtime/primitives: + cd runtime; make primitives + +bytecomp/runtimedef.ml: byterun/primitives byterun/fail.h + (echo 'let builtin_exceptions = [|'; \ + sed -n -e 's|.*/\* \("[A-Za-z_]*"\) \*/$$| \1;|p' byterun/fail.h | \ + sed -e '$$s/;$$//'; \ + echo '|]'; \ + echo 'let builtin_primitives = [|'; \ + sed -e 's/.*/ "&";/' -e '$$s/;$$//' byterun/primitives; \ + echo '|]') > bytecomp/runtimedef.ml + +clean:: + rm -f bytecomp/runtimedef.ml + +beforedepend:: bytecomp/runtimedef.ml + +# The "expunge" utility + +expunge: $(EXPUNGEOBJS) + $(CAMLC) $(LINKFLAGS) -o expunge $(EXPUNGEOBJS) + +clean:: + rm -f expunge + +# The runtime system + +runtime: + cd byterun; $(MAKE) all +realclean:: + cd byterun; $(MAKE) clean +alldepend:: + cd byterun; $(MAKE) depend + +# The library + +library: + cd stdlib; $(MAKE) all +clean:: + cd stdlib; $(MAKE) clean +alldepend:: + cd stdlib; $(MAKE) depend + +# The lexer and parser generators + +lex/camllex: + cd lex; $(MAKE) +clean:: + cd lex; $(MAKE) clean +alldepend:: + cd lex; $(MAKE) depend + +yacc/camlyacc: + cd yacc; $(MAKE) +realclean:: + cd yacc; $(MAKE) clean + +# Utilities + +realclean:: + cd tools; $(MAKE) clean +alldepend:: + cd tools; $(MAKE) depend + +# Default rules + +.SUFFIXES: .ml .mli .cmo .cmi + +.ml.cmo: + $(CAMLC) $(COMPFLAGS) -c $< + +.mli.cmi: + $(CAMLC) $(COMPFLAGS) -c $< + +clean:: + rm -f utils/*.cm[io] utils/*~ + rm -f parsing/*.cm[io] parsing/*~ + rm -f typing/*.cm[io] typing/*~ + rm -f bytecomp/*.cm[io] bytecomp/*~ + rm -f driver/*.cm[io] driver/*~ + rm -f toplevel/*.cm[io] toplevel/*~ + rm -f *~ + +depend: beforedepend + $(CAMLDEP) $(DEPFLAGS) */*.mli */*.ml > .depend + +alldepend:: depend + +include .depend diff --git a/Makefile.config b/Makefile.config new file mode 100644 index 0000000000..d070d82feb --- /dev/null +++ b/Makefile.config @@ -0,0 +1,47 @@ +### Compile-time configuration + +### Which C compiler to use. +### Performance is *much* better if Gnu CC 2 is used. +CC=gcc +#CC=cc + +### Additional compile-time options +# If using gcc on Intel 386 or Motorola 68k: +# CCCOMPOPTS=-fno-defer-pop +# If using gcc and being superstitious: +CCCOMPOPTS=-Wall +# Otherwise: +# CCCOMPOPTS= + +### Additional link-time options +CCLINKOPTS= + +### If using GCC on a Dec Alpha under OSF1: +LOWADDRESSES=-Xlinker -taso +# Otherwise: +# LOWADDRESSES= + +### Libraries needed +CCLIBS=$(TERMINFOLIBS) -lm + +### How to invoke ranlib (if needed) +# BSD-style: +RANLIB=ranlib +# System V-style: +# RANLIB=ar -rs +# If ranlib is not needed at all: +# RANLIB=true + +### Do #! scripts work on your system? +SHARPBANGSCRIPTS=true +# SHARPBANGSCRIPTS=false + +### Where to install the binaries +BINDIR=/usr/local/bin + +### Where to install the standard library +LIBDIR=/usr/local/lib/camlsl + +### Where to install the man pages +MANDIR=/usr/local/man/man1 +MANEXT=1 diff --git a/bytecomp/codegen.ml b/bytecomp/codegen.ml new file mode 100644 index 0000000000..f19ca96143 --- /dev/null +++ b/bytecomp/codegen.ml @@ -0,0 +1,443 @@ +(* codegen.ml : translation of lambda terms to lists of instructions. *) + +open Misc +open Asttypes +open Lambda +open Instruct + + +(**** Label generation ****) + +let label_counter = ref 0 + +let new_label () = + incr label_counter; !label_counter + +(**** Structure of the compilation environment. ****) + +type compilation_env = + { ce_stack: int Ident.tbl; (* Positions of variables in the stack *) + ce_heap: int Ident.tbl } (* Structure of the heap-allocated env *) + +(* The ce_stack component gives locations of variables residing + in the stack. The locations are offsets w.r.t. the origin of the + stack frame. + The ce_heap component gives the positions of variables residing in the + heap-allocated environment. *) + +let empty_env = + { ce_stack = Ident.empty; ce_heap = Ident.empty } + +(* Add a stack-allocated variable *) + +let add_var id pos env = + { ce_stack = Ident.add id pos env.ce_stack; + ce_heap = env.ce_heap } + +(**** Examination of the continuation ****) + +(* Return a label to the beginning of the given continuation. + If the sequence starts with a branch, use the target of that branch + as the label, thus avoiding a jump to a jump. *) + +let label_code = function + Kbranch lbl :: _ as cont -> (lbl, cont) + | Klabel lbl :: _ as cont -> (lbl, cont) + | cont -> let lbl = new_label() in (lbl, Klabel lbl :: cont) + +(* Return a branch to the continuation. That is, an instruction that, + when executed, branches to the continuation or performs what the + continuation performs. We avoid generating branches to branches and + branches to returns. *) + +let make_branch cont = + match cont with + (Kbranch _ as branch) :: _ -> (branch, cont) + | (Kreturn _ as return) :: _ -> (return, cont) + | Kraise :: _ -> (Kraise, cont) + | Klabel lbl :: _ -> (Kbranch lbl, cont) + | _ -> let lbl = new_label() in (Kbranch lbl, Klabel lbl :: cont) + +(* Discard all instructions up to the next label. + This function is to be applied to the continuation before adding a + non-terminating instruction (branch, raise, return) in front of it. *) + +let rec discard_dead_code = function + [] -> [] + | (Klabel _ | Krestart) :: _ as cont -> cont + | _ :: cont -> discard_dead_code cont + +(* Check if we're in tailcall position *) + +let rec is_tailcall = function + Kreturn _ :: _ -> true + | Klabel _ :: c -> is_tailcall c + | _ -> false + +(* Add a Kpop N instruction in front of a continuation *) + +let rec add_pop n cont = + if n = 0 then cont else + match cont with + Kpop m :: cont -> add_pop (n + m) cont + | Kreturn m :: cont -> Kreturn(n + m) :: cont + | Kraise :: _ -> cont + | _ -> Kpop n :: cont + +(* Add the constant "unit" in front of a continuation *) + +let add_const_unit = function + (Kacc _ | Kconst _ | Kgetglobal _ | Kpush_retaddr _) :: _ as cont -> cont + | cont -> Kconst const_unit :: cont + +(**** Compilation of a lambda expression ****) + +(* The label to which Lstaticfail branches, and the stack size at that point.*) + +let lbl_staticfail = ref 0 +and sz_staticfail = ref 0 + +(* Function bodies that remain to be compiled *) + +let functions_to_compile = + (Stack.new () : (Ident.t * lambda * label * Ident.t list) Stack.t) + +(* Compile an expression. + The val of the expression is left in the accumulator. + env = compilation environment + exp = the lambda expression to compile + sz = current size of the stack frame + cont = list of instructions to execute afterwards + Result = list of instructions that evaluate exp, then perform cont. *) + +open Format + +let rec comp_expr env exp sz cont = + match exp with + Lvar id -> + begin try + let pos = Ident.find_same id env.ce_stack in + Kacc(sz - pos) :: cont + with Not_found -> + try + let pos = Ident.find_same id env.ce_heap in + Kenvacc(pos) :: cont + with Not_found -> + Ident.print id; print_newline(); + fatal_error "Codegen.comp_expr: var" + end + | Lconst cst -> + Kconst cst :: cont + | Lapply(func, args) -> + let nargs = List.length args in + if is_tailcall cont then + comp_args env args sz + (Kpush :: comp_expr env func (sz + nargs) + (Kappterm(nargs, sz + nargs) :: discard_dead_code cont)) + else + if nargs < 4 then + comp_args env args sz + (Kpush :: comp_expr env func (sz + nargs) (Kapply nargs :: cont)) + else begin + let (lbl, cont1) = label_code cont in + Kpush_retaddr lbl :: + comp_args env args (sz + 3) + (Kpush :: comp_expr env func (sz + 3 + nargs) + (Kapply nargs :: cont1)) + end + | Lfunction(param, body) -> + let lbl = new_label() in + let fv = free_variables exp in + Stack.push (param, body, lbl, fv) functions_to_compile; + comp_args env (List.map (fun n -> Lvar n) fv) sz + (Kclosure(lbl, List.length fv) :: cont) + | Llet(id, arg, body) -> + comp_expr env arg sz + (Kpush :: comp_expr (add_var id (sz+1) env) body (sz+1) + (add_pop 1 cont)) + | Lletrec(([id, Lfunction(param, funct_body), _] as decl), let_body) -> + let lbl = new_label() in + let fv = free_variables (Lletrec(decl, lambda_unit)) in + Stack.push (param, funct_body, lbl, id :: fv) functions_to_compile; + comp_args env (List.map (fun n -> Lvar n) fv) sz + (Kclosurerec(lbl, List.length fv) :: Kpush :: + (comp_expr (add_var id (sz+1) env) let_body (sz+1) + (add_pop 1 cont))) + | Lletrec(decl, body) -> + let ndecl = List.length decl in + let rec comp_decl new_env sz i = function + [] -> + comp_expr new_env body sz (add_pop ndecl cont) + | (id, exp, blocksize) :: rem -> + comp_expr new_env exp sz + (Kpush :: Kacc i :: Kupdate :: comp_decl new_env sz (i-1) rem) in + let rec comp_init new_env sz = function + [] -> + comp_decl new_env sz ndecl decl + | (id, exp, blocksize) :: rem -> + Kdummy blocksize :: Kpush :: + comp_init (add_var id (sz+1) new_env) (sz+1) rem in + comp_init env sz decl + | Lprim(Pidentity, [arg]) -> + comp_expr env arg sz cont + | Lprim(Pnot, [arg]) -> + let newcont = + match cont with + Kbranchif lbl :: cont1 -> Kbranchifnot lbl :: cont1 + | Kbranchifnot lbl :: cont1 -> Kbranchif lbl :: cont1 + | _ -> Kboolnot :: cont in + comp_expr env arg sz newcont + | Lprim(Psequand, [exp1; exp2]) -> + begin match cont with + Kbranchifnot lbl :: _ -> + comp_expr env exp1 sz (Kbranchifnot lbl :: + comp_expr env exp2 sz cont) + | Kbranchif lbl :: cont1 -> + let (lbl2, cont2) = label_code cont1 in + comp_expr env exp1 sz (Kbranchifnot lbl2 :: + comp_expr env exp2 sz (Kbranchif lbl :: cont2)) + | _ -> + let (lbl, cont1) = label_code cont in + comp_expr env exp1 sz (Kstrictbranchifnot lbl :: + comp_expr env exp2 sz cont1) + end + | Lprim(Psequor, [exp1; exp2]) -> + begin match cont with + Kbranchif lbl :: _ -> + comp_expr env exp1 sz (Kbranchif lbl :: + comp_expr env exp2 sz cont) + | Kbranchifnot lbl :: cont1 -> + let (lbl2, cont2) = label_code cont1 in + comp_expr env exp1 sz (Kbranchif lbl2 :: + comp_expr env exp2 sz (Kbranchifnot lbl :: cont2)) + | _ -> + let (lbl, cont1) = label_code cont in + comp_expr env exp1 sz (Kstrictbranchif lbl :: + comp_expr env exp2 sz cont1) + end + | Lprim(Praise, [arg]) -> + comp_expr env arg sz (Kraise :: discard_dead_code cont) + | Lprim((Paddint | Psubint as prim), [arg; Lconst(Const_base(Const_int n))]) + when n >= immed_min & n <= immed_max -> + let ofs = if prim == Paddint then n else -n in + comp_expr env arg sz (Koffsetint ofs :: cont) + | Lprim(p, args) -> + let instr = + match p with + Pgetglobal id -> Kgetglobal id + | Psetglobal id -> Ksetglobal id + | Pupdate -> Kupdate + | Pcomp cmp -> Kintcomp cmp + | Pmakeblock tag -> Kmakeblock(List.length args, tag) + | Ptagof -> Ktagof + | Pfield n -> Kgetfield n + | Psetfield n -> Ksetfield n + | Pccall(name, n) -> Kccall(name, n) + | Pnegint -> Knegint + | Paddint -> Kaddint + | Psubint -> Ksubint + | Pmulint -> Kmulint + | Pdivint -> Kdivint + | Pmodint -> Kmodint + | Pandint -> Kandint + | Porint -> Korint + | Pxorint -> Kxorint + | Plslint -> Klslint + | Plsrint -> Klsrint + | Pasrint -> Kasrint + | Poffsetint n -> Koffsetint n + | Poffsetref n -> Koffsetref n + | Pgetstringchar -> Kgetstringchar + | Psetstringchar -> Ksetstringchar + | Pvectlength -> Kvectlength + | Pgetvectitem -> Kgetvectitem + | Psetvectitem -> Ksetvectitem + | _ -> fatal_error "Codegen.comp_expr: prim" in + comp_args env args sz (instr :: cont) + | Lcatch(body, Lstaticfail) -> + comp_expr env body sz cont + | Lcatch(body, handler) -> + let (branch1, cont1) = make_branch cont in + let (lbl_handler, cont2) = label_code (comp_expr env handler sz cont1) in + let saved_lbl_staticfail = !lbl_staticfail + and saved_sz_staticfail = !sz_staticfail in + lbl_staticfail := lbl_handler; + sz_staticfail := sz; + let cont3 = comp_expr env body sz (branch1 :: cont2) in + lbl_staticfail := saved_lbl_staticfail; + sz_staticfail := saved_sz_staticfail; + cont3 + | Lstaticfail -> + add_pop (sz - !sz_staticfail) + (Kbranch !lbl_staticfail :: discard_dead_code cont) + | Ltrywith(body, id, handler) -> + let (branch1, cont1) = make_branch cont in + let lbl_handler = new_label() in + Kpushtrap lbl_handler :: + comp_expr env body (sz+4) (Kpoptrap :: branch1 :: + Klabel lbl_handler :: Kpush :: + comp_expr (add_var id (sz+1) env) handler (sz+1) (add_pop 1 cont1)) + | Lifthenelse(cond, ifso, ifnot) -> + comp_binary_test env cond ifso ifnot sz cont + | Lsequence(exp1, exp2) -> + comp_expr env exp1 sz (comp_expr env exp2 sz cont) + | Lwhile(cond, body) -> + let lbl_loop = new_label() in + let lbl_test = new_label() in + Kbranch lbl_test :: Klabel lbl_loop :: Kcheck_signals :: + comp_expr env body sz + (Klabel lbl_test :: + comp_expr env cond sz (Kbranchif lbl_loop :: cont)) + | Lfor(param, start, stop, dir, body) -> + let lbl_loop = new_label() in + let lbl_test = new_label() in + let offset = match dir with Upto -> 1 | Downto -> -1 in + let comp = match dir with Upto -> Cle | Downto -> Cge in + comp_expr env start sz + (Kpush :: comp_expr env stop (sz+1) + (Kpush :: Kbranch lbl_test :: + Klabel lbl_loop :: Kcheck_signals :: + comp_expr (add_var param (sz+1) env) body (sz+2) + (Kacc 1 :: Koffsetint offset :: Kassign 1 :: + Klabel lbl_test :: + Kacc 0 :: Kpush :: Kacc 2 :: Kintcomp comp :: + Kbranchif lbl_loop :: + add_const_unit (add_pop 2 cont)))) + | Lswitch(arg, lo, hi, casel) -> + let numcases = List.length casel in + let cont1 = + if lo = 0 & numcases >= hi - 8 then (* Always true if hi <= 8... *) + comp_direct_switch env hi casel sz cont + else begin + let (transl_table, actions) = Dectree.make_decision_tree casel in + Ktranslate transl_table :: comp_switch env actions sz cont + end in + comp_expr env arg sz cont1 + | Lshared(expr, lblref) -> + begin match !lblref with + None -> + let (lbl, cont1) = label_code(comp_expr env expr sz cont) in + lblref := Some lbl; + cont1 + | Some lbl -> + Kbranch lbl :: discard_dead_code cont + end + +(* Compile a list of arguments [e1; ...; eN] to a primitive operation. + The values of eN ... e2 are pushed on the stack, e2 at top of stack, + then e3, then ... The val of e1 is left in the accumulator. *) + +and comp_args env argl sz cont = + comp_expr_list env (List.rev argl) sz cont + +and comp_expr_list env exprl sz cont = + match exprl with + [] -> cont + | [exp] -> comp_expr env exp sz cont + | exp :: rem -> + comp_expr env exp sz (Kpush :: comp_expr_list env rem (sz+1) cont) + +(* Compile an if-then-else test. *) + +and comp_binary_test env cond ifso ifnot sz cont = + let cont_cond = + if ifnot = Lconst const_unit then begin + let (lbl_end, cont1) = label_code cont in + Kbranchifnot lbl_end :: comp_expr env ifso sz cont1 + end else + if ifso = Lstaticfail & sz = !sz_staticfail then + Kbranchif !lbl_staticfail :: comp_expr env ifnot sz cont + else + if ifnot = Lstaticfail & sz = !sz_staticfail then + Kbranchifnot !lbl_staticfail :: comp_expr env ifso sz cont + else begin + let (branch_end, cont1) = make_branch cont in + let (lbl_not, cont2) = label_code(comp_expr env ifnot sz cont1) in + Kbranchifnot lbl_not :: comp_expr env ifso sz (branch_end :: cont2) + end in + comp_expr env cond sz cont_cond + +(* Compile a Lswitch directly, without breaking the array of cases into + dense enough components *) + +and comp_direct_switch env range casel sz cont = + let actv = Array.new range Lstaticfail in + List.iter (fun (n, act) -> actv.(n) <- act) casel; + comp_switch env actv sz cont + +(* Compile a switch instruction *) + +and comp_switch env actv sz cont = + (* To ensure stack balancing, we must have either sz = !sz_staticfail + or none of the actv.(i) contains an unguarded Lstaticfail. *) + let lblv = Array.new (Array.length actv) !lbl_staticfail in + let (branch, cont1) = make_branch cont in + let c = ref (discard_dead_code cont1) in + for i = Array.length actv - 1 downto 0 do + let (lbl, c1) = label_code(comp_expr env actv.(i) sz (branch :: !c)) in + lblv.(i) <- lbl; + c := discard_dead_code c1 + done; + Kswitch lblv :: !c + +(**** Compilation of functions ****) + +let comp_function (param, body, entry_lbl, free_vars) cont = + (* Uncurry the function body *) + let rec uncurry = function + Lfunction(param, body) -> + let (params, final) = uncurry body in (param :: params, final) + | Lshared(exp, lblref) -> + uncurry exp + | exp -> + ([], exp) in + let (params, fun_body) = + uncurry (Lfunction(param, body)) in + let arity = List.length params in + let rec pos_args pos delta = function + [] -> Ident.empty + | id :: rem -> Ident.add id pos (pos_args (pos+delta) delta rem) in + let env = + { ce_stack = pos_args arity (-1) params; + ce_heap = pos_args 0 1 free_vars } in + let cont1 = + comp_expr env fun_body arity (Kreturn arity :: cont) in + if arity > 1 then + Krestart :: Klabel entry_lbl :: Kgrab(arity - 1) :: cont1 + else + Klabel entry_lbl :: cont1 + +let comp_remainder cont = + let c = ref cont in + begin try + while true do + c := comp_function (Stack.pop functions_to_compile) !c + done + with Stack.Empty -> + () + end; + !c + +(**** Compilation of a lambda phrase ****) + +let compile_implementation expr = + Stack.clear functions_to_compile; + label_counter := 0; + lbl_staticfail := 0; + sz_staticfail := 0; + let init_code = comp_expr empty_env expr 0 [] in + if Stack.length functions_to_compile > 0 then begin + let lbl_init = new_label() in + Kbranch lbl_init :: comp_remainder (Klabel lbl_init :: init_code) + end else + init_code + +let compile_phrase expr = + Stack.clear functions_to_compile; + label_counter := 0; + lbl_staticfail := 0; + sz_staticfail := 0; + let init_code = comp_expr empty_env expr 0 [Kstop] in + let fun_code = comp_remainder [] in + (init_code, fun_code) + diff --git a/bytecomp/codegen.mli b/bytecomp/codegen.mli new file mode 100644 index 0000000000..97cb863e37 --- /dev/null +++ b/bytecomp/codegen.mli @@ -0,0 +1,8 @@ +(* Generation of bytecode from lambda terms *) + +open Lambda +open Instruct + +val compile_implementation: lambda -> instruction list +val compile_phrase: lambda -> instruction list * instruction list + diff --git a/bytecomp/dectree.ml b/bytecomp/dectree.ml new file mode 100644 index 0000000000..d847fac70c --- /dev/null +++ b/bytecomp/dectree.ml @@ -0,0 +1,50 @@ +open Lambda + + +(* Input: a list of (key, action) pairs, where keys are integers. *) +(* Output: a table of (low, high, offset) triples for Ktranslate + an array of actions for Kswitch *) + +let make_decision_tree casei = + (* Sort the cases by increasing keys *) + let cases = + Sort.list (fun (key1,act1) (key2,act2) -> key1 <= key2) casei in + (* Extract the keys and the actions *) + let keyv = Array.of_list (List.map fst cases) + and actv = Array.of_list (List.map snd cases) in + let n = Array.length keyv in + (* Partition the set of keys keyv into maximal dense enough segments. + A segment is dense enough if its span (max point - min point) is + less than four times its size (number of points). *) + let rec partition start = + if start >= n then [] else + let stop = ref (n-1) in + while keyv.(!stop) - keyv.(start) > 4 * (!stop - start) do + decr stop + done; + (* We've found a dense enough segment. + In the worst case, !stop = start and the segment is a single point *) + (* Record the segment and continue *) + (start, !stop) :: partition (!stop + 1) in + let part = partition 0 in + (* Compute the length of the switch table. + Slot 0 is reserved and always contains Lstaticfail. *) + let switchl = ref 1 in + List.iter + (fun (start, stop) -> switchl := !switchl + keyv.(stop) - keyv.(start) + 1) + part; + (* Build the two tables *) + let transl = Array.new (List.length part) (0, 0, 0) + and switch = Array.new !switchl Lstaticfail in + let tr_pos = ref 0 + and sw_ind = ref 1 in + List.iter + (fun (start, stop) -> + transl.(!tr_pos) <- (keyv.(start), keyv.(stop), !sw_ind); + for i = start to stop do + switch.(!sw_ind + keyv.(i) - keyv.(start)) <- actv.(i) + done; + incr tr_pos; + sw_ind := !sw_ind + keyv.(stop) - keyv.(start) + 1) + part; + (transl, switch) diff --git a/bytecomp/dectree.mli b/bytecomp/dectree.mli new file mode 100644 index 0000000000..a22ef611ce --- /dev/null +++ b/bytecomp/dectree.mli @@ -0,0 +1,10 @@ +(* Transformation of N-way integer branches *) + +open Lambda + +(* Input: a list of (key, action) pairs, where keys are integers. *) +(* Output: a table of (low, high, offset) triples for Ktranslate + an array of actions for Kswitch *) + +val make_decision_tree: + (int * lambda) list -> (int * int * int) array * lambda array diff --git a/bytecomp/emitcode.ml b/bytecomp/emitcode.ml new file mode 100644 index 0000000000..df97932a7d --- /dev/null +++ b/bytecomp/emitcode.ml @@ -0,0 +1,285 @@ +(* Generation of bytecode + relocation information *) + +open Config +open Misc +open Asttypes +open Lambda +open Instruct +open Opcodes + + +(* Relocation information *) + +type reloc_info = + Reloc_literal of structured_constant (* structured constant *) + | Reloc_getglobal of Ident.t (* reference to a global *) + | Reloc_setglobal of Ident.t (* definition of a global *) + | Reloc_primitive of string (* C primitive number *) + +(* Descriptor for compilation units *) + +type compilation_unit = + { mutable cu_pos: int; (* Absolute position in file *) + cu_codesize: int; (* Size of code block *) + cu_reloc: (reloc_info * int) list; (* Relocation information *) + cu_interfaces: (string * int) list } (* Names and CRC of intfs imported *) + +(* Format of a .cmo file: + Obj.magic number (Config.cmo_magic_number) + absolute offset of compilation unit descriptor + block of relocatable bytecode + compilation unit descriptor *) + +(* Buffering of bytecode *) + +let out_buffer = ref(String.create 1024) +and out_position = ref 0 + +let out_word b1 b2 b3 b4 = + let p = !out_position in + if p >= String.length !out_buffer then begin + let len = String.length !out_buffer in + let new_buffer = String.create (2 * len) in + String.blit !out_buffer 0 new_buffer 0 len; + out_buffer := new_buffer + end; + String.unsafe_set !out_buffer p (Char.unsafe_chr b1); + String.unsafe_set !out_buffer (p+1) (Char.unsafe_chr b2); + String.unsafe_set !out_buffer (p+2) (Char.unsafe_chr b3); + String.unsafe_set !out_buffer (p+3) (Char.unsafe_chr b4); + out_position := p + 4 + +let out opcode = + out_word opcode 0 0 0 + +let out_int n = + out_word n (n asr 8) (n asr 16) (n asr 24) + +(* Handling of local labels and backpatching *) + +type label_definition = + Label_defined of int + | Label_undefined of (int * int) list + +let label_table = ref ([| |] : label_definition array) + +let extend_label_table needed = + let new_size = ref(Array.length !label_table) in + while needed >= !new_size do new_size := 2 * !new_size done; + let new_table = Array.new !new_size (Label_undefined []) in + Array.blit !label_table 0 new_table 0 (Array.length !label_table); + label_table := new_table + +let backpatch (pos, orig) = + let displ = (!out_position - orig) / 4 in + !out_buffer.[pos] <- Char.unsafe_chr displ; + !out_buffer.[pos+1] <- Char.unsafe_chr (displ lsr 8); + !out_buffer.[pos+2] <- Char.unsafe_chr (displ lsr 16); + !out_buffer.[pos+3] <- Char.unsafe_chr (displ lsr 24) + +let define_label lbl = + if lbl >= Array.length !label_table then extend_label_table lbl; + match (!label_table).(lbl) with + Label_defined _ -> + fatal_error "Emitcode.define_label" + | Label_undefined patchlist -> + List.iter backpatch patchlist; + (!label_table).(lbl) <- Label_defined !out_position + +let out_label_with_orig orig lbl = + if lbl >= Array.length !label_table then extend_label_table lbl; + match (!label_table).(lbl) with + Label_defined def -> + out_int((def - orig) / 4) + | Label_undefined patchlist -> + (!label_table).(lbl) <- + Label_undefined((!out_position, orig) :: patchlist); + out_int 0 + +let out_label l = out_label_with_orig !out_position l + +(* Relocation information *) + +let reloc_info = ref ([] : (reloc_info * int) list) + +let enter info = + reloc_info := (info, !out_position) :: !reloc_info + +let slot_for_literal sc = + enter (Reloc_literal sc); + out_int 0 +and slot_for_getglobal id = + enter (Reloc_getglobal id); + out_int 0 +and slot_for_setglobal id = + enter (Reloc_setglobal id); + out_int 0 +and slot_for_c_prim name = + enter (Reloc_primitive name); + out_int 0 + +(* Initialization *) + +let init () = + out_position := 0; + label_table := Array.new 16 (Label_undefined []); + reloc_info := [] + +(* Emission of one instruction *) + +let emit_instr = function + Klabel lbl -> define_label lbl + | Kacc n -> + if n < 8 then out(opACC0 + n) else (out opACC; out_int n) + | Kenvacc n -> + if n < 4 then out(opENVACC0 + n) else (out opENVACC; out_int n) + | Kpush -> + out opPUSH + | Kpop n -> + out opPOP; out_int n + | Kassign n -> + out opASSIGN; out_int n + | Kpush_retaddr lbl -> out opPUSH_RETADDR; out_label lbl + | Kapply n -> + if n < 4 then out(opAPPLY1 + n - 1) else (out opAPPLY; out_int n) + | Kappterm(n, sz) -> + if n < 4 then (out(opAPPTERM1 + n - 1); out_int sz) + else (out opAPPTERM; out_int n; out_int sz) + | Kreturn n -> out opRETURN; out_int n + | Krestart -> out opRESTART + | Kgrab n -> out opGRAB; out_int n + | Kclosure(lbl, n) -> out opCLOSURE; out_int n; out_label lbl + | Kclosurerec(lbl, n) -> out opCLOSUREREC; out_int n; out_label lbl + | Kgetglobal q -> out opGETGLOBAL; slot_for_getglobal q + | Ksetglobal q -> out opSETGLOBAL; slot_for_setglobal q + | Kconst sc -> + begin match sc with + Const_base(Const_int i) when i >= immed_min & i <= immed_max -> + out opCONSTINT; out_int i + | Const_base(Const_char c) -> + out opCONSTINT; out_int (Char.code c) + | Const_block(t, []) -> + if t < 4 then out (opATOM0 + t) else (out opATOM; out_int t) + | _ -> + out opGETGLOBAL; slot_for_literal sc + end + | Kmakeblock(n, t) -> + if n = 0 then + if t < 4 then out (opATOM0 + t) else (out opATOM; out_int t) + else if n < 4 then (out(opMAKEBLOCK1 + n - 1); out_int t) + else (out opMAKEBLOCK; out_int n; out_int t) + | Kgetfield n -> + if n < 4 then out(opGETFIELD0 + n) else (out opGETFIELD; out_int n) + | Ksetfield n -> + if n < 4 then out(opSETFIELD0 + n) else (out opSETFIELD; out_int n) + | Ktagof -> out opTAGOF + | Kdummy n -> out opDUMMY; out_int n + | Kupdate -> out opUPDATE + | Kvectlength -> out opVECTLENGTH + | Kgetvectitem -> out opGETVECTITEM + | Ksetvectitem -> out opSETVECTITEM + | Kgetstringchar -> out opGETSTRINGCHAR + | Ksetstringchar -> out opSETSTRINGCHAR + | Kbranch lbl -> out opBRANCH; out_label lbl + | Kbranchif lbl -> out opBRANCHIF; out_label lbl + | Kbranchifnot lbl -> out opBRANCHIFNOT; out_label lbl + | Kstrictbranchif lbl -> out opBRANCHIF; out_label lbl + | Kstrictbranchifnot lbl -> out opBRANCHIFNOT; out_label lbl + | Kswitch lblv -> + out opSWITCH; out_int (Array.length lblv); + let org = !out_position in + Array.iter (out_label_with_orig org) lblv + | Ktranslate tbl -> + out opTRANSLATE; out_int (Array.length tbl); + Array.iter + (fun (lo, hi, ofs) -> out_int (lo + (hi lsl 8) + (ofs lsl 16))) + tbl + | Kboolnot -> out opBOOLNOT + | Kpushtrap lbl -> out opPUSHTRAP; out_label lbl + | Kpoptrap -> out opPOPTRAP + | Kraise -> out opRAISE + | Kcheck_signals -> out opCHECK_SIGNALS + | Kccall(name, n) -> + if n <= 4 + then (out (opC_CALL1 + n - 1); slot_for_c_prim name) + else (out opC_CALLN; out_int n; slot_for_c_prim name) + | Knegint -> out opNEGINT | Kaddint -> out opADDINT + | Ksubint -> out opSUBINT | Kmulint -> out opMULINT + | Kdivint -> out opDIVINT | Kmodint -> out opMODINT + | Kandint -> out opANDINT | Korint -> out opORINT + | Kxorint -> out opXORINT | Klslint -> out opLSLINT + | Klsrint -> out opLSRINT | Kasrint -> out opASRINT + | Kintcomp Ceq -> out opEQ | Kintcomp Cneq -> out opNEQ + | Kintcomp Clt -> out opLTINT | Kintcomp Cle -> out opLEINT + | Kintcomp Cgt -> out opGTINT | Kintcomp Cge -> out opGEINT + | Koffsetint n -> out opOFFSETINT; out_int n + | Koffsetref n -> out opOFFSETREF; out_int n + | Kstop -> out opSTOP + +(* Emission of a list of instructions. Include some peephole optimization. *) + +let rec emit = function + [] -> () + (* Peephole optimizations *) + | Kpush :: Kacc n :: c -> + if n < 8 then out(opPUSHACC0 + n) else (out opPUSHACC; out_int n); + emit c + | Kpush :: Kenvacc n :: c -> + if n < 4 then out(opPUSHENVACC0 + n) else (out opPUSHENVACC; out_int n); + emit c + | Kpush :: Kgetglobal id :: Kgetfield n :: c -> + out opPUSHGETGLOBALFIELD; slot_for_getglobal id; out n; emit c + | Kpush :: Kgetglobal q :: c -> + out opPUSHGETGLOBAL; slot_for_getglobal q; emit c + | Kpush :: Kconst sc :: c -> + begin match sc with + Const_base(Const_int i) when i >= immed_min & i <= immed_max -> + out opPUSHCONSTINT; out_int i + | Const_base(Const_char c) -> + out opPUSHCONSTINT; out_int(Char.code c) + | Const_block(t, []) -> + if t < 4 then out (opPUSHATOM0 + t) else (out opPUSHATOM; out_int t) + | _ -> + out opPUSHGETGLOBAL; slot_for_literal sc + end; + emit c + | Kgetglobal id :: Kgetfield n :: c -> + out opGETGLOBALFIELD; slot_for_getglobal id; out n; emit c + (* Default case *) + | instr :: c -> + emit_instr instr; emit c + +(* Emission to a file *) + +let to_file outchan unit_name crc_interface code = + init(); + output_string outchan cmo_magic_number; + let pos_depl = pos_out outchan in + output_binary_int outchan 0; + let pos_code = pos_out outchan in + emit code; + output outchan !out_buffer 0 !out_position; + let compunit = + { cu_pos = pos_code; + cu_codesize = !out_position; + cu_reloc = List.rev !reloc_info; + cu_interfaces = (unit_name, crc_interface) :: Env.imported_units() } in + init(); (* Free out_buffer and reloc_info *) + let pos_compunit = pos_out outchan in + output_value outchan compunit; + seek_out outchan pos_depl; + output_binary_int outchan pos_compunit + +(* Emission to a memory block *) + +let to_memory init_code fun_code = + init(); + emit init_code; + emit fun_code; + let code = Meta.static_alloc !out_position in + String.unsafe_blit !out_buffer 0 code 0 !out_position; + let reloc = List.rev !reloc_info + and code_size = !out_position in + init(); + (code, code_size, reloc) + diff --git a/bytecomp/emitcode.mli b/bytecomp/emitcode.mli new file mode 100644 index 0000000000..288e779f53 --- /dev/null +++ b/bytecomp/emitcode.mli @@ -0,0 +1,43 @@ +(* Generation of bytecode for .cmo files *) + +open Lambda +open Instruct + +(* Relocation information *) + +type reloc_info = + Reloc_literal of structured_constant (* structured constant *) + | Reloc_getglobal of Ident.t (* reference to a global *) + | Reloc_setglobal of Ident.t (* definition of a global *) + | Reloc_primitive of string (* C primitive number *) + +(* Descriptor for compilation units *) + +type compilation_unit = + { mutable cu_pos: int; (* Absolute position in file *) + cu_codesize: int; (* Size of code block *) + cu_reloc: (reloc_info * int) list; (* Relocation information *) + cu_interfaces: (string * int) list } (* Names and CRC of intfs imported *) + +(* Format of a .cmo file: + Obj.magic number (Config.cmo_magic_number) + absolute offset of compilation unit descriptor + block of relocatable bytecode + compilation unit descriptor *) + +val to_file: out_channel -> string -> int -> instruction list -> unit + (* Arguments: + channel on output file + name of compilation unit implemented + CRC of interface implemented + list of instructions to emit *) +val to_memory: instruction list -> instruction list -> + string * int * (reloc_info * int) list + (* Arguments: + initialization code (terminated by STOP) + function code + Results: + block of relocatable bytecode + size of this block + relocation information *) + diff --git a/bytecomp/instruct.ml b/bytecomp/instruct.ml new file mode 100644 index 0000000000..f312cbf746 --- /dev/null +++ b/bytecomp/instruct.ml @@ -0,0 +1,59 @@ +open Lambda + +type label = int (* Symbolic code labels *) + +type instruction = + Klabel of label + | Kacc of int + | Kenvacc of int + | Kpush + | Kpop of int + | Kassign of int + | Kpush_retaddr of label + | Kapply of int (* number of arguments *) + | Kappterm of int * int (* number of arguments, slot size *) + | Kreturn of int (* slot size *) + | Krestart + | Kgrab of int (* number of arguments *) + | Kclosure of label * int + | Kclosurerec of label * int + | Kgetglobal of Ident.t + | Ksetglobal of Ident.t + | Kconst of structured_constant + | Kmakeblock of int * int (* size, tag *) + | Kgetfield of int + | Ksetfield of int + | Ktagof + | Kdummy of int + | Kupdate + | Kvectlength + | Kgetvectitem + | Ksetvectitem + | Kgetstringchar + | Ksetstringchar + | Kbranch of label + | Kbranchif of label + | Kbranchifnot of label + | Kstrictbranchif of label + | Kstrictbranchifnot of label + | Kswitch of label array + | Ktranslate of (int * int * int) array + | Kboolnot + | Kpushtrap of label + | Kpoptrap + | Kraise + | Kcheck_signals + | Kccall of string * int + | Knegint | Kaddint | Ksubint | Kmulint | Kdivint | Kmodint + | Kandint | Korint | Kxorint | Klslint | Klsrint | Kasrint + | Kintcomp of comparison + | Koffsetint of int + | Koffsetref of int + | Kstop + +let immed_min = -0x40000000 +and immed_max = 0x3FFFFFFF + +(* Actually the abstract machine accomodates -0x80000000 to 0x7FFFFFFF, + but these numbers overflow the Caml type int if the compiler runs on + a 32-bit processor. *) diff --git a/bytecomp/instruct.mli b/bytecomp/instruct.mli new file mode 100644 index 0000000000..b2412029e1 --- /dev/null +++ b/bytecomp/instruct.mli @@ -0,0 +1,57 @@ +(* The type of the instructions of the abstract machine *) + +open Lambda + +type label = int (* Symbolic code labels *) + +type instruction = + Klabel of label + | Kacc of int + | Kenvacc of int + | Kpush + | Kpop of int + | Kassign of int + | Kpush_retaddr of label + | Kapply of int (* number of arguments *) + | Kappterm of int * int (* number of arguments, slot size *) + | Kreturn of int (* slot size *) + | Krestart + | Kgrab of int (* number of arguments *) + | Kclosure of label * int + | Kclosurerec of label * int + | Kgetglobal of Ident.t + | Ksetglobal of Ident.t + | Kconst of structured_constant + | Kmakeblock of int * int (* size, tag *) + | Kgetfield of int + | Ksetfield of int + | Ktagof + | Kdummy of int + | Kupdate + | Kvectlength + | Kgetvectitem + | Ksetvectitem + | Kgetstringchar + | Ksetstringchar + | Kbranch of label + | Kbranchif of label + | Kbranchifnot of label + | Kstrictbranchif of label + | Kstrictbranchifnot of label + | Kswitch of label array + | Ktranslate of (int * int * int) array + | Kboolnot + | Kpushtrap of label + | Kpoptrap + | Kraise + | Kcheck_signals + | Kccall of string * int + | Knegint | Kaddint | Ksubint | Kmulint | Kdivint | Kmodint + | Kandint | Korint | Kxorint | Klslint | Klsrint | Kasrint + | Kintcomp of comparison + | Koffsetint of int + | Koffsetref of int + | Kstop + +val immed_min: int +val immed_max: int diff --git a/bytecomp/lambda.ml b/bytecomp/lambda.ml new file mode 100644 index 0000000000..5fab0d2da7 --- /dev/null +++ b/bytecomp/lambda.ml @@ -0,0 +1,129 @@ +open Misc +open Path + +open Asttypes + +type primitive = + Pidentity + | Pgetglobal of Ident.t + | Psetglobal of Ident.t + | Pmakeblock of int + | Ptagof + | Pfield of int + | Psetfield of int + | Pccall of string * int + | Pupdate + | Praise + | Psequand | Psequor | Pnot + | Pnegint | Paddint | Psubint | Pmulint | Pdivint | Pmodint + | Pandint | Porint | Pxorint + | Plslint | Plsrint | Pasrint + | Pcomp of comparison + | Poffsetint of int + | Poffsetref of int + | Pgetstringchar | Psetstringchar + | Pvectlength | Pgetvectitem | Psetvectitem + +and comparison = + Ceq | Cneq | Clt | Cgt | Cle | Cge + +type structured_constant = + Const_base of constant + | Const_block of int * structured_constant list + +type lambda = + Lvar of Ident.t + | Lconst of structured_constant + | Lapply of lambda * lambda list + | Lfunction of Ident.t * lambda + | Llet of Ident.t * lambda * lambda + | Lletrec of (Ident.t * lambda * int) list * lambda + | Lprim of primitive * lambda list + | Lswitch of lambda * int * int * (int * lambda) list + | Lstaticfail + | Lcatch of lambda * lambda + | Ltrywith of lambda * Ident.t * lambda + | Lifthenelse of lambda * lambda * lambda + | Lsequence of lambda * lambda + | Lwhile of lambda * lambda + | Lfor of Ident.t * lambda * lambda * direction_flag * lambda + | Lshared of lambda * int option ref + +let const_unit = Const_block(0, []) + +let lambda_unit = Lconst const_unit + +let share_lambda = function + Lshared(_, _) as l -> l + | l -> Lshared(l, ref None) + +let name_lambda arg fn = + match arg with + Lvar id -> fn id + | _ -> let id = Ident.new "let" in Llet(id, arg, fn id) + +let free_variables l = + let fv = ref Cset.empty in + let rec freevars = function + Lvar id -> + fv := Cset.add id !fv + | Lconst sc -> () + | Lapply(fn, args) -> + freevars fn; List.iter freevars args + | Lfunction(param, body) -> + freevars body; fv := Cset.remove param !fv + | Llet(id, arg, body) -> + freevars arg; freevars body; fv := Cset.remove id !fv + | Lletrec(decl, body) -> + freevars body; + List.iter (fun (id, exp, sz) -> freevars exp) decl; + List.iter (fun (id, exp, sz) -> fv := Cset.remove id !fv) decl + | Lprim(p, args) -> + List.iter freevars args + | Lswitch(arg, lo, hi, cases) -> + freevars arg; List.iter (fun (key, case) -> freevars case) cases + | Lstaticfail -> () + | Lcatch(e1, e2) -> + freevars e1; freevars e2 + | Ltrywith(e1, exn, e2) -> + freevars e1; freevars e2; fv := Cset.remove exn !fv + | Lifthenelse(e1, e2, e3) -> + freevars e1; freevars e2; freevars e3 + | Lsequence(e1, e2) -> + freevars e1; freevars e2 + | Lwhile(e1, e2) -> + freevars e1; freevars e2 + | Lfor(v, e1, e2, dir, e3) -> + freevars e1; freevars e2; freevars e3; fv := Cset.remove v !fv + | Lshared(e, lblref) -> + freevars e + in freevars l; Cset.elements !fv + +(* Check if an action has a "when" guard *) + +let rec is_guarded = function + Lifthenelse(cond, body, Lstaticfail) -> true + | Lshared(lam, lbl) -> is_guarded lam + | Llet(id, lam, body) -> is_guarded body + | _ -> false + +type compilenv = lambda Ident.tbl + +let empty_env = Ident.empty + +let add_env = Ident.add + +let find_env = Ident.find_same + +let transl_access env id = + try + find_env id env + with Not_found -> + if Ident.global id then Lprim(Pgetglobal id, []) else Lvar id + +let rec transl_path = function + Pident id -> + if Ident.global id then Lprim(Pgetglobal id, []) else Lvar id + | Pdot(p, s, pos) -> + Lprim(Pfield pos, [transl_path p]) + diff --git a/bytecomp/lambda.mli b/bytecomp/lambda.mli new file mode 100644 index 0000000000..9aea3dbb9c --- /dev/null +++ b/bytecomp/lambda.mli @@ -0,0 +1,65 @@ +(* The "lambda" intermediate code *) + +open Misc +open Asttypes + +type primitive = + Pidentity + | Pgetglobal of Ident.t + | Psetglobal of Ident.t + | Pmakeblock of int + | Ptagof + | Pfield of int + | Psetfield of int + | Pccall of string * int + | Pupdate + | Praise + | Psequand | Psequor | Pnot + | Pnegint | Paddint | Psubint | Pmulint | Pdivint | Pmodint + | Pandint | Porint | Pxorint + | Plslint | Plsrint | Pasrint + | Pcomp of comparison + | Poffsetint of int + | Poffsetref of int + | Pgetstringchar | Psetstringchar + | Pvectlength | Pgetvectitem | Psetvectitem + +and comparison = + Ceq | Cneq | Clt | Cgt | Cle | Cge + +type structured_constant = + Const_base of constant + | Const_block of int * structured_constant list + +type lambda = + Lvar of Ident.t + | Lconst of structured_constant + | Lapply of lambda * lambda list + | Lfunction of Ident.t * lambda + | Llet of Ident.t * lambda * lambda + | Lletrec of (Ident.t * lambda * int) list * lambda + | Lprim of primitive * lambda list + | Lswitch of lambda * int * int * (int * lambda) list + | Lstaticfail + | Lcatch of lambda * lambda + | Ltrywith of lambda * Ident.t * lambda + | Lifthenelse of lambda * lambda * lambda + | Lsequence of lambda * lambda + | Lwhile of lambda * lambda + | Lfor of Ident.t * lambda * lambda * direction_flag * lambda + | Lshared of lambda * int option ref + +val const_unit: structured_constant +val lambda_unit: lambda +val share_lambda: lambda -> lambda +val name_lambda: lambda -> (Ident.t -> lambda) -> lambda +val free_variables: lambda -> Ident.t list +val is_guarded: lambda -> bool + +type compilenv + +val empty_env: compilenv +val add_env: Ident.t -> lambda -> compilenv -> compilenv +val transl_access: compilenv -> Ident.t -> lambda + +val transl_path: Path.t -> lambda diff --git a/bytecomp/librarian.ml b/bytecomp/librarian.ml new file mode 100644 index 0000000000..156896e1ae --- /dev/null +++ b/bytecomp/librarian.ml @@ -0,0 +1,62 @@ +(* Build libraries of .cmo files *) + +open Misc +open Config +open Emitcode + +type error = + File_not_found of string + | Not_an_object_file of string + +exception Error of error + +let copy_object_file outchan toc name = + let file_name = + try + find_in_path !load_path name + with Not_found -> + raise(Error(File_not_found name)) in + let ic = open_in_bin file_name in + try + let buffer = String.create (String.length cmo_magic_number) in + really_input ic buffer 0 (String.length cmo_magic_number); + if buffer <> cmo_magic_number then + raise(Error(Not_an_object_file file_name)); + let compunit_pos = input_binary_int ic in + seek_in ic compunit_pos; + let compunit = (input_value ic : compilation_unit) in + seek_in ic compunit.cu_pos; + compunit.cu_pos <- pos_out outchan; + copy_file_chunk ic outchan compunit.cu_codesize; + close_in ic; + compunit :: toc + with x -> + close_in ic; + raise x + +let create_archive file_list lib_name = + let outchan = open_out_bin lib_name in + try + output_string outchan cma_magic_number; + let ofs_pos_toc = pos_out outchan in + output_binary_int outchan 0; + let toc = List.fold_left (copy_object_file outchan) [] file_list in + let pos_toc = pos_out outchan in + output_value outchan toc; + seek_out outchan ofs_pos_toc; + output_binary_int outchan pos_toc; + close_out outchan + with x -> + close_out outchan; + remove_file lib_name; + raise x + +open Format + +let report_error = function + File_not_found name -> + print_string "Cannot find file "; print_string name + | Not_an_object_file name -> + print_string "The file "; print_string name; + print_string " is not a bytecode object file" + diff --git a/bytecomp/librarian.mli b/bytecomp/librarian.mli new file mode 100644 index 0000000000..ee9c9f378e --- /dev/null +++ b/bytecomp/librarian.mli @@ -0,0 +1,18 @@ +(* Build libraries of .cmo files *) + +(* Format of a library file: + Obj.magic number (Config.cma_magic_number) + absolute offset of content table + blocks of relocatable bytecode + content table = list of compilation units +*) + +val create_archive: string list -> string -> unit + +type error = + File_not_found of string + | Not_an_object_file of string + +exception Error of error + +val report_error: error -> unit diff --git a/bytecomp/linker.ml b/bytecomp/linker.ml new file mode 100644 index 0000000000..93d5ae124f --- /dev/null +++ b/bytecomp/linker.ml @@ -0,0 +1,251 @@ +(* Link a set of .cmo files and produce a bytecode executable. *) + +open Sys +open Misc +open Config +open Emitcode + +type error = + File_not_found of string + | Not_an_object_file of string + | Symbol_error of string * Symtable.error + | Inconsistent_import of string * string * string + | Custom_runtime + +exception Error of error + +type link_action = + Link_object of string * compilation_unit + (* Name of .cmo file and descriptor of the unit *) + | Link_archive of string * compilation_unit list + (* Name of .cma file and descriptors of the units to be linked. *) + +(* First pass: determine which units are needed *) + +let missing_globals = ref (Cset.empty : Ident.t Cset.t) + +let is_required (rel, pos) = + match rel with + Reloc_setglobal id -> Cset.mem id !missing_globals + | _ -> false + +let add_required (rel, pos) = + match rel with + Reloc_getglobal id -> missing_globals := Cset.add id !missing_globals + | _ -> () + +let remove_required (rel, pos) = + match rel with + Reloc_setglobal id -> missing_globals := Cset.remove id !missing_globals + | _ -> () + +let scan_file tolink obj_name = + let file_name = + try + find_in_path !load_path obj_name + with Not_found -> + raise(Error(File_not_found obj_name)) in + let ic = open_in_bin file_name in + try + let buffer = String.create (String.length cmo_magic_number) in + really_input ic buffer 0 (String.length cmo_magic_number); + if buffer = cmo_magic_number then begin + (* This is a .cmo file. It must be linked in any case. + Read the relocation information to see which modules it + requires. *) + let compunit_pos = input_binary_int ic in (* Go to descriptor *) + seek_in ic compunit_pos; + let compunit = (input_value ic : compilation_unit) in + List.iter add_required compunit.cu_reloc; + Link_object(file_name, compunit) :: tolink + end + else if buffer = cma_magic_number then begin + (* This is an archive file. Each unit contained in it will be linked + in only if needed. *) + let pos_toc = input_binary_int ic in (* Go to table of contents *) + seek_in ic pos_toc; + let toc = (input_value ic : compilation_unit list) in + let required = + List.fold_left + (fun reqd compunit -> + if List.exists is_required compunit.cu_reloc + or !Clflags.link_everything + then begin + List.iter remove_required compunit.cu_reloc; + List.iter add_required compunit.cu_reloc; + compunit :: reqd + end else + reqd) + [] toc in + Link_archive(file_name, required) :: tolink + end + else raise(Error(Not_an_object_file file_name)) + with x -> + close_in ic; raise x + +(* Second pass: link in the required units *) + +(* Consistency check between interfaces *) + +let crc_interfaces = (Hashtbl.new 17 : (string, string * int) Hashtbl.t) + +let check_consistency file_name cu = + List.iter + (fun (name, crc) -> + try + let (auth_name, auth_crc) = Hashtbl.find crc_interfaces name in + if crc <> auth_crc then + raise(Error(Inconsistent_import(name, file_name, auth_name))) + with Not_found -> + Hashtbl.add crc_interfaces name (file_name, crc)) + cu.cu_interfaces + +(* Link in a compilation unit *) + +let link_compunit outchan inchan file_name compunit = + check_consistency file_name compunit; + seek_in inchan compunit.cu_pos; + let code_block = String.create compunit.cu_codesize in + really_input inchan code_block 0 compunit.cu_codesize; + Symtable.patch_object code_block compunit.cu_reloc; + output outchan code_block 0 compunit.cu_codesize + +(* Link in a .cmo file *) + +let link_object outchan file_name compunit = + let inchan = open_in_bin file_name in + try + link_compunit outchan inchan file_name compunit; + close_in inchan + with + Symtable.Error msg -> + close_in inchan; raise(Error(Symbol_error(file_name, msg))) + | x -> + close_in inchan; raise x + +(* Link in a .cma file *) + +let link_archive outchan file_name units_required = + let inchan = open_in_bin file_name in + try + List.iter (link_compunit outchan inchan file_name) units_required; + close_in inchan + with + Symtable.Error msg -> + close_in inchan; raise(Error(Symbol_error(file_name, msg))) + | x -> + close_in inchan; raise x + +(* Link in a .cmo or .cma file *) + +let link_file outchan = function + Link_object(file_name, unit) -> link_object outchan file_name unit + | Link_archive(file_name, units) -> link_archive outchan file_name units + +(* Create a bytecode executable file *) + +let link_bytecode objfiles exec_name copy_header = + let objfiles = "stdlib.cma" :: objfiles in + let tolink = + List.fold_left scan_file [] (List.rev objfiles) in + let outchan = + open_out_gen [Sys.Open_wronly; Sys.Open_trunc; Sys.Open_creat; Sys.Open_binary] 0o777 exec_name in + try + (* Copy the header *) + if copy_header then begin + try + let inchan = open_in_bin (find_in_path !load_path "header_exe") in + copy_file inchan outchan; + close_in inchan + with Not_found | Sys_error _ -> () + end; + (* The bytecode *) + let pos1 = pos_out outchan in + Symtable.init(); + Hashtbl.clear crc_interfaces; + List.iter (link_file outchan) tolink; + (* The final STOP instruction *) + output_byte outchan Opcodes.opSTOP; + output_byte outchan 0; output_byte outchan 0; output_byte outchan 0; + (* The table of global data *) + let pos2 = pos_out outchan in + output_compact_value outchan (Symtable.initial_global_table()); + (* The List.map of global identifiers *) + let pos3 = pos_out outchan in + Symtable.output_global_map outchan; + (* The trailer *) + let pos4 = pos_out outchan in + output_binary_int outchan (pos2 - pos1); + output_binary_int outchan (pos3 - pos2); + output_binary_int outchan (pos4 - pos3); + output_binary_int outchan 0; + output_string outchan exec_magic_number; + close_out outchan + with x -> + close_out outchan; + remove_file exec_name; + raise x + +(* Main entry point (build a custom runtime if needed) *) + +let link objfiles = + if not !Clflags.custom_runtime then + link_bytecode objfiles !Clflags.exec_name true + else begin + let bytecode_name = temp_file "camlcode" "" in + let prim_name = temp_file "camlprim" ".c" in + try + link_bytecode objfiles bytecode_name false; + Symtable.output_primitives prim_name; + if Sys.command + (concat_strings " " ( + Config.c_compiler :: + ("-I" ^ Config.standard_library) :: + "-o" :: !Clflags.exec_name :: + List.rev !Clflags.ccopts @ + prim_name :: + ("-L" ^ Config.standard_library) :: + List.rev !Clflags.ccobjs @ + "-lcamlrun" :: + Config.c_libraries :: + [])) <> 0 + or Sys.command ("strip " ^ !Clflags.exec_name) <> 0 + then raise(Error Custom_runtime); + let oc = + open_out_gen [Sys.Open_wronly; Sys.Open_append; Sys.Open_binary] 0 !Clflags.exec_name in + let ic = open_in_bin bytecode_name in + copy_file ic oc; + close_in ic; + close_out oc; + remove_file bytecode_name; + remove_file prim_name + with x -> + remove_file bytecode_name; + remove_file prim_name; + raise x + end + +(* Error report *) + +open Format + +let report_error = function + File_not_found name -> + print_string "Cannot find file "; print_string name + | Not_an_object_file name -> + print_string "The file "; print_string name; + print_string " is not a bytecode object file" + | Symbol_error(name, err) -> + print_string "Error while linking "; print_string name; print_string ":"; + print_space(); + Symtable.report_error err + | Inconsistent_import(intf, file1, file2) -> + open_hvbox 0; + print_string "Files "; print_string file1; print_string " and "; + print_string file2; print_space(); + print_string "make inconsistent assumptions over interface "; + print_string intf; + close_box() + | Custom_runtime -> + print_string "Error while building custom runtime system" + diff --git a/bytecomp/linker.mli b/bytecomp/linker.mli new file mode 100644 index 0000000000..b4c57e632c --- /dev/null +++ b/bytecomp/linker.mli @@ -0,0 +1,16 @@ +(* Link .cmo files and produce a bytecode executable. *) + +val link: string list -> unit + +val check_consistency: string -> Emitcode.compilation_unit -> unit + +type error = + File_not_found of string + | Not_an_object_file of string + | Symbol_error of string * Symtable.error + | Inconsistent_import of string * string * string + | Custom_runtime + +exception Error of error + +val report_error: error -> unit diff --git a/bytecomp/matching.ml b/bytecomp/matching.ml new file mode 100644 index 0000000000..ace2a0c5be --- /dev/null +++ b/bytecomp/matching.ml @@ -0,0 +1,260 @@ +(* Compilation of pattern matching *) + +open Location +open Asttypes +open Typedtree +open Lambda + + +(* See Peyton-Jones, "The Implementation of functional programming + languages", chapter 5. *) + +type pattern_matching = + { mutable cases : (pattern list * lambda) list; + args : lambda list } + +(* To group lines of patterns with identical keys *) + +let add_line patl_action pm = + pm.cases <- patl_action :: pm.cases; pm + +let add make_matching_fun division key patl_action args = + try + let pm = List.assoc key division in + pm.cases <- patl_action :: pm.cases; + division + with Not_found -> + let pm = make_matching_fun args in + pm.cases <- patl_action :: pm.cases; + (key, pm) :: division + +(* To expand "or" patterns and remove aliases *) + +let rec simplify = function + ({pat_desc = Tpat_alias(p, id)} :: patl, action) :: rem -> + simplify((p :: patl, action) :: rem) + | ({pat_desc = Tpat_or(p1, p2)} :: patl, action) :: rem -> + let shared_action = share_lambda action in + simplify((p1 :: patl, shared_action) :: + (p2 :: patl, shared_action) :: rem) + | cases -> + cases + +(* Matching against a constant *) + +let make_constant_matching (arg :: argl) = + {cases = []; args = argl} + +let divide_constant {cases = cl; args = al} = + let rec divide cl = + match simplify cl with + ({pat_desc = Tpat_constant cst} :: patl, action) :: rem -> + let (constants, others) = divide rem in + (add make_constant_matching constants cst (patl, action) al, others) + | cl -> + ([], {cases = cl; args = al}) + in divide cl + +(* Matching against a constructor *) + +let make_constr_matching cstr (arg :: argl) = + let (first_pos, last_pos) = + match cstr.cstr_tag with + Cstr_tag n -> (0, cstr.cstr_arity - 1) + | Cstr_exception p -> (1, cstr.cstr_arity) in + let rec make_args pos = + if pos > last_pos + then argl + else Lprim(Pfield pos, [arg]) :: make_args (pos + 1) in + {cases = []; args = make_args first_pos} + +let divide_constructor {cases = cl; args = al} = + let rec divide cl = + match simplify cl with + ({pat_desc = Tpat_construct(cstr, args)} :: patl, action) :: rem -> + let (constructs, others) = divide rem in + (add (make_constr_matching cstr) constructs + cstr.cstr_tag (args @ patl, action) al, + others) + | cl -> + ([], {cases = cl; args = al}) + in divide cl + +(* Matching against a variable *) + +let divide_var {cases = cl; args = al} = + let rec divide cl = + match simplify cl with + ({pat_desc = (Tpat_any | Tpat_var _)} :: patl, action) :: rem -> + let (vars, others) = divide rem in + (add_line (patl, action) vars, others) + | cl -> + (make_constant_matching al, {cases = cl; args = al}) + in divide cl + +(* Matching against a tuple pattern *) + +let make_tuple_matching num_comps (arg :: argl) = + let rec make_args pos = + if pos >= num_comps + then argl + else Lprim(Pfield pos, [arg]) :: make_args (pos + 1) in + {cases = []; args = make_args 0} + +let any_pat = + {pat_desc = Tpat_any; pat_loc = Location.none; pat_type = Ctype.none} + +let divide_tuple arity {cases = cl; args = al} = + let rec divide cl = + match simplify cl with + ({pat_desc = Tpat_tuple args} :: patl, action) :: rem -> + add_line (args @ patl, action) (divide rem) + | ({pat_desc = (Tpat_any | Tpat_var _)} :: patl, action) + :: rem -> + let rec make_args n = + if n >= arity then patl else any_pat :: make_args (n-1) in + add_line (make_args arity, action) (divide rem) + | [] -> + make_tuple_matching arity al + in divide cl + +(* Matching against a record pattern *) + +let divide_record num_fields {cases = cl; args = al} = + let record_matching_line lbl_pat_list = + let patv = Array.new num_fields any_pat in + List.iter (fun (lbl, pat) -> patv.(lbl.lbl_pos) <- pat) lbl_pat_list; + Array.to_list patv in + let rec divide cl = + match simplify cl with + ({pat_desc = Tpat_record lbl_pat_list} :: patl, action) :: rem -> + add_line (record_matching_line lbl_pat_list @ patl, action) + (divide rem) + | ({pat_desc = (Tpat_any | Tpat_var _)} :: patl, action) :: rem -> + add_line (record_matching_line [] @ patl, action) (divide rem) + | [] -> + make_tuple_matching num_fields al + in divide cl + +(* To List.combine sub-matchings together *) + +let combine_var (lambda1, total1) (lambda2, total2) = + if total1 then (lambda1, true) else (Lcatch(lambda1, lambda2), total2) + +let combine_constant arg cst (const_lambda_list, total1) (lambda2, total2) = + let lambda1 = + match cst with + Const_int _ -> + List.fold_right + (fun (c, act) rem -> + Lifthenelse( + Lprim(Pcomp Ceq, [arg; Lconst(Const_base c)]), act, rem)) + const_lambda_list Lstaticfail + | Const_char _ -> + Lswitch(arg, 0, 256, + List.map (fun (Const_char c, l) -> (Char.code c, l)) + const_lambda_list) + | Const_string _ | Const_float _ -> + List.fold_right + (fun (c, act) rem -> + Lifthenelse( + Lprim(Pccall("equal", 2), [arg; Lconst(Const_base c)]), + act, rem)) + const_lambda_list Lstaticfail + in (Lcatch(lambda1, lambda2), total2) + +let combine_constructor arg cstr (tag_lambda_list, total1) (lambda2, total2) = + if cstr.cstr_span < 0 then begin + (* Special cases for exceptions *) + let lambda1 = + List.fold_right + (fun (Cstr_exception path, act) rem -> + Lifthenelse(Lprim(Pcomp Ceq, [Lprim(Pfield 0, [arg]); + transl_path path]), act, rem)) + tag_lambda_list Lstaticfail + in (Lcatch(lambda1, lambda2), total2) + end else begin + (* Regular concrete type *) + let caselist = + List.map (fun (Cstr_tag n, act) -> (n, act)) tag_lambda_list in + let lambda1 = + match (caselist, cstr.cstr_span) with + ([0, act], 1) -> act + | ([0, act], 2) -> Lifthenelse(arg, Lstaticfail, act) + | ([1, act], 2) -> Lifthenelse(arg, act, Lstaticfail) + | ([0, act0; 1, act1], 2) -> Lifthenelse(arg, act1, act0) + | ([1, act1; 0, act0], 2) -> Lifthenelse(arg, act1, act0) + | _ -> Lswitch(Lprim(Ptagof, [arg]), 0, cstr.cstr_span, caselist) in + if total1 & List.length tag_lambda_list = cstr.cstr_span + then (lambda1, true) + else (Lcatch(lambda1, lambda2), total2) + end + +(* The main compilation function. + Input: a pattern matching. + Output: a lambda term, a "total" flag (true if we're sure that the + matching covers all cases; this is an approximation). *) + +let rec compile_match m = + + let rec compile_list = function + [] -> ([], true) + | (key, pm) :: rem -> + let (lambda1, total1) = compile_match pm in + let (list2, total2) = compile_list rem in + ((key, lambda1) :: list2, total1 & total2) in + + match { cases = simplify m.cases; args = m.args } with + { cases = [] } -> + (Lstaticfail, false) + | { cases = ([], action) :: rem; args = argl } -> + if is_guarded action then begin + let (lambda, total) = compile_match { cases = rem; args = argl } in + (Lcatch(action, lambda), total) + end else + (action, true) + | { cases = (pat :: patl, action) :: _; args = arg :: _ } as pm -> + match pat.pat_desc with + Tpat_any | Tpat_var _ -> + let (vars, others) = divide_var pm in + combine_var (compile_match vars) (compile_match others) + | Tpat_constant cst -> + let (constants, others) = divide_constant pm in + combine_constant arg cst + (compile_list constants) (compile_match others) + | Tpat_tuple patl -> + compile_match (divide_tuple (List.length patl) pm) + | Tpat_construct(cstr, patl) -> + let (constrs, others) = divide_constructor pm in + combine_constructor arg cstr + (compile_list constrs) (compile_match others) + | Tpat_record((lbl, _) :: _) -> + compile_match (divide_record (Array.length lbl.lbl_all) pm) + +(* The entry points *) + +let compile_matching handler_fun arg pat_act_list = + let pm = + { cases = List.map (fun (pat, act) -> ([pat], act)) pat_act_list; + args = [arg] } in + let (lambda, total) = compile_match pm in + if total then lambda else Lcatch(lambda, handler_fun()) + +let partial_function loc () = + Lprim(Praise, [Lprim(Pmakeblock 0, + [transl_path Predef.path_match_failure; + Lconst(Const_block(0, + [Const_base(Const_string !Location.input_name); + Const_base(Const_int loc.loc_start); + Const_base(Const_int loc.loc_end)]))])]) + +let for_function loc param pat_act_list = + compile_matching (partial_function loc) (Lvar param) pat_act_list + +let for_trywith param pat_act_list = + compile_matching (fun () -> Lprim(Praise, [Lvar param])) + (Lvar param) pat_act_list + +let for_let loc param pat body = + compile_matching (partial_function loc) (Lvar param) [pat, body] + diff --git a/bytecomp/matching.mli b/bytecomp/matching.mli new file mode 100644 index 0000000000..21b6208019 --- /dev/null +++ b/bytecomp/matching.mli @@ -0,0 +1,11 @@ +(* Compilation of pattern-matching *) + +open Typedtree +open Lambda + +val for_function: + Location.t -> Ident.t -> (pattern * lambda) list -> lambda +val for_trywith: + Ident.t -> (pattern * lambda) list -> lambda +val for_let: + Location.t -> Ident.t -> pattern -> lambda -> lambda diff --git a/bytecomp/printinstr.ml b/bytecomp/printinstr.ml new file mode 100644 index 0000000000..beb840dfcc --- /dev/null +++ b/bytecomp/printinstr.ml @@ -0,0 +1,103 @@ +(* Pretty-print lists of instructions *) + +open Format +open Lambda +open Instruct + + +let instruction = function + Klabel lbl -> print_string "L"; print_int lbl; print_string ":" + | Kacc n -> print_string "\tacc "; print_int n + | Kenvacc n -> print_string "\tenvacc "; print_int n + | Kpush -> print_string "\tpush" + | Kpop n -> print_string "\tpop "; print_int n + | Kassign n -> print_string "\tassign "; print_int n + | Kpush_retaddr lbl -> print_string "\tpush_retaddr L"; print_int lbl + | Kapply n -> print_string "\tapply "; print_int n + | Kappterm(n, m) -> + print_string "\tappterm "; print_int n; print_string ", "; print_int m + | Kreturn n -> print_string "\treturn "; print_int n + | Krestart -> print_string "\trestart" + | Kgrab n -> print_string "\tgrab "; print_int n + | Kclosure(lbl, n) -> + print_string "\tclosure L"; print_int lbl; print_string ", "; print_int n + | Kclosurerec(lbl, n) -> + print_string "\tclosurerec L"; print_int lbl; + print_string ", "; print_int n + | Kgetglobal id -> print_string "\tgetglobal "; Ident.print id + | Ksetglobal id -> print_string "\tsetglobal "; Ident.print id + | Kconst cst -> + open_hovbox 10; print_string "\tconst"; print_space(); + Printlambda.structured_constant cst; close_box() + | Kmakeblock(n, m) -> + print_string "\tmakeblock "; print_int n; print_string ", "; print_int m + | Kgetfield n -> print_string "\tgetfield "; print_int n + | Ksetfield n -> print_string "\tsetfield "; print_int n + | Ktagof -> print_string "\ttagof" + | Kdummy n -> print_string "\tdummy "; print_int n + | Kupdate -> print_string "\tupdate" + | Kvectlength -> print_string "\tvectlength" + | Kgetvectitem -> print_string "\tgetvectitem" + | Ksetvectitem -> print_string "\tsetvectitem" + | Kgetstringchar -> print_string "\tgetstringchar" + | Ksetstringchar -> print_string "\tsetstringchar" + | Kbranch lbl -> print_string "\tbranch L"; print_int lbl + | Kbranchif lbl -> print_string "\tbranchif L"; print_int lbl + | Kbranchifnot lbl -> print_string "\tbranchifnot L"; print_int lbl + | Kstrictbranchif lbl -> print_string "\tstrictbranchif L"; print_int lbl + | Kstrictbranchifnot lbl -> + print_string "\tstrictbranchifnot L"; print_int lbl + | Kswitch lblv -> + open_hovbox 10; + print_string "\tswitch"; + Array.iter (fun lbl -> print_space(); print_int lbl) lblv; + close_box() + | Ktranslate tbl -> + open_hovbox 10; + print_string "\ttranslate"; + Array.iter + (fun (lo, hi, ofs) -> + print_space(); print_int lo; print_string "/"; + print_int hi; print_string "/"; print_int ofs) + tbl; + close_box() + | Kboolnot -> print_string "\tboolnot" + | Kpushtrap lbl -> print_string "\tpushtrap L"; print_int lbl + | Kpoptrap -> print_string "\tpoptrap" + | Kraise -> print_string "\traise" + | Kcheck_signals -> print_string "\tcheck_signals" + | Kccall(s, n) -> + print_string "\tccall "; print_string s; print_string ", "; print_int n + | Knegint -> print_string "\tnegint" + | Kaddint -> print_string "\taddint" + | Ksubint -> print_string "\tsubint" + | Kmulint -> print_string "\tmulint" + | Kdivint -> print_string "\tdivint" + | Kmodint -> print_string "\tmodint" + | Kandint -> print_string "\tandint" + | Korint -> print_string "\torint" + | Kxorint -> print_string "\txorint" + | Klslint -> print_string "\tlslint" + | Klsrint -> print_string "\tlsrint" + | Kasrint -> print_string "\tasrint" + | Kintcomp Ceq -> print_string "\teqint" + | Kintcomp Cneq -> print_string "\tneqint" + | Kintcomp Clt -> print_string "\tltint" + | Kintcomp Cgt -> print_string "\tgtint" + | Kintcomp Cle -> print_string "\tleint" + | Kintcomp Cge -> print_string "\tgeint" + | Koffsetint n -> print_string "\toffsetint "; print_int n + | Koffsetref n -> print_string "\toffsetref "; print_int n + | Kstop -> print_string "\tstop" + +let rec instruction_list = function + [] -> () + | Klabel lbl :: il -> + print_string "L"; print_int lbl; print_string ":"; instruction_list il + | instr :: il -> + instruction instr; print_space(); instruction_list il + +let instrlist il = + open_vbox 0; + instruction_list il; + close_box() diff --git a/bytecomp/printinstr.mli b/bytecomp/printinstr.mli new file mode 100644 index 0000000000..6ccadfedde --- /dev/null +++ b/bytecomp/printinstr.mli @@ -0,0 +1,6 @@ +(* Pretty-print lists of instructions *) + +open Instruct + +val instruction: instruction -> unit +val instrlist: instruction list -> unit diff --git a/bytecomp/printlambda.ml b/bytecomp/printlambda.ml new file mode 100644 index 0000000000..4bcca27e2b --- /dev/null +++ b/bytecomp/printlambda.ml @@ -0,0 +1,196 @@ +open Format +open Misc +open Asttypes +open Lambda + + +let rec structured_constant = function + Const_base(Const_int n) -> print_int n + | Const_base(Const_char c) -> + print_string "'"; print_string(Char.escaped c); print_string "'" + | Const_base(Const_string s) -> + print_string "\""; print_string(String.escaped s); print_string "\"" + | Const_base(Const_float s) -> + print_string s + | Const_block(tag, []) -> + print_string "["; print_int tag; print_string "]" + | Const_block(tag, sc1::scl) -> + open_hovbox 1; + print_string "["; print_int tag; print_string ":"; + print_space(); + open_hovbox 0; + structured_constant sc1; + List.iter (fun sc -> print_space(); structured_constant sc) scl; + close_box(); + print_string "]"; + close_box() + +let primitive = function + Pidentity -> print_string "id" + | Pgetglobal id -> print_string "global "; Ident.print id + | Psetglobal id -> print_string "setglobal "; Ident.print id + | Pmakeblock sz -> print_string "makeblock "; print_int sz + | Ptagof -> print_string "tag" + | Pfield n -> print_string "field "; print_int n + | Psetfield n -> print_string "setfield "; print_int n + | Pccall(name, arity) -> print_string name + | Pupdate -> print_string "update" + | Praise -> print_string "raise" + | Psequand -> print_string "&&" + | Psequor -> print_string "||" + | Pnot -> print_string "not" + | Pnegint -> print_string "~" + | Paddint -> print_string "+" + | Psubint -> print_string "-" + | Pmulint -> print_string "*" + | Pdivint -> print_string "/" + | Pmodint -> print_string "mod" + | Pandint -> print_string "and" + | Porint -> print_string "or" + | Pxorint -> print_string "xor" + | Plslint -> print_string "lsl" + | Plsrint -> print_string "lsr" + | Pasrint -> print_string "asr" + | Pcomp(Ceq) -> print_string "==" + | Pcomp(Cneq) -> print_string "!=" + | Pcomp(Clt) -> print_string "<" + | Pcomp(Cle) -> print_string "<=" + | Pcomp(Cgt) -> print_string ">" + | Pcomp(Cge) -> print_string ">=" + | Poffsetint n -> print_int n; print_string "+" + | Poffsetref n -> print_int n; print_string "+:=" + | Pgetstringchar -> print_string "string.get" + | Psetstringchar -> print_string "string.set" + | Pvectlength -> print_string "array.length" + | Pgetvectitem -> print_string "array.get" + | Psetvectitem -> print_string "array.set" + +let rec lambda = function + Lvar id -> + Ident.print id + | Lconst cst -> + structured_constant cst + | Lapply(lfun, largs) -> + open_hovbox 2; + print_string "(apply"; print_space(); + lambda lfun; + List.iter (fun l -> print_space(); lambda l) largs; + print_string ")"; + close_box() + | Lfunction(param, body) -> + open_hovbox 2; + print_string "(function"; print_space(); Ident.print param; + print_space(); lambda body; print_string ")"; close_box() + | Llet(id, arg, body) -> + open_hovbox 2; + print_string "(let"; print_space(); + open_hvbox 1; + print_string "("; + open_hovbox 2; Ident.print id; print_space(); lambda arg; close_box(); + letbody body; + print_string ")"; + close_box() + | Lletrec(id_arg_list, body) -> + open_hovbox 2; + print_string "(letrec"; print_space(); + open_hvbox 1; + print_string "("; + let spc = ref false in + List.iter + (fun (id, l, sz) -> + if !spc then print_space() else spc := true; + Ident.print id; print_string " "; lambda l) + id_arg_list; + close_box(); + print_string ")"; + print_space(); lambda body; + print_string ")"; close_box() + | Lprim(prim, largs) -> + open_hovbox 2; + print_string "("; primitive prim; + List.iter (fun l -> print_space(); lambda l) largs; + print_string ")"; + close_box() + | Lswitch(larg, lo, hi, cases) -> + open_hovbox 1; + print_string "(switch "; print_int lo; print_string "/"; + print_int hi; print_space(); + lambda larg; print_space(); + open_vbox 0; + let spc = ref false in + List.iter + (fun (n, l) -> + open_hvbox 1; + print_string "case "; print_int n; print_string ":"; print_space(); + lambda l; + close_box(); + if !spc then print_space() else spc := true) + cases; + print_string ")"; close_box(); close_box() + | Lstaticfail -> + print_string "exit" + | Lcatch(lbody, lhandler) -> + open_hovbox 2; + print_string "(catch"; print_space(); + lambda lbody; print_break(1, -1); + print_string "with"; print_space(); lambda lhandler; + print_string ")"; + close_box() + | Ltrywith(lbody, param, lhandler) -> + open_hovbox 2; + print_string "(try"; print_space(); + lambda lbody; print_break(1, -1); + print_string "with "; Ident.print param; print_space(); + lambda lhandler; + print_string ")"; + close_box() + | Lifthenelse(lcond, lif, lelse) -> + open_hovbox 2; + print_string "(if"; print_space(); + lambda lcond; print_space(); + lambda lif; print_space(); + lambda lelse; print_string ")"; + close_box() + | Lsequence(l1, l2) -> + open_hovbox 2; + print_string "(seq"; print_space(); + lambda l1; print_space(); sequence l2; print_string ")"; + close_box() + | Lwhile(lcond, lbody) -> + open_hovbox 2; + print_string "(while"; print_space(); + lambda lcond; print_space(); + lambda lbody; print_string ")"; + close_box() + | Lfor(param, lo, hi, dir, body) -> + open_hovbox 2; + print_string "(for "; Ident.print param; print_space(); + lambda lo; print_space(); + print_string(match dir with Upto -> "to" | Downto -> "downto"); + print_space(); + lambda hi; print_space(); + lambda body; print_string ")"; + close_box() + | Lshared(l, lbl) -> + lambda l + +and sequence = function + Lsequence(l1, l2) -> + sequence l1; print_space(); sequence l2 + | l -> + lambda l + +and letbody = function + Llet(id, arg, body) -> + print_space(); + open_hovbox 2; Ident.print id; print_space(); lambda arg; + close_box(); + letbody body + | Lshared(l, lbl) -> + letbody l + | l -> + print_string ")"; + close_box(); + print_space(); + lambda l + diff --git a/bytecomp/printlambda.mli b/bytecomp/printlambda.mli new file mode 100644 index 0000000000..3dbebb7011 --- /dev/null +++ b/bytecomp/printlambda.mli @@ -0,0 +1,4 @@ +open Lambda + +val structured_constant: structured_constant -> unit +val lambda: lambda -> unit diff --git a/bytecomp/runtimedef.mli b/bytecomp/runtimedef.mli new file mode 100644 index 0000000000..48ba14599d --- /dev/null +++ b/bytecomp/runtimedef.mli @@ -0,0 +1,4 @@ +(* Values and functions known and/or provided by the runtime system *) + +val builtin_exceptions: string array +val builtin_primitives: string array diff --git a/bytecomp/symtable.ml b/bytecomp/symtable.ml new file mode 100644 index 0000000000..c3a0eaf0ae --- /dev/null +++ b/bytecomp/symtable.ml @@ -0,0 +1,223 @@ +(* To assign numbers to globals and primitives *) + +open Misc +open Asttypes +open Lambda +open Emitcode + + +(* Functions for batch linking *) + +type error = + Undefined_global of string + | Unavailable_primitive of string + +exception Error of error + +(* Tables for numbering objects *) + +type 'a numtable = + { num_cnt: int; (* The next number *) + num_tbl: ('a, int) Tbl.t } (* The table of already numbered objects *) + +let empty_numtable = { num_cnt = 0; num_tbl = Tbl.empty } + +let find_numtable nt key = + Tbl.find key nt.num_tbl + +let enter_numtable nt key = + let n = !nt.num_cnt in + nt := { num_cnt = n + 1; num_tbl = Tbl.add key n !nt.num_tbl }; + n + +let incr_numtable nt = + let n = !nt.num_cnt in + nt := { num_cnt = n + 1; num_tbl = !nt.num_tbl }; + n + +(* Global variables *) + +let global_table = ref(empty_numtable : Ident.t numtable) +and literal_table = ref([] : (int * structured_constant) list) + +let slot_for_getglobal id = + try + find_numtable !global_table id + with Not_found -> + raise(Error(Undefined_global(Ident.name id))) + +let slot_for_setglobal id = + enter_numtable global_table id + +let slot_for_literal cst = + let n = incr_numtable global_table in + literal_table := (n, cst) :: !literal_table; + n + +(* The C primitives *) + +let c_prim_table = ref(empty_numtable : string numtable) + +let num_of_prim name = + try + find_numtable !c_prim_table name + with Not_found -> + if !Clflags.custom_runtime + then enter_numtable c_prim_table name + else raise(Error(Unavailable_primitive name)) + +open Printf + +let output_primitives prim_file_name = + let oc = open_out prim_file_name in + let prim = Array.new !c_prim_table.num_cnt "" in + Tbl.iter (fun name number -> prim.(number) <- name) !c_prim_table.num_tbl; + for i = 0 to Array.length prim - 1 do + fprintf oc "extern long %s();\n" prim.(i) + done; + fprintf oc "typedef long (*primitive)();\n"; + fprintf oc "primitive cprim[] = {\n"; + for i = 0 to Array.length prim - 1 do + fprintf oc " %s,\n" prim.(i) + done; + fprintf oc " (primitive) 0 };\n"; + fprintf oc "char * names_of_cprim[] = {\n"; + for i = 0 to Array.length prim - 1 do + fprintf oc " \"%s\",\n" prim.(i) + done; + fprintf oc " (char *) 0 };\n"; + close_out oc + +(* Initialization for batch linking *) + +let init () = + (* Enter the predefined exceptions *) + Array.iter + (fun name -> + let id = + try List.assoc name Predef.builtin_values + with Not_found -> fatal_error "Symtable.init" in + let c = slot_for_setglobal id in + let cst = Const_block(0, [Const_base(Const_string name)]) in + literal_table := (c, cst) :: !literal_table) + Runtimedef.builtin_exceptions; + (* Enter the known C primitives *) + Array.iter (enter_numtable c_prim_table) Runtimedef.builtin_primitives + +(* Relocate a block of object bytecode *) + +(* Must use the unsafe String.set here because the block may be + a "fake" string as returned by Meta.static_alloc. *) +let patch_short buff pos n = + String.unsafe_set buff pos (Char.unsafe_chr n); + String.unsafe_set buff (succ pos) (Char.unsafe_chr (n asr 8)) + +let patch_object buff patchlist = + List.iter + (function + (Reloc_literal sc, pos) -> + patch_short buff pos (slot_for_literal sc) + | (Reloc_getglobal id, pos) -> + patch_short buff pos (slot_for_getglobal id) + | (Reloc_setglobal id, pos) -> + patch_short buff pos (slot_for_setglobal id) + | (Reloc_primitive name, pos) -> + patch_short buff pos (num_of_prim name)) + patchlist + +(* Translate structured constants *) + +let rec transl_const = function + Const_base(Const_int i) -> Obj.repr i + | Const_base(Const_char c) -> Obj.repr c + | Const_base(Const_string s) -> Obj.repr s + | Const_base(Const_float f) -> Obj.repr(float_of_string f) + | Const_block(tag, fields) -> + let block = Obj.new_block tag (List.length fields) in + let pos = ref 0 in + List.iter + (fun c -> Obj.set_field block !pos (transl_const c); incr pos) + fields; + block + +(* Build the initial table of globals *) + +let initial_global_table () = + let glob = Array.new !global_table.num_cnt (Obj.repr 0) in + List.iter + (fun (slot, cst) -> glob.(slot) <- transl_const cst) + !literal_table; + literal_table := []; + glob + +(* Save the table of globals *) + +let output_global_map oc = + output_compact_value oc !global_table + +(* Functions for toplevel use *) + +(* Update the in-core table of globals *) + +let update_global_table () = + let ng = !global_table.num_cnt in + if ng >= Array.length(Meta.global_data()) then Meta.realloc_global_data ng; + let glob = Meta.global_data() in + List.iter + (fun (slot, cst) -> glob.(slot) <- transl_const cst) + !literal_table; + literal_table := [] + +(* Initialize the linker for toplevel use *) + +let init_toplevel () = + (* Read back the known global symbols from the executable file *) + let ic = open_in_bin Sys.argv.(0) in + let pos_trailer = + in_channel_length ic - 16 - String.length Config.exec_magic_number in + seek_in ic pos_trailer; + let code_size = input_binary_int ic in + let data_size = input_binary_int ic in + let symbol_size = input_binary_int ic in + let debug_size = input_binary_int ic in + seek_in ic (pos_trailer - debug_size - symbol_size); + global_table := (input_value ic : Ident.t numtable); + close_in ic; + (* Enter the known C primitives *) + Array.iter (enter_numtable c_prim_table) (Meta.available_primitives()) + +(* Find the val of a global identifier *) + +let get_global_value id = + (Meta.global_data()).(slot_for_getglobal id) +and assign_global_value id v = + (Meta.global_data()).(slot_for_getglobal id) <- v + +(* Save and restore the current state *) + +type global_map = Ident.t numtable + +let current_state () = !global_table +and restore_state st = global_table := st + +(* "Filter" the global List.map according to some predicate. + Used to expunge the global List.map for the toplevel. *) + +let filter_global_map p gmap = + let newtbl = ref Tbl.empty in + Tbl.iter + (fun id num -> if p id then newtbl := Tbl.add id num !newtbl) + gmap.num_tbl; + {num_cnt = gmap.num_cnt; num_tbl = !newtbl} + +(* Error report *) + +open Format + +let report_error = function + Undefined_global s -> + print_string "Reference to undefined global `"; print_string s; + print_string "'" + | Unavailable_primitive s -> + print_string "The external function `"; print_string s; + print_string "' is not available" diff --git a/bytecomp/symtable.mli b/bytecomp/symtable.mli new file mode 100644 index 0000000000..0fec140198 --- /dev/null +++ b/bytecomp/symtable.mli @@ -0,0 +1,34 @@ +(* Assign locations and numbers to globals and primitives *) + +open Emitcode + +(* Functions for batch linking *) + +val init: unit -> unit +val patch_object: string -> (reloc_info * int) list -> unit +val initial_global_table: unit -> Obj.t array +val output_global_map: out_channel -> unit +val output_primitives: string -> unit + +(* Functions for the toplevel *) + +val init_toplevel: unit -> unit +val update_global_table: unit -> unit +val get_global_value: Ident.t -> Obj.t +val assign_global_value: Ident.t -> Obj.t -> unit + +type global_map + +val current_state: unit -> global_map +val restore_state: global_map -> unit +val filter_global_map: (Ident.t -> bool) -> global_map -> global_map + +(* Error report *) + +type error = + Undefined_global of string + | Unavailable_primitive of string + +exception Error of error + +val report_error: error -> unit diff --git a/bytecomp/translcore.ml b/bytecomp/translcore.ml new file mode 100644 index 0000000000..35499b5242 --- /dev/null +++ b/bytecomp/translcore.ml @@ -0,0 +1,343 @@ +(* Translation from typed abstract syntax to lambda terms, + for the core language *) + +open Misc +open Asttypes +open Path +open Typedtree +open Lambda + + +type error = + Illegal_letrec_pat + | Illegal_letrec_expr + +exception Error of Location.t * error + +(* The translation environment maps identifiers bound by patterns + to lambda-terms, e.g. access paths. + Identifiers unbound in the environment List.map to themselves. *) + +(* Compute the access paths to identifiers bound in patterns. *) + +let rec bind_pattern env pat arg mut = + match pat.pat_desc with + Tpat_var id -> + begin match mut with + Mutable -> (env, fun e -> Llet(id, arg, e)) + | Immutable -> (add_env id arg env, fun e -> e) + end + | Tpat_alias(pat, id) -> + let (ext_env, bind) = bind_pattern env pat arg mut in + begin match mut with + Mutable -> (ext_env, fun e -> Llet(id, arg, bind e)) + | Immutable -> (add_env id arg ext_env, bind) + end + | Tpat_tuple patl -> + bind_pattern_list env patl arg mut 0 + | Tpat_construct(cstr, patl) -> + bind_pattern_list env patl arg mut + (match cstr.cstr_tag with Cstr_tag _ -> 0 | Cstr_exception _ -> 1) + | Tpat_record lbl_pat_list -> + bind_label_pattern env lbl_pat_list arg mut + | _ -> + (env, fun e -> e) + +and bind_pattern_list env patl arg mut pos = + match patl with + [] -> (env, fun e -> e) + | pat :: rem -> + let (env1, bind1) = + bind_pattern env pat (Lprim(Pfield pos, [arg])) mut in + let (env2, bind2) = + bind_pattern_list env1 rem arg mut (pos+1) in + (env2, fun e -> bind1(bind2 e)) + +and bind_label_pattern env patl arg mut = + match patl with + [] -> (env, fun e -> e) + | (lbl, pat) :: rem -> + let mut1 = + match lbl.lbl_mut with Mutable -> Mutable | Immutable -> mut in + let (env1, bind1) = + bind_pattern env pat (Lprim(Pfield lbl.lbl_pos, [arg])) mut1 in + let (env2, bind2) = + bind_label_pattern env1 rem arg mut in + (env2, fun e -> bind1(bind2 e)) + +(* Translation of primitives *) + +let comparisons_table = create_hashtable 11 [ + "%equal", + (Pccall("equal", 2), Pcomp Ceq, Pccall("eq_float", 2)); + "%notequal", + (Pccall("notequal", 2), Pcomp Cneq, Pccall("neq_float", 2)); + "%lessthan", + (Pccall("lessthan", 2), Pcomp Clt, Pccall("lt_float", 2)); + "%greaterthan", + (Pccall("greaterthan", 2), Pcomp Cgt, Pccall("gt_float", 2)); + "%lessequal", + (Pccall("lessequal", 2), Pcomp Cle, Pccall("le_float", 2)); + "%greaterequal", + (Pccall("greaterequal", 2), Pcomp Cge, Pccall("ge_float", 2)) +] + +let primitives_table = create_hashtable 31 [ + "%identity", Pidentity; + "%tagof", Ptagof; + "%field0", Pfield 0; + "%field1", Pfield 1; + "%setfield0", Psetfield 0; + "%makeblock", Pmakeblock 0; + "%update", Pupdate; + "%raise", Praise; + "%sequand", Psequand; + "%sequor", Psequor; + "%boolnot", Pnot; + "%negint", Pnegint; + "%succint", Poffsetint 1; + "%predint", Poffsetint(-1); + "%addint", Paddint; + "%subint", Psubint; + "%mulint", Pmulint; + "%divint", Pdivint; + "%modint", Pmodint; + "%andint", Pandint; + "%orint", Porint; + "%xorint", Pxorint; + "%lslint", Plslint; + "%lsrint", Plsrint; + "%asrint", Pasrint; + "%eq", Pcomp Ceq; + "%noteq", Pcomp Cneq; + "%ltint", Pcomp Clt; + "%leint", Pcomp Cle; + "%gtint", Pcomp Cgt; + "%geint", Pcomp Cge; + "%incr", Poffsetref(1); + "%decr", Poffsetref(-1); + "%string_get", Pgetstringchar; + "%string_set", Psetstringchar; + "%array_length", Pvectlength; + "%array_get", Pgetvectitem; + "%array_set", Psetvectitem +] + +let same_base_type ty1 ty2 = + match (Ctype.repr ty1, Ctype.repr ty2) with + (Tconstr(p1, []), Tconstr(p2, [])) -> Path.same p1 p2 + | (_, _) -> false + +let transl_prim prim arity args = + try + let (gencomp, intcomp, floatcomp) = + Hashtbl.find comparisons_table prim in + match args with + [arg1; arg2] when same_base_type arg1.exp_type Predef.type_int + or same_base_type arg1.exp_type Predef.type_char -> + intcomp + | [arg1; arg2] when same_base_type arg1.exp_type Predef.type_float -> + floatcomp + | _ -> + gencomp + with Not_found -> + try + Hashtbl.find primitives_table prim + with Not_found -> + Pccall(prim, arity) + +(* To compile "let rec" *) + +exception Unknown + +let rec size_of_lambda = function + Lfunction(param, body) -> 2 + | Lprim(Pmakeblock tag, args) -> + List.iter check_rec_lambda args; List.length args + | Llet(id, arg, body) -> + check_rec_lambda arg; size_of_lambda body + | _ -> raise Unknown + +and check_rec_lambda = function + Lvar id -> () + | Lconst cst -> () + | Lfunction(param, body) -> () + | Llet(id, arg, body) -> check_rec_lambda arg; check_rec_lambda body + | Lprim(Pmakeblock tag, args) -> List.iter check_rec_lambda args + | _ -> raise Unknown + +(* To propagate structured constants *) + +exception Not_constant + +let extract_constant = function Lconst sc -> sc | _ -> raise Not_constant + +(* Translation of expressions *) + +let rec transl_exp env e = + match e.exp_desc with + Texp_ident(path, desc) -> + begin match path with + Pident id -> transl_access env id + | _ -> transl_path path + end + | Texp_constant cst -> + Lconst(Const_base cst) + | Texp_let(rec_flag, pat_expr_list, body) -> + let (ext_env, add_let) = transl_let env rec_flag pat_expr_list in + add_let(transl_exp ext_env body) + | Texp_function pat_expr_list -> + let param = Ident.new "fun" in + Lfunction(param, Matching.for_function e.exp_loc param + (transl_cases env param pat_expr_list)) + | Texp_apply({exp_desc = Texp_ident(path, {val_prim = Primitive(s, arity)})}, + args) when List.length args = arity -> + Lprim(transl_prim s arity args, transl_list env args) + | Texp_apply(funct, args) -> + Lapply(transl_exp env funct, transl_list env args) + | Texp_match(arg, pat_expr_list) -> + name_lambda (transl_exp env arg) + (fun id -> + Matching.for_function e.exp_loc id + (transl_cases env id pat_expr_list)) + | Texp_try(body, pat_expr_list) -> + let id = Ident.new "exn" in + Ltrywith(transl_exp env body, id, + Matching.for_trywith id (transl_cases env id pat_expr_list)) + | Texp_tuple el -> + let ll = transl_list env el in + begin try + Lconst(Const_block(0, List.map extract_constant ll)) + with Not_constant -> + Lprim(Pmakeblock 0, ll) + end + | Texp_construct(cstr, args) -> + let ll = transl_list env args in + begin match cstr.cstr_tag with + Cstr_tag n -> + begin try + Lconst(Const_block(n, List.map extract_constant ll)) + with Not_constant -> + Lprim(Pmakeblock n, ll) + end + | Cstr_exception path -> + Lprim(Pmakeblock 0, transl_path path :: ll) + end + | Texp_record lbl_expr_list -> + let lv = Array.new (List.length lbl_expr_list) Lstaticfail in + List.iter + (fun (lbl, expr) -> lv.(lbl.lbl_pos) <- transl_exp env expr) + lbl_expr_list; + let ll = Array.to_list lv in + if List.for_all (fun (lbl, expr) -> lbl.lbl_mut = Immutable) lbl_expr_list + then begin + try + Lconst(Const_block(0, List.map extract_constant ll)) + with Not_constant -> + Lprim(Pmakeblock 0, ll) + end else + Lprim(Pmakeblock 0, ll) + | Texp_field(arg, lbl) -> + Lprim(Pfield lbl.lbl_pos, [transl_exp env arg]) + | Texp_setfield(arg, lbl, newval) -> + Lprim(Psetfield lbl.lbl_pos, + [transl_exp env arg; transl_exp env newval]) + | Texp_array expr_list -> + Lprim(Pmakeblock 0, transl_list env expr_list) + | Texp_ifthenelse(cond, ifso, Some ifnot) -> + Lifthenelse(transl_exp env cond, transl_exp env ifso, + transl_exp env ifnot) + | Texp_ifthenelse(cond, ifso, None) -> + Lifthenelse(transl_exp env cond, transl_exp env ifso, lambda_unit) + | Texp_sequence(expr1, expr2) -> + Lsequence(transl_exp env expr1, transl_exp env expr2) + | Texp_while(cond, body) -> + Lwhile(transl_exp env cond, transl_exp env body) + | Texp_for(param, low, high, dir, body) -> + Lfor(param, transl_exp env low, transl_exp env high, dir, + transl_exp env body) + | Texp_when(cond, body) -> + Lifthenelse(transl_exp env cond, transl_exp env body, Lstaticfail) + +and transl_list env = function + [] -> [] + | expr :: rem -> transl_exp env expr :: transl_list env rem + +and transl_cases env param pat_expr_list = + let transl_case (pat, expr) = + let (ext_env, bind_fun) = bind_pattern env pat (Lvar param) Immutable in + (pat, bind_fun(transl_exp ext_env expr)) in + List.map transl_case pat_expr_list + +and transl_let env rec_flag pat_expr_list = + match rec_flag with + Nonrecursive -> + let rec transl body_env = function + [] -> + (body_env, fun e -> e) + | (pat, expr) :: rem -> + let id = Ident.new "let" in + let (ext_env, bind_fun) = + bind_pattern body_env pat (Lvar id) Immutable in + let (final_env, add_let_fun) = + transl ext_env rem in + (final_env, + fun e -> Llet(id, transl_exp env expr, + Matching.for_let pat.pat_loc id pat + (bind_fun(add_let_fun e)))) in + transl env pat_expr_list + | Recursive -> + let transl_case (pat, expr) = + let id = + match pat.pat_desc with + Tpat_var id -> id + | _ -> raise(Error(pat.pat_loc, Illegal_letrec_pat)) in + let lam = transl_exp env expr in + let size = + try size_of_lambda lam + with Unknown -> raise(Error(expr.exp_loc, Illegal_letrec_expr)) in + (id, lam, size) in + let decls = + List.map transl_case pat_expr_list in + (env, fun e -> Lletrec(decls, e)) + +(* Compile a primitive definition *) + +let transl_primitive = function + Not_prim -> fatal_error "Translcore.transl_primitive" + | Primitive(name, arity) -> + let prim = + try + let (gencomp, intcomp, floatcomp) = + Hashtbl.find comparisons_table name in + gencomp + with Not_found -> + try + Hashtbl.find primitives_table name + with Not_found -> + Pccall(name, arity) in + let rec add_params n params = + if n >= arity + then Lprim(prim, List.rev params) + else begin + let id = Ident.new "prim" in + Lfunction(id, add_params (n+1) (Lvar id :: params)) + end in + add_params 0 [] + +(* Compile an exception definition *) + +let transl_exception id decl = + Lprim(Pmakeblock 0, [Lconst(Const_base(Const_string(Ident.name id)))]) + +(* Error report *) + +open Format + +let report_error = function + Illegal_letrec_pat -> + print_string + "Only variables are allowed as left-hand side of `let rec'" + | Illegal_letrec_expr -> + print_string + "This kind of expression is not allowed as right-hand side of `let rec'" diff --git a/bytecomp/translcore.mli b/bytecomp/translcore.mli new file mode 100644 index 0000000000..9fa9835a7b --- /dev/null +++ b/bytecomp/translcore.mli @@ -0,0 +1,23 @@ +(* Translation from typed abstract syntax to lambda terms, + for the core language *) + +open Asttypes +open Typedtree +open Lambda + +val transl_exp: compilenv -> expression -> lambda +val transl_let: + compilenv -> rec_flag -> (pattern * expression) list -> + compilenv * (lambda -> lambda) +val transl_primitive: primitive_description -> lambda +val transl_exception: Ident.t -> exception_declaration -> lambda + +type error = + Illegal_letrec_pat + | Illegal_letrec_expr + +exception Error of Location.t * error + +val report_error: error -> unit + + diff --git a/bytecomp/translmod.ml b/bytecomp/translmod.ml new file mode 100644 index 0000000000..5509868950 --- /dev/null +++ b/bytecomp/translmod.ml @@ -0,0 +1,157 @@ +(* Translation from typed abstract syntax to lambda terms, + for the module language *) + +open Misc +open Typedtree +open Lambda +open Translcore + + +(* Compile a coercion *) + +let rec apply_coercion restr arg = + match restr with + Tcoerce_none -> + arg + | Tcoerce_structure pos_cc_list -> + name_lambda arg (fun id -> + Lprim(Pmakeblock 0, List.map (apply_coercion_field id) pos_cc_list)) + | Tcoerce_functor(cc_arg, cc_res) -> + let param = Ident.new "funarg" in + name_lambda arg (fun id -> + Lfunction(param, + apply_coercion cc_res + (Lapply(Lvar id, [apply_coercion cc_arg (Lvar param)])))) + +and apply_coercion_field id (pos, cc) = + apply_coercion cc (Lprim(Pfield pos, [Lvar id])) + +(* Compose two coercions + apply_coercion c1 (apply_coercion c2 e) behaves like + apply_coercion (compose_coercions c1 c2) e. *) + +let rec compose_coercions c1 c2 = + match (c1, c2) with + (Tcoerce_none, c2) -> c2 + | (c1, Tcoerce_none) -> c1 + | (Tcoerce_structure pc1, Tcoerce_structure pc2) -> + let v2 = Array.of_list pc2 in + Tcoerce_structure + (List.map (fun (p1, c1) -> + let (p2, c2) = v2.(p1) in (p2, compose_coercions c1 c2)) + pc1) + | (Tcoerce_functor(arg1, res1), Tcoerce_functor(arg2, res2)) -> + Tcoerce_functor(compose_coercions arg2 arg1, + compose_coercions res1 res2) + | (_, _) -> + fatal_error "Translmod.compose_coercions" + +(* Compile a module expression *) + +let rec transl_module env cc mexp = + match mexp.mod_desc with + Tmod_ident path -> + apply_coercion cc (transl_path path) + | Tmod_structure str -> + transl_structure env [] cc str + | Tmod_functor(param, mty, body) -> + begin match cc with + Tcoerce_none -> + Lfunction(param, transl_module env Tcoerce_none body) + | Tcoerce_functor(ccarg, ccres) -> + let param' = Ident.new "funarg" in + Lfunction(param', + Llet(param, apply_coercion ccarg (Lvar param'), + transl_module env ccres body)) + | Tcoerce_structure _ -> + fatal_error "Translmod.transl_module" + end + | Tmod_apply(funct, arg, ccarg) -> + apply_coercion cc + (Lapply(transl_module env Tcoerce_none funct, + [transl_module env ccarg arg])) + | Tmod_constraint(arg, mty, ccarg) -> + transl_module env (compose_coercions cc ccarg) arg + +and transl_structure env fields cc = function + [] -> + begin match cc with + Tcoerce_none -> + Lprim(Pmakeblock 0, + List.map (fun id -> transl_access env id) (List.rev fields)) + | Tcoerce_structure pos_cc_list -> + let v = Array.of_list (List.rev fields) in + Lprim(Pmakeblock 0, + List.map (fun (pos, cc) -> + apply_coercion cc (transl_access env v.(pos))) + pos_cc_list) + | Tcoerce_functor(_, _) -> + fatal_error "Translmod.transl_structure" + end + | Tstr_eval expr :: rem -> + Lsequence(transl_exp env expr, transl_structure env fields cc rem) + | Tstr_value(rec_flag, pat_expr_list) :: rem -> + let ext_fields = let_bound_idents pat_expr_list @ fields in + let (ext_env, add_let) = transl_let env rec_flag pat_expr_list in + add_let(transl_structure ext_env ext_fields cc rem) + | Tstr_primitive(id, descr) :: rem -> + Llet(id, transl_primitive descr.val_prim, + transl_structure env (id :: fields) cc rem) + | Tstr_type(decls) :: rem -> + transl_structure env fields cc rem + | Tstr_exception(id, decl) :: rem -> + Llet(id, transl_exception id decl, + transl_structure env (id :: fields) cc rem) + | Tstr_module(id, modl) :: rem -> + Llet(id, transl_module env Tcoerce_none modl, + transl_structure env (id :: fields) cc rem) + | Tstr_modtype(id, decl) :: rem -> + transl_structure env fields cc rem + | Tstr_open path :: rem -> + transl_structure env fields cc rem + +(* Compile an implementation *) + +let transl_implementation module_name str cc = + let module_id = Ident.new_persistent module_name in + Lprim(Psetglobal module_id, [transl_structure empty_env [] cc str]) + +(* Compile a sequence of expressions *) + +let rec make_sequence fn = function + [] -> lambda_unit + | [x] -> fn x + | x::rem -> Lsequence(fn x, make_sequence fn rem) + +(* Compile a toplevel phrase *) + +let transl_toplevel_item = function + Tstr_eval expr -> + transl_exp empty_env expr + | Tstr_value(rec_flag, pat_expr_list) -> + let idents = let_bound_idents pat_expr_list in + let (env, add_lets) = transl_let empty_env rec_flag pat_expr_list in + let lam = + add_lets(make_sequence + (fun id -> Lprim(Psetglobal id, [transl_access env id])) + idents) in + List.iter Ident.make_global idents; + lam + | Tstr_primitive(id, descr) -> + Ident.make_global id; + Lprim(Psetglobal id, [transl_primitive descr.val_prim]) + | Tstr_type(decls) -> + lambda_unit + | Tstr_exception(id, decl) -> + Ident.make_global id; + Lprim(Psetglobal id, [transl_exception id decl]) + | Tstr_module(id, modl) -> + Ident.make_global id; + Lprim(Psetglobal id, [transl_module empty_env Tcoerce_none modl]) + | Tstr_modtype(id, decl) -> + lambda_unit + | Tstr_open path -> + lambda_unit + +let transl_toplevel_definition str = + make_sequence transl_toplevel_item str diff --git a/bytecomp/translmod.mli b/bytecomp/translmod.mli new file mode 100644 index 0000000000..067b2b6db6 --- /dev/null +++ b/bytecomp/translmod.mli @@ -0,0 +1,8 @@ +(* Translation from typed abstract syntax to lambda terms, + for the module language *) + +open Typedtree +open Lambda + +val transl_implementation: string -> structure -> module_coercion -> lambda +val transl_toplevel_definition: structure -> lambda diff --git a/byterun/.depend b/byterun/.depend new file mode 100644 index 0000000000..d43a9d1d88 --- /dev/null +++ b/byterun/.depend @@ -0,0 +1,121 @@ +alloc.o : alloc.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + major_gc.h freelist.h memory.h gc.h minor_gc.h stacks.h +array.o : array.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h memory.h gc.h major_gc.h freelist.h minor_gc.h +compare.o : compare.c fail.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + memory.h gc.h major_gc.h freelist.h minor_gc.h str.h +crc.o : crc.c io.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h +extern.o : extern.c fail.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + gc.h intext.h io.h memory.h major_gc.h freelist.h minor_gc.h reverse.h str.h +fail.o : fail.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h gc.h memory.h major_gc.h freelist.h minor_gc.h signals.h stacks.h +fix_code.o : fix_code.c config.h ../config/m.h ../config/s.h fix_code.h misc.h \ + mlvalues.h instruct.h reverse.h +floats.o : floats.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h memory.h gc.h major_gc.h freelist.h minor_gc.h stacks.h +freelist.o : freelist.c config.h ../config/m.h ../config/s.h freelist.h misc.h \ + mlvalues.h gc.h gc_ctrl.h major_gc.h +gc_ctrl.o : gc_ctrl.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + gc.h gc_ctrl.h major_gc.h freelist.h minor_gc.h +hash.o : hash.c mlvalues.h config.h ../config/m.h ../config/s.h misc.h memory.h \ + gc.h major_gc.h freelist.h minor_gc.h str.h +instrtrace.o : instrtrace.c +intern.o : intern.c fail.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + gc.h intext.h io.h memory.h major_gc.h freelist.h minor_gc.h reverse.h +interp.o : interp.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h fix_code.h instruct.h interp.h memory.h gc.h major_gc.h freelist.h minor_gc.h \ + prims.h signals.h stacks.h str.h instrtrace.h jumptbl.h +ints.o : ints.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h memory.h gc.h major_gc.h freelist.h minor_gc.h +io.o : io.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h fail.h \ + io.h memory.h gc.h major_gc.h freelist.h minor_gc.h signals.h sys.h +lexing.o : lexing.c interp.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + stacks.h memory.h gc.h major_gc.h freelist.h minor_gc.h str.h +main.o : main.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + exec.h fail.h gc_ctrl.h interp.h intext.h io.h stacks.h memory.h gc.h major_gc.h \ + freelist.h minor_gc.h sys.h +major_gc.o : major_gc.c config.h ../config/m.h ../config/s.h fail.h misc.h mlvalues.h \ + freelist.h gc.h gc_ctrl.h major_gc.h roots.h +memory.o : memory.c fail.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + freelist.h gc.h gc_ctrl.h major_gc.h memory.h minor_gc.h +meta.o : meta.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + interp.h major_gc.h freelist.h memory.h gc.h minor_gc.h prims.h +minor_gc.o : minor_gc.c config.h ../config/m.h ../config/s.h fail.h misc.h mlvalues.h \ + gc.h gc_ctrl.h major_gc.h freelist.h memory.h minor_gc.h roots.h +misc.o : misc.c config.h ../config/m.h ../config/s.h misc.h +parsing.o : parsing.c config.h ../config/m.h ../config/s.h mlvalues.h misc.h \ + memory.h gc.h major_gc.h freelist.h minor_gc.h alloc.h +prims.o : prims.c mlvalues.h config.h ../config/m.h ../config/s.h misc.h prims.h +roots.o : roots.c memory.h config.h ../config/m.h ../config/s.h gc.h mlvalues.h \ + misc.h major_gc.h freelist.h minor_gc.h roots.h stacks.h +signals.o : signals.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h interp.h memory.h gc.h major_gc.h freelist.h minor_gc.h roots.h signals.h +stacks.o : stacks.c config.h ../config/m.h ../config/s.h fail.h misc.h mlvalues.h \ + stacks.h memory.h gc.h major_gc.h freelist.h minor_gc.h +str.o : str.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h +sys.o : sys.c config.h ../config/m.h ../config/s.h alloc.h misc.h mlvalues.h \ + fail.h instruct.h signals.h stacks.h memory.h gc.h major_gc.h freelist.h minor_gc.h +terminfo.o : terminfo.c config.h ../config/m.h ../config/s.h alloc.h misc.h \ + mlvalues.h fail.h io.h +alloc.d.o : alloc.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + major_gc.h freelist.h memory.h gc.h minor_gc.h stacks.h +array.d.o : array.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h memory.h gc.h major_gc.h freelist.h minor_gc.h +compare.d.o : compare.c fail.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + memory.h gc.h major_gc.h freelist.h minor_gc.h str.h +crc.d.o : crc.c io.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h +extern.d.o : extern.c fail.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + gc.h intext.h io.h memory.h major_gc.h freelist.h minor_gc.h reverse.h str.h +fail.d.o : fail.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h gc.h memory.h major_gc.h freelist.h minor_gc.h signals.h stacks.h +fix_code.d.o : fix_code.c config.h ../config/m.h ../config/s.h fix_code.h misc.h \ + mlvalues.h instruct.h reverse.h +floats.d.o : floats.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h memory.h gc.h major_gc.h freelist.h minor_gc.h stacks.h +freelist.d.o : freelist.c config.h ../config/m.h ../config/s.h freelist.h misc.h \ + mlvalues.h gc.h gc_ctrl.h major_gc.h +gc_ctrl.d.o : gc_ctrl.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + gc.h gc_ctrl.h major_gc.h freelist.h minor_gc.h +hash.d.o : hash.c mlvalues.h config.h ../config/m.h ../config/s.h misc.h memory.h \ + gc.h major_gc.h freelist.h minor_gc.h str.h +instrtrace.d.o : instrtrace.c instruct.h misc.h config.h ../config/m.h ../config/s.h \ + mlvalues.h opnames.h +intern.d.o : intern.c fail.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + gc.h intext.h io.h memory.h major_gc.h freelist.h minor_gc.h reverse.h +interp.d.o : interp.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h fix_code.h instruct.h interp.h memory.h gc.h major_gc.h freelist.h minor_gc.h \ + prims.h signals.h stacks.h str.h instrtrace.h +ints.d.o : ints.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h memory.h gc.h major_gc.h freelist.h minor_gc.h +io.d.o : io.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h fail.h \ + io.h memory.h gc.h major_gc.h freelist.h minor_gc.h signals.h sys.h +lexing.d.o : lexing.c interp.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + stacks.h memory.h gc.h major_gc.h freelist.h minor_gc.h str.h +main.d.o : main.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + exec.h fail.h gc_ctrl.h interp.h intext.h io.h stacks.h memory.h gc.h major_gc.h \ + freelist.h minor_gc.h sys.h +major_gc.d.o : major_gc.c config.h ../config/m.h ../config/s.h fail.h misc.h mlvalues.h \ + freelist.h gc.h gc_ctrl.h major_gc.h roots.h +memory.d.o : memory.c fail.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + freelist.h gc.h gc_ctrl.h major_gc.h memory.h minor_gc.h +meta.d.o : meta.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + interp.h major_gc.h freelist.h memory.h gc.h minor_gc.h prims.h +minor_gc.d.o : minor_gc.c config.h ../config/m.h ../config/s.h fail.h misc.h mlvalues.h \ + gc.h gc_ctrl.h major_gc.h freelist.h memory.h minor_gc.h roots.h +misc.d.o : misc.c config.h ../config/m.h ../config/s.h misc.h +parsing.d.o : parsing.c config.h ../config/m.h ../config/s.h mlvalues.h misc.h \ + memory.h gc.h major_gc.h freelist.h minor_gc.h alloc.h +prims.d.o : prims.c mlvalues.h config.h ../config/m.h ../config/s.h misc.h prims.h +roots.d.o : roots.c memory.h config.h ../config/m.h ../config/s.h gc.h mlvalues.h \ + misc.h major_gc.h freelist.h minor_gc.h roots.h stacks.h +signals.d.o : signals.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h interp.h memory.h gc.h major_gc.h freelist.h minor_gc.h roots.h signals.h +stacks.d.o : stacks.c config.h ../config/m.h ../config/s.h fail.h misc.h mlvalues.h \ + stacks.h memory.h gc.h major_gc.h freelist.h minor_gc.h +str.d.o : str.c alloc.h misc.h config.h ../config/m.h ../config/s.h mlvalues.h \ + fail.h +sys.d.o : sys.c config.h ../config/m.h ../config/s.h alloc.h misc.h mlvalues.h \ + fail.h instruct.h signals.h stacks.h memory.h gc.h major_gc.h freelist.h minor_gc.h +terminfo.d.o : terminfo.c config.h ../config/m.h ../config/s.h alloc.h misc.h \ + mlvalues.h fail.h io.h diff --git a/byterun/Makefile b/byterun/Makefile new file mode 100644 index 0000000000..2ac4f0ef3a --- /dev/null +++ b/byterun/Makefile @@ -0,0 +1,84 @@ +include ../config/Makefile.h +include ../Makefile.config + +CFLAGS=-O $(CCCOMPOPTS) +DFLAGS=-g -DDEBUG $(CCCOMPOPTS) + +OBJS=interp.o misc.o stacks.o fix_code.o main.o fail.o signals.o \ + freelist.o major_gc.o minor_gc.o memory.o alloc.o roots.o \ + compare.o ints.o floats.o str.o array.o io.o extern.o intern.o \ + hash.o sys.o meta.o parsing.o lexing.o gc_ctrl.o terminfo.o crc.o + +DOBJS=$(OBJS:.o=.d.o) instrtrace.d.o + +PRIMS=array.c compare.c crc.c extern.c floats.c gc_ctrl.c hash.c \ + intern.c interp.c ints.c io.c lexing.c meta.c parsing.c \ + signals.c str.c sys.c terminfo.c + +all: camlrun camlrund libcaml.a + +camlrun: $(OBJS) prims.o + $(CC) $(CCCOMPOPTS) $(CCLINKOPTS) $(LOWADDRESSES) -o camlrun prims.o $(OBJS) $(CCLIBS) + +camlrund: $(DOBJS) prims.o + $(CC) -g $(CCCOMPOPTS) $(CCLINKOPTS) $(LOWADDRESSES) -o camlrund prims.o $(DOBJS) $(CCLIBS) + +libcaml.a: $(OBJS) + rm -f libcaml.a + ar rc libcaml.a $(OBJS) + $(RANLIB) libcaml.a + +install: + cp camlrun $(BINDIR)/cslrun + cp libcaml.a $(LIBDIR) + $(RANLIB) $(LIBDIR)/libcaml.a + test -d $(LIBDIR)/caml || mkdir $(LIBDIR)/caml + cp mlvalues.h alloc.h misc.h $(LIBDIR)/caml + sed -e '/#include ".*\/m.h/r ../config/m.h' \ + -e '/#include ".*\/s.h/r ../config/s.h' \ + -e '/#include "/d' config.h > $(LIBDIR)/caml/config.h + sed -e '/#include ".*gc\.h"/d' \ + -e '/#define Alloc_small/,/^}/d' \ + -e '/Modify/,/^}/d' memory.h > $(LIBDIR)/caml/memory.h + +clean: + rm -f camlrun camlrund *.o *.a + rm -f primitives prims.c opnames.h jumptbl.h + +primitives : $(PRIMS) + sed -n -e '/\/\* ML \*\//s/.* \([a-z0-9_]*\)(.*/\1/p' \ + $(PRIMS) > primitives + +prims.c : primitives + (echo '#include "mlvalues.h"'; \ + echo '#include "prims.h"'; \ + sed -e 's/.*/extern value &();/' primitives; \ + echo 'c_primitive cprim[] = {'; \ + sed -e 's/.*/ &,/' primitives; \ + echo ' 0 };'; \ + echo 'char * names_of_cprim[] = {'; \ + sed -e 's/.*/ "&",/' primitives; \ + echo ' 0 };') > prims.c + +opnames.h : instruct.h + sed -e '/\/\*/d' \ + -e 's/enum /char * names_of_/' \ + -e 's/{$$/[] = {/' \ + -e 's/\([A-Z][A-Z_0-9]*\)/"\1"/g' instruct.h > opnames.h + +# jumptbl.h is required only if you have GCC 2.0 or later +jumptbl.h : instruct.h + sed -n -e '/^ /s/ \([A-Z]\)/ \&\&lbl_\1/gp' \ + -e '/^}/q' instruct.h > jumptbl.h + +.SUFFIXES: .d.o + +.c.d.o: + cd .debugobj; $(CC) -c $(DFLAGS) -I.. ../$< + mv .debugobj/$*.o $*.d.o + +depend : prims.c opnames.h jumptbl.h + gcc -MM $(CFLAGS) *.c > .depend + gcc -MM $(DFLAGS) *.c | sed -e 's/\.o/.d.o/' >> .depend + +include .depend diff --git a/byterun/alloc.c b/byterun/alloc.c new file mode 100644 index 0000000000..afe0892cee --- /dev/null +++ b/byterun/alloc.c @@ -0,0 +1,131 @@ +/* 1. Allocation functions doing the same work as the macros in the + case where [Setup_for_gc] and [Restore_after_gc] are no-ops. + 2. Convenience functions related to allocation. +*/ + +#include <string.h> +#include "alloc.h" +#include "major_gc.h" +#include "memory.h" +#include "mlvalues.h" +#include "stacks.h" + +#define Setup_for_gc +#define Restore_after_gc + +value alloc (wosize, tag) + mlsize_t wosize; + tag_t tag; +{ + value result; + + Assert (wosize > 0 && wosize <= Max_young_wosize); + Alloc_small (result, wosize, tag); + return result; +} + +value alloc_tuple(n) + mlsize_t n; +{ + return alloc(n, 0); +} + +value alloc_string (len) + mlsize_t len; +{ + value result; + mlsize_t offset_index; + mlsize_t wosize = (len + sizeof (value)) / sizeof (value); + + if (wosize <= Max_young_wosize) { + Alloc_small (result, wosize, String_tag); + }else{ + result = alloc_shr (wosize, String_tag); + } + Field (result, wosize - 1) = 0; + offset_index = Bsize_wsize (wosize) - 1; + Byte (result, offset_index) = offset_index - len; + return result; +} + +value alloc_final (len, fun, mem, max) + mlsize_t len; + final_fun fun; + mlsize_t mem, max; +{ + value result = alloc_shr (len, Final_tag); + + Field (result, 0) = (value) fun; + adjust_gc_speed (mem, max); + return result; +} + +value copy_double(d) + double d; +{ + value res; + + Alloc_small(res, Double_wosize, Double_tag); + Store_double_val(res, d); + return res; +} + +value copy_string(s) + char * s; +{ + int len; + value res; + + len = strlen(s); + res = alloc_string(len); + bcopy(s, String_val(res), len); + return res; +} + +value alloc_array(funct, arr) + value (*funct) P((char *)); + char ** arr; +{ + mlsize_t nbr, n; + value v; + + nbr = 0; + while (arr[nbr] != 0) nbr++; + if (nbr == 0) { + v = Atom(0); + } else { + while (extern_sp - nbr <= stack_low) + realloc_stack(); + for (n = 0; n < nbr; n++) + *--extern_sp = funct(arr[n]); + if (nbr < Max_young_wosize) { + v = alloc(nbr, 0); + n = nbr; + while (n-- > 0) Field (v, n) = *extern_sp++; + } else { + v = alloc_shr(nbr, 0); + n = nbr; + while (n-- > 0) initialize (&Field(v, n), *extern_sp++); + } + } + return v; +} + +value copy_string_array(arr) + char ** arr; +{ + return alloc_array(copy_string, arr); +} + +int convert_flag_list(list, flags) + value list; + int * flags; +{ + int res; + res = 0; + while (Tag_val(list) == 1) { + res |= flags[Tag_val(Field(list, 0))]; + list = Field(list, 1); + } + return res; +} diff --git a/byterun/alloc.h b/byterun/alloc.h new file mode 100644 index 0000000000..5b0a1029a0 --- /dev/null +++ b/byterun/alloc.h @@ -0,0 +1,19 @@ +#ifndef _alloc_ +#define _alloc_ + + +#include "misc.h" +#include "mlvalues.h" + +value alloc P((mlsize_t, tag_t)); +value alloc_tuple P((mlsize_t)); +value alloc_string P((mlsize_t)); +value alloc_final P((mlsize_t, final_fun, mlsize_t, mlsize_t)); +value copy_string P((char *)); +value copy_string_array P((char **)); +value copy_double P((double)); +value alloc_array P((value (*funct) P((char *)), char ** array)); +int convert_flag_list P((value, int *)); + + +#endif /* _alloc_ */ diff --git a/byterun/array.c b/byterun/array.c new file mode 100644 index 0000000000..471bfa1541 --- /dev/null +++ b/byterun/array.c @@ -0,0 +1,45 @@ +/* Operations on arrays */ + +#include "alloc.h" +#include "fail.h" +#include "memory.h" +#include "misc.h" +#include "mlvalues.h" + +value make_vect(len, init) /* ML */ + value len, init; +{ + value res; + mlsize_t size, i; + Push_roots(root, 1); + + size = Long_val(len); + if (size > Max_wosize) { + Pop_roots(); + invalid_argument("Array.new"); + } + if (size == 0) { + res = Atom(0); + } + else if (size < Max_young_wosize) { + root[0] = init; + res = alloc(size, 0); + init = root[0]; + for (i = 0; i < size; i++) Field(res, i) = init; + } + else if (Is_block(init) && Is_young(init)) { + root[0] = init; + minor_collection(); + res = alloc_shr(size, 0); + init = root[0]; + for (i = 0; i < size; i++) Field(res, i) = init; + } + else { + root[0] = init; + res = alloc_shr(size, 0); + init = root[0]; + for (i = 0; i < size; i++) initialize(&Field(res, i), init); + } + Pop_roots(); + return res; +} diff --git a/byterun/compare.c b/byterun/compare.c new file mode 100644 index 0000000000..a42fe7664d --- /dev/null +++ b/byterun/compare.c @@ -0,0 +1,110 @@ +#include "fail.h" +#include "memory.h" +#include "misc.h" +#include "mlvalues.h" +#include "str.h" + +/* Structural comparison on trees. + May loop on cyclic structures. */ + +static long compare_val(v1, v2) + value v1,v2; +{ + tag_t t1, t2; + + tailcall: + if (v1 == v2) return 0; + if (Is_long(v1) || Is_long(v2)) return Long_val(v1) - Long_val(v2); + /* If one of the objects is outside the heap (but is not an atom), + use address comparison. */ + if ((!Is_atom(v1) && !Is_young(v1) && !Is_in_heap(v1)) || + (!Is_atom(v2) && !Is_young(v2) && !Is_in_heap(v2))) + return v1 - v2; + t1 = Tag_val(v1); + t2 = Tag_val(v2); + if (t1 != t2) return (long)t1 - (long)t2; + switch(t1) { + case String_tag: { + mlsize_t len1, len2, len; + unsigned char * p1, * p2; + len1 = string_length(v1); + len2 = string_length(v2); + for (len = (len1 <= len2 ? len1 : len2), + p1 = (unsigned char *) String_val(v1), + p2 = (unsigned char *) String_val(v2); + len > 0; + len--, p1++, p2++) + if (*p1 != *p2) return (long)*p1 - (long)*p2; + return len1 - len2; + } + case Double_tag: { + double d1 = Double_val(v1); + double d2 = Double_val(v2); + if (d1 == d2) return 0; else if (d1 < d2) return -1; else return 1; + } + case Abstract_tag: + case Final_tag: + invalid_argument("equal: abstract value"); + case Closure_tag: + invalid_argument("equal: functional value"); + default: { + mlsize_t sz1 = Wosize_val(v1); + mlsize_t sz2 = Wosize_val(v2); + value * p1, * p2; + long res; + if (sz1 != sz2) return sz1 - sz2; + for(p1 = Op_val(v1), p2 = Op_val(v2); + sz1 > 1; + sz1--, p1++, p2++) { + res = compare_val(*p1, *p2); + if (res != 0) return res; + } + v1 = *p1; + v2 = *p2; + goto tailcall; + } + } +} + +value compare(v1, v2) /* ML */ + value v1, v2; +{ + return Val_long(compare_val(v1, v2)); +} + +value equal(v1, v2) /* ML */ + value v1, v2; +{ + return Atom(compare_val(v1, v2) == 0); +} + +value notequal(v1, v2) /* ML */ + value v1, v2; +{ + return Atom(compare_val(v1, v2) != 0); +} + +value lessthan(v1, v2) /* ML */ + value v1, v2; +{ + return Atom(compare_val(v1, v2) < 0); +} + +value lessequal(v1, v2) /* ML */ + value v1, v2; +{ + return Atom(compare_val(v1, v2) <= 0); +} + +value greaterthan(v1, v2) /* ML */ + value v1, v2; +{ + return Atom(compare_val(v1, v2) > 0); +} + +value greaterequal(v1, v2) /* ML */ + value v1, v2; +{ + return Atom(compare_val(v1, v2) >= 0); +} + diff --git a/byterun/config.h b/byterun/config.h new file mode 100644 index 0000000000..c3a8a78b91 --- /dev/null +++ b/byterun/config.h @@ -0,0 +1,121 @@ +#ifndef _config_ +#define _config_ + + +#include "../config/m.h" +#include "../config/s.h" + +/* Library dependencies */ + +#ifdef HAS_MEMMOVE +#define bcopy(src,dst,len) memmove((dst), (src), (len)) +#else +#ifdef HAS_BCOPY +/* Nothing to do */ +#else +#ifdef HAS_MEMCPY +#define bcopy(src,dst,len) memcpy((dst), (src), (len)) +#else +#define bcopy(src,dst,len) memmov((dst), (src), (len)) +#define USING_MEMMOV +#endif +#endif +#endif + +#ifndef HAS__SETJMP +#define _setjmp setjmp +#define _longjmp longjmp +#endif + +/* We use threaded code interpretation if the compiler provides labels + as first-class values (GCC 2.x). */ + +#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(DEBUG) +#define THREADED_CODE +#endif + +/* Do not change this definition. */ +#define Page_size (1 << Page_log) + +/* Memory model parameters */ + +#ifndef SMALL + +/* The size of a page for memory management (in bytes) is [1 << Page_log]. + It must be a multiple of [sizeof (long)]. */ +#define Page_log 12 /* A page is 4 kilobytes. */ + +/* Initial sizes of stack (bytes). */ +#define Stack_size 16384 + +/* Minimum free size of stack (bytes); below that, it is reallocated. */ +#define Stack_threshold 1024 + +/* Maximum sizes for the stack (bytes). */ + +#ifdef MINIMIZE_MEMORY +#define Max_stack_size 131072 +#else +#define Max_stack_size 524288 +#endif + +/* Maximum size of a block allocated in the young generation (words). */ +/* Must be > 4 */ +#define Max_young_wosize 256 + + +/* Minimum size of the minor zone (words). + This must be at least [Max_young_wosize + 1]. */ +#define Minor_heap_min 4096 + +/* Maximum size of the minor zone (words). + Must be greater than or equal to [Minor_heap_min]. +*/ +#define Minor_heap_max (1 << 28) + +/* Default size of the minor zone. (words) */ +#define Minor_heap_def 32768 + + +/* Minimum size increment when growing the heap (words). + Must be a multiple of [Page_size / sizeof (value)]. */ +#define Heap_chunk_min (2 * Page_size / sizeof (value)) + +/* Maximum size of a contiguous piece of the heap (words). + Must be greater than or equal to [Heap_chunk_min]. + Must be greater than or equal to [Bhsize_wosize (Max_wosize)]. */ +#define Heap_chunk_max (Bhsize_wosize (Max_wosize)) + +/* Default size increment when growing the heap. (bytes) + Must be a multiple of [Page_size / sizeof (value)]. */ +#define Heap_chunk_def (62 * Page_size / sizeof (value)) + + +/* Default speed setting for the major GC. The heap will grow until + the dead objects and the free list represent this percentage of the + heap size. The rest of the heap is live objects. */ +#define Percent_free_def 30 + +#else +/* Scaled-down parameters for small memory */ + +#define Page_log 10 +#define Arg_stack_size 16384 +#define Ret_stack_size 16384 +#define Arg_stack_threshold 1024 +#define Ret_stack_threshold 1024 +#define Max_arg_stack_size 524288 +#define Max_ret_stack_size 524288 +#define Max_young_wosize 256 +#define Minor_heap_min 1024 +#define Minor_heap_max (1 << 28) +#define Minor_heap_def 16384 +#define Heap_chunk_min (2 * Page_size / sizeof (value)) +#define Heap_chunk_max (1 << 28) +#define Heap_chunk_def (126 * Page_size / sizeof (value)) +#define Percent_free_def 20 + +#endif + + +#endif /* _config_ */ diff --git a/byterun/crc.c b/byterun/crc.c new file mode 100644 index 0000000000..0e961b3493 --- /dev/null +++ b/byterun/crc.c @@ -0,0 +1,91 @@ +/* CRC computation */ + +#include "io.h" +#include "mlvalues.h" + +static uint32 crc32tab[] = { /* CRC polynomial 0xedb88320 */ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d }; + +#define START 0xFFFFFFFF +#define MASK_RESULT 0x7FFFFFFF + +/* This macro assumes that crc is an unsigned integer */ +#define ADDCRC(ch, crc) (crc32tab[((ch) ^ crc) & 0xff] ^ (crc >> 8)) + +value crc_string(str, ofs, len) /* ML */ + value str, ofs, len; +{ + unsigned char * p; + mlsize_t n; + uint32 crc; + + for (crc = START, p = &Byte_u(str, Long_val(ofs)), n = Long_val(len); + n > 0; + n--, p++) + crc = ADDCRC(*p, crc); + return Val_int(crc & MASK_RESULT); +} + +value crc_chan(chan, len) /* ML */ + struct channel * chan; + value len; +{ + mlsize_t n; + uint32 crc; + + for (crc = START, n = Long_val(len); n > 0; n--) + crc = ADDCRC(getch(chan), crc); + return Val_int(crc & MASK_RESULT); +} + diff --git a/byterun/exec.h b/byterun/exec.h new file mode 100644 index 0000000000..1590dc0a4a --- /dev/null +++ b/byterun/exec.h @@ -0,0 +1,27 @@ +/* exec.h : format of executable bytecode files */ + +/* offset 0 ---> initial junk + code block + data block + symbol table + debug infos + trailer + end of file ---> +*/ + +/* Structure of the trailer: four 32-bit, unsigned integers, big endian */ + +#define TRAILER_SIZE (4*4+12) + +struct exec_trailer { + unsigned long code_size; /* Size of the code block (in bytes) */ + unsigned long data_size; /* Size of the global data table (bytes) */ + unsigned long symbol_size; /* Size of the symbol table (bytes) */ + unsigned long debug_size; /* Size of the debug infos (bytes) */ + char magic[12]; /* A magic string */ +}; + +/* Magic number for this release */ + +#define EXEC_MAGIC "Caml1999X001" + diff --git a/byterun/extern.c b/byterun/extern.c new file mode 100644 index 0000000000..ea05d35e76 --- /dev/null +++ b/byterun/extern.c @@ -0,0 +1,245 @@ +/* Structured output */ + +#include "fail.h" +#include "gc.h" +#include "intext.h" +#include "io.h" +#include "memory.h" +#include "misc.h" +#include "mlvalues.h" +#include "reverse.h" +#include "str.h" + +/* To keep track of sharing in externed objects */ + +typedef unsigned long byteoffset_t; + +struct extern_obj { + value obj; + byteoffset_t ofs; +}; + +static struct extern_obj * extern_table; +static asize_t extern_table_size; + +#ifdef SIXTYFOUR +#define Hash(v) (((asize_t) ((v) >> 3)) % extern_table_size) +#else +#define Hash(v) (((asize_t) ((v) >> 2)) % extern_table_size) +#endif + +static void alloc_extern_table() +{ + asize_t i; + + extern_table = (struct extern_obj *) + stat_alloc(extern_table_size * sizeof(struct extern_obj)); + for (i = 0; i < extern_table_size; i++) + extern_table[i].obj = 0; +} + +static void resize_extern_table() +{ + asize_t oldsize; + struct extern_obj * oldtable; + asize_t i, h; + + oldsize = extern_table_size; + oldtable = extern_table; + extern_table_size = 2 * extern_table_size; + alloc_extern_table(); + for (i = 0; i < oldsize; i++) { + h = Hash(oldtable[i].obj); + while (extern_table[h].obj != 0) { + h++; + if (h >= extern_table_size) h = 0; + } + extern_table[h].obj = oldtable[i].obj; + extern_table[h].ofs = oldtable[i].ofs; + } + stat_free((char *) oldtable); +} + +/* Write integers on a channel */ + +static void output8(chan, code, val) + struct channel * chan; + int code; + long val; +{ + putch(chan, code); putch(chan, val); +} + +static void output16(chan, code, val) + struct channel * chan; + int code; + long val; +{ + putch(chan, code); putch(chan, val >> 8); putch(chan, val); +} + +static void output32(chan, code, val) + struct channel * chan; + int code; + long val; +{ + putch(chan, code); + putch(chan, val >> 24); putch(chan, val >> 16); + putch(chan, val >> 8); putch(chan, val); +} + +#ifdef SIXTYFOUR +static void output64(chan, code, val) + struct channel * chan; + int code; + long val; +{ + int i; + putch(chan, code); + for (i = 64 - 8; i >= 0; i -= 8) putch(chan, val >> i); +} +#endif + +static byteoffset_t obj_counter; /* Number of objects emitted so far */ +static unsigned long size_32; /* Size in words of 32-bit block for struct. */ +static unsigned long size_64; /* Size in words of 64-bit block for struct. */ + +static void emit_compact(chan, v) + struct channel * chan; + value v; +{ + tailcall: + if (Is_long(v)) { + long n = Long_val(v); + if (n >= 0 && n < 0x40) { + putch(chan, PREFIX_SMALL_INT + n); + } else if (n >= -(1 << 7) && n < (1 << 7)) { + output8(chan, CODE_INT8, n); + } else if (n >= -(1 << 15) && n < (1 << 15)) { + output16(chan, CODE_INT16, n); +#ifdef SIXTYFOUR + } else if (n < -(1L << 31) || n >= (1L << 31)) { + output64(chan, CODE_INT64, n); +#endif + } else + output32(chan, CODE_INT32, n); + } else { + header_t hd = Hd_val(v); + tag_t tag = Tag_hd(hd); + mlsize_t sz = Wosize_hd(hd); + asize_t h; + /* Atoms are treated specially for two reasons: they are not allocated + in the externed block, and they are automatically shared. */ + if (sz == 0) { + if (tag < 16) { + putch(chan, PREFIX_SMALL_BLOCK + tag); + } else { + output32(chan, CODE_BLOCK32, hd); + } + } else { + /* Check if already seen */ + if (2 * obj_counter >= extern_table_size) resize_extern_table(); + h = Hash(v); + while (extern_table[h].obj != 0) { + if (extern_table[h].obj == v) { + byteoffset_t d = obj_counter - extern_table[h].ofs; + if (d < 0x100) { + output8(chan, CODE_SHARED8, d); + } else if (d < 0x10000) { + output16(chan, CODE_SHARED16, d); + } else { + output32(chan, CODE_SHARED32, d); + } + return; + } + h++; + if (h >= extern_table_size) h = 0; + } + /* Not seen yet. Record the object and output its contents. */ + extern_table[h].obj = v; + extern_table[h].ofs = obj_counter; + obj_counter++; + switch(tag) { + case String_tag: { + mlsize_t len = string_length(v); + if (len < 0x20) { + putch(chan, PREFIX_SMALL_STRING + len); + } else if (len < 0x100) { + output8(chan, CODE_STRING8, len); + } else { + output32(chan, CODE_STRING32, len); + } + putblock(chan, String_val(v), len); + size_32 += 1 + (len + 4) / 4; + size_64 += 1 + (len + 8) / 8; + break; + } + case Double_tag: { + double buffer; + if (sizeof(double) != 8) + invalid_argument("output_value: non-standard floats"); + putch(chan, CODE_DOUBLE_NATIVE); + buffer = Double_val(v); + putblock(chan, (char *) &buffer, 8); + size_32 += 1 + sizeof(double) / 4; + size_64 += 1 + sizeof(double) / 8; + break; + } + case Abstract_tag: + case Final_tag: + invalid_argument("output_value: abstract value"); + break; + case Closure_tag: + invalid_argument("output_value: functional value"); + break; + default: { + mlsize_t i; + if (tag < 16 && sz < 8) { + putch(chan, PREFIX_SMALL_BLOCK + tag + (sz << 4)); + } else { + output32(chan, CODE_BLOCK32, hd); + } + size_32 += 1 + sz; + size_64 += 1 + sz; + for (i = 0; i < sz - 1; i++) emit_compact(chan, Field(v, i)); + v = Field(v, i); + goto tailcall; + } + } + } + } +} + +value output_value(chan, v) /* ML */ + struct channel * chan; + value v; +{ + value start_loc, final_loc; + putword(chan, Compact_magic_number); + start_loc = pos_out(chan); + putword(chan, 0); + putword(chan, 0); + putword(chan, 0); + extern_table_size = INITIAL_EXTERN_TABLE_SIZE; + alloc_extern_table(); + obj_counter = 0; + size_32 = 0; + size_64 = 0; + emit_compact(chan, v); +#ifdef SIXTYFOUR + if (size_32 >= (1L << 32) || size_64 >= (1L << 32)) { + /* The object is so big its size cannot be written in the header. + Besides, some of the block sizes or string lengths or shared offsets + it contains may have overflowed the 32 bits used to write them. */ + failwith("output_value: object too big"); + } +#endif + final_loc = pos_out(chan); + seek_out(chan, start_loc); + putword(chan, obj_counter); + putword(chan, size_32); + putword(chan, size_64); + seek_out(chan, final_loc); + stat_free((char *) extern_table); + return Val_unit; +} diff --git a/byterun/fail.c b/byterun/fail.c new file mode 100644 index 0000000000..8b56034beb --- /dev/null +++ b/byterun/fail.c @@ -0,0 +1,108 @@ +/* Raising exceptions from C. */ + +#include "alloc.h" +#include "fail.h" +#include "gc.h" +#include "memory.h" +#include "mlvalues.h" +#include "signals.h" +#include "stacks.h" + +struct longjmp_buffer * external_raise; +value exn_bucket; + +static void mlraise P((value)) Noreturn; + +static void mlraise(v) + value v; +{ + leave_blocking_section(); + exn_bucket = v; + longjmp(external_raise->buf, 1); +} + +void raise_constant(tag) + value tag; +{ + value bucket; + Push_roots (a, 1); + a[0] = tag; + bucket = alloc (1, 0); + Field(bucket, 0) = a[0]; + Pop_roots (); + mlraise(bucket); +} + +void raise_with_arg(tag, arg) + value tag; + value arg; +{ + value bucket; + Push_roots (a, 2); + a[0] = tag; + a[1] = arg; + bucket = alloc (2, 0); + Field(bucket, 0) = a[0]; + Field(bucket, 1) = a[1]; + Pop_roots (); + mlraise(bucket); +} + +void raise_with_string(tag, msg) + value tag; + char * msg; +{ + raise_with_arg(tag, copy_string(msg)); +} + +void failwith (msg) + char * msg; +{ + raise_with_string(Field(global_data, FAILURE_EXN), msg); +} + +void invalid_argument (msg) + char * msg; +{ + raise_with_string(Field(global_data, INVALID_EXN), msg); +} + +/* Problem: we can't use raise_constant, because it allocates and + we're out of memory... The following is a terrible hack that works + because global_data[OUT_OF_MEMORY_EXN] is in the old generation + (because global_data was read with intern_val), hence stays at + a fixed address */ + +static struct { + header_t hdr; + value exn; +} out_of_memory_bucket; + +void raise_out_of_memory() +{ + out_of_memory_bucket.hdr = Make_header(1, 0, White); + out_of_memory_bucket.exn = Field(global_data, OUT_OF_MEMORY_EXN); + mlraise((value) &(out_of_memory_bucket.exn)); +} + +void raise_sys_error(msg) + value msg; +{ + raise_with_arg(Field(global_data, SYS_ERROR_EXN), msg); +} + +void raise_end_of_file() +{ + raise_constant(Field(global_data, END_OF_FILE_EXN)); +} + +void raise_zero_divide() +{ + raise_constant(Field(global_data, ZERO_DIVIDE_EXN)); +} + +void raise_not_found() +{ + raise_constant(Field(global_data, NOT_FOUND_EXN)); +} + diff --git a/byterun/fail.h b/byterun/fail.h new file mode 100644 index 0000000000..0a0be89706 --- /dev/null +++ b/byterun/fail.h @@ -0,0 +1,36 @@ +#ifndef _fail_ +#define _fail_ + + +#include <setjmp.h> +#include "misc.h" +#include "mlvalues.h" + +#define OUT_OF_MEMORY_EXN 0 /* "Out_of_memory" */ +#define SYS_ERROR_EXN 1 /* "Sys_error" */ +#define FAILURE_EXN 2 /* "Failure" */ +#define INVALID_EXN 3 /* "Invalid_argument" */ +#define END_OF_FILE_EXN 4 /* "End_of_file" */ +#define ZERO_DIVIDE_EXN 5 /* "Division_by_zero" */ +#define NOT_FOUND_EXN 6 /* "Not_found" */ +#define MATCH_FAILURE_EXN 7 /* "Match_failure" */ + +struct longjmp_buffer { + jmp_buf buf; +}; + +extern struct longjmp_buffer * external_raise; +extern value exn_bucket; + +void raise_constant P((value tag)) Noreturn; +void raise_with_arg P((value tag, value arg)) Noreturn; +void raise_with_string P((value tag, char * msg)) Noreturn; +void failwith P((char *)) Noreturn; +void invalid_argument P((char *)) Noreturn; +void raise_out_of_memory P((void)) Noreturn; +void raise_sys_error P((value)) Noreturn; +void raise_end_of_file P((void)) Noreturn; +void raise_zero_divide P((void)) Noreturn; +void raise_not_found P((void)) Noreturn; + +#endif /* _fail_ */ diff --git a/byterun/fix_code.c b/byterun/fix_code.c new file mode 100644 index 0000000000..9c24de180e --- /dev/null +++ b/byterun/fix_code.c @@ -0,0 +1,66 @@ +/* Translate a block of bytecode (endianness switch, threading). */ + +#include "config.h" +#include "fix_code.h" +#include "misc.h" +#include "mlvalues.h" +#include "instruct.h" +#include "reverse.h" + +/* This code is needed only if the processor is big endian */ + +#ifdef BIG_ENDIAN + +void fixup_endianness(code, len) + code_t code; + asize_t len; +{ + code_t p; + len /= sizeof(opcode_t); + for (p = code; p < code + len; p++) { + Reverse_int32(p); + } +} + +#endif + +/* This code is needed only if we're using threaded code */ + +#ifdef THREADED_CODE + +void thread_code(code, len, instr_table) + code_t code; + asize_t len; + void * instr_table[]; +{ + code_t p; + len /= sizeof(opcode_t); + for (p = code; p < code + len; /*nothing*/) { + opcode_t instr = *p; + Assert(instr >= 0 && instr <= STOP); + *p++ = (opcode_t)((unsigned long)(instr_table[instr])); + switch(instr) { + /* Instructions with one operand */ + case PUSHACC: case ACC: case POP: case ASSIGN: + case PUSHENVACC: case ENVACC: case PUSH_RETADDR: case APPLY: + case APPTERM1: case APPTERM2: case APPTERM3: case RETURN: + case GRAB: case PUSHGETGLOBAL: case GETGLOBAL: case SETGLOBAL: + case PUSHATOM: case ATOM: case MAKEBLOCK1: case MAKEBLOCK2: + case MAKEBLOCK3: case GETFIELD: case SETFIELD: case DUMMY: + case BRANCH: case BRANCHIF: case BRANCHIFNOT: case PUSHTRAP: + case C_CALL1: case C_CALL2: case C_CALL3: case C_CALL4: + case CONSTINT: case PUSHCONSTINT: case OFFSETINT: case OFFSETREF: + p += 1; break; + /* Instructions with two operands */ + case APPTERM: case CLOSURE: case CLOSUREREC: case PUSHGETGLOBALFIELD: + case GETGLOBALFIELD: case MAKEBLOCK: case C_CALLN: + p += 2; break; + /* Instructions with N+1 operands */ + case SWITCH: case TRANSLATE: + p += *p + 1; break; + } + } + Assert(p = code + len); +} + +#endif diff --git a/byterun/fix_code.h b/byterun/fix_code.h new file mode 100644 index 0000000000..c754fad278 --- /dev/null +++ b/byterun/fix_code.h @@ -0,0 +1,15 @@ +/* Translate a block of bytecode (endianness switch, threading). */ + +#ifndef _fix_code_ +#define _fix_code_ + + +#include "misc.h" +#include "mlvalues.h" + +void fixup_endianness P((code_t code, asize_t len)); +void thread_code P((code_t code, asize_t len, void * instr_table[])); + + +#endif + diff --git a/byterun/floats.c b/byterun/floats.c new file mode 100644 index 0000000000..f5b4864737 --- /dev/null +++ b/byterun/floats.c @@ -0,0 +1,226 @@ +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include "alloc.h" +#include "fail.h" +#include "memory.h" +#include "mlvalues.h" +#include "misc.h" +#include "stacks.h" + +#ifdef ALIGN_DOUBLE + +double Double_val(val) + value val; +{ + union { value v[2]; double d; } buffer; + + Assert(sizeof(double) == 2 * sizeof(value)); + buffer.v[0] = Field(val, 0); + buffer.v[1] = Field(val, 1); + return buffer.d; +} + +void Store_double_val(val, dbl) + value val; + double dbl; +{ + union { value v[2]; double d; } buffer; + + Assert(sizeof(double) == 2 * sizeof(value)); + buffer.d = dbl; + Field(val, 0) = buffer.v[0]; + Field(val, 1) = buffer.v[1]; +} + +#endif + +value format_float(fmt, arg) /* ML */ + value fmt, arg; +{ + char format_buffer[64]; + int prec, i; + char * p; + char * dest; + value res; + + prec = 64; + for (p = String_val(fmt); *p != 0; p++) { + if (*p >= '0' && *p <= '9') { + i = atoi(p) + 15; + if (i > prec) prec = i; + break; + } + } + for( ; *p != 0; p++) { + if (*p == '.') { + i = atoi(p+1) + 15; + if (i > prec) prec = i; + break; + } + } + if (prec <= sizeof(format_buffer)) { + dest = format_buffer; + } else { + dest = stat_alloc(prec); + } + sprintf(dest, String_val(fmt), Double_val(arg)); + res = copy_string(dest); + if (dest != format_buffer) { + stat_free(dest); + } + return res; +} + +value float_of_string(s) /* ML */ + value s; +{ + return copy_double(atof(String_val(s))); +} + +value int_of_float(f) /* ML */ + value f; +{ + return Val_long((long) Double_val(f)); +} + +value float_of_int(n) /* ML */ + value n; +{ + return copy_double((double) Long_val(n)); +} + +value neg_float(f) /* ML */ + value f; +{ + return copy_double(- Double_val(f)); +} + +value add_float(f, g) /* ML */ + value f, g; +{ + return copy_double(Double_val(f) + Double_val(g)); +} + +value sub_float(f, g) /* ML */ + value f, g; +{ + return copy_double(Double_val(f) - Double_val(g)); +} + +value mul_float(f, g) /* ML */ + value f, g; +{ + return copy_double(Double_val(f) * Double_val(g)); +} + +value div_float(f, g) /* ML */ + value f, g; +{ + double dg = Double_val(g); + if (dg == 0.0) raise_zero_divide(); + return copy_double(Double_val(f) / dg); +} + +value exp_float(f) /* ML */ + value f; +{ + return copy_double(exp(Double_val(f))); +} + +value log_float(f) /* ML */ + value f; +{ + return copy_double(log(Double_val(f))); +} + +value sqrt_float(f) /* ML */ + value f; +{ + return copy_double(sqrt(Double_val(f))); +} + +value power_float(f, g) /* ML */ + value f, g; +{ + return copy_double(pow(Double_val(f), Double_val(g))); +} + +value sin_float(f) /* ML */ + value f; +{ + return copy_double(sin(Double_val(f))); +} + +value cos_float(f) /* ML */ + value f; +{ + return copy_double(cos(Double_val(f))); +} + +value tan_float(f) /* ML */ + value f; +{ + return copy_double(tan(Double_val(f))); +} + +value asin_float(f) /* ML */ + value f; +{ + return copy_double(asin(Double_val(f))); +} + +value acos_float(f) /* ML */ + value f; +{ + return copy_double(acos(Double_val(f))); +} + +value atan_float(f) /* ML */ + value f; +{ + return copy_double(atan(Double_val(f))); +} + +value atan2_float(f, g) /* ML */ + value f, g; +{ + return copy_double(atan2(Double_val(f), Double_val(g))); +} + +value eq_float(f, g) /* ML */ + value f, g; +{ + return Val_bool(Double_val(f) == Double_val(g)); +} + +value neq_float(f, g) /* ML */ + value f, g; +{ + return Val_bool(Double_val(f) != Double_val(g)); +} + +value le_float(f, g) /* ML */ + value f, g; +{ + return Val_bool(Double_val(f) <= Double_val(g)); +} + +value lt_float(f, g) /* ML */ + value f, g; +{ + return Val_bool(Double_val(f) < Double_val(g)); +} + +value ge_float(f, g) /* ML */ + value f, g; +{ + return Val_bool(Double_val(f) >= Double_val(g)); +} + +value gt_float(f, g) /* ML */ + value f, g; +{ + return Val_bool(Double_val(f) > Double_val(g)); +} + diff --git a/byterun/freelist.c b/byterun/freelist.c new file mode 100644 index 0000000000..0b348fd205 --- /dev/null +++ b/byterun/freelist.c @@ -0,0 +1,234 @@ +#include "config.h" +#include "freelist.h" +#include "gc.h" +#include "gc_ctrl.h" +#include "major_gc.h" +#include "misc.h" +#include "mlvalues.h" + +/* The free-list is kept sorted by increasing addresses. + This makes the merging of adjacent free blocks possible. + (See [fl_merge_block].) +*/ + +typedef struct { + char *next_bp; /* Pointer to the first byte of the next block. */ +} block; + +/* The sentinel can be located anywhere in memory, but it must not be + adjacent to any heap object. */ +static struct { + value filler1; /* Make sure the sentinel is never adjacent to any block. */ + header_t h; + value first_bp; + value filler2; /* Make sure the sentinel is never adjacent to any block. */ +} sentinel = {0, Make_header (0, 0, Blue), 0, 0}; + +#define Fl_head ((char *) (&(sentinel.first_bp))) +static char *fl_prev = Fl_head; /* Current allocation pointer. */ +static char *fl_last = NULL; /* Last block in the list. Only valid + just after fl_allocate returned NULL. */ +char *fl_merge = Fl_head; /* Current insertion pointer. Managed + jointly with [sweep_slice]. */ + +#define Next(b) (((block *) (b))->next_bp) + +#ifdef DEBUG +void fl_verify () +{ + char *cur, *prev; + int prev_found = 0, merge_found = 0; + + prev = Fl_head; + cur = Next (prev); + while (cur != NULL){ + Assert (Is_in_heap (cur)); + if (cur == fl_prev) prev_found = 1; + if (cur == fl_merge) merge_found = 1; + prev = cur; + cur = Next (prev); + } + Assert (prev_found || fl_prev == Fl_head); + Assert (merge_found || fl_merge == Fl_head); +} +#endif + +/* [allocate_block] is called by [fl_allocate]. Given a suitable free + block and the desired size, it allocates a new block from the free + block. There are three cases: + 0. The free block has the desired size. Detach the block from the + free-list and return it. + 1. The free block is 1 word longer than the desired size. Detach + the block from the free list. The remaining word cannot be linked: + turn it into an empty block (header only), and return the rest. + 2. The free block is big enough. Split it in two and return the right + block. + In all cases, the allocated block is right-justified in the free block: + it is located in the high-address words of the free block. This way, + the linking of the free-list does not change in case 2. +*/ +static char *allocate_block (wh_sz, prev, cur) + mlsize_t wh_sz; + char *prev, *cur; +{ + header_t h = Hd_bp (cur); + Assert (Whsize_hd (h) >= wh_sz); + if (Wosize_hd (h) < wh_sz + 1){ /* Cases 0 and 1. */ + Next (prev) = Next (cur); + Assert (Is_in_heap (Next (prev)) || Next (prev) == NULL); + if (fl_merge == cur) fl_merge = prev; +#ifdef DEBUG + fl_last = NULL; +#endif + /* In case 1, the following creates the empty block correctly. + In case 0, it gives an invalid header to the block. The function + calling [fl_allocate] will overwrite it. */ + Hd_op (cur) = Make_header (0, 0, White); + }else{ /* Case 2. */ + Hd_op (cur) = Make_header (Wosize_hd (h) - wh_sz, 0, Blue); + } + fl_prev = prev; + return cur + Bosize_hd (h) - Bsize_wsize (wh_sz); +} + +/* [fl_allocate] does not set the header of the newly allocated block. + The calling function must do it before any GC function gets called. + [fl_allocate] returns a head pointer. +*/ +char *fl_allocate (wo_sz) + mlsize_t wo_sz; +{ + char *cur, *prev; + Assert (sizeof (char *) == sizeof (value)); + Assert (fl_prev != NULL); + Assert (wo_sz >= 1); + /* Search from [fl_prev] to the end of the list. */ + prev = fl_prev; + cur = Next (prev); + while (cur != NULL){ Assert (Is_in_heap (cur)); + if (Wosize_bp (cur) >= wo_sz){ + return allocate_block (Whsize_wosize (wo_sz), prev, cur); + } + prev = cur; + cur = Next (prev); + } + fl_last = prev; + /* Search from the start of the list to [fl_prev]. */ + prev = Fl_head; + cur = Next (prev); + while (prev != fl_prev){ + if (Wosize_bp (cur) >= wo_sz){ + return allocate_block (Whsize_wosize (wo_sz), prev, cur); + } + prev = cur; + cur = Next (prev); + } + /* No suitable block was found. */ + return NULL; +} + +void fl_init_merge () +{ + fl_merge = Fl_head; +} + +/* [fl_merge_block] returns the head pointer of the next block after [bp], + because merging blocks may change the size of [bp]. */ +char *fl_merge_block (bp) + char *bp; +{ + char *prev, *cur, *adj; + header_t hd = Hd_bp (bp); + +#ifdef DEBUG + { + mlsize_t i; + for (i = 0; i < Wosize_hd (hd); i++){ + Field (Val_bp (bp), i) = not_random (); + } + } +#endif + prev = fl_merge; + cur = Next (prev); + /* The sweep code makes sure that this is the right place to insert + this block: */ + Assert (prev < bp || prev == Fl_head); + Assert (cur > bp || cur == NULL); + + /* If [bp] and [cur] are adjacent, remove [cur] from the free-list + and merge them. */ + adj = bp + Bosize_hd (hd); + if (adj == Hp_bp (cur)){ + char *next_cur = Next (cur); + long cur_whsz = Whsize_bp (cur); + + Next (prev) = next_cur; + if (fl_prev == cur) fl_prev = prev; + hd = Make_header (Wosize_hd (hd) + cur_whsz, 0, Blue); + Hd_bp (bp) = hd; + adj = bp + Bosize_hd (hd); +#ifdef DEBUG + fl_last = NULL; + Next (cur) = (char *) not_random (); + Hd_bp (cur) = not_random (); +#endif + cur = next_cur; + } + /* If [prev] and [bp] are adjacent merge them, else insert [bp] into + the free-list if it is big enough. */ + if (prev + Bosize_bp (prev) == Hp_bp (bp)){ + Hd_bp (prev) = Make_header (Wosize_bp (prev) + Whsize_hd (hd), 0, Blue); +#ifdef DEBUG + Hd_bp (bp) = not_random (); +#endif + Assert (fl_merge == prev); + }else if (Wosize_hd (hd) > 0){ + Hd_bp (bp) = Bluehd_hd (hd); + Next (bp) = cur; + Next (prev) = bp; + fl_merge = bp; + } /* Else leave it in white. */ + return adj; +} + +/* This is a heap extension. We have to insert it in the right place + in the free-list. + [fl_add_block] can only be called just after a call to [fl_allocate] + that returned NULL. + Most of the heap extensions are expected to be at the end of the + free list. (This depends on the implementation of [malloc].) +*/ +void fl_add_block (bp) + char *bp; +{ + Assert (fl_last != NULL); + Assert (Next (fl_last) == NULL); +#ifdef DEBUG + { + mlsize_t i; + for (i = 0; i < Wosize_bp (bp); i++){ + Field (Val_bp (bp), i) = not_random (); + } + } +#endif + if (bp > fl_last){ + Next (fl_last) = bp; + Next (bp) = NULL; + }else{ + char *cur, *prev; + + prev = Fl_head; + cur = Next (prev); + while (cur != NULL && cur < bp){ Assert (prev < bp || prev == Fl_head); + prev = cur; + cur = Next (prev); + } Assert (prev < bp || prev == Fl_head); + Assert (cur > bp || cur == NULL); + Next (bp) = cur; + Next (prev) = bp; + /* When inserting a block between fl_merge and gc_sweep_hp, we must + advance fl_merge to the new block, so that fl_merge is always the + last free-list block before gc_sweep_hp. */ + if (prev == fl_merge && bp <= gc_sweep_hp) fl_merge = bp; + } +} diff --git a/byterun/freelist.h b/byterun/freelist.h new file mode 100644 index 0000000000..1f1aef9a7f --- /dev/null +++ b/byterun/freelist.h @@ -0,0 +1,16 @@ +/* Free lists of heap blocks. */ + +#ifndef _freelist_ +#define _freelist_ + + +#include "misc.h" +#include "mlvalues.h" + +char *fl_allocate P((mlsize_t)); +void fl_init_merge P((void)); +char *fl_merge_block P((char *)); +void fl_add_block P((char *)); + + +#endif /* _freelist_ */ diff --git a/byterun/gc.h b/byterun/gc.h new file mode 100644 index 0000000000..b772f2f28d --- /dev/null +++ b/byterun/gc.h @@ -0,0 +1,42 @@ +#ifndef _gc_ +#define _gc_ + + +#include "mlvalues.h" + +/* Defined in [major_gc.c]. */ +extern unsigned free_mem_percent_min, free_mem_percent_max; + +#define White (0 << 8) +#define Gray (1 << 8) +#define Blue (2 << 8) +#define Black (3 << 8) + +#define Color_hd(hd) ((color_t) ((hd) & Black)) +#define Color_hp(hp) Color_hd (Hd_hp (hp)) + +#define Is_white_hd(hd) (Color_hd (hd) == White) +#define Is_gray_hd(hd) (Color_hd (hd) == Gray) +#define Is_blue_hd(hd) (Color_hd (hd) == Blue) +#define Is_black_hd(hd) (Color_hd (hd) == Black) + +#define Whitehd_hd(hd) ((hd) & ~Black) +#define Grayhd_hd(hd) (((hd) & ~Black) | Gray) +#define Blackhd_hd(hd) ((hd) | Black) +#define Bluehd_hd(hd) (((hd) & ~Black) | Blue) + +/* This depends on the layout of the header. See [mlvalues.h]. */ +#define Make_header(wosize, tag, color) \ + ((header_t) (((header_t) (wosize) << 10) \ + + (color) \ + + (tag_t) (tag))) + +#define Color_val(val) (Color_hd (Hd_val (val))) + +#define Is_white_val(val) (Color_val(val) == White) +#define Is_gray_val(val) (Color_val(val) == Gray) +#define Is_blue_val(val) (Color_val(val) == Blue) +#define Is_black_val(val) (Color_val(val) == Black) + + +#endif /* _gc_ */ diff --git a/byterun/gc_ctrl.c b/byterun/gc_ctrl.c new file mode 100644 index 0000000000..facd9f30f7 --- /dev/null +++ b/byterun/gc_ctrl.c @@ -0,0 +1,212 @@ +#include "alloc.h" +#include "gc.h" +#include "gc_ctrl.h" +#include "major_gc.h" +#include "minor_gc.h" +#include "misc.h" +#include "mlvalues.h" + +long stat_minor_words = 0, + stat_promoted_words = 0, + stat_major_words = 0, + stat_minor_collections = 0, + stat_major_collections = 0, + stat_heap_size = 0; /* bytes */ + +extern asize_t major_heap_increment; /* bytes; cf. major_gc.c */ +extern int percent_free; /* cf. major_gc.c */ +extern int verb_gc; /* cf. misc.c */ + +#define Chunk_size(c) (((heap_chunk_head *) (c)) [-1]).size +#define Chunk_next(c) (((heap_chunk_head *) (c)) [-1]).next +#define Next(hp) ((hp) + Bhsize_hp (hp)) + +/* This will also thoroughly verify the heap if compiled in DEBUG mode. */ + +value gc_stat(v) /* ML */ + value v; +{ + value res; + long live_words = 0, live_blocks = 0, + free_words = 0, free_blocks = 0, largest_free = 0, + fragments = 0, heap_chunks = 0; + char *chunk = heap_start, *chunk_end; + char *cur_hp, *prev_hp; + header_t cur_hd; + + Assert (v == Atom (0)); + + while (chunk != NULL){ + ++ heap_chunks; + chunk_end = chunk + Chunk_size (chunk); + prev_hp = NULL; + cur_hp = chunk; + while (cur_hp < chunk_end){ + cur_hd = Hd_hp (cur_hp); + switch (Color_hd (cur_hd)){ + case White: + if (Wosize_hd (cur_hd) == 0){ + ++fragments; + Assert (prev_hp == NULL + || (Color_hp (prev_hp) != Blue + && Wosize_hp (prev_hp) > 0)); + Assert (Next (cur_hp) == chunk_end + || (Color_hp (Next (cur_hp)) != Blue + && Wosize_hp (Next (cur_hp)) > 0)); + break; + } + /* FALLTHROUGH */ + case Gray: case Black: + Assert (Wosize_hd (cur_hd) > 0); + ++ live_blocks; + live_words += Whsize_hd (cur_hd); + break; + case Blue: + Assert (Wosize_hd (cur_hd) > 0); + ++ free_blocks; + free_words += Whsize_hd (cur_hd); + if (Whsize_hd (cur_hd) > largest_free){ + largest_free = Whsize_hd (cur_hd); + } + Assert (prev_hp == NULL + || (Color_hp (prev_hp) != Blue + && Wosize_hp (prev_hp) > 0)); + Assert (Next (cur_hp) == chunk_end + || (Color_hp (Next (cur_hp)) != Blue + && Wosize_hp (Next (cur_hp)) > 0)); + break; + } + prev_hp = cur_hp; + cur_hp = Next (cur_hp); + } Assert (cur_hp == chunk_end); + chunk = Chunk_next (chunk); + } + + Assert (live_words + free_words + fragments == Wsize_bsize (stat_heap_size)); + + res = alloc (13, 0); + Field (res, 0) = Val_long (stat_minor_words + + Wsize_bsize (young_ptr - young_start)); + Field (res, 1) = Val_long (stat_promoted_words); + Field (res, 2) = Val_long (stat_major_words + allocated_words); + Field (res, 3) = Val_long (stat_minor_collections); + Field (res, 4) = Val_long (stat_major_collections); + Field (res, 5) = Val_long (Wsize_bsize (stat_heap_size)); + Field (res, 6) = Val_long (heap_chunks); + Field (res, 7) = Val_long (live_words); + Field (res, 8) = Val_long (live_blocks); + Field (res, 9) = Val_long (free_words); + Field (res, 10) = Val_long (free_blocks); + Field (res, 11) = Val_long (largest_free); + Field (res, 12) = Val_long (fragments); + return res; +} + +value gc_get(v) /* ML */ + value v; +{ + value res; + + Assert (v == Atom (0)); + res = alloc (4, 0); + Field (res, 0) = Wsize_bsize (Val_long (minor_heap_size)); + Field (res, 1) = Wsize_bsize (Val_long (major_heap_increment)); + Field (res, 2) = Val_long (percent_free); + Field (res, 3) = Val_bool (verb_gc); + return res; +} + +static int norm_pfree (p) + int p; +{ + if (p < 1) return p = 1; + return p; +} + +static long norm_heapincr (i) + long i; +{ + i = ((i + (1 << Page_log) - 1) >> Page_log) << Page_log; + if (i < Heap_chunk_min) i = Heap_chunk_min; + if (i > Heap_chunk_max) i = Heap_chunk_max; + return i; +} + +static long norm_minsize (s) + long s; +{ + if (s < Minor_heap_min) s = Minor_heap_min; + if (s > Minor_heap_max) s = Minor_heap_max; + return s; +} + +value gc_set(v) /* ML */ + value v; +{ + int newpf; + + verb_gc = Bool_val (Field (v, 3)); + + newpf = norm_pfree (Long_val (Field (v, 2))); + if (newpf != percent_free){ + percent_free = newpf; + gc_message ("New space overhead: %d%%\n", percent_free); + } + + if (Bsize_wsize (Long_val (Field (v, 1))) != major_heap_increment){ + major_heap_increment = norm_heapincr (Bsize_wsize (Long_val (Field(v,1)))); + gc_message ("New heap increment size: %ldk\n", major_heap_increment/1024); + } + + /* Minor heap size comes last because it will trigger a minor collection + (thus invalidating [v]) and it can raise [Out_of_memory]. */ + if (Bsize_wsize (Long_val (Field (v, 0))) != minor_heap_size){ + long new_size = norm_minsize (Bsize_wsize (Long_val (Field (v, 0)))); + gc_message ("New minor heap size: %ldk\n", new_size/1024); + set_minor_heap_size (new_size); + } + return Atom (0); +} + +value gc_minor(v) /* ML */ + value v; +{ Assert (v == Atom (0)); + minor_collection (); + return Atom (0); +} + +value gc_major(v) /* ML */ + value v; +{ Assert (v == Atom (0)); + minor_collection (); + finish_major_cycle (); + return Atom (0); +} + +value gc_full_major(v) /* ML */ + value v; +{ Assert (v == Atom (0)); + minor_collection (); + finish_major_cycle (); + finish_major_cycle (); + return Atom (0); +} + +void init_gc (minor_size, major_incr, percent_fr, verb) + long minor_size; + long major_incr; + int percent_fr; + int verb; +{ +#ifdef DEBUG + gc_message ("*** camlrun: debug mode ***\n", 0); +#endif + verb_gc = verb; + set_minor_heap_size (Bsize_wsize (norm_minsize (minor_size))); + major_heap_increment = Bsize_wsize (norm_heapincr (major_incr)); + percent_free = norm_pfree (percent_fr); + init_major_heap (major_heap_increment); + gc_message ("Initial space overhead: %d%%\n", percent_free); + gc_message ("Initial heap increment: %ldk\n", major_heap_increment / 1024); + gc_message ("Initial minor heap size: %ldk\n", minor_heap_size / 1024); +} diff --git a/byterun/gc_ctrl.h b/byterun/gc_ctrl.h new file mode 100644 index 0000000000..5a88a9ab61 --- /dev/null +++ b/byterun/gc_ctrl.h @@ -0,0 +1,17 @@ +#ifndef _gc_ctrl_ +#define _gc_ctrl_ + +#include "misc.h" + +extern long + stat_minor_words, + stat_promoted_words, + stat_major_words, + stat_minor_collections, + stat_major_collections, + stat_heap_size; + +void init_gc P((long, long, int, int)); + + +#endif /* _gc_ctrl_ */ diff --git a/byterun/hash.c b/byterun/hash.c new file mode 100644 index 0000000000..944bfac985 --- /dev/null +++ b/byterun/hash.c @@ -0,0 +1,103 @@ +/* The generic hashing primitive */ + +#include "mlvalues.h" +#include "memory.h" +#include "str.h" + +static unsigned long hash_accu; +static long hash_univ_limit, hash_univ_count; + +static void hash_aux(); + +value hash_univ_param(count, limit, obj) /* ML */ + value obj, count, limit; +{ + hash_univ_limit = Long_val(limit); + hash_univ_count = Long_val(count); + hash_accu = 0; + hash_aux(obj); + return Val_long(hash_accu & 0x3FFFFFFF); + /* The & has two purposes: ensure that the return value is positive + and give the same result on 32 bit and 64 bit architectures. */ +} + +#define Alpha 65599 +#define Beta 19 +#define Combine(new) (hash_accu = hash_accu * Alpha + (new)) +#define Combine_small(new) (hash_accu = hash_accu * Beta + (new)) + +static void hash_aux(obj) + value obj; +{ + unsigned char * p; + mlsize_t i; + tag_t tag; + + hash_univ_limit--; + if (hash_univ_count < 0 || hash_univ_limit < 0) return; + + if (Is_long(obj)) { + hash_univ_count--; + Combine(Long_val(obj)); + return; + } + + /* Atoms are not in the heap, but it's better to hash their tag + than to do nothing. */ + + if (Is_atom(obj)) { + tag = Tag_val(obj); + hash_univ_count--; + Combine_small(tag); + return; + } + + /* Pointers into the heap are well-structured blocks. + We can inspect the block contents. */ + + if (Is_in_heap(obj) || Is_young(obj)) { + tag = Tag_val(obj); + switch (tag) { + case String_tag: + hash_univ_count--; + i = string_length(obj); + for (p = &Byte_u(obj, 0); i > 0; i--, p++) + Combine_small(*p); + break; + case Double_tag: + /* For doubles, we inspect their binary representation, LSB first. + The results are consistent among all platforms with IEEE floats. */ + hash_univ_count--; +#ifdef BIG_ENDIAN + for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double); + i > 0; + p--, i--) +#else + for (p = &Byte_u(obj, 0), i = sizeof(double); + i > 0; + p++, i--) +#endif + Combine_small(*p); + break; + case Abstract_tag: + case Final_tag: + /* We don't know anything about the contents of the block. + Better do nothing. */ + break; + default: + hash_univ_count--; + Combine_small(tag); + i = Wosize_val(obj); + while (i != 0) { + i--; + hash_aux(Field(obj, i)); + } + break; + } + return; + } + + /* Otherwise, obj is a pointer outside the heap, to an object with + a priori unknown structure. Use its physical address as hash key. */ + Combine((long) obj); +} diff --git a/byterun/instrtrace.c b/byterun/instrtrace.c new file mode 100644 index 0000000000..5e3133565f --- /dev/null +++ b/byterun/instrtrace.c @@ -0,0 +1,52 @@ +/* Trace the instructions executed */ + +#ifdef DEBUG + +#include <stdio.h> +#include "instruct.h" +#include "misc.h" +#include "mlvalues.h" +#include "opnames.h" + +extern code_t start_code; +extern char * names_of_cprim[]; + +long icount = 0; + +void stop_here () {} + +int trace_flag = 0; + +void disasm_instr(pc) + code_t pc; +{ + int instr = *pc; + printf("%6d %s", pc - start_code, + instr < 0 || instr > STOP ? "???" : names_of_instructions[instr]); + pc++; + switch(instr) { + /* Instructions with one integer operand */ + case PUSHACC: case ACC: case POP: case ASSIGN: + case PUSHENVACC: case ENVACC: case PUSH_RETADDR: case APPLY: + case APPTERM1: case APPTERM2: case APPTERM3: case RETURN: + case GRAB: case PUSHGETGLOBAL: case GETGLOBAL: case SETGLOBAL: + case PUSHATOM: case ATOM: case MAKEBLOCK1: case MAKEBLOCK2: + case MAKEBLOCK3: case GETFIELD: case SETFIELD: case DUMMY: + case BRANCH: case BRANCHIF: case BRANCHIFNOT: case PUSHTRAP: + case CONSTINT: case PUSHCONSTINT: case OFFSETINT: case OFFSETREF: + printf(" %d\n", pc[0]); break; + /* Instructions with two operands */ + case APPTERM: case CLOSURE: case CLOSUREREC: case PUSHGETGLOBALFIELD: + case GETGLOBALFIELD: case MAKEBLOCK: + printf(" %d, %d\n", pc[0], pc[1]); break; + /* Instructions with a C primitive as operand */ + case C_CALL1: case C_CALL2: case C_CALL3: case C_CALL4: + printf(" %s\n", names_of_cprim[pc[0]]); break; + case C_CALLN: + printf(" %d, %s\n", pc[0], names_of_cprim[pc[1]]); break; + default: + printf("\n"); + } +} + +#endif diff --git a/byterun/instrtrace.h b/byterun/instrtrace.h new file mode 100644 index 0000000000..c47b39781c --- /dev/null +++ b/byterun/instrtrace.h @@ -0,0 +1,16 @@ +/* Trace the instructions executed */ + +#ifndef _instrtrace_ +#define _instrtrace_ + + +#include "mlvalues.h" +#include "misc.h" + +extern int trace_flag; +extern long icount; +void stop_here P((void)); +void disasm_instr P((code_t pc)); + + +#endif diff --git a/byterun/instruct.h b/byterun/instruct.h new file mode 100644 index 0000000000..43cb8dcdf0 --- /dev/null +++ b/byterun/instruct.h @@ -0,0 +1,33 @@ +/* The instruction set. */ + +enum instructions { + ACC0, ACC1, ACC2, ACC3, ACC4, ACC5, ACC6, ACC7, + ACC, PUSH, + PUSHACC0, PUSHACC1, PUSHACC2, PUSHACC3, + PUSHACC4, PUSHACC5, PUSHACC6, PUSHACC7, + PUSHACC, POP, ASSIGN, + ENVACC0, ENVACC1, ENVACC2, ENVACC3, ENVACC, + PUSHENVACC0, PUSHENVACC1, PUSHENVACC2, PUSHENVACC3, PUSHENVACC, + PUSH_RETADDR, APPLY, APPLY1, APPLY2, APPLY3, + APPTERM, APPTERM1, APPTERM2, APPTERM3, + RETURN, RESTART, GRAB, + CLOSURE, CLOSUREREC, + GETGLOBAL, PUSHGETGLOBAL, GETGLOBALFIELD, PUSHGETGLOBALFIELD, SETGLOBAL, + ATOM0, ATOM1, ATOM2, ATOM3, ATOM, + PUSHATOM0, PUSHATOM1, PUSHATOM2, PUSHATOM3, PUSHATOM, + MAKEBLOCK, MAKEBLOCK1, MAKEBLOCK2, MAKEBLOCK3, + GETFIELD0, GETFIELD1, GETFIELD2, GETFIELD3, GETFIELD, + SETFIELD0, SETFIELD1, SETFIELD2, SETFIELD3, SETFIELD, + TAGOF, DUMMY, UPDATE, + VECTLENGTH, GETVECTITEM, SETVECTITEM, + GETSTRINGCHAR, SETSTRINGCHAR, + BRANCH, BRANCHIF, BRANCHIFNOT, SWITCH, TRANSLATE, BOOLNOT, + PUSHTRAP, POPTRAP, RAISE, CHECK_SIGNALS, + C_CALL1, C_CALL2, C_CALL3, C_CALL4, C_CALLN, + CONSTINT, PUSHCONSTINT, + NEGINT, ADDINT, SUBINT, MULINT, DIVINT, MODINT, + ANDINT, ORINT, XORINT, LSLINT, LSRINT, ASRINT, + EQ, NEQ, LTINT, LEINT, GTINT, GEINT, + OFFSETINT, OFFSETREF, + STOP +}; diff --git a/byterun/intern.c b/byterun/intern.c new file mode 100644 index 0000000000..4fb438b44a --- /dev/null +++ b/byterun/intern.c @@ -0,0 +1,230 @@ +/* Structured input, compact format */ + +#include "fail.h" +#include "gc.h" +#include "intext.h" +#include "io.h" +#include "memory.h" +#include "mlvalues.h" +#include "misc.h" +#include "reverse.h" + +static header_t * intern_ptr; +static asize_t obj_counter; +static value * intern_obj_table; +static unsigned int intern_color; +static header_t intern_header; +static value intern_block; + +#define Sign_extend_shift ((sizeof(long) - 1) * 8) +#define Sign_extend(x) (((long)(x) << Sign_extend_shift) >> Sign_extend_shift) + +static long input8u(chan) + struct channel * chan; +{ + return getch(chan); +} + +static long input8s(chan) + struct channel * chan; +{ + long b1 = getch(chan); + return Sign_extend(b1); +} + +static long input16u(chan) + struct channel * chan; +{ + long b1 = getch(chan); + long b2 = getch(chan); + return (b1 << 8) + b2; +} + +static long input16s(chan) + struct channel * chan; +{ + long b1 = getch(chan); + long b2 = getch(chan); + return (Sign_extend(b1) << 8) + b2; +} + +static long input32u(chan) + struct channel * chan; +{ + long b1 = getch(chan); + long b2 = getch(chan); + long b3 = getch(chan); + long b4 = getch(chan); + return (b1 << 24) + (b2 << 16) + (b3 << 8) + b4; +} + +static long input32s(chan) + struct channel * chan; +{ + long b1 = getch(chan); + long b2 = getch(chan); + long b3 = getch(chan); + long b4 = getch(chan); + return (Sign_extend(b1) << 24) + (b2 << 16) + (b3 << 8) + b4; +} + +#ifdef SIXTYFOUR +static long input64s(chan) + struct channel * chan; +{ + long res; + int i; + res = 0; + for (i = 0; i < 8; i++) res = (res << 8) + getch(chan); + return res; +} +#endif + +static void read_compact(chan, dest) + struct channel * chan; + value * dest; +{ + unsigned int code; + tag_t tag; + mlsize_t size, len, ofs_ind; + value v; + asize_t ofs; + header_t header; + + tailcall: + code = getch(chan); + if (code >= PREFIX_SMALL_INT) { + if (code >= PREFIX_SMALL_BLOCK) { + /* Small block */ + tag = code & 0xF; + size = (code >> 4) & 0x7; + read_block: + if (size == 0) { + v = Atom(tag); + } else { + v = Val_hp(intern_ptr); + *dest = v; + intern_obj_table[obj_counter++] = v; + dest = (value *) (intern_ptr + 1); + *intern_ptr = Make_header(size, tag, intern_color); + intern_ptr += 1 + size; + for(/*nothing*/; size > 1; size--, dest++) + read_compact(chan, dest); + goto tailcall; + } + } else { + /* Small integer */ + v = Val_int(code & 0x3F); + } + } else { + if (code >= PREFIX_SMALL_STRING) { + /* Small string */ + len = (code & 0x1F); + read_string: + size = (len + sizeof(value)) / sizeof(value); + v = Val_hp(intern_ptr); + intern_obj_table[obj_counter++] = v; + *intern_ptr = Make_header(size, String_tag, intern_color); + intern_ptr += 1 + size; + Field(v, size - 1) = 0; + ofs_ind = Bsize_wsize(size) - 1; + Byte(v, ofs_ind) = ofs_ind - len; + really_getblock(chan, String_val(v), len); + } else { + switch(code) { + case CODE_INT8: + v = Val_long(input8s(chan)); + break; + case CODE_INT16: + v = Val_long(input16s(chan)); + break; + case CODE_INT32: + v = Val_long(input32s(chan)); + break; + case CODE_INT64: +#ifdef SIXTYFOUR + v = Val_long(input64s(chan)); + break; +#else + stat_free((char *) intern_obj_table); + Hd_val(intern_block) = intern_header; /* Don't confuse the GC */ + failwith("input_value: integer too large"); + break; +#endif + case CODE_SHARED8: + ofs = input8u(chan); + read_shared: + Assert(ofs > 0 && ofs <= obj_counter); + v = intern_obj_table[obj_counter - ofs]; + break; + case CODE_SHARED16: + ofs = input16u(chan); + goto read_shared; + case CODE_SHARED32: + ofs = input32u(chan); + goto read_shared; + case CODE_BLOCK32: + header = (header_t) input32u(chan); + tag = Tag_hd(header); + size = Wosize_hd(header); + goto read_block; + case CODE_STRING8: + len = input8u(chan); + goto read_string; + case CODE_STRING32: + len = input32u(chan); + goto read_string; + case CODE_DOUBLE_LITTLE: + case CODE_DOUBLE_BIG: + if (sizeof(double) != 8) { + stat_free((char *) intern_obj_table); + Hd_val(intern_block) = intern_header; /* Don't confuse the GC */ + invalid_argument("input_value: non-standard floats"); + } + v = Val_hp(intern_ptr); + intern_obj_table[obj_counter++] = v; + *intern_ptr = Make_header(Double_wosize, Double_tag, intern_color); + intern_ptr += 1 + Double_wosize; + really_getblock(chan, (char *) v, 8); + if (code != CODE_DOUBLE_NATIVE) Reverse_double(v); + break; + } + } + } + *dest = v; +} + +value input_value(chan) /* ML */ + struct channel * chan; +{ + uint32 magic; + mlsize_t num_objects, size_32, size_64, whsize; + value res; + + magic = getword(chan); + if (magic != Compact_magic_number) failwith("input_value: bad object"); + num_objects = getword(chan); + size_32 = getword(chan); + size_64 = getword(chan); +#ifdef SIXTYFOUR + whsize = size_64; +#else + whsize = size_32; +#endif + if (whsize == 0) { + read_compact(chan, &res); + } else { + if (Wosize_whsize(whsize) > Max_wosize) + failwith("intern: structure too big"); + intern_block = alloc_shr(Wosize_whsize(whsize), String_tag); + intern_header = Hd_val(intern_block); + intern_color = Color_hd(intern_header); + Assert (intern_color == White || intern_color == Black); + intern_ptr = (header_t *) Hp_val(intern_block); + obj_counter = 0; + intern_obj_table = (value *) stat_alloc(num_objects * sizeof(value)); + read_compact(chan, &res); + stat_free((char *) intern_obj_table); + } + return res; +} diff --git a/byterun/interp.c b/byterun/interp.c new file mode 100644 index 0000000000..2a5babc5a7 --- /dev/null +++ b/byterun/interp.c @@ -0,0 +1,865 @@ +/* The bytecode interpreter */ + +#include "alloc.h" +#include "fail.h" +#include "fix_code.h" +#include "instruct.h" +#include "interp.h" +#include "memory.h" +#include "misc.h" +#include "mlvalues.h" +#include "prims.h" +#include "signals.h" +#include "stacks.h" +#include "str.h" +#include "instrtrace.h" + +/* Registers for the abstract machine: + pc the code pointer + sp the stack pointer (grows downward) + accu the accumulator + env heap-allocated environment + trapsp pointer to the current trap frame + extra_args number of extra arguments provided by the caller + +sp is a local copy of the global variable extern_sp. */ + +extern value global_data; +extern code_t start_code; + +/* Instruction decoding */ + +#ifdef THREADED_CODE +# define Instruct(name) lbl_##name +# ifdef DEBUG +# define Next goto next_instr +# else +# define Next goto *((void *)((unsigned long)(*pc++))) +# endif +#else +# define Instruct(name) case name +# define Next break +#endif + +/* GC interface */ + +#define Setup_for_gc { sp -= 2; sp[0] = accu; sp[1] = env; extern_sp = sp; } +#define Restore_after_gc { accu = sp[0]; env = sp[1]; sp += 2; } +#define Setup_for_c_call { *--sp = env; extern_sp = sp; } +#define Restore_after_c_call { sp = extern_sp; env = *sp++; } + +/* Register optimization. + Many compilers underestimate the use of the local variables representing + the abstract machine registers, and don't put them in hardware registers, + which slows down the interpreter considerably. + For GCC, I have hand-assigned hardware registers for several architectures. +*/ + +#if defined(__GNUC__) && !defined(DEBUG) +#ifdef __mips__ +#define PC_REG asm("$16") +#define SP_REG asm("$17") +#define ACCU_REG asm("$18") +#endif +#ifdef __sparc__ +#define PC_REG asm("%l0") +#define SP_REG asm("%l1") +#define ACCU_REG asm("%l2") +#endif +#ifdef __alpha__ +#define PC_REG asm("$9") +#define SP_REG asm("$10") +#define ACCU_REG asm("$11") +#endif +#ifdef __i386__ +#define PC_REG asm("%esi") +#define SP_REG asm("%edi") +#define ACCU_REG +#endif +#endif + +/* The interpreter itself */ + +value interprete(prog, prog_size) + code_t prog; + asize_t prog_size; +{ +#ifdef PC_REG + register code_t pc PC_REG; + register value * sp SP_REG; + register value accu ACCU_REG; +#else + register code_t pc; + register value * sp; + register value accu; +#endif + value env; + long extra_args; + struct longjmp_buffer * initial_external_raise; + int initial_sp_offset; + value * initial_local_roots; + struct longjmp_buffer raise_buf; + value * modify_dest, modify_newval; + +#ifdef THREADED_CODE + static void * jumptable[] = { +# include "jumptbl.h" + }; +#endif + +#ifdef THREADED_CODE + if (prog[0] <= STOP) thread_code(prog, prog_size, jumptable); +#endif + + sp = extern_sp; + pc = prog; + extra_args = 0; + env = Atom(0); + accu = Val_long(0); + initial_local_roots = local_roots; + initial_sp_offset = stack_high - sp; + initial_external_raise = external_raise; + if (setjmp(raise_buf.buf)) { + local_roots = initial_local_roots; + accu = exn_bucket; + goto raise_exception; + } + external_raise = &raise_buf; + +#ifdef THREADED_CODE +#ifdef DEBUG + next_instr: + if (icount-- == 0) stop_here (); + Assert(sp >= stack_low); + Assert(sp <= stack_high); + goto *((void *)((unsigned long)(*pc++))); +#else + Next; /* Jump to the first instruction */ +#endif +#else + while(1) { +#ifdef DEBUG + if (icount-- == 0) stop_here (); + if (trace_flag) disasm_instr(pc); + Assert(sp >= stack_low); + Assert(sp <= stack_high); +#endif + switch(*pc++) { +#endif + +/* Basic stack operations */ + + Instruct(ACC0): + accu = sp[0]; Next; + Instruct(ACC1): + accu = sp[1]; Next; + Instruct(ACC2): + accu = sp[2]; Next; + Instruct(ACC3): + accu = sp[3]; Next; + Instruct(ACC4): + accu = sp[4]; Next; + Instruct(ACC5): + accu = sp[5]; Next; + Instruct(ACC6): + accu = sp[6]; Next; + Instruct(ACC7): + accu = sp[7]; Next; + + Instruct(PUSH): Instruct(PUSHACC0): + *--sp = accu; Next; + Instruct(PUSHACC1): + *--sp = accu; accu = sp[1]; Next; + Instruct(PUSHACC2): + *--sp = accu; accu = sp[2]; Next; + Instruct(PUSHACC3): + *--sp = accu; accu = sp[3]; Next; + Instruct(PUSHACC4): + *--sp = accu; accu = sp[4]; Next; + Instruct(PUSHACC5): + *--sp = accu; accu = sp[5]; Next; + Instruct(PUSHACC6): + *--sp = accu; accu = sp[6]; Next; + Instruct(PUSHACC7): + *--sp = accu; accu = sp[7]; Next; + + Instruct(PUSHACC): + *--sp = accu; + /* Fallthrough */ + Instruct(ACC): + accu = sp[*pc++]; + Next; + + Instruct(POP): + sp += *pc++; + Next; + Instruct(ASSIGN): + sp[*pc++] = accu; + Next; + +/* Access in heap-allocated environment */ + + Instruct(ENVACC0): + accu = Field(env, 0); Next; + Instruct(ENVACC1): + accu = Field(env, 1); Next; + Instruct(ENVACC2): + accu = Field(env, 2); Next; + Instruct(ENVACC3): + accu = Field(env, 3); Next; + + Instruct(PUSHENVACC0): + *--sp = accu; accu = Field(env, 0); Next; + Instruct(PUSHENVACC1): + *--sp = accu; accu = Field(env, 1); Next; + Instruct(PUSHENVACC2): + *--sp = accu; accu = Field(env, 2); Next; + Instruct(PUSHENVACC3): + *--sp = accu; accu = Field(env, 3); Next; + + Instruct(PUSHENVACC): + *--sp = accu; + /* Fallthrough */ + Instruct(ENVACC): + accu = Field(env, *pc++); + Next; + +/* Function application */ + + Instruct(PUSH_RETADDR): { + sp -= 3; + sp[0] = (value) (pc + *pc); + sp[1] = env; + sp[2] = Val_long(extra_args); + pc++; + Next; + } + Instruct(APPLY): { + extra_args = *pc++ - 1; + pc = Code_val(accu); + env = Env_val(accu); + goto check_stacks; + } + Instruct(APPLY1): { + value arg1 = sp[0]; + sp -= 3; + sp[0] = arg1; + sp[1] = (value)pc; + sp[2] = env; + sp[3] = Val_long(extra_args); + pc = Code_val(accu); + env = Env_val(accu); + extra_args = 0; + goto check_stacks; + } + Instruct(APPLY2): { + value arg1 = sp[0]; + value arg2 = sp[1]; + sp -= 3; + sp[0] = arg1; + sp[1] = arg2; + sp[2] = (value)pc; + sp[3] = env; + sp[4] = Val_long(extra_args); + pc = Code_val(accu); + env = Env_val(accu); + extra_args = 1; + goto check_stacks; + } + Instruct(APPLY3): { + value arg1 = sp[0]; + value arg2 = sp[1]; + value arg3 = sp[2]; + sp -= 3; + sp[0] = arg1; + sp[1] = arg2; + sp[2] = arg3; + sp[3] = (value)pc; + sp[4] = env; + sp[5] = Val_long(extra_args); + pc = Code_val(accu); + env = Env_val(accu); + extra_args = 2; + goto check_stacks; + } + + Instruct(APPTERM): { + int nargs = *pc++; + int slotsize = *pc++; + value * newsp; + int i; + /* Slide the nargs bottom words of the current frame to the top + of the frame, and discard the remainder of the frame */ + newsp = sp + slotsize - nargs; + for (i = nargs - 1; i >= 0; i--) newsp[i] = sp[i]; + sp = newsp; + pc = Code_val(accu); + env = Env_val(accu); + extra_args += nargs - 1; + goto check_stacks; + } + Instruct(APPTERM1): { + value arg1 = sp[0]; + sp = sp + *pc++ - 1; + sp[0] = arg1; + pc = Code_val(accu); + env = Env_val(accu); + goto check_stacks; + } + Instruct(APPTERM2): { + value arg1 = sp[0]; + value arg2 = sp[1]; + sp = sp + *pc++ - 2; + sp[0] = arg1; + sp[1] = arg2; + pc = Code_val(accu); + env = Env_val(accu); + extra_args += 1; + goto check_stacks; + } + Instruct(APPTERM3): { + value arg1 = sp[0]; + value arg2 = sp[1]; + value arg3 = sp[2]; + sp = sp + *pc++ - 3; + sp[0] = arg1; + sp[1] = arg2; + sp[2] = arg3; + pc = Code_val(accu); + env = Env_val(accu); + extra_args += 2; + goto check_stacks; + } + + Instruct(RETURN): { + sp += *pc++; + if (extra_args > 0) { + extra_args--; + pc = Code_val(accu); + env = Env_val(accu); + } else { + pc = (code_t)(sp[0]); + env = sp[1]; + extra_args = Long_val(sp[2]); + sp += 3; + } + Next; + } + + Instruct(RESTART): { + int num_args = Wosize_val(env) - 1; + int i; + sp -= num_args; + for (i = 0; i < num_args; i++) sp[i] = Field(env, i); + env = Field(env, num_args); + extra_args += num_args; + Next; + } + + Instruct(GRAB): { + int required = *pc++; + if (extra_args >= required) { + extra_args -= required; + } else { + value clos; + mlsize_t num_args, i; + num_args = 1 + extra_args; /* arg1 + extra args */ + Alloc_small(accu, num_args + 1, 0); + for (i = 0; i < num_args; i++) Field(accu, i) = sp[i]; + Field(accu, num_args) = env; + sp += num_args; + Alloc_small(clos, Closure_wosize, Closure_tag); + Code_val(clos) = pc - 3; /* Point to the preceding RESTART instr. */ + Env_val(clos) = accu; + pc = (code_t)(sp[0]); + env = sp[1]; + extra_args = Long_val(sp[2]); + sp += 3; + accu = clos; + } + Next; + } + + Instruct(CLOSURE): { + int nvars = *pc++; + value clos; + int i; + if (nvars == 0) { + accu = Val_int(0); + } else { + *--sp = accu; + Alloc_small(accu, nvars, 0); + for (i = 0; i < nvars; i++) Field(accu, i) = sp[i]; + sp += nvars; + } + Alloc_small(clos, Closure_wosize, Closure_tag); + Code_val(clos) = pc + *pc; + Env_val(clos) = accu; + accu = clos; + pc++; + Next; + } + + Instruct(CLOSUREREC): { + int nvars = *pc++; + value fun_clos, fun_env; + int i; + Alloc_small(fun_env, nvars + 1, 0); + Field(fun_env, 0) = Val_int(0); + if (nvars > 0) { + *--sp = accu; + for (i = 0; i < nvars; i++) Field(fun_env, i+1) = sp[i]; + sp += nvars; + } + accu = fun_env; + Alloc_small(fun_clos, Closure_wosize, Closure_tag); + Code_val(fun_clos) = pc + *pc; + Env_val(fun_clos) = accu; + modify(&Field(accu, 0), fun_clos); + accu = fun_clos; + pc++; + Next; + } + + Instruct(PUSHGETGLOBAL): + *--sp = accu; + /* Fallthrough */ + Instruct(GETGLOBAL): + accu = Field(global_data, *pc); + pc++; + Next; + + Instruct(PUSHGETGLOBALFIELD): + *--sp = accu; + /* Fallthrough */ + Instruct(GETGLOBALFIELD): { + accu = Field(global_data, *pc); + pc++; + accu = Field(accu, *pc); + pc++; + Next; + } + + Instruct(SETGLOBAL): + modify(&Field(global_data, *pc), accu); + accu = Val_unit; + pc++; + Next; + +/* Allocation of blocks */ + + Instruct(ATOM0): + accu = Atom(0); Next; + Instruct(ATOM1): + accu = Atom(1); Next; + Instruct(ATOM2): + accu = Atom(2); Next; + Instruct(ATOM3): + accu = Atom(3); Next; + + Instruct(PUSHATOM0): + *--sp = accu; accu = Atom(0); Next; + Instruct(PUSHATOM1): + *--sp = accu; accu = Atom(1); Next; + Instruct(PUSHATOM2): + *--sp = accu; accu = Atom(2); Next; + Instruct(PUSHATOM3): + *--sp = accu; accu = Atom(3); Next; + + Instruct(PUSHATOM): + *--sp = accu; + /* Fallthrough */ + Instruct(ATOM): + accu = Atom(*pc); + pc++; + Next; + + Instruct(MAKEBLOCK): { + mlsize_t wosize = *pc++; + tag_t tag = *pc++; + mlsize_t i; + value block; + Alloc_small(block, wosize, tag); + Field(block, 0) = accu; + for (i = 1; i < wosize; i++) Field(block, i) = *sp++; + accu = block; + Next; + } + Instruct(MAKEBLOCK1): { + tag_t tag = *pc++; + value block; + Alloc_small(block, 1, tag); + Field(block, 0) = accu; + accu = block; + Next; + } + Instruct(MAKEBLOCK2): { + tag_t tag = *pc++; + value block; + Alloc_small(block, 2, tag); + Field(block, 0) = accu; + Field(block, 1) = sp[0]; + sp += 1; + accu = block; + Next; + } + Instruct(MAKEBLOCK3): { + tag_t tag = *pc++; + value block; + Alloc_small(block, 3, tag); + Field(block, 0) = accu; + Field(block, 1) = sp[0]; + Field(block, 2) = sp[1]; + sp += 2; + accu = block; + Next; + } + +/* Access to components of blocks */ + + Instruct(GETFIELD0): + accu = Field(accu, 0); Next; + Instruct(GETFIELD1): + accu = Field(accu, 1); Next; + Instruct(GETFIELD2): + accu = Field(accu, 2); Next; + Instruct(GETFIELD3): + accu = Field(accu, 3); Next; + Instruct(GETFIELD): + accu = Field(accu, *pc); pc++; Next; + + Instruct(SETFIELD0): + modify_dest = &Field(accu, 0); + modify_newval = *sp++; + modify: + Modify(modify_dest, modify_newval); + accu = Val_unit; + Next; + Instruct(SETFIELD1): + modify_dest = &Field(accu, 1); + modify_newval = *sp++; + goto modify; + Instruct(SETFIELD2): + modify_dest = &Field(accu, 2); + modify_newval = *sp++; + goto modify; + Instruct(SETFIELD3): + modify_dest = &Field(accu, 3); + modify_newval = *sp++; + goto modify; + Instruct(SETFIELD): + modify_dest = &Field(accu, *pc); + pc++; + modify_newval = *sp++; + goto modify; + + Instruct(TAGOF): + accu = Val_int(Tag_val(accu)); + Next; + +/* For recursive definitions */ + + Instruct(DUMMY): { + int size = *pc++; + Alloc_small(accu, size, 0); + while (size--) Field(accu, size) = Val_long(0); + Next; + } + Instruct(UPDATE): { + value newval = *sp++; + mlsize_t size, n; + Tag_val(accu) = Tag_val(newval); + size = Wosize_val(newval); + for (n = 0; n < size; n++) { + modify(&Field(accu, n), Field(newval, n)); + } + accu = Val_unit; + Next; + } + +/* Array operations */ + + Instruct(VECTLENGTH): + accu = Val_long(Wosize_val(accu)); + Next; + Instruct(GETVECTITEM): + accu = Field(accu, Long_val(sp[0])); + sp += 1; + Next; + Instruct(SETVECTITEM): + modify_dest = &Field(accu, Long_val(sp[0])); + modify_newval = sp[1]; + sp += 2; + goto modify; + +/* String operations */ + + Instruct(GETSTRINGCHAR): + accu = Val_int(Byte_u(accu, Long_val(sp[0]))); + sp += 1; + Next; + Instruct(SETSTRINGCHAR): + Byte_u(accu, Long_val(sp[0])) = Int_val(sp[1]); + sp += 2; + Next; + +/* Branches and conditional branches */ + + Instruct(BRANCH): + pc += *pc; + Next; + Instruct(BRANCHIF): + if (Tag_val(accu) != 0) pc += *pc; else pc++; + Next; + Instruct(BRANCHIFNOT): + if (Tag_val(accu) == 0) pc += *pc; else pc++; + Next; + Instruct(SWITCH): { + long index = Long_val(accu); + Assert(index >= 0 && index < *pc); + pc++; + pc += pc[index]; + Next; + } + Instruct(TRANSLATE): { + long arg = Long_val(accu); + int num_cases = *pc++; + int low, high, i; + uint32 interv; + for (low = 0, high = num_cases - 1, accu = Val_int(0); + low <= high; + /*nothing*/) { + i = (low + high) / 2; + interv = pc[i]; + if (arg < (interv & 0xFF)) + high = i - 1; + else if (arg > ((interv >> 8) & 0xFF)) + low = i + 1; + else { + accu = Val_long(arg + (interv >> 16) - (interv & 0xFF)); + break; + } + } + pc += num_cases; + Next; + } + Instruct(BOOLNOT): + accu = Atom(Tag_val(accu) == 0); + Next; + +/* Exceptions */ + + Instruct(PUSHTRAP): + sp -= 4; + Trap_pc(sp) = pc + *pc; + Trap_link(sp) = trapsp; + sp[2] = env; + sp[3] = Val_long(extra_args); + trapsp = sp; + pc++; + Next; + + Instruct(POPTRAP): + /* We should check here if a signal is pending, to preserve the + semantics of the program w.r.t. exceptions. Unfortunately, + process_signal destroys the accumulator, and there is no + convenient way to preserve it... */ + trapsp = Trap_link(sp); + sp += 4; + Next; + + Instruct(RAISE): /* arg */ + raise_exception: + sp = trapsp; + if (sp >= stack_high - initial_sp_offset) { + exn_bucket = accu; + external_raise = initial_external_raise; + longjmp(external_raise->buf, 1); + } + pc = Trap_pc(sp); + trapsp = Trap_link(sp); + env = sp[2]; + extra_args = Long_val(sp[3]); + sp += 4; + Next; + +/* Stack checks */ + + check_stacks: + if (sp < stack_threshold) { + extern_sp = sp; + realloc_stack(); + sp = extern_sp; + } + /* Fall through CHECK_SIGNALS */ + +/* Signal handling */ + + Instruct(CHECK_SIGNALS): /* accu not preserved */ + if (something_to_do) goto process_signal; + Next; + + process_signal: + something_to_do = 0; + if (force_minor_flag){ + force_minor_flag = 0; + Setup_for_gc; + minor_collection (); + Restore_after_gc; + } + /* If a signal arrives between the following two instructions, + it will be lost. */ + { int signal_number = pending_signal; + pending_signal = 0; + if (signal_number) { + /* Push a return frame to the current code location */ + sp -= 4; + sp[0] = (value) pc; + sp[1] = env; + sp[2] = Val_long(extra_args); + sp[3] = Val_int(signal_number); + pc = Code_val(Field(signal_handlers, signal_number)); + env = Env_val(Field(signal_handlers, signal_number)); + extra_args = 0; + } + } + Next; + +/* Calling C functions */ + + Instruct(C_CALL1): + Setup_for_c_call; + accu = cprim[*pc](accu); + Restore_after_c_call; + pc++; + Next; + Instruct(C_CALL2): + Setup_for_c_call; + accu = cprim[*pc](accu, sp[1]); + Restore_after_c_call; + sp += 1; + pc++; + Next; + Instruct(C_CALL3): + Setup_for_c_call; + accu = cprim[*pc](accu, sp[1], sp[2]); + Restore_after_c_call; + sp += 2; + pc++; + Next; + Instruct(C_CALL4): + Setup_for_c_call; + accu = cprim[*pc](accu, sp[1], sp[2], sp[3]); + Restore_after_c_call; + sp += 3; + pc++; + Next; + Instruct(C_CALLN): { + int nargs = *pc++; + *--sp = accu; + Setup_for_c_call; + accu = cprim[*pc](sp + 1, nargs); + Restore_after_c_call; + sp += nargs; + pc++; + Next; + } + +/* Integer arithmetic */ + + Instruct(CONSTINT): + accu = Val_int(*pc); + pc++; + Next; + Instruct(PUSHCONSTINT): + *--sp = accu; + accu = Val_int(*pc); + pc++; + Next; + Instruct(NEGINT): + accu = (value)(2 - (long)accu); Next; + Instruct(ADDINT): + accu = (value)((long) accu + (long) *sp++ - 1); Next; + Instruct(SUBINT): + accu = (value)((long) accu - (long) *sp++ + 1); Next; + Instruct(MULINT): + accu = Val_long(Long_val(accu) * Long_val(*sp++)); Next; + Instruct(DIVINT): { + value div = *sp++; + if (div == Val_long(0)) { + accu = Field(global_data, ZERO_DIVIDE_EXN); + goto raise_exception; + } + accu = Val_long(Long_val(accu) / Long_val(div)); + Next; + } + Instruct(MODINT): { + value div = *sp++; + if (div == Val_long(0)) { + accu = Field(global_data, ZERO_DIVIDE_EXN); + goto raise_exception; + } + accu = Val_long(Long_val(accu) % Long_val(div)); + Next; + } + Instruct(ANDINT): + accu = (value)((long) accu & (long) *sp++); Next; + Instruct(ORINT): + accu = (value)((long) accu | (long) *sp++); Next; + Instruct(XORINT): + accu = (value)(((long) accu ^ (long) *sp++) | 1); Next; + Instruct(LSLINT): + accu = (value)((((long) accu - 1) << Long_val(*sp++)) + 1); Next; + Instruct(LSRINT): + accu = (value)((((long) accu - 1) >> Long_val(*sp++)) | 1); Next; + Instruct(ASRINT): + accu = (value)((((unsigned long) accu - 1) >> Long_val(*sp++)) | 1); + Next; + +#define Integer_comparison(opname,tst) \ + Instruct(opname): \ + accu = Atom((long) accu tst (long) *sp++); Next; + + Integer_comparison(EQ, ==) + Integer_comparison(NEQ, !=) + Integer_comparison(LTINT, <) + Integer_comparison(LEINT, <=) + Integer_comparison(GTINT, >) + Integer_comparison(GEINT, >=) + + Instruct(OFFSETINT): + accu += *pc << 1; + pc++; + Next; + Instruct(OFFSETREF): + Field(accu, 0) += *pc << 1; + pc++; + Next; + +/* Machine control */ + + Instruct(STOP): + external_raise = initial_external_raise; + extern_sp = sp; + return accu; + +#ifndef THREADED_CODE + default: + fatal_error("bad opcode"); + } + } +#endif +} + +static opcode_t callback_code[] = { + ACC1, APPLY1, POP, 1, STOP +}; + +value callback(closure, argument) + value closure, argument; +{ + extern_sp -= 2; + extern_sp[0] = argument; + extern_sp[1] = closure; + return interprete(callback_code, sizeof(callback_code)); +} diff --git a/byterun/interp.h b/byterun/interp.h new file mode 100644 index 0000000000..e652331d8a --- /dev/null +++ b/byterun/interp.h @@ -0,0 +1,14 @@ +/* The bytecode interpreter */ + +#ifndef _interp_ +#define _interp_ + + +#include "misc.h" +#include "mlvalues.h" + +value interprete P((code_t prog, asize_t prog_size)); +value callback P((value closure, value argument)); + + +#endif _interp_ diff --git a/byterun/intext.h b/byterun/intext.h new file mode 100644 index 0000000000..7b1a26c414 --- /dev/null +++ b/byterun/intext.h @@ -0,0 +1,54 @@ +/* Structured input/output */ + +#ifndef __intext__ +#define __intext__ + +#include "misc.h" +#include "mlvalues.h" +#include "io.h" + +/* Magic number */ + +#define Base_magic_number 0x8495A6B9 +#define Compact_magic_number (Base_magic_number + 4) + +/* Codes for the compact format */ + +#define PREFIX_SMALL_BLOCK 0x80 +#define PREFIX_SMALL_INT 0x40 +#define PREFIX_SMALL_STRING 0x20 +#define CODE_INT8 0x0 +#define CODE_INT16 0x1 +#define CODE_INT32 0x2 +#define CODE_INT64 0x3 +#define CODE_SHARED8 0x4 +#define CODE_SHARED16 0x5 +#define CODE_SHARED32 0x6 +#define CODE_BLOCK32 0x8 +#define CODE_STRING8 0x9 +#define CODE_STRING32 0xA +#define CODE_DOUBLE_BIG 0xB +#define CODE_DOUBLE_LITTLE 0xC +#ifdef BIG_ENDIAN +#define CODE_DOUBLE_NATIVE CODE_DOUBLE_BIG +#else +#define CODE_DOUBLE_NATIVE CODE_DOUBLE_LITTLE +#endif + +/* Initial sizes of data structures for extern */ + +#ifndef INITIAL_EXTERN_SIZE +#define INITIAL_EXTERN_SIZE 4096 +#endif +#ifndef INITIAL_EXTERN_TABLE_SIZE +#define INITIAL_EXTERN_TABLE_SIZE 2039 +#endif + +/* The entry points */ + +value output_value P((struct channel *, value)); +value input_value P((struct channel *)); + + +#endif + diff --git a/byterun/ints.c b/byterun/ints.c new file mode 100644 index 0000000000..4a4f767225 --- /dev/null +++ b/byterun/ints.c @@ -0,0 +1,81 @@ +#include <stdio.h> +#include "alloc.h" +#include "fail.h" +#include "memory.h" +#include "mlvalues.h" + +value int_of_string(s) /* ML */ + value s; +{ + long res; + int sign; + int base; + char * p; + int c, d; + + p = String_val(s); + if (*p == 0) failwith("int_of_string"); + sign = 1; + if (*p == '-') { + sign = -1; + p++; + } + base = 10; + if (*p == '0') { + switch (p[1]) { + case 'x': case 'X': + base = 16; p += 2; break; + case 'o': case 'O': + base = 8; p += 2; break; + case 'b': case 'B': + base = 2; p += 2; break; + } + } + res = 0; + while (1) { + c = *p; + if (c >= '0' && c <= '9') + d = c - '0'; + else if (c >= 'A' && c <= 'F') + d = c - 'A' + 10; + else if (c >= 'a' && c <= 'f') + d = c - 'a' + 10; + else + break; + if (d >= base) break; + res = base * res + d; + p++; + } + if (*p != 0) + failwith("int_of_string"); + return Val_long(sign < 0 ? -res : res); +} + +value format_int(fmt, arg) /* ML */ + value fmt, arg; +{ + char format_buffer[32]; + int prec; + char * p; + char * dest; + value res; + + prec = 32; + for (p = String_val(fmt); *p != 0; p++) { + if (*p >= '0' && *p <= '9') { + prec = atoi(p) + 5; + break; + } + } + if (prec <= sizeof(format_buffer)) { + dest = format_buffer; + } else { + dest = stat_alloc(prec); + } + sprintf(dest, String_val(fmt), Long_val(arg)); + res = copy_string(dest); + if (dest != format_buffer) { + stat_free(dest); + } + return res; +} diff --git a/byterun/io.c b/byterun/io.c new file mode 100644 index 0000000000..67bc961f78 --- /dev/null +++ b/byterun/io.c @@ -0,0 +1,393 @@ +/* Buffered input/output. */ + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include "alloc.h" +#include "fail.h" +#include "io.h" +#include "memory.h" +#include "misc.h" +#include "mlvalues.h" +#include "signals.h" +#include "sys.h" +#ifdef HAS_UI +#include "ui.h" +#endif + +/* Common functions. */ + +struct channel * open_descr(fd) + int fd; +{ + struct channel * channel; + + channel = (struct channel *) stat_alloc(sizeof(struct channel)); + channel->fd = fd; + channel->offset = 0; + channel->curr = channel->max = channel->buff; + channel->end = channel->buff + IO_BUFFER_SIZE; + return channel; +} + +value open_descriptor(fd) /* ML */ + value fd; +{ + return (value) open_descr(Int_val(fd)); +} + +value channel_descriptor(channel) /* ML */ + struct channel * channel; +{ + return Val_long(channel->fd); +} + +value channel_size(channel) /* ML */ + struct channel * channel; +{ + long end; + + end = lseek(channel->fd, 0, 2); + if (end == -1) sys_error(NULL); + if (lseek(channel->fd, channel->offset, 0) != channel->offset) + sys_error(NULL); + return Val_long(end); +} + +/* Output */ + +static void really_write(fd, p, n) + int fd; + char * p; + int n; +{ + int retcode; + while (n > 0) { +#ifdef HAS_UI + retcode = ui_write(fd, p, n); +#else +#ifdef EINTR + do { retcode = write(fd, p, n); } while (retcode == -1 && errno == EINTR); +#else + retcode = write(fd, p, n); +#endif +#endif + if (retcode == -1) sys_error(NULL); + p += retcode; + n -= retcode; + } +} + +value flush(channel) /* ML */ + struct channel * channel; +{ + int n; + n = channel->max - channel->buff; + if (n > 0) { + really_write(channel->fd, channel->buff, n); + channel->offset += n; + channel->curr = channel->buff; + channel->max = channel->buff; + } + return Atom(0); +} + +value output_char(channel, ch) /* ML */ + struct channel * channel; + value ch; +{ + putch(channel, Long_val(ch)); + return Atom(0); +} + +void putword(channel, w) + struct channel * channel; + uint32 w; +{ + putch(channel, w >> 24); + putch(channel, w >> 16); + putch(channel, w >> 8); + putch(channel, w); +} + +value output_int(channel, w) /* ML */ + struct channel * channel; + value w; +{ + putword(channel, Long_val(w)); + return Atom(0); +} + +void putblock(channel, p, n) + struct channel * channel; + char * p; + unsigned n; +{ + unsigned m; + + m = channel->end - channel->curr; + if (channel->curr == channel->buff && n >= m) { + really_write(channel->fd, p, n); + channel->offset += n; + } else if (n <= m) { + bcopy(p, channel->curr, n); + channel->curr += n; + if (channel->curr > channel->max) channel->max = channel->curr; + } else { + bcopy(p, channel->curr, m); + p += m; + n -= m; + m = channel->end - channel->buff; + really_write(channel->fd, channel->buff, m); + channel->offset += m; + if (n <= m) { + bcopy(p, channel->buff, n); + channel->curr = channel->max = channel->buff + n; + } else { + really_write(channel->fd, p, n); + channel->offset += n; + channel->curr = channel->max = channel->buff; + } + } +} + +value output(channel, buff, start, length) /* ML */ + value channel, buff, start, length; +{ + putblock((struct channel *) channel, + &Byte(buff, Long_val(start)), + (unsigned) Long_val(length)); + return Atom(0); +} + +value seek_out(channel, pos) /* ML */ + struct channel * channel; + value pos; +{ + long dest; + + dest = Long_val(pos); + if (dest >= channel->offset && + dest <= channel->offset + channel->max - channel->buff) { + channel->curr = channel->buff + dest - channel->offset; + } else { + flush(channel); + if (lseek(channel->fd, dest, 0) != dest) sys_error(NULL); + channel->offset = dest; + } + return Atom(0); +} + +value pos_out(channel) /* ML */ + struct channel * channel; +{ + return Val_long(channel->offset + channel->curr - channel->buff); +} + +value close_out(channel) /* ML */ + struct channel * channel; +{ + flush(channel); + close(channel->fd); + stat_free((char *) channel); + return Atom(0); +} + +/* Input */ + +static int really_read(fd, p, n) + int fd; + char * p; + unsigned n; +{ + int retcode; + + enter_blocking_section(); +#ifdef HAS_UI + retcode = ui_read(fd, p, n); +#else +#ifdef EINTR + do { retcode = read(fd, p, n); } while (retcode == -1 && errno == EINTR); +#else + retcode = read(fd, p, n); +#endif +#endif + leave_blocking_section(); + if (retcode == -1) sys_error(NULL); + return retcode; +} + +unsigned char refill(channel) + struct channel * channel; +{ + int n; + + n = really_read(channel->fd, channel->buff, IO_BUFFER_SIZE); + if (n == 0) raise_end_of_file(); + channel->offset += n; + channel->max = channel->buff + n; + channel->curr = channel->buff + 1; + return (unsigned char)(channel->buff[0]); +} + +value input_char(channel) /* ML */ + struct channel * channel; +{ + unsigned char c; + c = getch(channel); + return Val_long(c); +} + +uint32 getword(channel) + struct channel * channel; +{ + int i; + uint32 res; + + res = 0; + for(i = 0; i < 4; i++) { + res = (res << 8) + getch(channel); + } + return res; +} + +value input_int(channel) /* ML */ + struct channel * channel; +{ + long i; + i = getword(channel); +#ifdef SIXTYFOUR + i = (i << 32) >> 32; /* Force sign extension */ +#endif + return Val_long(i); +} + +unsigned getblock(channel, p, n) + struct channel * channel; + char * p; + unsigned n; +{ + unsigned m, l; + + m = channel->max - channel->curr; + if (n <= m) { + bcopy(channel->curr, p, n); + channel->curr += n; + return n; + } else if (m > 0) { + bcopy(channel->curr, p, m); + channel->curr += m; + return m; + } else if (n < IO_BUFFER_SIZE) { + l = really_read(channel->fd, channel->buff, IO_BUFFER_SIZE); + channel->offset += l; + channel->max = channel->buff + l; + if (n > l) n = l; + bcopy(channel->buff, p, n); + channel->curr = channel->buff + n; + return n; + } else { + channel->curr = channel->buff; + channel->max = channel->buff; + l = really_read(channel->fd, p, n); + channel->offset += l; + return l; + } +} + +int really_getblock(chan, p, n) + struct channel * chan; + char * p; + unsigned long n; +{ + unsigned r; + while (n > 0) { + r = getblock(chan, p, (unsigned) n); + if (r == 0) return 0; + p += r; + n -= r; + } + return 1; +} + +value input(channel, buff, start, length) /* ML */ + value channel, buff, start, length; +{ + return Val_long(getblock((struct channel *) channel, + &Byte(buff, Long_val(start)), + (unsigned) Long_val(length))); +} + +value seek_in(channel, pos) /* ML */ + struct channel * channel; + value pos; +{ + long dest; + + dest = Long_val(pos); + if (dest >= channel->offset - (channel->max - channel->buff) && + dest <= channel->offset) { + channel->curr = channel->max - (channel->offset - dest); + } else { + if (lseek(channel->fd, dest, 0) != dest) sys_error(NULL); + channel->offset = dest; + channel->curr = channel->max = channel->buff; + } + return Atom(0); +} + +value pos_in(channel) /* ML */ + struct channel * channel; +{ + return Val_long(channel->offset - (channel->max - channel->curr)); +} + +value close_in(channel) /* ML */ + struct channel * channel; +{ + close(channel->fd); + stat_free((char *) channel); + return Atom(0); +} + +value input_scan_line(channel) /* ML */ + struct channel * channel; +{ + char * p; + int n; + + p = channel->curr; + do { + if (p >= channel->max) { + /* No more characters available in the buffer */ + if (channel->curr > channel->buff) { + /* Try to make some room in the buffer by shifting the unread + portion at the beginning */ + bcopy(channel->curr, channel->buff, channel->max - channel->curr); + n = channel->curr - channel->buff; + channel->curr -= n; + channel->max -= n; + p -= n; + } + if (channel->max >= channel->end) { + /* Buffer is full, no room to read more characters from the input. + Return the number of characters in the buffer, with negative + sign to indicate that no newline was encountered. */ + return Val_long(-(channel->max - channel->curr)); + } + /* Fill the buffer as much as possible */ + n = really_read(channel->fd, channel->max, channel->end - channel->max); + if (n == 0) { + /* End-of-file encountered. Return the number of characters in the + buffer, with negative sign since we haven't encountered + a newline. */ + return Val_long(-(channel->max - channel->curr)); + } + channel->offset += n; + channel->max += n; + } + } while (*p++ != '\n'); + /* Found a newline. Return the length of the line, newline included. */ + return Val_long(p - channel->curr); +} diff --git a/byterun/io.h b/byterun/io.h new file mode 100644 index 0000000000..d679886cbc --- /dev/null +++ b/byterun/io.h @@ -0,0 +1,52 @@ +/* Buffered input/output */ + +#ifndef _io_ +#define _io_ + + +#include "misc.h" +#include "mlvalues.h" + +#ifndef IO_BUFFER_SIZE +#define IO_BUFFER_SIZE 4096 +#endif + +struct channel { + int fd; /* Unix file descriptor */ + long offset; /* Absolute position of fd in the file */ + char * curr; /* Current position in the buffer */ + char * max; /* Logical end of the buffer */ + char * end; /* Physical end of the buffer */ + char buff[IO_BUFFER_SIZE]; /* The buffer itself */ +}; + +/* For an output channel: + [offset] is the absolute position of the beginning of the buffer [buff]. + For an input channel: + [offset] is the absolute position of the logical end of the buffer [max]. +*/ + +#define putch(channel, ch) \ + { if ((channel)->curr >= (channel)->end) flush(channel); \ + *((channel)->curr)++ = (ch); \ + if ((channel)->curr > (channel)->max) (channel)->max = (channel)->curr; } + +#define getch(channel) \ + ((channel)->curr >= (channel)->max \ + ? refill(channel) \ + : (unsigned char) *((channel))->curr++) + +struct channel * open_descr P((int)); +value flush P((struct channel *)); +void putword P((struct channel *, uint32)); +void putblock P((struct channel *, char *, unsigned)); +unsigned char refill P((struct channel *)); +value pos_out P((struct channel *)); +value seek_out P((struct channel *, value)); +uint32 getword P((struct channel *)); +unsigned getblock P((struct channel *, char *, unsigned)); +int really_getblock P((struct channel *, char *, unsigned long)); +value close_in P((struct channel *)); + + +#endif /* _io_ */ diff --git a/byterun/main.c b/byterun/main.c new file mode 100644 index 0000000000..f91ada34c2 --- /dev/null +++ b/byterun/main.c @@ -0,0 +1,229 @@ +/* Start-up code */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include "alloc.h" +#include "exec.h" +#include "fail.h" +#include "gc_ctrl.h" +#include "interp.h" +#include "intext.h" +#include "io.h" +#include "misc.h" +#include "mlvalues.h" +#include "stacks.h" +#include "sys.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +header_t first_atoms[256]; +code_t start_code; +asize_t code_size; + +static void init_atoms() +{ + int i; + for(i = 0; i < 256; i++) first_atoms[i] = Make_header(0, i, White); +} + +static unsigned long read_size(p) + unsigned char * p; +{ + return ((unsigned long) p[0] << 24) + ((unsigned long) p[1] << 16) + + ((unsigned long) p[2] << 8) + p[3]; +} + +#define FILE_NOT_FOUND (-1) +#define TRUNCATED_FILE (-2) +#define BAD_MAGIC_NUM (-3) + +static int read_trailer(fd, trail) + int fd; + struct exec_trailer * trail; +{ + char buffer[TRAILER_SIZE]; + + lseek(fd, (long) -TRAILER_SIZE, 2); + if (read(fd, buffer, TRAILER_SIZE) < TRAILER_SIZE) return TRUNCATED_FILE; + trail->code_size = read_size(buffer); + trail->data_size = read_size(buffer+4); + trail->symbol_size = read_size(buffer+8); + trail->debug_size = read_size(buffer+12); + if (strncmp(buffer + 16, EXEC_MAGIC, 12) == 0) + return 0; + else + return BAD_MAGIC_NUM; +} + +extern char * searchpath(); + +int attempt_open(name, trail, do_open_script) + char ** name; + struct exec_trailer * trail; + int do_open_script; +{ + char * truename; + int fd; + int err; + char buf [2]; + + truename = searchpath(*name); + if (truename == 0) truename = *name; else *name = truename; + fd = open(truename, O_RDONLY | O_BINARY); + if (fd == -1) return FILE_NOT_FOUND; + if (!do_open_script){ + err = read (fd, buf, 2); + if (err < 2) return TRUNCATED_FILE; + if (buf [0] == '#' && buf [1] == '!') return BAD_MAGIC_NUM; + } + err = read_trailer(fd, trail); + if (err != 0) { close(fd); return err; } + return fd; +} + +/* Invocation of camlrun: 4 cases. + + 1. runtime + bytecode + user types: camlrun [options] bytecode args... + arguments: camlrun [options] bytecode args... + + 2. bytecode script + user types: bytecode args... + 2a (kernel 1) arguments: camlrun ./bytecode args... + 2b (kernel 2) arguments: bytecode bytecode args... + + 3. concatenated runtime and bytecode + user types: composite args... + arguments: composite args... + +Algorithm: + 1- If argument 0 is a valid byte-code file that does not start with #!, + then we are in case 3 and we pass the same command line to the + Caml Light program. + 2- In all other cases, we parse the command line as: + (whatever) [options] bytecode args... + and we strip "(whatever) [options]" from the command line. + +*/ + +#ifdef HAS_UI +int caml_main(argc, argv) +#else +int main(argc, argv) +#endif + int argc; + char * argv[]; +{ + int fd; + struct exec_trailer trail; + int i; + struct longjmp_buffer raise_buf; + struct channel * chan; + int verbose_init = 0, percent_free_init = Percent_free_def; + long minor_heap_init = Minor_heap_def, heap_chunk_init = Heap_chunk_def; + +#ifdef DEBUG + verbose_init = 1; +#endif + + i = 0; + fd = attempt_open(&argv[0], &trail, 0); + + if (fd < 0) { + + for(i = 1; i < argc && argv[i][0] == '-'; i++) { + switch(argv[i][1]) { +#ifdef DEBUG + case 't': { + extern int trace_flag; + trace_flag = 1; + break; + } +#endif + case 'v': + verbose_init = 1; + break; + case 'V': + fprintf(stderr, "The Caml 1999 runtime system, version %s\n", + "1"); + exit(0); + default: + fatal_error_arg("Unknown option %s.\n", argv[i]); + } + } + + if (argv[i] == 0) + fatal_error("No bytecode file specified.\n"); + + fd = attempt_open(&argv[i], &trail, 1); + + switch(fd) { + case FILE_NOT_FOUND: + fatal_error_arg("Fatal error: cannot find file %s\n", argv[i]); + break; + case TRUNCATED_FILE: + case BAD_MAGIC_NUM: + fatal_error_arg( + "Fatal error: the file %s is not a bytecode executable file\n", + argv[i]); + break; + } + } + + /* Runtime options. The option letter is the first letter of the + last word of the ML name of the option (see [lib/gc.mli]). */ + + { char *opt = getenv ("CAMLRUNPARAM"); + if (opt != NULL){ + while (*opt != '\0'){ + switch (*opt++){ + case 's': sscanf (opt, "=%ld", &minor_heap_init); break; + case 'i': sscanf (opt, "=%ld", &heap_chunk_init); break; + case 'o': sscanf (opt, "=%d", &percent_free_init); break; + case 'v': sscanf (opt, "=%d", &verbose_init); break; + } + } + } + } + + if (setjmp(raise_buf.buf) == 0) { + + external_raise = &raise_buf; + + init_gc (minor_heap_init, heap_chunk_init, percent_free_init, + verbose_init); + init_stack(); + init_atoms(); + + lseek(fd, - (long) (TRAILER_SIZE + trail.code_size + trail.data_size + + trail.symbol_size + trail.debug_size), 2); + + code_size = trail.code_size; + start_code = (code_t) stat_alloc(code_size); + if (read(fd, (char *) start_code, code_size) != code_size) + fatal_error("Fatal error: truncated bytecode file.\n"); + +#ifdef BIG_ENDIAN + fixup_endianness(start_code, code_size); +#endif + + chan = open_descr(fd); + global_data = input_value(chan); + close_in(chan); + + sys_init(argv + i); + interprete(start_code, code_size); + sys_exit(Val_int(0)); + + } else { + + fatal_error_arg("Fatal error: uncaught exception %s.\n", + String_val(Field(Field(exn_bucket, 0), 0))); + } +} + diff --git a/byterun/major_gc.c b/byterun/major_gc.c new file mode 100644 index 0000000000..11dd32b790 --- /dev/null +++ b/byterun/major_gc.c @@ -0,0 +1,305 @@ +#include "config.h" +#include "fail.h" +#include "freelist.h" +#include "gc.h" +#include "gc_ctrl.h" +#include "major_gc.h" +#include "misc.h" +#include "mlvalues.h" +#include "roots.h" + +#ifdef __STDC__ +#include <limits.h> +#else +#ifdef SIXTYFOUR +#define LONG_MAX 0x7FFFFFFFFFFFFFFF +#else +#define LONG_MAX 0x7FFFFFFF +#endif +#endif + +int percent_free; +long major_heap_increment; +char *heap_start, *heap_end; +char *page_table; +asize_t page_table_size; +char *gc_sweep_hp; +int gc_phase; +static value *gray_vals; +value *gray_vals_cur, *gray_vals_end; +static asize_t gray_vals_size; +static int heap_is_pure; /* The heap is pure if the only gray objects + below [markhp] are also in [gray_vals]. */ +unsigned long allocated_words; +unsigned long extra_heap_memory; +extern char *fl_merge; /* Defined in freelist.c. */ + +static char *markhp, *chunk, *limit; + +static void realloc_gray_vals () +{ + value *new; + + Assert (gray_vals_cur == gray_vals_end); + if (gray_vals_size < stat_heap_size / 128){ + gc_message ("Growing gray_vals to %ldk\n", + (long) gray_vals_size * sizeof (value) / 512); + new = (value *) realloc ((char *) gray_vals, + 2 * gray_vals_size * sizeof (value)); + if (new == NULL){ + gc_message ("No room for growing gray_vals\n", 0); + gray_vals_cur = gray_vals; + heap_is_pure = 0; + }else{ + gray_vals = new; + gray_vals_cur = gray_vals + gray_vals_size; + gray_vals_size *= 2; + gray_vals_end = gray_vals + gray_vals_size; + } + }else{ + gray_vals_cur = gray_vals + gray_vals_size / 2; + heap_is_pure = 0; + } +} + +void darken (v) + value v; +{ + if (Is_block (v) && Is_in_heap (v) && Is_white_val (v)){ + Hd_val (v) = Grayhd_hd (Hd_val (v)); + *gray_vals_cur++ = v; + if (gray_vals_cur >= gray_vals_end) realloc_gray_vals (); + } +} + +static void darken_root (p, v) + value *p; + value v; +{ + darken (v); +} + +static void start_cycle () +{ + Assert (gray_vals_cur == gray_vals); + Assert (Is_white_val (global_data)); + darken (global_data); + scan_local_roots (darken_root); + gc_phase = Phase_mark; + markhp = NULL; +} + +static void mark_slice (work) + long work; +{ + value v, child; + mlsize_t i; + + while (work > 0){ + if (gray_vals_cur > gray_vals){ + v = *--gray_vals_cur; + Assert (Is_gray_val (v)); + Hd_val (v) = Blackhd_hd (Hd_val (v)); + if (Tag_val (v) < No_scan_tag){ + for (i = Wosize_val (v); i > 0;){ + --i; + child = Field (v, i); + darken (child); + } + } + work -= Whsize_val (v); + }else if (markhp != NULL){ + if (markhp == limit){ + chunk = (((heap_chunk_head *) chunk) [-1]).next; + if (chunk == NULL){ + markhp = NULL; + }else{ + markhp = chunk; + limit = chunk + (((heap_chunk_head *) chunk) [-1]).size; + } + }else{ + if (Is_gray_val (Val_hp (markhp))){ + Assert (gray_vals_cur == gray_vals); + *gray_vals_cur++ = Val_hp (markhp); + } + markhp += Bhsize_hp (markhp); + } + }else if (!heap_is_pure){ + heap_is_pure = 1; + chunk = heap_start; + markhp = chunk; + limit = chunk + (((heap_chunk_head *) chunk) [-1]).size; + }else{ + /* Marking is done. */ + gc_sweep_hp = heap_start; + fl_init_merge (); + gc_phase = Phase_sweep; + chunk = heap_start; + gc_sweep_hp = chunk; + limit = chunk + (((heap_chunk_head *) chunk) [-1]).size; + work = 0; + } + } +} + +static void sweep_slice (work) + long work; +{ + char *hp; + header_t hd; + + while (work > 0){ + if (gc_sweep_hp < limit){ + hp = gc_sweep_hp; + hd = Hd_hp (hp); + work -= Whsize_hd (hd); + gc_sweep_hp += Bhsize_hd (hd); + switch (Color_hd (hd)){ + case White: + if (Tag_hd (hd) == Final_tag){ + Final_fun (Val_hp (hp)) (Val_hp (hp)); + } + gc_sweep_hp = fl_merge_block (Bp_hp (hp)); + break; + case Gray: + Assert (0); /* Fall through to Black when not in debug mode. */ + case Black: + Hd_hp (hp) = Whitehd_hd (hd); + break; + case Blue: + /* Only the blocks of the free-list are blue. See [freelist.c]. */ + fl_merge = Bp_hp (hp); + break; + } + Assert (gc_sweep_hp <= limit); + }else{ + chunk = (((heap_chunk_head *) chunk) [-1]).next; + if (chunk == NULL){ + /* Sweeping is done. Start the next cycle. */ + ++ stat_major_collections; + work = 0; + start_cycle (); + }else{ + gc_sweep_hp = chunk; + limit = chunk + (((heap_chunk_head *) chunk) [-1]).size; + } + } + } +} + +void major_collection_slice () +{ + /* Free memory at the start of the GC cycle: + FM = stat_heap_size * percent_free / 100 * 2/3 + Proportion of free memory consumed since the previous slice: + PH = allocated_words / FM + Proportion of extra-heap memory consumed since the previous slice: + PE = extra_heap_memory / stat_heap_size + Proportion of total work to do in this slice: + P = PH + PE + Amount of marking work for the GC cycle: + MW = stat_heap_size * (100 - percent_free) / 100 + Amount of sweeping work for the GC cycle: + SW = stat_heap_size + Amount of marking work for this slice: + MS = MW * 2 * P + MS = 2 * (100 - percent_free) + * (allocated_words * 3 / percent_free / 2 + + 100 * extra_heap_memory) + Amount of sweeping work for this slice: + SS = SW * 2 * P + SS = 2 * 100 + * (allocated_words * 3 / percent_free / 2 + + 100 * extra_heap_memory) + This slice will either mark MS words or sweep SS words. + */ + +#define Margin 100 /* Make it a little faster to be on the safe side. */ + + if (gc_phase == Phase_mark){ + mark_slice (2 * (100 - percent_free) + * (allocated_words * 3 / percent_free / 2 + + 100 * extra_heap_memory) + + Margin); + gc_message ("!", 0); + }else{ + Assert (gc_phase == Phase_sweep); + sweep_slice (200 * (allocated_words * 3 / percent_free / 2 + + 100 * extra_heap_memory) + + Margin); + gc_message ("$", 0); + } + stat_major_words += allocated_words; + allocated_words = 0; + extra_heap_memory = 0; +} + +/* The minor heap must be empty when this function is called. */ +void finish_major_cycle () +{ + if (gc_phase == Phase_mark) mark_slice (LONG_MAX); + Assert (gc_phase == Phase_sweep); + sweep_slice (LONG_MAX); + stat_major_words += allocated_words; + allocated_words = 0; +} + +asize_t round_heap_chunk_size (request) + asize_t request; +{ Assert (major_heap_increment >= Heap_chunk_min); + if (request < major_heap_increment){ + Assert (major_heap_increment % Page_size == 0); + return major_heap_increment; + }else if (request <= Heap_chunk_max){ + return ((request + Page_size - 1) >> Page_log) << Page_log; + }else{ + raise_out_of_memory (); + } +} + +void init_major_heap (heap_size) + asize_t heap_size; +{ + asize_t i; + + stat_heap_size = round_heap_chunk_size (heap_size); + Assert (stat_heap_size % Page_size == 0); + heap_start = aligned_malloc (stat_heap_size + sizeof (heap_chunk_head), + sizeof (heap_chunk_head)); + if (heap_start == NULL) + fatal_error ("Fatal error: not enough memory for the initial heap.\n"); + heap_start += sizeof (heap_chunk_head); + Assert ((unsigned long) heap_start % Page_size == 0); + (((heap_chunk_head *) heap_start) [-1]).size = stat_heap_size; + (((heap_chunk_head *) heap_start) [-1]).next = NULL; + heap_end = heap_start + stat_heap_size; + Assert ((unsigned long) heap_end % Page_size == 0); +#ifdef SIXTEEN + page_table_size = 640L * 1024L / Page_size + 1; +#else + page_table_size = 4 * stat_heap_size / Page_size; +#endif + page_table = (char *) malloc (page_table_size); + if (page_table == NULL){ + fatal_error ("Fatal error: not enough memory for the initial heap.\n"); + } + for (i = 0; i < page_table_size; i++){ + page_table [i] = Not_in_heap; + } + for (i = Page (heap_start); i < Page (heap_end); i++){ + page_table [i] = In_heap; + } + Hd_hp (heap_start) = Make_header (Wosize_bhsize (stat_heap_size), 0, Blue); + fl_init_merge (); + fl_merge_block (Bp_hp (heap_start)); + /* We start the major GC in the marking phase, just after the roots have been + darkened. (Since there are no roots, we don't have to darken anything.) */ + gc_phase = Phase_mark; + gray_vals_size = 2048; + gray_vals = (value *) malloc (gray_vals_size * sizeof (value)); + gray_vals_cur = gray_vals; + gray_vals_end = gray_vals + gray_vals_size; + heap_is_pure = 1; + allocated_words = 0; + extra_heap_memory = 0; +} diff --git a/byterun/major_gc.h b/byterun/major_gc.h new file mode 100644 index 0000000000..0c39a87de1 --- /dev/null +++ b/byterun/major_gc.h @@ -0,0 +1,42 @@ +#ifndef _major_gc_ +#define _major_gc_ + + +#include "freelist.h" +#include "misc.h" + +typedef struct { + asize_t size; + char *next; +} heap_chunk_head; + +extern int gc_phase; +extern unsigned long allocated_words; +extern unsigned long extra_heap_memory; + +#define Phase_mark 0 +#define Phase_sweep 1 + +extern char *heap_start; +extern char *heap_end; +extern unsigned long total_heap_size; +extern char *page_table; +extern asize_t page_table_size; +extern char *gc_sweep_hp; + +#define In_heap 1 +#define Not_in_heap 0 +#define Page(p) (((addr) (p) - (addr) heap_start) >> Page_log) +#define Is_in_heap(p) \ + ((addr)(p) >= (addr)heap_start && (addr)(p) < (addr)heap_end \ + && page_table [Page (p)] == In_heap) + +void init_major_heap P((asize_t)); +asize_t round_heap_chunk_size P((asize_t)); +void darken P((value)); +void major_collection_slice P((void)); +void major_collection P((void)); +void finish_major_cycle P((void)); + + +#endif /* _major_gc_ */ diff --git a/byterun/memory.c b/byterun/memory.c new file mode 100644 index 0000000000..aacf05eba7 --- /dev/null +++ b/byterun/memory.c @@ -0,0 +1,205 @@ +#include <string.h> +#include "fail.h" +#include "freelist.h" +#include "gc.h" +#include "gc_ctrl.h" +#include "major_gc.h" +#include "memory.h" +#include "minor_gc.h" +#include "misc.h" +#include "mlvalues.h" + +/* Allocate more memory from malloc for the heap. + Return a block of at least the requested size (in words). + Return NULL when out of memory. +*/ +static char *expand_heap (request) + mlsize_t request; +{ + char *mem; + char *new_page_table; + asize_t new_page_table_size; + asize_t malloc_request; + asize_t i, more_pages; + + malloc_request = round_heap_chunk_size (Bhsize_wosize (request)); + gc_message ("Growing heap to %ldk\n", + (stat_heap_size + malloc_request) / 1024); + mem = aligned_malloc (malloc_request + sizeof (heap_chunk_head), + sizeof (heap_chunk_head)); + if (mem == NULL){ + gc_message ("No room for growing heap\n", 0); + return NULL; + } + mem += sizeof (heap_chunk_head); + (((heap_chunk_head *) mem) [-1]).size = malloc_request; + Assert (Wosize_bhsize (malloc_request) >= request); + Hd_hp (mem) = Make_header (Wosize_bhsize (malloc_request), 0, Blue); + + if (mem < heap_start){ + more_pages = -Page (mem); + }else if (Page (mem + malloc_request) > page_table_size){ + Assert (mem >= heap_end); + more_pages = Page (mem + malloc_request) - page_table_size; + }else{ + more_pages = 0; + } + + if (more_pages != 0){ + new_page_table_size = page_table_size + more_pages; + new_page_table = (char *) malloc (new_page_table_size); + if (new_page_table == NULL){ + gc_message ("No room for growing page table\n", 0); + free (mem); + return NULL; + } + } else { + new_page_table = NULL; + new_page_table_size = 0; + } + + if (mem < heap_start){ + Assert (more_pages != 0); + for (i = 0; i < more_pages; i++){ + new_page_table [i] = Not_in_heap; + } + bcopy (page_table, new_page_table + more_pages, page_table_size); + (((heap_chunk_head *) mem) [-1]).next = heap_start; + heap_start = mem; + }else{ + char **last; + char *cur; + + if (mem >= heap_end) heap_end = mem + malloc_request; + if (more_pages != 0){ + for (i = page_table_size; i < new_page_table_size; i++){ + new_page_table [i] = Not_in_heap; + } + bcopy (page_table, new_page_table, page_table_size); + } + last = &heap_start; + cur = *last; + while (cur != NULL && cur < mem){ + last = &((((heap_chunk_head *) cur) [-1]).next); + cur = *last; + } + (((heap_chunk_head *) mem) [-1]).next = cur; + *last = mem; + } + + if (more_pages != 0){ + free (page_table); + page_table = new_page_table; + page_table_size = new_page_table_size; + } + + for (i = Page (mem); i < Page (mem + malloc_request); i++){ + page_table [i] = In_heap; + } + stat_heap_size += malloc_request; + return Bp_hp (mem); +} + +value alloc_shr (wosize, tag) + mlsize_t wosize; + tag_t tag; +{ + char *hp, *new_block; + + hp = fl_allocate (wosize); + if (hp == NULL){ + new_block = expand_heap (wosize); + if (new_block == NULL) raise_out_of_memory (); + fl_add_block (new_block); + hp = fl_allocate (wosize); + } + + Assert (Is_in_heap (Val_hp (hp))); + + if (gc_phase == Phase_mark || (addr)hp >= (addr)gc_sweep_hp){ + Hd_hp (hp) = Make_header (wosize, tag, Black); + }else{ + Hd_hp (hp) = Make_header (wosize, tag, White); + } + allocated_words += Whsize_wosize (wosize); + if (allocated_words > Wsize_bsize (minor_heap_size)) force_minor_gc (); + return Val_hp (hp); +} + +/* Use this function to tell the major GC to speed up when you use + finalized objects to automatically deallocate extra-heap objects. + The GC will do at least one cycle every [max] allocated words; + [mem] is the number of words allocated this time. + Note that only [mem/max] is relevant. You can use numbers of bytes + (or kilobytes, ...) instead of words. You can change units between + calls to [adjust_collector_speed]. +*/ +void adjust_gc_speed (mem, max) + mlsize_t mem, max; +{ + if (max == 0) max = 1; + if (mem > max) mem = max; + extra_heap_memory += ((float) mem / max) * stat_heap_size; + if (extra_heap_memory > stat_heap_size){ + extra_heap_memory = stat_heap_size; + } + if (extra_heap_memory > Wsize_bsize (minor_heap_size) / 2) force_minor_gc (); +} + +/* You must use [initialize] to store the initial value in a field of + a shared block, unless you are sure the value is not a young block. + A block value [v] is a shared block if and only if [Is_in_heap (v)] + is true. +*/ +/* [initialize] never calls the GC, so you may call it while an object is + unfinished (i.e. just after a call to [alloc_shr].) */ +void initialize (fp, val) + value *fp; + value val; +{ + *fp = val; + Assert (Is_in_heap (fp)); + if (Is_block (val) && Is_young (val)){ + *ref_table_ptr++ = fp; + if (ref_table_ptr >= ref_table_limit){ + realloc_ref_table (); + } + } +} + +/* You must use [modify] to change a field of an existing shared block, + unless you are sure the value being overwritten is not a shared block and + the value being written is not a young block. */ +/* [modify] never calls the GC. */ +void modify (fp, val) + value *fp; + value val; +{ + Modify (fp, val); +} + +char *stat_alloc (sz) + asize_t sz; +{ + char *result = (char *) malloc (sz); + + if (result == NULL) raise_out_of_memory (); + return result; +} + +void stat_free (blk) + char * blk; +{ + free (blk); +} + +char *stat_resize (blk, sz) + char *blk; + asize_t sz; +{ + char *result = (char *) realloc (blk, sz); + + if (result == NULL) raise_out_of_memory (); + return result; +} + diff --git a/byterun/memory.h b/byterun/memory.h new file mode 100644 index 0000000000..5df199e02e --- /dev/null +++ b/byterun/memory.h @@ -0,0 +1,88 @@ +/* Allocation macros and functions */ + +#ifndef _memory_ +#define _memory_ + + +#include "config.h" +#include "gc.h" +#include "major_gc.h" +#include "minor_gc.h" +#include "misc.h" +#include "mlvalues.h" + +value alloc_shr P((mlsize_t, tag_t)); +void adjust_gc_speed P((mlsize_t, mlsize_t)); +void modify P((value *, value)); +void initialize P((value *, value)); +char * stat_alloc P((asize_t)); /* Size in bytes. */ +void stat_free P((char *)); +char * stat_resize P((char *, asize_t)); /* Size in bytes. */ + + +#define Alloc_small(result, wosize, tag) { \ + char *_res_ = young_ptr; \ + young_ptr += Bhsize_wosize (wosize); \ + if (young_ptr > young_end){ \ + Setup_for_gc; \ + minor_collection (); \ + Restore_after_gc; \ + _res_ = young_ptr; \ + young_ptr += Bhsize_wosize (wosize); \ + } \ + Hd_hp (_res_) = Make_header ((wosize), (tag), Black); \ + (result) = Val_hp (_res_); \ +} + +/* You must use [Modify] to change a field of an existing shared block, + unless you are sure the value being overwritten is not a shared block and + the value being written is not a young block. */ +/* [Modify] never calls the GC. */ + +#define Modify(fp, val) { \ + value _old_ = *(fp); \ + *(fp) = (val); \ + if (Is_in_heap (fp)){ \ + if (gc_phase == Phase_mark) darken (_old_); \ + if (Is_block (val) && Is_young (val) \ + && ! (Is_block (_old_) && Is_young (_old_))){ \ + *ref_table_ptr++ = (fp); \ + if (ref_table_ptr >= ref_table_limit){ \ + Assert (ref_table_ptr == ref_table_limit); \ + realloc_ref_table (); \ + } \ + } \ + } \ +} \ + +/* [Push_roots] and [Pop_roots] are used for C variables that are GC roots. + * It must contain all values in C local variables at the time the minor GC is + * called. + * Usage: + * At the end of the declarations of your C local variables, add + * [ Push_roots (variable_name, size); ] + * The size is the number of declared roots. They are accessed as + * [ variable_name [0] ... variable_name [size - 1] ]. + * The [variable_name] and the [size] must not be [ _ ]. + * Just before the function return, add a call to [Pop_roots]. + */ + +extern value *local_roots; + +#define Push_roots(name, size) \ + value name [(size) + 2]; \ + { long _; for (_ = 0; _ < (size); name [_++] = Val_long (0)); } \ + name [(size)] = (value) (size); \ + name [(size) + 1] = (value) local_roots; \ + local_roots = &(name [(size)]); + +#define Pop_roots() {local_roots = (value *) local_roots [1]; } + +/* [register_global_root] registers a global C variable as a memory root + for the duration of the program. */ + +void register_global_root P((value *)); + + +#endif /* _memory_ */ + diff --git a/byterun/meta.c b/byterun/meta.c new file mode 100644 index 0000000000..2da8c15c91 --- /dev/null +++ b/byterun/meta.c @@ -0,0 +1,93 @@ +/* Primitives for the toplevel */ + +#include "alloc.h" +#include "interp.h" +#include "major_gc.h" +#include "memory.h" +#include "minor_gc.h" +#include "misc.h" +#include "mlvalues.h" +#include "prims.h" + +value get_global_data(unit) /* ML */ + value unit; +{ + return global_data; +} + +value execute_bytecode(prog, len) /* ML */ + value prog, len; +{ +#if defined(BIG_ENDIAN) + fixup_endianness((code_t) prog, (asize_t) Long_val(len)); +#endif + return interprete((code_t) prog, (asize_t) Long_val(len)); +} + +value realloc_global(size) /* ML */ + value size; +{ + mlsize_t requested_size, actual_size, i; + value new_global_data; + + requested_size = Long_val(size); + actual_size = Wosize_val(global_data); + if (requested_size >= actual_size) { + requested_size = (requested_size + 0x100) & 0xFFFFFF00; + new_global_data = alloc_shr(requested_size, 0); + for (i = 0; i < actual_size; i++) + initialize(&Field(new_global_data, i), Field(global_data, i)); + for (i = actual_size; i < requested_size; i++){ + Field (new_global_data, i) = Val_long (0); + } + global_data = new_global_data; + } + return Atom(0); +} + +value static_alloc(size) /* ML */ + value size; +{ + return (value) stat_alloc((asize_t) Long_val(size)); +} + +value static_free(blk) /* ML */ + value blk; +{ + stat_free((char *) blk); + return Atom(0); +} + +value static_resize(blk, new_size) /* ML */ + value blk, new_size; +{ + return (value) stat_resize((char *) blk, (asize_t) Long_val(new_size)); +} + +value obj_is_block(arg) /* ML */ + value arg; +{ + return Atom(Is_block(arg)); +} + +value obj_block(tag, size) /* ML */ + value tag, size; +{ + value res; + mlsize_t sz, i; + tag_t tg; + + sz = Long_val(size); + tg = Long_val(tag); + if (sz == 0) return Atom(tg); + res = alloc(sz, tg); + for (i = 0; i < sz; i++) + Field(res, i) = Val_long(0); + + return res; +} + +value available_primitives() /* ML */ +{ + return copy_string_array(names_of_cprim); +} diff --git a/byterun/minor_gc.c b/byterun/minor_gc.c new file mode 100644 index 0000000000..f2fd8fbd31 --- /dev/null +++ b/byterun/minor_gc.c @@ -0,0 +1,156 @@ +#include <string.h> +#include "config.h" +#include "fail.h" +#include "gc.h" +#include "gc_ctrl.h" +#include "major_gc.h" +#include "memory.h" +#include "minor_gc.h" +#include "misc.h" +#include "mlvalues.h" +#include "roots.h" + +asize_t minor_heap_size; +char *young_start = NULL, *young_end, *young_ptr = NULL; +static value **ref_table = NULL, **ref_table_end, **ref_table_threshold; +value **ref_table_ptr = NULL, **ref_table_limit; +static asize_t ref_table_size, ref_table_reserve; + +void set_minor_heap_size (size) + asize_t size; +{ + char *new_heap; + value **new_table; + + Assert (size >= Minor_heap_min); + Assert (size <= Minor_heap_max); + Assert (size % sizeof (value) == 0); + if (young_ptr != young_start) minor_collection (); + Assert (young_ptr == young_start); + new_heap = (char *) stat_alloc (size); + if (young_start != NULL){ + stat_free ((char *) young_start); + } + young_start = new_heap; + young_end = new_heap + size; + young_ptr = young_start; + minor_heap_size = size; + + ref_table_size = minor_heap_size / sizeof (value) / 8; + ref_table_reserve = 256; + new_table = (value **) stat_alloc ((ref_table_size + ref_table_reserve) + * sizeof (value *)); + if (ref_table != NULL) stat_free ((char *) ref_table); + ref_table = new_table; + ref_table_ptr = ref_table; + ref_table_threshold = ref_table + ref_table_size; + ref_table_limit = ref_table_threshold; + ref_table_end = ref_table + ref_table_size + ref_table_reserve; +} + +static void oldify (p, v) + value *p; + value v; +{ + value result; + mlsize_t i; + + tail_call: + if (Is_block (v) && Is_young (v)){ + Assert (Hp_val (v) < young_ptr); + if (Is_blue_val (v)){ /* Already forwarded ? */ + *p = Field (v, 0); /* Then the forward pointer is the first field. */ + }else if (Tag_val (v) >= No_scan_tag){ + result = alloc_shr (Wosize_val (v), Tag_val (v)); + bcopy (Bp_val (v), Bp_val (result), Bosize_val (v)); + Hd_val (v) = Bluehd_hd (Hd_val (v)); /* Put the forward flag. */ + Field (v, 0) = result; /* And the forward pointer. */ + *p = result; + }else{ + /* We can do recursive calls before all the fields are filled, because + we will not be calling the major GC. */ + value field0 = Field (v, 0); + mlsize_t sz = Wosize_val (v); + + result = alloc_shr (sz, Tag_val (v)); + *p = result; + Hd_val (v) = Bluehd_hd (Hd_val (v)); /* Put the forward flag. */ + Field (v, 0) = result; /* And the forward pointer. */ + if (sz == 1){ + p = &Field (result, 0); + v = field0; + goto tail_call; + }else{ + oldify (&Field (result, 0), field0); + for (i = 1; i < sz - 1; i++){ + oldify (&Field (result, i), Field (v, i)); + } + p = &Field (result, i); + v = Field (v, i); + goto tail_call; + } + } + }else{ + *p = v; + } +} + +void minor_collection () +{ + value **r; + struct longjmp_buffer raise_buf; + struct longjmp_buffer *old_external_raise; + long prev_alloc_words = allocated_words; + + if (setjmp(raise_buf.buf)) { + fatal_error ("Fatal error: out of memory.\n"); + } + old_external_raise = external_raise; + external_raise = &raise_buf; + + gc_message ("<", 0); + scan_local_roots (oldify); + for (r = ref_table; r < ref_table_ptr; r++) oldify (*r, **r); + stat_minor_words += Wsize_bsize (young_ptr - young_start); + young_ptr = young_start; + ref_table_ptr = ref_table; + ref_table_limit = ref_table_threshold; + gc_message (">", 0); + + external_raise = old_external_raise; + + stat_promoted_words += allocated_words - prev_alloc_words; + ++ stat_minor_collections; + major_collection_slice (); + force_minor_flag = 0; +} + +void realloc_ref_table () +{ Assert (ref_table_ptr == ref_table_limit); + Assert (ref_table_limit <= ref_table_end); + Assert (ref_table_limit >= ref_table_threshold); + + if (ref_table_limit == ref_table_threshold){ + gc_message ("ref_table threshold crossed\n", 0); + ref_table_limit = ref_table_end; + force_minor_gc (); + }else{ /* This will never happen. */ + asize_t sz; + asize_t cur_ptr = ref_table_ptr - ref_table; + Assert (force_minor_flag); + Assert (something_to_do); + ref_table_reserve += 1024; + sz = (ref_table_size + ref_table_reserve) * sizeof (value *); + gc_message ("Growing ref_table to %ldk\n", (long) sz / 1024); +#ifdef MAX_MALLOC_SIZE + if (sz > MAX_MALLOC_SIZE) ref_table = NULL; + else +#endif + ref_table = (value **) realloc ((char *) ref_table, sz); + if (ref_table == NULL) fatal_error ("Fatal error: ref_table overflow\n"); + ref_table_end = ref_table + ref_table_size + ref_table_reserve; + ref_table_threshold = ref_table + ref_table_size; + ref_table_ptr = ref_table + cur_ptr; + ref_table_limit = ref_table_end; + } +} diff --git a/byterun/minor_gc.h b/byterun/minor_gc.h new file mode 100644 index 0000000000..112ba58a97 --- /dev/null +++ b/byterun/minor_gc.h @@ -0,0 +1,19 @@ +#ifndef _minor_gc_ +#define _minor_gc_ + + +#include "misc.h" + +extern char *young_start, *young_ptr, *young_end; +extern value **ref_table_ptr, **ref_table_limit; +extern asize_t minor_heap_size; + +#define Is_young(val) \ + ((addr)(val) > (addr)young_start && (addr)(val) < (addr)young_end) + +extern void set_minor_heap_size P((asize_t)); +extern void minor_collection P((void)); +extern void realloc_ref_table P((void)); + + +#endif /* _minor_gc_ */ diff --git a/byterun/misc.c b/byterun/misc.c new file mode 100644 index 0000000000..c53452acd0 --- /dev/null +++ b/byterun/misc.c @@ -0,0 +1,166 @@ +#include <stdio.h> +#include "config.h" +#include "misc.h" +#ifdef HAS_UI +#include "ui.h" +#endif + +#ifdef DEBUG + +void failed_assert (expr, file, line) + char *expr, *file; + int line; +{ + fprintf (stderr, "Assertion failed: %s; file %s; line %d\n", + expr, file, line); + exit (100); +} + +static unsigned long seed = 0x12345; + +unsigned long not_random () +{ + seed = seed * 65537 + 12345; + return seed; +} + +#endif + +int verb_gc; +int Volatile something_to_do = 0; +int Volatile force_minor_flag = 0; + +void force_minor_gc () +{ + force_minor_flag = 1; + something_to_do = 1; +} + +void gc_message (msg, arg) + char *msg; + unsigned long arg; +{ + if (verb_gc){ +#ifdef HAS_UI + ui_gc_message(msg, arg); +#else + fprintf (stderr, msg, arg); + fflush (stderr); +#endif + } +} + +void fatal_error (msg) + char * msg; +{ +#ifdef HAS_UI + ui_fatal_error("%s", msg); +#else + fprintf (stderr, "%s", msg); + exit(2); +#endif +} + +void fatal_error_arg (fmt, arg) + char * fmt, * arg; +{ +#ifdef HAS_UI + ui_fatal_error(fmt, arg); +#else + fprintf (stderr, fmt, arg); + exit(2); +#endif +} + +#ifdef USING_MEMMOV + +/* This should work on 64-bit machines as well as 32-bit machines. + It assumes a long is the natural size for memory reads and writes. +*/ +void memmov (dst, src, length) + char *dst, *src; + unsigned long length; +{ + unsigned long i; + + if ((unsigned long) dst <= (unsigned long) src){ + + /* Copy in ascending order. */ + if (((unsigned long) src - (unsigned long) dst) % sizeof (long) != 0){ + + /* The pointers are not equal modulo sizeof (long). + Copy byte by byte. */ + for (; length != 0; length--){ + *dst++ = *src++; + } + }else{ + + /* Copy the first few bytes. */ + i = (unsigned long) dst % sizeof (long); + if (i != 0){ + i = sizeof (long) - i; /* Number of bytes to copy. */ + if (i > length) i = length; /* Never copy more than length.*/ + for (; i != 0; i--){ + *dst++ = *src++; --length; + } + } Assert ((unsigned long) dst % sizeof (long) == 0); + Assert ((unsigned long) src % sizeof (long) == 0); + + /* Then copy as many entire words as possible. */ + for (i = length / sizeof (long); i > 0; i--){ + *(long *) dst = *(long *) src; + dst += sizeof (long); src += sizeof (long); + } + + /* Then copy the last few bytes. */ + for (i = length % sizeof (long); i > 0; i--){ + *dst++ = *src++; + } + } + }else{ /* Copy in descending order. */ + src += length; dst += length; + if (((unsigned long) dst - (unsigned long) src) % sizeof (long) != 0){ + + /* The pointers are not equal modulo sizeof (long). + Copy byte by byte. */ + for (; length > 0; length--){ + *--dst = *--src; + } + }else{ + + /* Copy the first few bytes. */ + i = (unsigned long) dst % sizeof (long); + if (i > length) i = length; /* Never copy more than length. */ + for (; i > 0; i--){ + *--dst = *--src; --length; + } + + /* Then copy as many entire words as possible. */ + for (i = length / sizeof (long); i > 0; i--){ + dst -= sizeof (long); src -= sizeof (long); + *(long *) dst = *(long *) src; + } + + /* Then copy the last few bytes. */ + for (i = length % sizeof (long); i > 0; i--){ + *--dst = *--src; + } + } + } +} + +#endif /* USING_MEMMOV */ + +char *aligned_malloc (size, modulo) + asize_t size; + int modulo; +{ + char *raw_mem; + unsigned long aligned_mem; + Assert (modulo < Page_size); + raw_mem = (char *) malloc (size + Page_size); + if (raw_mem == NULL) return NULL; + raw_mem += modulo; /* Address to be aligned */ + aligned_mem = (((unsigned long) raw_mem / Page_size + 1) * Page_size); + return (char *) (aligned_mem - modulo); +} diff --git a/byterun/misc.h b/byterun/misc.h new file mode 100644 index 0000000000..14c7bb3385 --- /dev/null +++ b/byterun/misc.h @@ -0,0 +1,90 @@ +/* Miscellaneous macros and variables. */ + +#ifndef _misc_ +#define _misc_ + + +#include "config.h" + +/* Standard definitions */ + +#ifdef __STDC__ +#include <stddef.h> +#include <stdlib.h> +#endif + +/* Function prototypes */ + +#ifdef __STDC__ +#define P(x) x +#else +#define P(x) () +#endif + +/* Basic types and constants */ + +#ifdef __STDC__ +typedef size_t asize_t; +#else +typedef int asize_t; +#endif + +#ifndef NULL +#define NULL 0 +#endif + +typedef char * addr; + +/* Volatile stuff */ + +#ifdef __STDC__ +#define Volatile volatile +#else +#define Volatile +#endif + +#ifdef __GNUC__ +/* Works only in GCC 2.5 and later */ +#define Noreturn __attribute ((noreturn)) +#else +#define Noreturn +#endif + +/* Assertions */ + +#ifdef DEBUG +#ifdef __STDC__ +#define Assert(x) if (!(x)) failed_assert ( #x , __FILE__, __LINE__) +#else +#ifndef __LINE__ +#define __LINE__ 0 +#endif +#ifndef __FILE__ +#define __FILE__ "(?)" +#endif +#define Assert(x) if (!(x)) failed_assert ("(?)" , __FILE__, __LINE__) +#endif +#else +#define Assert(x) +#endif + +void failed_assert P((char *, char *, int)) Noreturn; +void fatal_error P((char *)) Noreturn; +void fatal_error_arg P((char *, char *)) Noreturn; + +/* GC flags and messages */ + +extern int verb_gc; +extern int Volatile something_to_do; +extern int Volatile force_minor_flag; + +void force_minor_gc P((void)); +void gc_message P((char *, unsigned long)); + +/* Memory routines */ + +void memmov P((char *, char *, unsigned long)); +char * aligned_malloc P((asize_t, int)); +unsigned long not_random P((void)); + +#endif /* _misc_ */ diff --git a/byterun/mlvalues.h b/byterun/mlvalues.h new file mode 100644 index 0000000000..f820f289a6 --- /dev/null +++ b/byterun/mlvalues.h @@ -0,0 +1,213 @@ +#ifndef _mlvalues_ +#define _mlvalues_ + + +#include "config.h" +#include "misc.h" + +/* Definitions + + word: Four bytes on 32 and 16 bit architectures, + eight bytes on 64 bit architectures. + long: A C long integer. + val: The ML representation of something. A long or a block or a pointer + outside the heap. If it is a block, it is the (encoded) address + of an object. If it is a long, it is encoded as well. + object: Something allocated. It always has a header and some + fields or some number of bytes (a multiple of the word size). + field: A word-sized val which is part of an object. + bp: Pointer to the first byte of an object. (a char *) + op: Pointer to the first field of an object. (a value *) + hp: Pointer to the header of an object. (a char *) + int32: Four bytes on all architectures. + + Remark: An object size is always a multiple of the word size, and at least + one word plus the header. + + bosize: Size (in bytes) of the "bytes" part. + wosize: Size (in words) of the "fields" part. + bhsize: Size (in bytes) of the object with its header. + whsize: Size (in words) of the object with its header. + + hd: A header. + tag: The value of the tag field of the header. + color: The value of the color field of the header. + This is for use only by the GC. +*/ + +typedef long value; +typedef unsigned long header_t; +typedef unsigned long mlsize_t; +typedef unsigned int tag_t; /* Actually, an unsigned char */ +typedef unsigned long color_t; +typedef unsigned long mark_t; + +typedef int int32; /* Not portable, but checked by autoconf. */ +typedef unsigned int uint32; /* Seems like a reasonable assumption anyway. */ + +/* Longs vs blocks. */ +#define Is_long(x) (((x) & 1) == 1) +#define Is_block(x) (((x) & 1) == 0) + +/* Conversion macro names are always of the form "to_from". */ +/* Example: Val_long as in "Val from long" or "Val of long". */ +#define Val_long(x) (((long)(x) << 1) + 1) +#define Long_val(x) ((x) >> 1) +#define Max_long ((1L << (8 * sizeof(value) - 2)) - 1) +#define Min_long (-(1L << (8 * sizeof(value) - 2))) +#define Val_int Val_long +#define Int_val(x) ((int) Long_val(x)) + +/* Structure of the header: + +For 16-bit and 32-bit architectures: + +--------+-------+-----+ + | wosize | color | tag | + +--------+-------+-----+ +bits 31 10 9 8 7 0 + +For 64-bit architectures: + + +--------+-------+-----+ + | wosize | color | tag | + +--------+-------+-----+ +bits 63 10 9 8 7 0 + +*/ + +#define Tag_hd(hd) ((tag_t) ((hd) & 0xFF)) +#define Wosize_hd(hd) ((mlsize_t) ((hd) >> 10)) + +#define Hd_val(val) (((header_t *) (val)) [-1]) /* Also an l-value. */ +#define Hd_op(op) (Hd_val (op)) /* Also an l-value. */ +#define Hd_bp(bp) (Hd_val (bp)) /* Also an l-value. */ +#define Hd_hp(hp) (* ((header_t *) (hp))) /* Also an l-value. */ +#define Hp_val(val) ((char *) (((header_t *) (val)) - 1)) +#define Hp_op(op) (Hp_val (op)) +#define Hp_bp(bp) (Hp_val (bp)) +#define Val_op(op) ((value) (op)) +#define Val_hp(hp) ((value) (((header_t *) (hp)) + 1)) +#define Op_hp(hp) ((value *) Val_hp (hp)) +#define Bp_hp(hp) ((char *) Val_hp (hp)) + +#define Num_tags (1 << 8) +#ifdef SIXTYFOUR +#define Max_wosize ((1L << 54) - 1) +#else +#define Max_wosize ((1 << 22) - 1) +#endif + +#define Wosize_val(val) (Wosize_hd (Hd_val (val))) +#define Wosize_op(op) (Wosize_val (op)) +#define Wosize_bp(bp) (Wosize_val (bp)) +#define Wosize_hp(hp) (Wosize_hd (Hd_hp (hp))) +#define Whsize_wosize(sz) ((sz) + 1) +#define Wosize_whsize(sz) ((sz) - 1) +#define Wosize_bhsize(sz) ((sz) / sizeof (value) - 1) +#define Bsize_wsize(sz) ((sz) * sizeof (value)) +#define Wsize_bsize(sz) ((sz) / sizeof (value)) +#define Bhsize_wosize(sz) (Bsize_wsize (Whsize_wosize (sz))) +#define Bhsize_bosize(sz) ((sz) + sizeof (header_t)) +#define Bosize_val(val) (Bsize_wsize (Wosize_val (val))) +#define Bosize_op(op) (Bosize_val (Val_op (op))) +#define Bosize_bp(bp) (Bosize_val (Val_bp (bp))) +#define Bosize_hd(hd) (Bsize_wsize (Wosize_hd (hd))) +#define Whsize_hp(hp) (Whsize_wosize (Wosize_hp (hp))) +#define Whsize_val(val) (Whsize_hp (Hp_val (val))) +#define Whsize_bp(bp) (Whsize_val (Val_bp (bp))) +#define Whsize_hd(hd) (Whsize_wosize (Wosize_hd (hd))) +#define Bhsize_hp(hp) (Bsize_wsize (Whsize_hp (hp))) +#define Bhsize_hd(hd) (Bsize_wsize (Whsize_hd (hd))) + +#ifdef BIG_ENDIAN +#define Tag_val(val) (((unsigned char *) (val)) [-1]) + /* Also an l-value. */ +#define Tag_hp(hp) (((unsigned char *) (hp)) [sizeof(value)-1]) + /* Also an l-value. */ +#else +#define Tag_val(val) (((unsigned char *) (val)) [-sizeof(value)]) + /* Also an l-value. */ +#define Tag_hp(hp) (((unsigned char *) (hp)) [0]) + /* Also an l-value. */ +#endif + +/* The Lowest tag for blocks containing no value. */ +#define No_scan_tag (Num_tags - 4) + + +/* 1- If tag < No_scan_tag : a tuple of fields. */ + +/* Pointer to the first field. */ +#define Op_val(x) ((value *) (x)) +/* Fields are numbered from 0. */ +#define Field(x, i) (((value *)(x)) [i]) /* Also an l-value. */ + +typedef int32 opcode_t; +typedef opcode_t * code_t; + +#define Closure_wosize 2 +#define Closure_tag (No_scan_tag - 1) +#define Code_val(val) (((code_t *) (val)) [0]) /* Also an l-value. */ +#define Env_val(val) (Field(val, 1)) /* Also an l-value. */ + + +/* 2- If tag >= No_scan_tag : a sequence of bytes. */ + +/* Pointer to the first byte */ +#define Bp_val(v) ((char *) (v)) +#define Val_bp(p) ((value) (p)) +/* Bytes are numbered from 0. */ +#define Byte(x, i) (((char *) (x)) [i]) /* Also an l-value. */ +#define Byte_u(x, i) (((unsigned char *) (x)) [i]) /* Also an l-value. */ + +/* Abstract things. Their contents is not traced by the GC; therefore they + must not contain any [value]. +*/ +#define Abstract_tag No_scan_tag + +/* Strings. */ +#define String_tag (No_scan_tag + 1) +#define String_val(x) ((char *) Bp_val(x)) + +/* Floating-point numbers. */ +#define Double_tag (No_scan_tag + 2) +#define Double_wosize ((sizeof(double) / sizeof(value))) +#ifndef ALIGN_DOUBLE +#define Double_val(v) (* (double *) (v)) +#define Store_double_val(v,d) (* (double *) (v) = (d)) +#else +double Double_val P((value)); +void Store_double_val P((value,double)); +#endif + +/* Finalized things. Just like abstract things, but the GC will call the + [Final_fun] before deallocation. +*/ +#define Final_tag (No_scan_tag + 3) +typedef void (*final_fun) P((value)); +#define Final_fun(val) (((final_fun *) (val)) [0]) /* Also an l-value. */ + + +/* 3- Atoms are 0-tuples. They are statically allocated once and for all. */ + +extern header_t first_atoms[]; +#define Atom(tag) (Val_hp (&(first_atoms [tag]))) +#define Is_atom(v) (v >= Atom(0) && v <= Atom(255)) + +/* Booleans are atoms tagged 0 or 1 */ + +#define Val_bool(x) Atom((x) != 0) +#define Bool_val(x) Tag_val(x) +#define Val_false Atom(0) +#define Val_true Atom(1) + +/* The unit value is the atom tagged 0 */ + +#define Val_unit Atom(0) + +/* The table of global identifiers */ + +extern value global_data; + + +#endif /* _mlvalues_ */ diff --git a/byterun/oldlexing.c b/byterun/oldlexing.c new file mode 100644 index 0000000000..3d5d4a0903 --- /dev/null +++ b/byterun/oldlexing.c @@ -0,0 +1,36 @@ +/* The "get_next_char" routine for lexers generated by camllex. */ + +#include "interp.h" +#include "mlvalues.h" +#include "stacks.h" +#include "str.h" + +struct lexer_buffer { + value refill_buff; + value lex_buffer; + value lex_abs_pos; + value lex_start_pos; + value lex_curr_pos; + value lex_last_pos; + value lex_last_action; +}; + +value get_next_char(lexbuf) /* ML */ + struct lexer_buffer * lexbuf; +{ + mlsize_t buffer_len, curr_pos; + + buffer_len = string_length(lexbuf->lex_buffer); + curr_pos = Long_val(lexbuf->lex_curr_pos); + if (curr_pos >= buffer_len) { + Push_roots (r, 1); + r[0] = (value) lexbuf; + callback(lexbuf->refill_buff, (value) lexbuf); + lexbuf = (struct lexer_buffer *) r[0]; + curr_pos = Long_val(lexbuf->lex_curr_pos); + Pop_roots (); + } + lexbuf->lex_curr_pos += 2; + return Val_int(Byte_u(lexbuf->lex_buffer, curr_pos)); +} + diff --git a/byterun/parsing.c b/byterun/parsing.c new file mode 100644 index 0000000000..f051ffed7b --- /dev/null +++ b/byterun/parsing.c @@ -0,0 +1,205 @@ +/* The PDA automaton for parsers generated by camlyacc */ + +#include <stdio.h> +#include "config.h" +#include "mlvalues.h" +#include "memory.h" +#include "alloc.h" + +struct parser_tables { /* Mirrors parse_tables in ../stdlib/parsing.mli */ + value actions; + value transl; + char * lhs; + char * len; + char * defred; + char * dgoto; + char * sindex; + char * rindex; + char * gindex; + value tablesize; + char * table; + char * check; +}; + +struct parser_env { /* Mirrors parser_env in ../stdlib/parsing.ml */ + value s_stack; + value v_stack; + value symb_start_stack; + value symb_end_stack; + value stacksize; + value curr_char; + value lval; + value symb_start; + value symb_end; + value asp; + value rule_len; + value rule_number; + value sp; + value state; +}; + +#ifdef BIG_ENDIAN +#define Short(tbl,n) \ + (*((unsigned char *)((tbl) + (n) * sizeof(short))) + \ + (*((schar *)((tbl) + (n) * sizeof(short) + 1)) << 8)) +#else +#define Short(tbl,n) (((short *)(tbl))[n]) +#endif + +#ifdef DEBUG +int parser_trace = 0; +#define Trace(act) if(parser_trace) act +#else +#define Trace(act) +#endif + +/* Input codes */ + +#define START 0 /* Mirrors parser_input in ../stdlib/parsing.ml */ +#define TOKEN_READ 1 +#define STACKS_GROWN_1 2 +#define STACKS_GROWN_2 3 +#define SEMANTIC_ACTION_COMPUTED 4 + +/* Output codes */ + +#define READ_TOKEN Atom(0) /* Mirrors parser_output in ../stdlib/parsing.ml */ +#define RAISE_PARSE_ERROR Atom(1) +#define GROW_STACKS_1 Atom(2) +#define GROW_STACKS_2 Atom(3) +#define COMPUTE_SEMANTIC_ACTION Atom(4) + +/* The pushdown automata */ + +value parse_engine(tables, env, cmd, arg) /* ML */ + struct parser_tables * tables; + struct parser_env * env; + value cmd; + value arg; +{ + int state; + mlsize_t sp; + int n, n1, n2, m, state1; + + switch(Tag_val(cmd)) { + + case START: + state = 0; + sp = Int_val(env->sp); + + loop: + Trace(printf("Loop %d\n", state)); + n = Short(tables->defred, state); + if (n != 0) goto reduce; + if (Int_val(env->curr_char) >= 0) goto testshift; + env->sp = Val_int(sp); + env->state = Val_int(state); + return READ_TOKEN; + /* The ML code calls the lexer and updates */ + /* symb_start and symb_end */ + case TOKEN_READ: + sp = Int_val(env->sp); + state = Int_val(env->state); + env->curr_char = Field(tables->transl, Tag_val(arg)); + switch (Wosize_val(arg)) { + case 0: + env->lval = Val_long(0); break; + case 1: + modify(&env->lval, Field(arg, 0)); break; + default: { + value tuple; + mlsize_t size, i; + Push_roots(r, 4); + r[0] = (value) tables; + r[1] = (value) env; + r[2] = cmd; + r[3] = arg; + size = Wosize_val(arg); + tuple = alloc_tuple(size); + tables = (struct parser_tables *) r[0]; + env = (struct parser_env *) r[1]; + cmd = r[2]; + arg = r[3]; + for (i = 0; i < size; i++) Field(tuple, i) = Field(arg, i); + modify(&env->lval, tuple); + Pop_roots(); + break; } + } + Trace(printf("Token %d (0x%lx)\n", Int_val(env->curr_char), env->lval)); + + testshift: + n1 = Short(tables->sindex, state); + n2 = n1 + Int_val(env->curr_char); + if (n1 != 0 && n2 >= 0 && n2 <= Int_val(tables->tablesize) && + Short(tables->check, n2) == Int_val(env->curr_char)) goto shift; + n1 = Short(tables->rindex, state); + n2 = n1 + Int_val(env->curr_char); + if (n1 != 0 && n2 >= 0 && n2 <= Int_val(tables->tablesize) && + Short(tables->check, n2) == Int_val(env->curr_char)) { + n = Short(tables->table, n2); + goto reduce; + } + env->sp = Val_int(sp); + env->state = Val_int(state); + return RAISE_PARSE_ERROR; + /* The ML code raises the Parse_error exn */ + shift: + state = Short(tables->table, n2); + Trace(printf("Shift %d\n", state)); + sp++; + if (sp < Long_val(env->stacksize)) goto push; + env->sp = Val_int(sp); + env->state = Val_int(state); + return GROW_STACKS_1; + /* The ML code resizes the stacks */ + case STACKS_GROWN_1: + sp = Int_val(env->sp); + state = Int_val(env->state); + push: + Field(env->s_stack, sp) = Val_int(state); + modify(&Field(env->v_stack, sp), env->lval); + Field(env->symb_start_stack, sp) = env->symb_start; + Field(env->symb_end_stack, sp) = env->symb_end; + env->curr_char = Val_int(-1); + goto loop; + + reduce: + Trace(printf("Reduce %d\n", n)); + m = Short(tables->len, n); + env->asp = Val_int(sp); + env->rule_number = Val_int(n); + env->rule_len = Val_int(m); + sp = sp - m + 1; + m = Short(tables->lhs, n); + state1 = Int_val(Field(env->s_stack, sp - 1)); + n1 = Short(tables->gindex, m); + n2 = n1 + state1; + if (n1 != 0 && n2 >= 0 && n2 <= Int_val(tables->tablesize) && + Short(tables->check, n2) == state1) { + state = Short(tables->table, n2); + } else { + state = Short(tables->dgoto, m); + } + if (sp < Long_val(env->stacksize)) goto semantic_action; + env->sp = Val_int(sp); + env->state = Val_int(state); + return GROW_STACKS_2; + /* The ML code resizes the stacks */ + case STACKS_GROWN_2: + sp = Int_val(env->sp); + state = Int_val(env->state); + semantic_action: + env->sp = Val_int(sp); + env->state = Val_int(state); + return COMPUTE_SEMANTIC_ACTION; + /* The ML code calls the semantic action */ + case SEMANTIC_ACTION_COMPUTED: + sp = Int_val(env->sp); + state = Int_val(env->state); + Field(env->s_stack, sp) = Val_int(state); + modify(&Field(env->v_stack, sp), arg); + Field(env->symb_end_stack, sp) = + Field(env->symb_end_stack, Int_val(env->asp)); + goto loop; + } +} diff --git a/byterun/prims.h b/byterun/prims.h new file mode 100644 index 0000000000..78478ce7a5 --- /dev/null +++ b/byterun/prims.h @@ -0,0 +1,11 @@ +/* Interface with C primitives. */ + +#ifndef _prims_ +#define _prims_ + +typedef value (*c_primitive)(); + +extern c_primitive cprim[]; +extern char * names_of_cprim[]; + +#endif /* _prims_ */ diff --git a/byterun/reverse.h b/byterun/reverse.h new file mode 100644 index 0000000000..74e5ccbc28 --- /dev/null +++ b/byterun/reverse.h @@ -0,0 +1,54 @@ +/* Swap byte-order in 16-bit, 32-bit and 64-bit words */ + +#ifndef _reverse_ +#define _reverse_ + + +#define Reverse_short(s) { \ + char * _p; \ + int _a; \ + _p = (char *) (s); \ + _a = _p[0]; \ + _p[0] = _p[1]; \ + _p[1] = _a; \ +} + +#define Reverse_int32(w) { \ + char * _p; \ + int _a; \ + _p = (char *) (w); \ + _a = _p[0]; \ + _p[0] = _p[3]; \ + _p[3] = _a; \ + _a = _p[1]; \ + _p[1] = _p[2]; \ + _p[2] = _a; \ +} + +#define Reverse_int64(d) { \ + char * _p; \ + int _a; \ + _p = (char *) (d); \ + _a = _p[0]; \ + _p[0] = _p[7]; \ + _p[7] = _a; \ + _a = _p[1]; \ + _p[1] = _p[6]; \ + _p[6] = _a; \ + _a = _p[2]; \ + _p[2] = _p[5]; \ + _p[5] = _a; \ + _a = _p[3]; \ + _p[3] = _p[4]; \ + _p[4] = _a; \ +} + +#ifdef SIXTYFOUR +#define Reverse_word Reverse_int64 +#else +#define Reverse_word Reverse_int32 +#endif + +#define Reverse_double Reverse_int64 + +#endif /* _reverse_ */ diff --git a/byterun/roots.c b/byterun/roots.c new file mode 100644 index 0000000000..eb0f5dfd0f --- /dev/null +++ b/byterun/roots.c @@ -0,0 +1,49 @@ +/* To walk the memory roots for garbage collection */ + +#include "memory.h" +#include "misc.h" +#include "mlvalues.h" +#include "roots.h" +#include "stacks.h" + +value * local_roots = NULL; + +struct global_root { + value * root; + struct global_root * next; +}; + +static struct global_root * global_roots = NULL; + +void scan_local_roots (copy_fn) + void (*copy_fn) (); +{ + register value * sp; + value * block; + struct global_root * gr; + + /* The stack */ + for (sp = extern_sp; sp < stack_high; sp++) { + copy_fn (sp, *sp); + } + /* Local C roots */ + for (block = local_roots; block != NULL; block = (value *) block [1]){ + for (sp = block - (long) block [0]; sp < block; sp++){ + copy_fn (sp, *sp); + } + } + /* Global C roots */ + for (gr = global_roots; gr != NULL; gr = gr->next) { + copy_fn(gr->root, *(gr->root)); + } +} + +void register_global_root(r) + value * r; +{ + struct global_root * gr; + gr = (struct global_root *) stat_alloc(sizeof(struct global_root)); + gr->root = r; + gr->next = global_roots; + global_roots = gr; +} diff --git a/byterun/roots.h b/byterun/roots.h new file mode 100644 index 0000000000..67732f3cb0 --- /dev/null +++ b/byterun/roots.h @@ -0,0 +1,9 @@ +#ifndef _roots_ +#define _roots_ + +#include "misc.h" + +void scan_local_roots P((void (*copy_fn) (value *, value))); + + +#endif /* _roots_ */ diff --git a/byterun/signals.c b/byterun/signals.c new file mode 100644 index 0000000000..62aabd84cc --- /dev/null +++ b/byterun/signals.c @@ -0,0 +1,158 @@ +#include <signal.h> +#include "alloc.h" +#include "config.h" +#include "fail.h" +#include "interp.h" +#include "memory.h" +#include "misc.h" +#include "mlvalues.h" +#include "roots.h" +#include "signals.h" + +static Volatile int async_signal_mode = 0; +Volatile int pending_signal = 0; +value signal_handlers = 0; + +static void execute_signal(signal_number) + int signal_number; +{ + Assert (!async_signal_mode); + callback(Field(signal_handlers, signal_number), Val_int(signal_number)); +} + +void handle_signal(signal_number) + int signal_number; +{ +#ifndef BSD_SIGNALS + signal(signal_number, handle_signal); +#endif + if (async_signal_mode){ + leave_blocking_section (); + execute_signal(signal_number); + enter_blocking_section (); + }else{ + pending_signal = signal_number; + something_to_do = 1; + } +} + +void enter_blocking_section() +{ + int temp; + + while (1){ + Assert (!async_signal_mode); + /* If a signal arrives between the next two instructions, + it will be lost. */ + temp = pending_signal; pending_signal = 0; + if (temp) execute_signal(temp); + async_signal_mode = 1; + if (!pending_signal) break; + async_signal_mode = 0; + } +} + +/* This function may be called from outside a blocking section. */ +void leave_blocking_section() +{ + async_signal_mode = 0; +} + +#ifndef SIGABRT +#define SIGABRT 0 +#endif +#ifndef SIGALRM +#define SIGALRM 0 +#endif +#ifndef SIGFPE +#define SIGFPE 0 +#endif +#ifndef SIGHUP +#define SIGHUP 0 +#endif +#ifndef SIGILL +#define SIGILL 0 +#endif +#ifndef SIGINT +#define SIGINT 0 +#endif +#ifndef SIGKILL +#define SIGKILL 0 +#endif +#ifndef SIGPIPE +#define SIGPIPE 0 +#endif +#ifndef SIGQUIT +#define SIGQUIT 0 +#endif +#ifndef SIGSEGV +#define SIGSEGV 0 +#endif +#ifndef SIGTERM +#define SIGTERM 0 +#endif +#ifndef SIGUSR1 +#define SIGUSR1 0 +#endif +#ifndef SIGUSR2 +#define SIGUSR2 0 +#endif +#ifndef SIGCHLD +#define SIGCHLD 0 +#endif +#ifndef SIGCONT +#define SIGCONT 0 +#endif +#ifndef SIGSTOP +#define SIGSTOP 0 +#endif +#ifndef SIGTSTP +#define SIGTSTP 0 +#endif +#ifndef SIGTTIN +#define SIGTTIN 0 +#endif +#ifndef SIGTTOU +#define SIGTTOU 0 +#endif + +static int posix_signals[] = { + SIGABRT, SIGALRM, SIGFPE, SIGHUP, SIGILL, SIGINT, SIGKILL, SIGPIPE, + SIGQUIT, SIGSEGV, SIGTERM, SIGUSR1, SIGUSR2, SIGCHLD, SIGCONT, + SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU +}; + +value install_signal_handler(signal_number, action) /* ML */ + value signal_number, action; +{ + int sig = Int_val(signal_number); + if (sig < 0) { + sig = posix_signals[-sig-1]; + if (sig == 0) invalid_argument("Sys.signal: unavailable signal"); + } + switch(Tag_val(action)) { + case 0: /* Signal_default */ + signal(sig, SIG_DFL); + break; + case 1: /* Signal_ignore */ + signal(sig, SIG_IGN); + break; + case 2: /* Signal_handle */ + if (signal_handlers == 0) { + int i; + Push_roots(r, 1); + r[0] = action; + signal_handlers = alloc_tuple(32); + action = r[0]; + Pop_roots(); + for (i = 0; i < 32; i++) Field(signal_handlers, i) = Val_int(0); + register_global_root(&signal_handlers); + } + modify(&Field(signal_handlers, sig), Field(action, 0)); + signal(sig, handle_signal); + break; + default: + Assert(0); + } + return Val_unit; +} diff --git a/byterun/signals.h b/byterun/signals.h new file mode 100644 index 0000000000..83fbb043cc --- /dev/null +++ b/byterun/signals.h @@ -0,0 +1,13 @@ +#ifndef _signals_ +#define _signals_ + +#include "misc.h" + +extern value signal_handlers; +extern Volatile int pending_signal; + +void enter_blocking_section P((void)); +void leave_blocking_section P((void)); + +#endif /* _signals_ */ + diff --git a/byterun/stacks.c b/byterun/stacks.c new file mode 100644 index 0000000000..c831538590 --- /dev/null +++ b/byterun/stacks.c @@ -0,0 +1,60 @@ +/* To initialize and resize the stacks */ + +#include <string.h> +#include "config.h" +#include "fail.h" +#include "misc.h" +#include "mlvalues.h" +#include "stacks.h" + +value * stack_low; +value * stack_high; +value * stack_threshold; +value * extern_sp; +value * trapsp; +value global_data; + +void init_stack() +{ + stack_low = (value *) stat_alloc(Stack_size); + stack_high = stack_low + Stack_size / sizeof (value); + stack_threshold = stack_low + Stack_threshold / sizeof (value); + extern_sp = stack_high; + trapsp = stack_high; +} + +void realloc_stack() +{ + asize_t size; + value * new_low, * new_high, * new_sp; + value * p; + + Assert(extern_sp >= stack_low); + size = stack_high - stack_low; + if (size >= Max_stack_size) + raise_out_of_memory(); + size *= 2; + gc_message ("Growing stack to %ld kB.\n", + (long) size * sizeof(value) / 1024); + new_low = (value *) stat_alloc(size * sizeof(value)); + new_high = new_low + size; + +#define shift(ptr) \ + ((char *) new_high - ((char *) stack_high - (char *) (ptr))) + + new_sp = (value *) shift(extern_sp); + bcopy((char *) extern_sp, + (char *) new_sp, + (stack_high - extern_sp) * sizeof(value)); + stat_free((char *) stack_low); + trapsp = (value *) shift(trapsp); + for (p = trapsp; p < new_high; p = Trap_link(p)) + Trap_link(p) = (value *) shift(Trap_link(p)); + stack_low = new_low; + stack_high = new_high; + stack_threshold = stack_low + Stack_threshold / sizeof (value); + extern_sp = new_sp; + +#undef shift +} + diff --git a/byterun/stacks.h b/byterun/stacks.h new file mode 100644 index 0000000000..aa68532f8c --- /dev/null +++ b/byterun/stacks.h @@ -0,0 +1,26 @@ +/* structure of the stacks */ + +#ifndef _stacks_ +#define _stacks_ + + +#include "misc.h" +#include "mlvalues.h" +#include "memory.h" + +extern value * stack_low; +extern value * stack_high; +extern value * stack_threshold; +extern value * extern_sp; +extern value * trapsp; + +#define Trap_pc(tp) (((code_t *)(tp))[0]) +#define Trap_link(tp) (((value **)(tp))[1]) + +void reset_roots P((void)); +void init_stack P((void)); +void realloc_stack P((void)); + + +#endif /* _stacks_ */ + diff --git a/byterun/str.c b/byterun/str.c new file mode 100644 index 0000000000..c5f5eedb2d --- /dev/null +++ b/byterun/str.c @@ -0,0 +1,81 @@ +/* Operations on strings */ + +#include <string.h> +#include "alloc.h" +#include "fail.h" +#include "mlvalues.h" +#include "misc.h" + +mlsize_t string_length(s) + value s; +{ + mlsize_t temp; + temp = Bosize_val(s) - 1; + Assert (Byte (s, temp - Byte (s, temp)) == 0); + return temp - Byte (s, temp); +} + +value ml_string_length(s) /* ML */ + value s; +{ + mlsize_t temp; + temp = Bosize_val(s) - 1; + Assert (Byte (s, temp - Byte (s, temp)) == 0); + return Val_long(temp - Byte (s, temp)); +} + +value create_string(len) /* ML */ + value len; +{ + mlsize_t size = Long_val(len); + if (size > Max_wosize * sizeof(value) - 2) invalid_argument("String.create"); + return alloc_string(size); +} + +value blit_string(argv, argc) /* ML */ + value * argv; + int argc; +{ + bcopy(&Byte(argv[0], Long_val(argv[1])), + &Byte(argv[2], Long_val(argv[3])), + Int_val(argv[4])); + return Atom(0); +} + +value fill_string(s, offset, len, init) /* ML */ + value s, offset, len, init; +{ + register char * p; + register mlsize_t n; + register char c; + + c = Long_val(init); + for(p = &Byte(s, Long_val(offset)), n = Long_val(len); + n > 0; n--, p++) + *p = c; + return Atom(0); +} + +static unsigned char printable_chars_ascii[] = /* 0x20-0x7E */ + "\000\000\000\000\377\377\377\377\377\377\377\377\377\377\377\177\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"; +static unsigned char printable_chars_iso[] = /* 0x20-0x7E 0xA1-0xFF */ + "\000\000\000\000\377\377\377\377\377\377\377\377\377\377\377\177\000\000\000\000\376\377\377\377\377\377\377\377\377\377\377\377"; + +value is_printable(chr) /* ML */ + value chr; +{ + int c; + static int iso_charset = -1; + unsigned char * printable_chars; + + if (iso_charset == -1) { + char * lc_ctype = (char *) getenv("LC_CTYPE"); + if (lc_ctype != 0 && strcmp(lc_ctype, "iso_8859_1") == 0) + iso_charset = 1; + else + iso_charset = 0; + } + printable_chars = iso_charset ? printable_chars_iso : printable_chars_ascii; + c = Int_val(chr); + return Val_bool(printable_chars[c >> 3] & (1 << (c & 7))); +} diff --git a/byterun/str.h b/byterun/str.h new file mode 100644 index 0000000000..6f6373e57f --- /dev/null +++ b/byterun/str.h @@ -0,0 +1,11 @@ +#ifndef _str_ +#define _str_ + + +#include "misc.h" +#include "mlvalues.h" + +mlsize_t string_length P((value)); + + +#endif /* _str_ */ diff --git a/byterun/sys.c b/byterun/sys.c new file mode 100644 index 0000000000..10f05e2568 --- /dev/null +++ b/byterun/sys.c @@ -0,0 +1,186 @@ +/* Basic system calls */ + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> +#include "config.h" +#include "alloc.h" +#include "fail.h" +#include "instruct.h" +#include "mlvalues.h" +#include "signals.h" +#include "stacks.h" + +extern int errno; + +#ifdef HAS_STRERROR + +extern char * strerror(); + +char * error_message() +{ + return strerror(errno); +} + +#else + +extern int sys_nerr; +extern char * sys_errlist []; + +char * error_message() +{ + if (errno < 0 || errno >= sys_nerr) + return "unknown error"; + else + return sys_errlist[errno]; +} + +#endif /* HAS_STRERROR */ + +void sys_error(arg) + char * arg; +{ + char * err = error_message(); + int err_len = strlen(err); + int arg_len; + value str; + + if (arg == NULL) { + str = alloc_string(err_len); + bcopy(err, &Byte(str, 0), err_len); + } else { + arg_len = strlen(arg); + str = alloc_string(arg_len + 2 + err_len); + bcopy(arg, &Byte(str, 0), arg_len); + bcopy(": ", &Byte(str, arg_len), 2); + bcopy(err, &Byte(str, arg_len + 2), err_len); + } + raise_sys_error(str); +} + +void sys_exit(retcode) /* ML */ + value retcode; +{ + exit(Int_val(retcode)); +} + +#ifndef O_BINARY +#define O_BINARY 0 +#endif +#ifndef O_TEXT +#define O_TEXT 0 +#endif + +static int sys_open_flags[] = { + O_RDONLY, O_WRONLY, O_RDWR, O_APPEND, O_CREAT, O_TRUNC, O_EXCL, + O_BINARY, O_TEXT +}; + +value sys_open(path, flags, perm) /* ML */ + value path, flags, perm; +{ + int ret; + ret = open(String_val(path), convert_flag_list(flags, sys_open_flags), + Int_val(perm)); + if (ret == -1) sys_error(String_val(path)); + return Val_long(ret); +} + +value sys_close(fd) /* ML */ + value fd; +{ + if (close(Int_val(fd)) != 0) sys_error(NULL); + return Atom(0); +} + +value sys_remove(name) /* ML */ + value name; +{ + int ret; + ret = unlink(String_val(name)); + if (ret != 0) sys_error(String_val(name)); + return Atom(0); +} + +value sys_rename(oldname, newname) /* ML */ + value oldname, newname; +{ + if (rename(String_val(oldname), String_val(newname)) != 0) + sys_error(String_val(oldname)); + return Atom(0); +} + +value sys_chdir(dirname) /* ML */ + value dirname; +{ + if (chdir(String_val(dirname)) != 0) sys_error(String_val(dirname)); + return Atom(0); +} + +extern char * getenv(); + +value sys_getenv(var) /* ML */ + value var; +{ + char * res; + + res = getenv(String_val(var)); + if (res == 0) raise_not_found(); + return copy_string(res); +} + +static char ** main_argv; + +value sys_get_argv(unit) /* ML */ + value unit; +{ + return copy_string_array(main_argv); +} + +void sys_init(argv) + char ** argv; +{ + main_argv = argv; +} + +value sys_system_command(command) /* ML */ + value command; +{ + int retcode = system(String_val(command)); + if (retcode == -1) sys_error(String_val(command)); + return Val_int(retcode); +} + +/* Search path function */ + +char * searchpath(name) + char * name; +{ + static char fullname[512]; + char * path; + char * p; + char * q; + + for (p = name; *p != 0; p++) { + if (*p == '/') return name; + } + path = getenv("PATH"); + if (path == 0) return 0; + while(1) { + p = fullname; + while (*path != 0 && *path != ':') { + *p++ = *path++; + } + if (p != fullname) *p++ = '/'; + q = name; + while (*q != 0) { + *p++ = *q++; + } + *p = 0; + if (access(fullname, 1) == 0) return fullname; + if (*path == 0) return 0; + path++; + } +} diff --git a/byterun/sys.h b/byterun/sys.h new file mode 100644 index 0000000000..1d4e3b7a2a --- /dev/null +++ b/byterun/sys.h @@ -0,0 +1,10 @@ +#ifndef _sys_ +#define _sys_ + +#include "misc.h" + +void sys_error P((char *)); +void sys_init P((char **)); +void sys_exit P((value)) Noreturn; + +#endif /* _sys_ */ diff --git a/byterun/terminfo.c b/byterun/terminfo.c new file mode 100644 index 0000000000..214984892f --- /dev/null +++ b/byterun/terminfo.c @@ -0,0 +1,126 @@ +/* Read and output terminal commands */ + +#include "config.h" +#include "alloc.h" +#include "fail.h" +#include "io.h" +#include "mlvalues.h" + +#ifdef HAS_TERMINFO + +#undef getch +#include <curses.h> +#include <term.h> + +value terminfo_setup(unit) /* ML */ + value unit; +{ + if (setupterm(NULL, 1, 1) != 1) failwith("Terminfo.setupterm"); + return Val_unit; +} + +value terminfo_getstr(capa) /* ML */ + value capa; +{ + char * res = (char *) tigetstr(String_val(capa)); + if (res == (char *)(-1)) raise_not_found(); + return copy_string(res); +} + +value terminfo_getnum(capa) /* ML */ + value capa; +{ + int res = tigetnum(String_val(capa)); + if (res == -2) raise_not_found(); + return Val_int(res); +} + +#else + +#ifdef HAS_TERMCAP + +#define _BSD /* For DEC OSF1 */ +#undef getch +#include <curses.h> + +value terminfo_setup(unit) + value unit; +{ + static buffer[1024]; + if (tgetent(buffer, getenv("TERM")) != 1) failwith("Terminfo.setupterm"); + return Val_unit; +} + +value terminfo_getstr(capa) + value capa; +{ + char buff[1024]; + char * p = buff; + if (tgetstr(String_val(capa), &p) == 0) raise_not_found(); + return copy_string(buff); +} + +value terminfo_getnum(capa) + value capa; +{ + int res = tgetnum(String_val(capa)); + if (res == -1) raise_not_found(); + return Val_int(res); +} + +#else + +value terminfo_setup(unit) + value unit; +{ + failwith("Terminfo.setupterm"); + return Val_unit; +} + +value terminfo_getstr(capa) + value capa; +{ + raise_not_found(); + return Val_unit; +} + +value terminfo_getnum(capa) + value capa; +{ + raise_not_found(); + return Val_unit; +} + +#endif +#endif + +#if defined HAS_TERMINFO || defined HAS_TERMCAP + +static struct channel * terminfo_putc_channel; + +static int terminfo_putc(c) + int c; +{ + putch(terminfo_putc_channel, c); + return c; +} + +value terminfo_puts(chan, str, count) /* ML */ + struct channel * chan; + value str, count; +{ + terminfo_putc_channel = chan; + tputs(String_val(str), Int_val(count), terminfo_putc); + return Val_unit; +} + +#else + +value terminfo_puts(chan, str, count) + struct channel * chan; + value str, count; +{ + invalid_argument("Terminfo.puts"); +} + +#endif diff --git a/driver/compile.ml b/driver/compile.ml new file mode 100644 index 0000000000..91ff7e9e21 --- /dev/null +++ b/driver/compile.ml @@ -0,0 +1,99 @@ +(* The batch compiler *) + +open Misc +open Config +open Format +open Typedtree + +(* Initialize the search path. + The current directory is always searched first, + then the directories specified with the -I option (in command-line order), + then the standard library directory. *) + +let init_path () = + load_path := "" :: List.rev (Config.standard_library :: !Clflags.include_dirs); + Env.reset_cache() + +(* Return the initial environment in which compilation proceeds. *) + +let initial_env () = + init_path(); + try + if !Clflags.nopervasives + then Env.initial + else Env.open_pers_signature "Pervasives" Env.initial + with Not_found -> + fatal_error "cannot open Pervasives.cmi" + +(* Compute the CRC of a file *) + +let file_crc ic = + seek_in ic 0; + Crc.for_channel ic (in_channel_length ic) + +(* Compile a .mli file *) + +let interface sourcefile = + let prefixname = Filename.chop_suffix sourcefile ".mli" in + let modulename = capitalize(Filename.basename prefixname) in + let ic = open_in_bin sourcefile in + let lb = Lexing.from_channel ic in + Location.input_name := sourcefile; + try + let sg = Typemod.transl_signature (initial_env()) (Parse.interface lb) in + let crc = file_crc ic in + close_in ic; + if !Clflags.print_types then (Printtyp.signature sg; print_flush()); + Env.save_signature sg modulename crc (prefixname ^ ".cmi") + with x -> + close_in ic; + raise x + +let print_if flag printer arg = + if !flag then begin printer arg; print_newline() end; + arg + +let implementation sourcefile = + let prefixname = Filename.chop_suffix sourcefile ".ml" in + let modulename = capitalize(Filename.basename prefixname) in + let objfile = prefixname ^ ".cmo" in + let ic = open_in_bin sourcefile in + let lb = Lexing.from_channel ic in + let oc = open_out_bin objfile in + Location.input_name := sourcefile; + try + let (str, sg, finalenv) = + Typemod.type_structure (initial_env()) (Parse.implementation lb) in + if !Clflags.print_types then (Printtyp.signature sg; print_flush()); + let (coercion, crc) = + if file_exists (prefixname ^ ".mli") then begin + let (dclsig, crc) = + Env.read_signature modulename (prefixname ^ ".cmi") in + (Includemod.signatures Env.initial sg dclsig, crc) + end else begin + let crc = file_crc ic in + Env.save_signature sg modulename crc (prefixname ^ ".cmi"); + (Tcoerce_none, crc) + end in + Emitcode.to_file oc modulename crc + (print_if Clflags.dump_instr Printinstr.instrlist + (Codegen.compile_implementation + (print_if Clflags.dump_lambda Printlambda.lambda + (Translmod.transl_implementation modulename str coercion)))); + close_in ic; + close_out oc + with x -> + close_in ic; + close_out oc; + remove_file objfile; + raise x + +let c_file name = + if Sys.command (concat_strings " " ( + Config.c_compiler :: + "-c" :: + List.map (fun dir -> "-I" ^ dir) (List.rev !Clflags.include_dirs) @ + ("-I" ^ Config.standard_library) :: + name :: + [])) <> 0 + then exit 2 diff --git a/driver/compile.mli b/driver/compile.mli new file mode 100644 index 0000000000..0df7451f36 --- /dev/null +++ b/driver/compile.mli @@ -0,0 +1,8 @@ +(* Compile a .ml or .mli file *) + +val interface: string -> unit +val implementation: string -> unit +val c_file: string -> unit + +val initial_env: unit -> Env.t +val init_path: unit -> unit diff --git a/driver/errors.ml b/driver/errors.ml new file mode 100644 index 0000000000..438418125b --- /dev/null +++ b/driver/errors.ml @@ -0,0 +1,42 @@ +(* Error report *) + +open Format +open Location + +(* Report an error *) + +let report_error exn = + open_hovbox 0; + begin match exn with + Lexer.Error(err, start, stop) -> + Location.print {loc_start = start; loc_end = stop}; + Lexer.report_error err + | Parse.Error(start, stop) -> + Location.print {loc_start = start; loc_end = stop}; + print_string "Syntax error" + | Env.Error err -> + Env.report_error err + | Typecore.Error(loc, err) -> + Location.print loc; Typecore.report_error err + | Typetexp.Error(loc, err) -> + Location.print loc; Typetexp.report_error err + | Typedecl.Error(loc, err) -> + Location.print loc; Typedecl.report_error err + | Includemod.Error err -> + Includemod.report_error err + | Typemod.Error(loc, err) -> + Location.print loc; Typemod.report_error err + | Translcore.Error(loc, err) -> + Location.print loc; Translcore.report_error err + | Symtable.Error code -> + Symtable.report_error code + | Linker.Error code -> + Linker.report_error code + | Librarian.Error code -> + Librarian.report_error code + | Sys_error msg -> + print_string "I/O error: "; print_string msg + | x -> + close_box(); raise x + end; + close_box(); print_newline() diff --git a/driver/errors.mli b/driver/errors.mli new file mode 100644 index 0000000000..abe8636153 --- /dev/null +++ b/driver/errors.mli @@ -0,0 +1,3 @@ +(* Error report *) + +val report_error: exn -> unit diff --git a/driver/main.ml b/driver/main.ml new file mode 100644 index 0000000000..d80a105bee --- /dev/null +++ b/driver/main.ml @@ -0,0 +1,62 @@ +open Clflags + +let process_file name = + if Filename.check_suffix name ".ml" then begin + Compile.implementation name; + objfiles := (Filename.chop_suffix name ".ml" ^ ".cmo") :: !objfiles + end + else if Filename.check_suffix name ".mli" then + Compile.interface name + else if Filename.check_suffix name ".cmo" + or Filename.check_suffix name ".cma" then + objfiles := name :: !objfiles + else if Filename.check_suffix name ".o" + or Filename.check_suffix name ".a" then + ccobjs := name :: !ccobjs + else if Filename.check_suffix name ".c" then begin + Compile.c_file name; + ccobjs := (Filename.chop_suffix (Filename.basename name) ".c" ^ ".o") + :: !ccobjs + end + else + raise(Arg.Bad("don't know what to do with " ^ name)) + +let print_version_number () = + print_string "The Caml Special Light compiler, version "; + print_string Config.version; + print_newline() + +let main () = + try + Arg.parse + ["-I", Arg.String(fun dir -> include_dirs := dir :: !include_dirs); + "-c", Arg.Unit(fun () -> compile_only := true); + "-o", Arg.String(fun s -> exec_name := s; archive_name := s); + "-i", Arg.Unit(fun () -> print_types := true); + "-a", Arg.Unit(fun () -> make_archive := true); + "-fast", Arg.Unit(fun () -> fast := true); + "-nopervasives", Arg.Unit(fun () -> nopervasives := true); + "-custom", Arg.Unit(fun () -> custom_runtime := true); + "-ccopt", Arg.String(fun s -> ccopts := s :: !ccopts); + "-l", Arg.String(fun s -> ccobjs := s :: !ccobjs); + "-linkall", Arg.Unit(fun s -> link_everything := true); + "-dlambda", Arg.Unit(fun () -> dump_lambda := true); + "-dinstr", Arg.Unit(fun () -> dump_instr := true); + "-v", Arg.Unit print_version_number; + "-", Arg.String process_file] + process_file; + if !make_archive then begin + Compile.init_path(); + Librarian.create_archive (List.rev !objfiles) !archive_name + end + else if not !compile_only & !objfiles <> [] then begin + Compile.init_path(); + Linker.link (List.rev !objfiles) + end; + exit 0 + with x -> + Format.set_formatter_output stderr; + Errors.report_error x; + exit 2 + +let _ = Printexc.catch main () diff --git a/lex/.depend b/lex/.depend new file mode 100644 index 0000000000..0379bb77b3 --- /dev/null +++ b/lex/.depend @@ -0,0 +1,7 @@ +lexer.cmi: parser.cmi +parser.cmi: syntax.cmo +lexer.cmo: lexer.cmi parser.cmi syntax.cmo +lexgen.cmo: syntax.cmo +main.cmo: lexgen.cmo parser.cmi output.cmo syntax.cmo lexer.cmi +output.cmo: syntax.cmo +parser.cmo: parser.cmi syntax.cmo diff --git a/lex/Makefile b/lex/Makefile new file mode 100644 index 0000000000..4104fe77c2 --- /dev/null +++ b/lex/Makefile @@ -0,0 +1,51 @@ +# The lexer generator + +CAMLC=../boot/camlrun ../boot/camlc -I ../boot +COMPFLAGS= +LINKFLAGS= +CAMLYACC=../boot/camlyacc +YACCFLAGS= +CAMLLEX=../boot/camlrun ../boot/camllex +CAMLDEP=../tools/camldep +DEPFLAGS= + +OBJS=syntax.cmo parser.cmo lexer.cmo lexgen.cmo output.cmo main.cmo + +all: camllex + +camllex: $(OBJS) + $(CAMLC) $(LINKFLAGS) -o camllex $(OBJS) + +clean:: + rm -f camllex + rm -f *.cmo *.cmi camllex + +parser.ml parser.mli: parser.mly + $(CAMLYACC) $(YACCFLAGS) parser.mly + +clean:: + rm -f parser.ml parser.mli + +beforedepend:: parser.ml parser.mli + +lexer.ml: lexer.mll + $(CAMLLEX) lexer.mll + +clean:: + rm -f lexer.ml + +beforedepend:: lexer.ml + +.SUFFIXES: +.SUFFIXES: .ml .cmo .mli .cmi + +.ml.cmo: + $(CAMLC) -c $(COMPFLAGS) $< + +.mli.cmi: + $(CAMLC) -c $(COMPFLAGS) $< + +depend: beforedepend + $(CAMLDEP) *.mli *.ml > .depend + +include .depend diff --git a/lex/lexer.mli b/lex/lexer.mli new file mode 100644 index 0000000000..6e0b4a5073 --- /dev/null +++ b/lex/lexer.mli @@ -0,0 +1,3 @@ +val main: Lexing.lexbuf -> Parser.token + +exception Lexical_error of string diff --git a/lex/lexer.mll b/lex/lexer.mll new file mode 100644 index 0000000000..41d05c421a --- /dev/null +++ b/lex/lexer.mll @@ -0,0 +1,159 @@ +(* The lexical analyzer for lexer definitions. Bootstrapped! *) + +{ +open Syntax +open Parser + +(* Auxiliaries for the lexical analyzer *) + +let brace_depth = ref 0 +and comment_depth = ref 0 + +exception Lexical_error of string + +let initial_string_buffer = String.create 256 +let string_buff = ref initial_string_buffer +let string_index = ref 0 + +let reset_string_buffer () = + string_buff := initial_string_buffer; + string_index := 0 + +let store_string_char c = + (if !string_index >= String.length (!string_buff) then + let new_buff = String.create (String.length (!string_buff) * 2) in + String.blit new_buff 0 (!string_buff) 0 (String.length (!string_buff)); + string_buff := new_buff; + ()); + !string_buff.[!string_index] <- c; + incr string_index + +let get_stored_string () = + let s = String.sub (!string_buff) 0 (!string_index) in + string_buff := initial_string_buffer; + s + +let char_for_backslash = function + 'n' -> '\010' (* '\n' when bootstrapped *) + | 't' -> '\009' (* '\t' *) + | 'b' -> '\008' (* '\b' *) + | 'r' -> '\013' (* '\r' *) + | c -> c + +let char_for_decimal_code lexbuf i = + Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + + 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + + (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48)) + +} + +rule main = parse + [' ' '\010' '\013' '\009' ] + + { main lexbuf } + | "(*" + { comment_depth := 1; + comment lexbuf; + main lexbuf } + | ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] * + { match Lexing.lexeme lexbuf with + "rule" -> Trule + | "parse" -> Tparse + | "and" -> Tand + | "eof" -> Teof + | s -> Tident s } + | '"' + { reset_string_buffer(); + string lexbuf; + Tstring(get_stored_string()) } + | "`" [^ '\\'] "`" + { Tchar(Lexing.lexeme_char lexbuf 1) } + | "`" '\\' ['\\' '`' 'n' 't' 'b' 'r'] "`" + { Tchar(char_for_backslash (Lexing.lexeme_char lexbuf 2)) } + | "`" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "`" + { Tchar(char_for_decimal_code lexbuf 2) } + | "'" [^ '\\'] "'" + { Tchar(Lexing.lexeme_char lexbuf 1) } + | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" + { Tchar(char_for_backslash (Lexing.lexeme_char lexbuf 2)) } + | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" + { Tchar(char_for_decimal_code lexbuf 2) } + | '{' + { let n1 = Lexing.lexeme_end lexbuf in + brace_depth := 1; + let n2 = action lexbuf in + Taction(Location(n1, n2)) } + | '=' { Tequal } + | '|' { Tor } + | '_' { Tunderscore } + | "eof" { Teof } + | '[' { Tlbracket } + | ']' { Trbracket } + | '*' { Tstar } + | '?' { Tmaybe } + | '+' { Tplus } + | '(' { Tlparen } + | ')' { Trparen } + | '^' { Tcaret } + | '-' { Tdash } + | eof { Tend } + | _ + { raise(Lexical_error + ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf))) } + +and action = parse + '{' + { incr brace_depth; + action lexbuf } + | '}' + { decr brace_depth; + if !brace_depth == 0 then Lexing.lexeme_start lexbuf else action lexbuf } + | '"' + { reset_string_buffer(); + string lexbuf; + reset_string_buffer(); + action lexbuf } + | "'{'" + { action lexbuf } + | "'{'" + { action lexbuf } + | "(*" + { comment_depth := 1; + comment lexbuf; + action lexbuf } + | eof + { raise (Lexical_error "unterminated action") } + | _ + { action lexbuf } + +and string = parse + '"' + { () } + | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + + { string lexbuf } + | '\\' ['\\' '"' 'n' 't' 'b' 'r'] + { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); + string lexbuf } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] + { store_string_char(char_for_decimal_code lexbuf 1); + string lexbuf } + | eof + { raise(Lexical_error "unterminated string") } + | _ + { store_string_char(Lexing.lexeme_char lexbuf 0); + string lexbuf } + +and comment = parse + "(*" + { incr comment_depth; comment lexbuf } + | "*)" + { decr comment_depth; + if !comment_depth == 0 then () else comment lexbuf } + | '"' + { reset_string_buffer(); + string lexbuf; + reset_string_buffer(); + comment lexbuf } + | eof + { raise(Lexical_error "unterminated comment") } + | _ + { comment lexbuf } diff --git a/lex/lexgen.ml b/lex/lexgen.ml new file mode 100644 index 0000000000..6870419470 --- /dev/null +++ b/lex/lexgen.ml @@ -0,0 +1,203 @@ +(* Compiling a lexer definition *) + +open Syntax + +(* Deep abstract syntax for regular expressions *) + +type regexp = + Empty + | Chars of int + | Action of int + | Seq of regexp * regexp + | Alt of regexp * regexp + | Star of regexp + +(* From shallow to deep syntax *) + +let chars = ref ([] : char list list) +let chars_count = ref 0 +let actions = ref ([] : (int * location) list) +let actions_count = ref 0 + +let rec encode_regexp = function + Epsilon -> Empty + | Characters cl -> + let n = !chars_count in + chars := cl :: !chars; + incr chars_count; + Chars(n) + | Sequence(r1,r2) -> + Seq(encode_regexp r1, encode_regexp r2) + | Alternative(r1,r2) -> + Alt(encode_regexp r1, encode_regexp r2) + | Repetition r -> + Star (encode_regexp r) + +let encode_casedef = + List.fold_left + (fun reg (expr,act) -> + let act_num = !actions_count in + incr actions_count; + actions := (act_num, act) :: !actions; + Alt(reg, Seq(encode_regexp expr, Action act_num))) + Empty + +let encode_lexdef (Lexdef(_, ld)) = + chars := []; + chars_count := 0; + actions := []; + actions_count := 0; + let name_regexp_list = + List.map (fun (name, casedef) -> (name, encode_casedef casedef)) ld in + let chr = Array.of_list (List.rev !chars) + and act = !actions in + chars := []; + actions := []; + (chr, name_regexp_list, act) + +(* To generate directly a NFA from a regular expression. + Confer Aho-Sethi-Ullman, dragon book, chap. 3 *) + +type transition = + OnChars of int + | ToAction of int + +let rec merge_trans s1 s2 = + match (s1, s2) with + ([], _) -> s2 + | (_, []) -> s1 + | ((OnChars n1 as t1) :: r1, (OnChars n2 as t2) :: r2) -> + if n1 == n2 then t1 :: merge_trans r1 r2 else + if n1 < n2 then t1 :: merge_trans r1 s2 else + t2 :: merge_trans s1 r2 + | ((ToAction n1 as t1) :: r1, (ToAction n2 as t2) :: r2) -> + if n1 == n2 then t1 :: merge_trans r1 r2 else + if n1 < n2 then t1 :: merge_trans r1 s2 else + t2 :: merge_trans s1 r2 + | ((OnChars n1 as t1) :: r1, (ToAction n2 as t2) :: r2) -> + t1 :: merge_trans r1 s2 + | ((ToAction n1 as t1) :: r1, (OnChars n2 as t2) :: r2) -> + t2 :: merge_trans s1 r2 + +let rec nullable = function + Empty -> true + | Chars _ -> false + | Action _ -> false + | Seq(r1,r2) -> nullable r1 & nullable r2 + | Alt(r1,r2) -> nullable r1 or nullable r2 + | Star r -> true + +let rec firstpos = function + Empty -> [] + | Chars pos -> [OnChars pos] + | Action act -> [ToAction act] + | Seq(r1,r2) -> if nullable r1 + then merge_trans (firstpos r1) (firstpos r2) + else firstpos r1 + | Alt(r1,r2) -> merge_trans (firstpos r1) (firstpos r2) + | Star r -> firstpos r + +let rec lastpos = function + Empty -> [] + | Chars pos -> [OnChars pos] + | Action act -> [ToAction act] + | Seq(r1,r2) -> if nullable r2 + then merge_trans (lastpos r1) (lastpos r2) + else lastpos r2 + | Alt(r1,r2) -> merge_trans (lastpos r1) (lastpos r2) + | Star r -> lastpos r + +let followpos size name_regexp_list = + let v = Array.new size [] in + let fill_pos first = function + OnChars pos -> v.(pos) <- merge_trans first v.(pos); () + | ToAction _ -> () in + let rec fill = function + Seq(r1,r2) -> + fill r1; fill r2; + List.iter (fill_pos (firstpos r2)) (lastpos r1) + | Alt(r1,r2) -> + fill r1; fill r2 + | Star r -> + fill r; + List.iter (fill_pos (firstpos r)) (lastpos r) + | _ -> () in + List.iter (fun (name, regexp) -> fill regexp) name_regexp_list; + v + +let no_action = 32767 + +let split_trans_set = List.fold_left + (fun (act, pos_set as act_pos_set) -> + function OnChars pos -> (act, pos :: pos_set) + | ToAction act1 -> if act1 < act then (act1, pos_set) + else act_pos_set) + (no_action, []) + +let memory = (Hashtbl.new 131 : (transition list, int) Hashtbl.t) +and todo = ref ([] : (transition list * int) list) +and next = ref 0 + +let reset_state_mem () = + Hashtbl.clear memory; todo := []; next := 0; () + +let get_state st = + try + Hashtbl.find memory st + with Not_found -> + let nbr = !next in + incr next; + Hashtbl.add memory st nbr; + todo := (st, nbr) :: !todo; + nbr + +let rec map_on_states f = + match !todo with + [] -> [] + | (st,i)::r -> todo := r; let res = f st in (res,i) :: map_on_states f + +let number_of_states () = + !next + +let goto_state = function + [] -> Backtrack + | ps -> Goto (get_state ps) + +let transition_from chars follow pos_set = + let tr = Array.new 256 [] + and shift = Array.new 256 Backtrack in + List.iter + (fun pos -> + List.iter + (fun c -> + tr.(Char.code c) <- + merge_trans tr.(Char.code c) follow.(pos)) + chars.(pos)) + pos_set; + for i = 0 to 255 do + shift.(i) <- goto_state tr.(i) + done; + shift + +let translate_state chars follow state = + match split_trans_set state with + n, [] -> Perform n + | n, ps -> Shift( (if n == no_action then No_remember else Remember n), + transition_from chars follow ps) + +let make_dfa lexdef = + let (chars, name_regexp_list, actions) = + encode_lexdef lexdef in + let follow = + followpos (Array.length chars) name_regexp_list in + reset_state_mem(); + let initial_states = + List.map (fun (name, regexp) -> (name, get_state(firstpos regexp))) + name_regexp_list in + let states = + map_on_states (translate_state chars follow) in + let v = + Array.new (number_of_states()) (Perform 0) in + List.iter (fun (auto, i) -> v.(i) <- auto) states; + reset_state_mem(); + (initial_states, v, actions) diff --git a/lex/main.ml b/lex/main.ml new file mode 100644 index 0000000000..aaefe487dd --- /dev/null +++ b/lex/main.ml @@ -0,0 +1,48 @@ +(* The lexer generator. Command-line parsing. *) + +open Syntax +open Lexgen +open Output + +let main () = + if Array.length Sys.argv != 2 then begin + prerr_endline "Usage: camllex <input file>"; + exit 2 + end; + let source_name = Sys.argv.(1) in + let dest_name = + if Filename.check_suffix source_name ".mll" then + Filename.chop_suffix source_name ".mll" ^ ".ml" + else + source_name ^ ".ml" in + ic := open_in_bin source_name; + oc := open_out dest_name; + let lexbuf = + Lexing.from_channel !ic in + let (Lexdef(header,_) as def) = + try + Parser.lexer_definition Lexer.main lexbuf + with exn -> + close_out !oc; + Sys.remove dest_name; + begin match exn with + Parsing.Parse_error -> + prerr_string "Syntax error around char "; + prerr_int (Lexing.lexeme_start lexbuf); + prerr_endline "." + | Lexer.Lexical_error s -> + prerr_string "Lexical error around char "; + prerr_int (Lexing.lexeme_start lexbuf); + prerr_string ": "; + prerr_string s; + prerr_endline "." + | _ -> raise exn + end; + exit 2 in + let ((init, states, acts) as dfa) = make_dfa def in + output_lexdef header dfa; + close_in !ic; + close_out !oc + +let _ = Printexc.catch main (); exit 0 + diff --git a/lex/output.ml b/lex/output.ml new file mode 100644 index 0000000000..b3ca459c1b --- /dev/null +++ b/lex/output.ml @@ -0,0 +1,146 @@ +(* Generating a DFA as a set of mutually recursive functions *) + +open Syntax + +let ic = ref stdin +and oc = ref stdout + +(* 1- Generating the actions *) + +let copy_buffer = String.create 1024 + +let copy_chunk (Location(start,stop)) = + let rec copy s = + if s <= 0 then () else + let n = if s < 1024 then s else 1024 in + let m = input !ic copy_buffer 0 n in + output !oc copy_buffer 0 m; + copy (s - m) + in + seek_in !ic start; + copy (stop - start) + +let output_action (i,act) = + output_string !oc ("action_" ^ string_of_int i ^ " lexbuf = (\n"); + copy_chunk act; + output_string !oc ")\nand "; + () + +(* 2- Generating the states *) + +let states = ref ([||] : automata array) + +let enumerate_vect v = + let rec enum env pos = + if pos >= Array.length v then env else + try + let pl = List.assoc v.(pos) env in + pl := pos :: !pl; enum env (succ pos) + with Not_found -> + enum ((v.(pos), ref [pos]) :: env) (succ pos) in + Sort.list + (fun (e1, pl1) (e2, pl2) -> List.length !pl1 >= List.length !pl2) + (enum [] 0) + +let output_move = function + Backtrack -> + output_string !oc "backtrack lexbuf" + | Goto dest -> + match !states.(dest) with + Perform act_num -> + output_string !oc ("action_" ^ string_of_int act_num ^ " lexbuf") + | _ -> + output_string !oc ("state_" ^ string_of_int dest ^ " lexbuf") + +let output_char_for_read oc = function + '\'' -> output_string oc "\\'" + | '\\' -> output_string oc "\\\\" + | '\n' -> output_string oc "\\n" + | '\t' -> output_string oc "\\t" + | c -> + let n = Char.code c in + if n >= 32 & n < 127 then + output_char oc c + else begin + output_char oc '\\'; + output_char oc (Char.chr (48 + n / 100)); + output_char oc (Char.chr (48 + (n / 10) mod 10)); + output_char oc (Char.chr (48 + n mod 10)) + end + +let rec output_chars = function + [] -> + failwith "output_chars" + | [c] -> + output_string !oc "'"; + output_char_for_read !oc (Char.chr c); + output_string !oc "'" + | c::cl -> + output_string !oc "'"; + output_char_for_read !oc (Char.chr c); + output_string !oc "'|"; + output_chars cl + +let output_one_trans (dest, chars) = + output_chars !chars; + output_string !oc " -> "; + output_move dest; + output_string !oc "\n | "; + () + +let output_all_trans trans = + output_string !oc " match get_next_char lexbuf with\n "; + match enumerate_vect trans with + [] -> + failwith "output_all_trans" + | (default, _) :: rest -> + List.iter output_one_trans rest; + output_string !oc "_ -> "; + output_move default; + output_string !oc "\nand "; + () + +let output_state state_num = function + Perform i -> + () + | Shift(what_to_do, moves) -> + output_string !oc + ("state_" ^ string_of_int state_num ^ " lexbuf =\n"); + begin match what_to_do with + No_remember -> () + | Remember i -> + output_string !oc " lexbuf.lex_last_pos <- lexbuf.lex_curr_pos;\n"; + output_string !oc (" lexbuf.lex_last_action <- Obj.magic action_" ^ + string_of_int i ^ ";\n") + end; + output_all_trans moves + +(* 3- Generating the entry points *) + +let rec output_entries = function + [] -> failwith "output_entries" + | (name,state_num) :: rest -> + output_string !oc (name ^ " lexbuf =\n"); + output_string !oc " start_lexing lexbuf;\n"; + output_string !oc (" state_" ^ string_of_int state_num ^ " lexbuf\n"); + match rest with + [] -> output_string !oc "\n"; () + | _ -> output_string !oc "\nand "; output_entries rest + +(* All together *) + +let output_lexdef header (initial_st, st, actions) = + print_int (Array.length st); print_string " states, "; + print_int (List.length actions); print_string " actions."; + print_newline(); + output_string !oc "open Obj\nopen Lexing\n\n"; + copy_chunk header; + output_string !oc "\nlet rec "; + states := st; + List.iter output_action actions; + for i = 0 to Array.length st - 1 do + output_state i st.(i) + done; + output_entries initial_st + + diff --git a/lex/parser.mly b/lex/parser.mly new file mode 100644 index 0000000000..e8851df101 --- /dev/null +++ b/lex/parser.mly @@ -0,0 +1,120 @@ +/* The grammar for lexer definitions */ + +%{ +open Syntax + +(* Auxiliaries for the parser. *) + +let regexp_for_string s = + let rec re_string n = + if n >= String.length s then Epsilon + else if succ n = String.length s then Characters([s.[n]]) + else Sequence(Characters([s.[n]]), re_string (succ n)) + in re_string 0 + +let char_class c1 c2 = + let rec class n = + if n > (Char.code c2) then [] else (Char.chr n) :: class(succ n) + in class (Char.code c1) + +let all_chars = char_class (Char.chr 1) (Char.chr 255) + +let rec subtract l1 l2 = + match l1 with + [] -> [] + | a::r -> if List.mem a l2 then subtract r l2 else a :: subtract r l2 +%} + +%token <string> Tident +%token <char> Tchar +%token <string> Tstring +%token <Syntax.location> Taction +%token Trule Tparse Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket +%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash + +%left Tor +%left CONCAT +%nonassoc Tmaybe +%left Tstar +%left Tplus + +%start lexer_definition +%type <Syntax.lexer_definition> lexer_definition + +%% + +lexer_definition: + header Trule definition other_definitions Tend + { Lexdef($1, $3::(List.rev $4)) } +; +header: + Taction + { $1 } + | + { Location(0,0) } +; +other_definitions: + other_definitions Tand definition + { $3::$1 } + | + { [] } +; +definition: + Tident Tequal entry + { ($1,$3) } +; +entry: + Tparse case rest_of_entry + { $2::List.rev $3 } +; +rest_of_entry: + rest_of_entry Tor case + { $3::$1 } + | + { [] } +; +case: + regexp Taction + { ($1,$2) } +; +regexp: + Tunderscore + { Characters all_chars } + | Teof + { Characters ['\000'] } + | Tchar + { Characters [$1] } + | Tstring + { regexp_for_string $1 } + | Tlbracket char_class Trbracket + { Characters $2 } + | regexp Tstar + { Repetition $1 } + | regexp Tmaybe + { Alternative($1, Epsilon) } + | regexp Tplus + { Sequence($1, Repetition $1) } + | regexp Tor regexp + { Alternative($1,$3) } + | regexp regexp %prec CONCAT + { Sequence($1,$2) } + | Tlparen regexp Trparen + { $2 } +; +char_class: + Tcaret char_class1 + { subtract all_chars $2 } + | char_class1 + { $1 } +; +char_class1: + Tchar Tdash Tchar + { char_class $1 $3 } + | Tchar + { [$1] } + | char_class1 char_class1 %prec CONCAT + { $1 @ $2 } +; + +%% + diff --git a/lex/syntax.ml b/lex/syntax.ml new file mode 100644 index 0000000000..f692e6f625 --- /dev/null +++ b/lex/syntax.ml @@ -0,0 +1,26 @@ +(* The shallow abstract syntax *) + +type location = + Location of int * int + +type regular_expression = + Epsilon + | Characters of char list + | Sequence of regular_expression * regular_expression + | Alternative of regular_expression * regular_expression + | Repetition of regular_expression + +type lexer_definition = + Lexdef of location * (string * (regular_expression * location) list) list + +(* Representation of automata *) + +type automata = + Perform of int + | Shift of automata_trans * automata_move array +and automata_trans = + No_remember + | Remember of int +and automata_move = + Backtrack + | Goto of int diff --git a/parsing/asttypes.mli b/parsing/asttypes.mli new file mode 100644 index 0000000000..f2e01f60c0 --- /dev/null +++ b/parsing/asttypes.mli @@ -0,0 +1,15 @@ +(* Auxiliary a.s.t. types used by parsetree and typedtree. *) + +type constant = + Const_int of int + | Const_char of char + | Const_string of string + | Const_float of string + +type rec_flag = Nonrecursive | Recursive + +type direction_flag = Upto | Downto + +type mutable_flag = Immutable | Mutable + + diff --git a/parsing/lexer.mli b/parsing/lexer.mli new file mode 100644 index 0000000000..d5f0da4229 --- /dev/null +++ b/parsing/lexer.mli @@ -0,0 +1,12 @@ +(* The lexical analyzer *) + +val token: Lexing.lexbuf -> Parser.token + +type error = + Illegal_character + | Unterminated_comment + | Unterminated_string + +exception Error of error * int * int + +val report_error: error -> unit diff --git a/parsing/lexer.mll b/parsing/lexer.mll new file mode 100644 index 0000000000..92f1deade5 --- /dev/null +++ b/parsing/lexer.mll @@ -0,0 +1,243 @@ +(* The lexer definition *) + +{ +open Misc +open Parser + +type error = + Illegal_character + | Unterminated_comment + | Unterminated_string + +exception Error of error * int * int + +(* For nested comments *) + +let comment_depth = ref 0 + +(* The table of keywords *) + +let keyword_table = + create_hashtable 149 [ + "and", AND; + "as", AS; + "begin", BEGIN; + "do", DO; + "done", DONE; + "downto", DOWNTO; + "else", ELSE; + "end", END; + "exception", EXCEPTION; + "external", EXTERNAL; + "false", FALSE; + "for", FOR; + "fun", FUN; + "function", FUNCTION; + "functor", FUNCTOR; + "if", IF; + "in", IN; + "include", INCLUDE; + "let", LET; + "match", MATCH; + "module", MODULE; + "mutable", MUTABLE; + "of", OF; + "open", OPEN; + "or", OR; + "rec", REC; + "sig", SIG; + "struct", STRUCT; + "then", THEN; + "to", TO; + "true", TRUE; + "try", TRY; + "type", TYPE; + "val", VAL; + "when", WHEN; + "while", WHILE; + "with", WITH; + + "mod", INFIXOP3("mod"); + "land", INFIXOP3("land"); + "lor", INFIXOP3("lor"); + "lxor", INFIXOP3("lxor"); + "lsl", INFIXOP4("lsl"); + "lsr", INFIXOP4("lsr"); + "asr", INFIXOP4("asr") +] + +(* To buffer string literals *) + +let initial_string_buffer = String.create 256 +let string_buff = ref initial_string_buffer +let string_index = ref 0 + +let reset_string_buffer () = + string_buff := initial_string_buffer; + string_index := 0 + +let store_string_char c = + if !string_index >= String.length (!string_buff) then begin + let new_buff = String.create (String.length (!string_buff) * 2) in + String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff)); + string_buff := new_buff + end; + String.unsafe_set (!string_buff) (!string_index) c; + incr string_index + +let get_stored_string () = + let s = String.sub (!string_buff) 0 (!string_index) in + string_buff := initial_string_buffer; + s + +(* To translate escape sequences *) + +let char_for_backslash = function + 'n' -> '\010' + | 'r' -> '\013' + | 'b' -> '\008' + | 't' -> '\009' + | c -> c + +let char_for_decimal_code lexbuf i = + Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + + 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + + (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48)) + +(* To store the position of the beginning of a string or comment *) + +let start_pos = ref 0 + +(* Error report *) + +open Format + +let report_error = function + Illegal_character -> + print_string "Illegal character" + | Unterminated_comment -> + print_string "Comment not terminated" + | Unterminated_string -> + print_string "String literal not terminated" + +} + +rule token = parse + [' ' '\010' '\013' '\009' '\012'] + + { token lexbuf } + | ['a'-'z' '\223'-'\246' '\248'-'\255' ] + (['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' + '\'' '0'-'9' ]) * + { let s = Lexing.lexeme lexbuf in + try + Hashtbl.find keyword_table s + with Not_found -> + LIDENT s } + | ['A'-'Z' '\192'-'\214' '\216'-'\222' ] + (['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' + '\'' '0'-'9' ]) * + { UIDENT(Lexing.lexeme lexbuf) } (* No capitalized keywords *) + | ['0'-'9']+ + | '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+ + | '0' ['o' 'O'] ['0'-'7']+ + | '0' ['b' 'B'] ['0'-'1']+ + { INT (int_of_string(Lexing.lexeme lexbuf)) } + | ['0'-'9']+ ('.' ['0'-'9']*)? (['e' 'E'] ['+' '-']? ['0'-'9']+)? + { FLOAT (Lexing.lexeme lexbuf) } + | "\"" + { reset_string_buffer(); + let string_start = Lexing.lexeme_start lexbuf in + start_pos := string_start; + string lexbuf; + lexbuf.lex_start_pos <- string_start - lexbuf.lex_abs_pos; + STRING (get_stored_string()) } + | "'" [^ '\\' '\''] "'" + { CHAR(Lexing.lexeme_char lexbuf 1) } + | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" + { CHAR(char_for_backslash (Lexing.lexeme_char lexbuf 2)) } + | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" + { CHAR(char_for_decimal_code lexbuf 2) } + | "(*" + { comment_depth := 1; + start_pos := Lexing.lexeme_start lexbuf; + comment lexbuf; + token lexbuf } + | "#" { SHARP } + | "&" { AMPERSAND } + | "'" { QUOTE } + | "(" { LPAREN } + | ")" { RPAREN } + | "*" { STAR } + | "," { COMMA } + | "->" { MINUSGREATER } + | "." { DOT } + | ".." { DOTDOT } + | ".(" { DOTLPAREN } + | ".[" { DOTLBRACKET } + | ":" { COLON } + | "::" { COLONCOLON } + | ":=" { COLONEQUAL } + | ";" { SEMI } + | ";;" { SEMISEMI } + | "<-" { LESSMINUS } + | "=" { EQUAL } + | "[" { LBRACKET } + | "[|" { LBRACKETBAR } + | "]" { RBRACKET } + | "_" { UNDERSCORE } + | "{" { LBRACE } + | "|" { BAR } + | "|]" { BARRBRACKET } + | "}" { RBRACE } + + | "!=" { INFIXOP1 "!=" } + | "-" { SUBTRACTIVE "-" } + | "-." { SUBTRACTIVE "-." } + + | [ '!' '?' ] + [ '!' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~' ] * + { PREFIXOP(Lexing.lexeme lexbuf) } + | [ '=' '<' '>' '@' '^' '|' '&' '~' ] + [ '!' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~' ] * + { INFIXOP1(Lexing.lexeme lexbuf) } + | [ '+' '-' ] + [ '!' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~' ] * + { INFIXOP2(Lexing.lexeme lexbuf) } + | "**" + [ '!' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~' ] * + { INFIXOP4(Lexing.lexeme lexbuf) } + | [ '*' '/' '%' ] + [ '!' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~' ] * + { INFIXOP3(Lexing.lexeme lexbuf) } + | eof { EOF } + | _ + { raise (Error(Illegal_character, + Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) } + +and comment = parse + "(*" + { comment_depth := succ !comment_depth; comment lexbuf } + | "*)" + { comment_depth := pred !comment_depth; + if !comment_depth > 0 then comment lexbuf } + | eof + { raise (Error(Unterminated_comment, !start_pos, !start_pos+2)) } + | _ + { comment lexbuf } + +and string = parse + '"' + { () } + | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + + { string lexbuf } + | '\\' ['\\' '"' 'n' 't' 'b' 'r'] + { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); + string lexbuf } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] + { store_string_char(char_for_decimal_code lexbuf 1); + string lexbuf } + | eof + { raise (Error(Unterminated_string, !start_pos, !start_pos+1)) } + | _ + { store_string_char(Lexing.lexeme_char lexbuf 0); + string lexbuf } diff --git a/parsing/location.ml b/parsing/location.ml new file mode 100644 index 0000000000..2a291905d9 --- /dev/null +++ b/parsing/location.ml @@ -0,0 +1,133 @@ +open Lexing +open Misc + + +type t = + { loc_start: int; loc_end: int } + +let none = { loc_start = -1; loc_end = -1 } + +let symbol_loc () = + { loc_start = Parsing.symbol_start(); loc_end = Parsing.symbol_end() } + +let rhs_loc n = + { loc_start = Parsing.rhs_start n; loc_end = Parsing.rhs_end n } + +let input_name = ref "" + +let input_lexbuf = ref (None : lexbuf option) + +(* Determine line numbers and position of beginning of lines in a file *) + +let line_pos_file filename loc = + let ic = open_in_bin filename in + let pos = ref 0 + and linenum = ref 1 + and linebeg = ref 0 in + begin try + while !pos < loc do + incr pos; + if input_char ic = '\n' then begin + incr linenum; + linebeg := !pos + end + done + with End_of_file -> () + end; + close_in ic; + (!linenum, !linebeg) + +(* Terminal info *) + +type terminal_info_status = Unknown | Bad_term | Good_term + +let status = ref Unknown +and num_lines = ref 0 +and cursor_up = ref "" +and start_standout = ref "" +and end_standout = ref "" + +let setup_terminal_info() = + try + Terminfo.setupterm(); + num_lines := Terminfo.getnum "li"; + cursor_up := Terminfo.getstr "up"; + begin try + start_standout := Terminfo.getstr "us"; + end_standout := Terminfo.getstr "ue" + with Not_found -> + start_standout := Terminfo.getstr "so"; + end_standout := Terminfo.getstr "se" + end; + status := Good_term + with _ -> + status := Bad_term + +(* Print the location using standout mode. *) + +let rec highlight_location loc = + match !status with + Unknown -> + setup_terminal_info(); highlight_location loc + | Bad_term -> + false + | Good_term -> + match !input_lexbuf with + None -> false + | Some lb -> + (* Char 0 is at offset -lb.lex_abs_pos in lb.lex_buffer. *) + let pos0 = -lb.lex_abs_pos in + (* Do nothing if the buffer does not contain the whole phrase. *) + if pos0 < 0 then false else begin + (* Count number of lines in phrase *) + let lines = ref 0 in + for i = pos0 to String.length lb.lex_buffer - 1 do + if lb.lex_buffer.[i] = '\n' then incr lines + done; + (* If too many lines, give up *) + if !lines >= !num_lines - 2 then false else begin + (* Move cursor up that number of lines *) + for i = 1 to !lines do + Terminfo.puts stdout !cursor_up 1 + done; + (* Print the input, switching to standout for the location *) + let bol = ref true in + for pos = 0 to String.length lb.lex_buffer - pos0 - 1 do + if !bol then (print_char '#'; bol := false); + if pos = loc.loc_start then + Terminfo.puts stdout !start_standout 1; + if pos = loc.loc_end then + Terminfo.puts stdout !end_standout 1; + let c = lb.lex_buffer.[pos + pos0] in + print_char c; + bol := (c = '\n') + done; + true + end + end + +(* Print the location in some way or another *) + +open Format + +let print loc = + if String.length !input_name = 0 then + if highlight_location loc then () else begin + print_string "Characters "; + print_int loc.loc_start; print_string "-"; + print_int loc.loc_end; print_string ":"; + force_newline() + end + else begin + let (linenum, linebeg) = line_pos_file !input_name loc.loc_start in + print_string "File "; print_string !input_name; + print_string ", line "; print_int linenum; + print_string ", characters "; print_int (loc.loc_start - linebeg); + print_string "-"; print_int (loc.loc_end - linebeg); + print_string ":"; + force_newline() + end + +let print_warning loc msg = + print loc; + print_string "Warning: "; print_string msg; print_newline() diff --git a/parsing/location.mli b/parsing/location.mli new file mode 100644 index 0000000000..a39ddd8557 --- /dev/null +++ b/parsing/location.mli @@ -0,0 +1,17 @@ +(* Source code locations, used in parsetree *) + +open Misc + +type t = + { loc_start: int; loc_end: int } + +val none: t +val symbol_loc: unit -> t +val rhs_loc: int -> t + +val input_name: string ref +val input_lexbuf: Lexing.lexbuf option ref + +val print: t -> unit +val print_warning: t -> string -> unit + diff --git a/parsing/longident.mli b/parsing/longident.mli new file mode 100644 index 0000000000..ffbe8004f0 --- /dev/null +++ b/parsing/longident.mli @@ -0,0 +1,5 @@ +(* Long identifiers, used in parsetree. *) + +type t = + Lident of string + | Ldot of t * string diff --git a/parsing/parse.ml b/parsing/parse.ml new file mode 100644 index 0000000000..04764f5eb9 --- /dev/null +++ b/parsing/parse.ml @@ -0,0 +1,36 @@ +(* Entry points in the parser *) + +exception Error of int * int (* Syntax error *) +(* Skip tokens to the end of the phrase *) + +let rec skip_phrase lexbuf = + try + match Lexer.token lexbuf with + Parser.SEMISEMI | Parser.EOF -> () + | _ -> skip_phrase lexbuf + with Lexer.Error(_,_,_) -> + skip_phrase lexbuf + +let maybe_skip_phrase lexbuf = + if Parsing.is_current_lookahead Parser.SEMISEMI + or Parsing.is_current_lookahead Parser.EOF + then () + else skip_phrase lexbuf + +let wrap parsing_fun lexbuf = + try + parsing_fun Lexer.token lexbuf + with + Lexer.Error(_, _, _) as err -> + if !Location.input_name = "" then skip_phrase lexbuf; + raise err + | Parsing.Parse_error -> + let start = Lexing.lexeme_start lexbuf + and stop = Lexing.lexeme_end lexbuf in + if !Location.input_name = "" + then maybe_skip_phrase lexbuf; + raise(Error(start, stop)) + +let toplevel_phrase = wrap Parser.toplevel_phrase +and implementation = wrap Parser.implementation +and interface = wrap Parser.interface diff --git a/parsing/parse.mli b/parsing/parse.mli new file mode 100644 index 0000000000..02d6694594 --- /dev/null +++ b/parsing/parse.mli @@ -0,0 +1,7 @@ +(* Entry points in the parser *) + +val toplevel_phrase : Lexing.lexbuf -> Parsetree.toplevel_phrase +val implementation : Lexing.lexbuf -> Parsetree.structure +val interface : Lexing.lexbuf -> Parsetree.signature + +exception Error of int * int (* Syntax error *) diff --git a/parsing/parser.mly b/parsing/parser.mly new file mode 100644 index 0000000000..c45f900ead --- /dev/null +++ b/parsing/parser.mly @@ -0,0 +1,658 @@ +/* The parser definition */ + +%{ +open Misc +open Location +open Asttypes +open Longident +open Parsetree + +let mktyp d = + { ptyp_desc = d; ptyp_loc = symbol_loc() } +let mkpat d = + { ppat_desc = d; ppat_loc = symbol_loc() } +let mkexp d = + { pexp_desc = d; pexp_loc = symbol_loc() } +let mkmty d = + { pmty_desc = d; pmty_loc = symbol_loc() } +let mkmod d = + { pmod_desc = d; pmod_loc = symbol_loc() } + +let mkoperator name pos = + { pexp_desc = Pexp_ident(Lident name); pexp_loc = rhs_loc pos } + +let mkinfix arg1 name arg2 = + mkexp(Pexp_apply(mkoperator name 2, [arg1; arg2])) + +let mkuminus name arg = + match arg.pexp_desc with + Pexp_constant(Const_int n) -> + mkexp(Pexp_constant(Const_int(-n))) + | Pexp_constant(Const_float f) -> + mkexp(Pexp_constant(Const_float("-" ^ f))) + | _ -> + mkexp(Pexp_apply(mkoperator ("~" ^ name) 1, [arg])) + +let rec mklistexp = function + [] -> + mkexp(Pexp_construct(Lident "[]", None)) + | e1 :: el -> + mkexp(Pexp_construct(Lident "::", + Some(mkexp(Pexp_tuple[e1; mklistexp el])))) +let rec mklistpat = function + [] -> + mkpat(Ppat_construct(Lident "[]", None)) + | p1 :: pl -> + mkpat(Ppat_construct(Lident "::", + Some(mkpat(Ppat_tuple[p1; mklistpat pl])))) + +let array_function str name = + Ldot(Lident str, (if !Clflags.fast then "unsafe_" ^ name else name)) + +let rec mkrangepat c1 c2 = + if c1 > c2 then mkrangepat c2 c1 else + if c1 = c2 then mkpat(Ppat_constant(Const_char c1)) else + mkpat(Ppat_or(mkpat(Ppat_constant(Const_char c1)), + mkrangepat (Char.chr(Char.code c1 + 1)) c2)) +%} + +/* Tokens */ + +%token AMPERSAND +%token AND +%token AS +%token BAR +%token BARRBRACKET +%token BEGIN +%token <char> CHAR +%token COLON +%token COLONCOLON +%token COLONEQUAL +%token COMMA +%token DO +%token DONE +%token DOT +%token DOTDOT +%token DOTLBRACKET +%token DOTLPAREN +%token DOWNTO +%token ELSE +%token END +%token EOF +%token EQUAL +%token EXCEPTION +%token EXTERNAL +%token FALSE +%token <string> FLOAT +%token FOR +%token FUN +%token FUNCTION +%token FUNCTOR +%token IF +%token IN +%token INCLUDE +%token <string> INFIXOP1 +%token <string> INFIXOP2 +%token <string> INFIXOP3 +%token <string> INFIXOP4 +%token <int> INT +%token LBRACE +%token LBRACKET +%token LBRACKETBAR +%token LESSMINUS +%token LET +%token <string> LIDENT +%token LPAREN +%token MATCH +%token MINUSGREATER +%token MODULE +%token MUTABLE +%token OF +%token OPEN +%token OR +%token <string> PREFIXOP +%token QUOTE +%token RBRACE +%token RBRACKET +%token REC +%token RPAREN +%token SEMI +%token SEMISEMI +%token SHARP +%token SIG +%token STAR +%token <string> STRING +%token STRUCT +%token <string> SUBTRACTIVE +%token THEN +%token TO +%token TRUE +%token TRY +%token TYPE +%token <string> UIDENT +%token UNDERSCORE +%token VAL +%token WHEN +%token WHILE +%token WITH + +/* Precedences and associativities. Lower precedences come first. */ + +%right prec_let /* let ... in ... */ +%right SEMI /* e1; e2 (sequence) */ +%right prec_fun prec_match prec_try /* match ... with ... */ +%right prec_list /* e1; e2 (list, array, record) */ +%right prec_if /* if ... then ... else ... */ +%right COLONEQUAL LESSMINUS /* assignments */ +%left AS /* as in patterns */ +%left BAR /* | in patterns */ +%left COMMA /* , in expressions, patterns, types */ +%right prec_type_arrow /* -> in type expressions */ +%right OR /* or */ +%right AMPERSAND /* & */ +%left INFIXOP1 EQUAL /* = < > etc */ +%right COLONCOLON /* :: */ +%left INFIXOP2 SUBTRACTIVE /* + - */ +%left INFIXOP3 STAR /* * / */ +%right INFIXOP4 /* ** */ +%right prec_unary_minus /* - unary */ +%left prec_appl /* function application */ +%right prec_constr_appl /* constructor application */ +%left DOT DOTLPAREN DOTLBRACKET /* record access, array access */ +%right PREFIXOP /* ! */ + +/* Entry points */ + +%start implementation /* for implementation files */ +%type <Parsetree.structure> implementation +%start interface /* for interface files */ +%type <Parsetree.signature> interface +%start toplevel_phrase /* for interactive use */ +%type <Parsetree.toplevel_phrase> toplevel_phrase + +%% + +/* Entry points */ + +implementation: + structure EOF { List.rev $1 } +; +interface: + signature EOF { List.rev $1 } +; +toplevel_phrase: + structure_item SEMISEMI { Ptop_def[$1] } + | expr SEMISEMI { Ptop_def[Pstr_eval($1)] } + | SHARP ident SEMISEMI { Ptop_dir($2, Pdir_none) } + | SHARP ident STRING SEMISEMI { Ptop_dir($2, Pdir_string $3) } + | SHARP ident INT SEMISEMI { Ptop_dir($2, Pdir_int $3) } + | SHARP ident val_longident SEMISEMI { Ptop_dir($2, Pdir_ident $3) } + | EOF { raise End_of_file } +; + +/* Module expressions */ + +module_expr: + mod_longident + { mkmod(Pmod_ident $1) } + | STRUCT structure END + { mkmod(Pmod_structure(List.rev $2)) } + | FUNCTOR LPAREN UIDENT COLON module_type RPAREN MINUSGREATER module_expr + %prec prec_fun + { mkmod(Pmod_functor($3, $5, $8)) } + | module_expr module_expr %prec prec_appl + { mkmod(Pmod_apply($1, $2)) } + | LPAREN module_expr COLON module_type RPAREN + { mkmod(Pmod_constraint($2, $4)) } + | LPAREN module_expr RPAREN + { $2 } +; +structure: + /* empty */ { [] } + | structure structure_item { $2 :: $1 } +; +structure_item: + LET UNDERSCORE EQUAL expr + { Pstr_eval($4) } + | LET rec_flag let_bindings + { Pstr_value($2, List.rev $3) } + | EXTERNAL val_ident COLON core_type EQUAL STRING + { Pstr_primitive($2, {pval_type = $4; pval_prim = Some $6}) } + | TYPE type_declarations + { Pstr_type(List.rev $2) } + | EXCEPTION UIDENT constructor_arguments + { Pstr_exception($2, $3) } + | MODULE UIDENT module_binding + { Pstr_module($2, $3) } + | MODULE TYPE ident EQUAL module_type + { Pstr_modtype($3, $5) } + | OPEN mod_longident + { Pstr_open($2, rhs_loc 2) } +; +module_binding: + EQUAL module_expr + { $2 } + | COLON module_type EQUAL module_expr + { mkmod(Pmod_constraint($4, $2)) } + | LPAREN UIDENT COLON module_type RPAREN module_binding + { mkmod(Pmod_functor($2, $4, $6)) } +; + +/* Module types */ + +module_type: + mty_longident + { mkmty(Pmty_ident $1) } + | SIG signature END + { mkmty(Pmty_signature(List.rev $2)) } + | FUNCTOR LPAREN UIDENT COLON module_type RPAREN MINUSGREATER module_type + %prec FUNCTOR + { mkmty(Pmty_functor($3, $5, $8)) } + | module_type WITH type_declarations + { mkmty(Pmty_with($1, List.rev $3)) } + | LPAREN module_type RPAREN + { $2 } +; +signature: + /* empty */ { [] } + | signature signature_item { $2 :: $1 } +; +signature_item: + VAL val_ident COLON core_type + { Psig_value($2, {pval_type = $4; pval_prim = None}) } + | VAL val_ident COLON core_type EQUAL STRING + { Psig_value($2, {pval_type = $4; pval_prim = Some $6}) } + | TYPE type_declarations + { Psig_type(List.rev $2) } + | EXCEPTION UIDENT constructor_arguments + { Psig_exception($2, $3) } + | MODULE UIDENT module_declaration + { Psig_module($2, $3) } + | MODULE TYPE ident + { Psig_modtype($3, Pmodtype_abstract) } + | MODULE TYPE ident EQUAL module_type + { Psig_modtype($3, Pmodtype_manifest $5) } + | OPEN mod_longident + { Psig_open($2, rhs_loc 2) } + | INCLUDE module_type + { Psig_include $2 } +; + +module_declaration: + COLON module_type + { $2 } + | LPAREN UIDENT COLON module_type RPAREN module_declaration + { mkmty(Pmty_functor($2, $4, $6)) } +; + +/* Core expressions */ + +expr: + simple_expr + { $1 } + | simple_expr simple_expr_list %prec prec_appl + { mkexp(Pexp_apply($1, List.rev $2)) } + | LET rec_flag let_bindings IN expr %prec prec_let + { mkexp(Pexp_let($2, List.rev $3, $5)) } + | FUNCTION match_cases %prec prec_fun + { mkexp(Pexp_function(List.rev $2)) } + | FUN pattern fun_def %prec prec_fun + { mkexp(Pexp_function([$2, $3])) } + | MATCH expr WITH match_cases %prec prec_match + { mkexp(Pexp_match($2, List.rev $4)) } + | TRY expr WITH match_cases %prec prec_try + { mkexp(Pexp_try($2, List.rev $4)) } + | expr_comma_list + { mkexp(Pexp_tuple(List.rev $1)) } + | constr_longident simple_expr %prec prec_constr_appl + { mkexp(Pexp_construct($1, Some $2)) } + | IF expr THEN expr ELSE expr %prec prec_if + { mkexp(Pexp_ifthenelse($2, $4, Some $6)) } + | IF expr THEN expr %prec prec_if + { mkexp(Pexp_ifthenelse($2, $4, None)) } + | expr SEMI expr + { mkexp(Pexp_sequence($1, $3)) } + | WHILE expr DO expr DONE + { mkexp(Pexp_while($2, $4)) } + | FOR val_ident EQUAL expr direction_flag expr DO expr DONE + { mkexp(Pexp_for($2, $4, $6, $5, $8)) } + | expr COLONCOLON expr + { mkexp(Pexp_construct(Lident "::", Some(mkexp(Pexp_tuple[$1;$3])))) } + | expr INFIXOP1 expr + { mkinfix $1 $2 $3 } + | expr INFIXOP2 expr + { mkinfix $1 $2 $3 } + | expr INFIXOP3 expr + { mkinfix $1 $2 $3 } + | expr INFIXOP4 expr + { mkinfix $1 $2 $3 } + | expr SUBTRACTIVE expr + { mkinfix $1 $2 $3 } + | expr STAR expr + { mkinfix $1 "*" $3 } + | expr EQUAL expr + { mkinfix $1 "=" $3 } + | expr OR expr + { mkinfix $1 "or" $3 } + | expr AMPERSAND expr + { mkinfix $1 "&" $3 } + | expr COLONEQUAL expr + { mkinfix $1 ":=" $3 } + | SUBTRACTIVE expr %prec prec_unary_minus + { mkuminus $1 $2 } + | simple_expr DOT label_longident LESSMINUS expr + { mkexp(Pexp_setfield($1, $3, $5)) } + | simple_expr DOTLPAREN expr RPAREN LESSMINUS expr + { mkexp(Pexp_apply(mkexp(Pexp_ident(array_function "Array" "set")), + [$1; $3; $6])) } + | simple_expr DOTLBRACKET expr RBRACKET LESSMINUS expr + { mkexp(Pexp_apply(mkexp(Pexp_ident(array_function "String" "set")), + [$1; $3; $6])) } +; +simple_expr: + val_longident + { mkexp(Pexp_ident $1) } + | constant + { mkexp(Pexp_constant $1) } + | constr_longident + { mkexp(Pexp_construct($1, None)) } + | LPAREN expr RPAREN + { $2 } + | BEGIN expr END + { $2 } + | LPAREN expr COLON core_type RPAREN + { mkexp(Pexp_constraint($2, $4)) } + | simple_expr DOT label_longident + { mkexp(Pexp_field($1, $3)) } + | simple_expr DOTLPAREN expr RPAREN + { mkexp(Pexp_apply(mkexp(Pexp_ident(array_function "Array" "get")), + [$1; $3])) } + | simple_expr DOTLBRACKET expr RBRACKET + { mkexp(Pexp_apply(mkexp(Pexp_ident(array_function "String" "get")), + [$1; $3])) } + | LBRACE lbl_expr_list RBRACE + { mkexp(Pexp_record(List.rev $2)) } + | LBRACKETBAR expr_semi_list BARRBRACKET + { mkexp(Pexp_array(List.rev $2)) } + | LBRACKETBAR BARRBRACKET + { mkexp(Pexp_array []) } + | LBRACKET expr_semi_list RBRACKET + { mklistexp(List.rev $2) } + | PREFIXOP simple_expr + { mkexp(Pexp_apply(mkoperator $1 1, [$2])) } +; +simple_expr_list: + simple_expr + { [$1] } + | simple_expr_list simple_expr + { $2 :: $1 } +; +let_bindings: + let_binding { [$1] } + | let_bindings AND let_binding { $3 :: $1 } +; +let_binding: + val_ident fun_binding + { ({ppat_desc = Ppat_var $1; ppat_loc = rhs_loc 1}, $2) } + | LPAREN pattern RPAREN EQUAL expr + { ($2, $5) } +; +fun_binding: + EQUAL expr %prec prec_let + { $2 } + | COLON core_type EQUAL expr %prec prec_let + { mkexp(Pexp_constraint($4,$2)) } + | pattern fun_binding + { mkexp(Pexp_function[$1,$2]) } +; +match_cases: + pattern match_action { [$1, $2] } + | match_cases BAR pattern match_action { ($3, $4) :: $1 } +; +fun_def: + match_action { $1 } + | pattern fun_def { mkexp(Pexp_function[$1,$2]) } +; +match_action: + MINUSGREATER expr { $2 } + | WHEN expr MINUSGREATER expr { mkexp(Pexp_when($2, $4)) } +; +expr_comma_list: + expr_comma_list COMMA expr { $3 :: $1 } + | expr COMMA expr { [$3; $1] } +; +lbl_expr_list: + label_longident EQUAL expr %prec prec_list + { [$1,$3] } + | lbl_expr_list SEMI label_longident EQUAL expr %prec prec_list + { ($3, $5) :: $1 } +; +expr_semi_list: + expr %prec prec_list { [$1] } + | expr_semi_list SEMI expr %prec prec_list { $3 :: $1 } +; + +/* Patterns */ + +pattern: + val_ident + { mkpat(Ppat_var $1) } + | UNDERSCORE + { mkpat(Ppat_any) } + | pattern AS val_ident + { mkpat(Ppat_alias($1, $3)) } + | signed_constant + { mkpat(Ppat_constant $1) } + | CHAR DOTDOT CHAR + { mkrangepat $1 $3 } + | pattern_comma_list + { mkpat(Ppat_tuple(List.rev $1)) } + | constr_longident + { mkpat(Ppat_construct($1, None)) } + | constr_longident pattern %prec prec_constr_appl + { mkpat(Ppat_construct($1, Some $2)) } + | pattern COLONCOLON pattern + { mkpat(Ppat_construct(Lident "::", Some(mkpat(Ppat_tuple[$1;$3])))) } + | LBRACE lbl_pattern_list RBRACE + { mkpat(Ppat_record(List.rev $2)) } + | LBRACKET pattern_semi_list RBRACKET + { mklistpat(List.rev $2) } + | pattern BAR pattern + { mkpat(Ppat_or($1, $3)) } + | LPAREN pattern RPAREN + { $2 } + | LPAREN pattern COLON core_type RPAREN + { mkpat(Ppat_constraint($2, $4)) } +; +pattern_comma_list: + pattern_comma_list COMMA pattern { $3 :: $1 } + | pattern COMMA pattern { [$3; $1] } +; +pattern_semi_list: + pattern { [$1] } + | pattern_semi_list SEMI pattern { $3 :: $1 } +; +lbl_pattern_list: + label_longident EQUAL pattern { [($1, $3)] } + | lbl_pattern_list SEMI label_longident EQUAL pattern { ($3, $5) :: $1 } +; + +/* Type declarations */ + +type_declarations: + type_declaration { [$1] } + | type_declarations AND type_declaration { $3 :: $1 } +; +type_declaration: + type_parameters LIDENT type_kind + { ($2, {ptype_params = $1; ptype_kind = $3; ptype_loc = symbol_loc()}) } +; +type_kind: + /*empty*/ + { Ptype_abstract } + | EQUAL core_type + { Ptype_manifest $2 } + | EQUAL constructor_declarations + { Ptype_variant(List.rev $2) } + | EQUAL LBRACE label_declarations RBRACE + { Ptype_record(List.rev $3) } +; +type_parameters: + /*empty*/ { [] } + | type_parameter { [$1] } + | LPAREN type_parameter_list RPAREN { List.rev $2 } +; +type_parameter: + QUOTE ident { $2 } +; +type_parameter_list: + type_parameter { [$1] } + | type_parameter_list COMMA type_parameter { $3 :: $1 } +; +constructor_declarations: + constructor_declaration { [$1] } + | constructor_declarations BAR constructor_declaration { $3 :: $1 } +; +constructor_declaration: + constr_ident constructor_arguments { ($1, $2) } +; +constructor_arguments: + /*empty*/ { [] } + | OF core_type_list { List.rev $2 } +; +label_declarations: + label_declaration { [$1] } + | label_declarations SEMI label_declaration { $3 :: $1 } +; +label_declaration: + mutable_flag LIDENT COLON core_type { ($2, $1, $4) } +; + +/* Core types */ + +core_type: + simple_core_type + { $1 } + | core_type MINUSGREATER core_type %prec prec_type_arrow + { mktyp(Ptyp_arrow($1, $3)) } + | core_type_tuple + { mktyp(Ptyp_tuple(List.rev $1)) } +; +simple_core_type: + QUOTE ident + { mktyp(Ptyp_var $2) } + | type_longident + { mktyp(Ptyp_constr($1, [])) } + | simple_core_type type_longident %prec prec_constr_appl + { mktyp(Ptyp_constr($2, [$1])) } + | LPAREN core_type_comma_list RPAREN type_longident %prec prec_constr_appl + { mktyp(Ptyp_constr($4, List.rev $2)) } + | LPAREN core_type RPAREN + { $2 } +; + +core_type_tuple: + simple_core_type STAR simple_core_type + { [$3; $1] } + | core_type_tuple STAR simple_core_type + { $3 :: $1 } +; +core_type_comma_list: + core_type COMMA core_type { [$3; $1] } + | core_type_comma_list COMMA core_type { $3 :: $1 } +; +core_type_list: + simple_core_type { [$1] } + | core_type_list STAR simple_core_type { $3 :: $1 } +; + +/* Constants */ + +constant: + INT { Const_int $1 } + | CHAR { Const_char $1 } + | STRING { Const_string $1 } + | FLOAT { Const_float $1 } +; +signed_constant: + constant { $1 } + | SUBTRACTIVE INT { Const_int(- $2) } + | SUBTRACTIVE FLOAT { Const_float("-" ^ $2) } +; + +/* Identifiers and long identifiers */ + +ident: + UIDENT { $1 } + | LIDENT { $1 } +; +val_ident: + LIDENT { $1 } + | LPAREN operator RPAREN { $2 } +; +operator: + PREFIXOP { $1 } + | INFIXOP1 { $1 } + | INFIXOP2 { $1 } + | INFIXOP3 { $1 } + | INFIXOP4 { $1 } + | SUBTRACTIVE { $1 } + | STAR { "*" } + | EQUAL { "=" } + | OR { "or" } + | AMPERSAND { "&" } + | COLONEQUAL { ":=" } +; +constr_ident: + UIDENT { $1 } + | LBRACKET RBRACKET { "[]" } + | LPAREN RPAREN { "()" } + | COLONCOLON { "::" } + | FALSE { "false" } + | TRUE { "true" } +; + +val_longident: + val_ident { Lident $1 } + | mod_longident DOT val_ident { Ldot($1, $3) } +; +constr_longident: + mod_longident { $1 } + | LBRACKET RBRACKET { Lident "[]" } + | LPAREN RPAREN { Lident "()" } + | FALSE { Lident "false" } + | TRUE { Lident "true" } +; +label_longident: + LIDENT { Lident $1 } + | mod_longident DOT LIDENT { Ldot($1, $3) } +; +type_longident: + LIDENT { Lident $1 } + | mod_longident DOT LIDENT { Ldot($1, $3) } +; +mod_longident: + UIDENT { Lident $1 } + | mod_longident DOT UIDENT { Ldot($1, $3) } +; +mty_longident: + ident { Lident $1 } + | mod_longident DOT ident { Ldot($1, $3) } +; + +/* Miscellaneous */ + +rec_flag: + /* empty */ { Nonrecursive } + | REC { Recursive } +; +direction_flag: + TO { Upto } + | DOWNTO { Downto } +; +mutable_flag: + /* empty */ { Immutable } + | MUTABLE { Mutable } +; + +%% diff --git a/parsing/parsetree.mli b/parsing/parsetree.mli new file mode 100644 index 0000000000..32bcfc10d7 --- /dev/null +++ b/parsing/parsetree.mli @@ -0,0 +1,143 @@ +(* Abstract syntax tree produced by parsing *) + +open Misc +open Asttypes + +(* Type expressions for the core language *) + +type core_type = + { ptyp_desc: core_type_desc; + ptyp_loc: Location.t } + +and core_type_desc = + Ptyp_var of string + | Ptyp_arrow of core_type * core_type + | Ptyp_tuple of core_type list + | Ptyp_constr of Longident.t * core_type list + +(* Value expressions for the core language *) + +type pattern = + { ppat_desc: pattern_desc; + ppat_loc: Location.t } + +and pattern_desc = + Ppat_any + | Ppat_var of string + | Ppat_alias of pattern * string + | Ppat_constant of constant + | Ppat_tuple of pattern list + | Ppat_construct of Longident.t * pattern option + | Ppat_record of (Longident.t * pattern) list + | Ppat_or of pattern * pattern + | Ppat_constraint of pattern * core_type + +type expression = + { pexp_desc: expression_desc; + pexp_loc: Location.t } + +and expression_desc = + Pexp_ident of Longident.t + | Pexp_constant of constant + | Pexp_let of rec_flag * (pattern * expression) list * expression + | Pexp_function of (pattern * expression) list + | Pexp_apply of expression * expression list + | Pexp_match of expression * (pattern * expression) list + | Pexp_try of expression * (pattern * expression) list + | Pexp_tuple of expression list + | Pexp_construct of Longident.t * expression option + | Pexp_record of (Longident.t * expression) list + | Pexp_field of expression * Longident.t + | Pexp_setfield of expression * Longident.t * expression + | Pexp_array of expression list + | Pexp_ifthenelse of expression * expression * expression option + | Pexp_sequence of expression * expression + | Pexp_while of expression * expression + | Pexp_for of string * expression * expression * direction_flag * expression + | Pexp_constraint of expression * core_type + | Pexp_when of expression * expression + +(* Value descriptions *) + +type value_description = + { pval_type: core_type; + pval_prim: string option } + +(* Type declarations *) + +type type_declaration = + { ptype_params: string list; + ptype_kind: type_kind; + ptype_loc: Location.t } + +and type_kind = + Ptype_abstract + | Ptype_manifest of core_type + | Ptype_variant of (string * core_type list) list + | Ptype_record of (string * mutable_flag * core_type) list + +type exception_declaration = core_type list + +(* Type expressions for the module language *) + +type module_type = + { pmty_desc: module_type_desc; + pmty_loc: Location.t } + +and module_type_desc = + Pmty_ident of Longident.t + | Pmty_signature of signature + | Pmty_functor of string * module_type * module_type + | Pmty_with of module_type * (string * type_declaration) list + +and signature = signature_item list + +and signature_item = + Psig_value of string * value_description + | Psig_type of (string * type_declaration) list + | Psig_exception of string * exception_declaration + | Psig_module of string * module_type + | Psig_modtype of string * modtype_declaration + | Psig_open of Longident.t * Location.t + | Psig_include of module_type + +and modtype_declaration = + Pmodtype_abstract + | Pmodtype_manifest of module_type + +(* Value expressions for the module language *) + +type module_expr = + { pmod_desc: module_expr_desc; + pmod_loc: Location.t } + +and module_expr_desc = + Pmod_ident of Longident.t + | Pmod_structure of structure + | Pmod_functor of string * module_type * module_expr + | Pmod_apply of module_expr * module_expr + | Pmod_constraint of module_expr * module_type + +and structure = structure_item list + +and structure_item = + Pstr_eval of expression + | Pstr_value of rec_flag * (pattern * expression) list + | Pstr_primitive of string * value_description + | Pstr_type of (string * type_declaration) list + | Pstr_exception of string * exception_declaration + | Pstr_module of string * module_expr + | Pstr_modtype of string * module_type + | Pstr_open of Longident.t * Location.t + +(* Toplevel phrases *) + +type toplevel_phrase = + Ptop_def of structure + | Ptop_dir of string * directive_argument + +and directive_argument = + Pdir_none + | Pdir_string of string + | Pdir_int of int + | Pdir_ident of Longident.t diff --git a/stdlib/.depend b/stdlib/.depend new file mode 100644 index 0000000000..f988c3da91 --- /dev/null +++ b/stdlib/.depend @@ -0,0 +1,28 @@ +baltree.cmi: list.cmi +format.cmi: list.cmi +gc.cmi: +lexing.cmi: obj.cmi +parsing.cmi: lexing.cmi obj.cmi +pervasives.cmi: sys.cmi +printexc.cmi: +arg.cmo: arg.cmi sys.cmi string.cmi list.cmi array.cmi printf.cmi +array.cmo: array.cmi list.cmi array.cmi +baltree.cmo: baltree.cmi baltree.cmi list.cmi +char.cmo: char.cmi char.cmi string.cmi +filename.cmo: filename.cmi string.cmi +format.cmo: format.cmi queue.cmi string.cmi list.cmi +gc.cmo: gc.cmi printf.cmi +hashtbl.cmo: hashtbl.cmi array.cmi +lexing.cmo: lexing.cmi string.cmi obj.cmi +list.cmo: list.cmi list.cmi +obj.cmo: obj.cmi +parsing.cmo: parsing.cmi array.cmi lexing.cmi obj.cmi +pervasives.cmo: pervasives.cmi sys.cmi +printexc.cmo: printexc.cmi obj.cmi +printf.cmo: printf.cmi string.cmi obj.cmi +queue.cmo: queue.cmi +set.cmo: set.cmi baltree.cmi +sort.cmo: sort.cmi +stack.cmo: stack.cmi list.cmi +string.cmo: string.cmi char.cmi string.cmi +sys.cmo: sys.cmi diff --git a/stdlib/Makefile b/stdlib/Makefile new file mode 100644 index 0000000000..a00cd82b9a --- /dev/null +++ b/stdlib/Makefile @@ -0,0 +1,55 @@ +include ../Makefile.config + +COMPILER=../camlc +CAMLC=../boot/camlrun $(COMPILER) +CAMLDEP=../tools/camldep + +OBJS=pervasives.cmo string.cmo char.cmo list.cmo array.cmo sys.cmo \ + hashtbl.cmo sort.cmo filename.cmo obj.cmo lexing.cmo parsing.cmo \ + baltree.cmo set.cmo stack.cmo queue.cmo \ + printf.cmo format.cmo arg.cmo printexc.cmo gc.cmo + +all: stdlib.cma header.exe + +install: + cp stdlib.cma *.cmi *.mli header.exe $(LIBDIR) + +stdlib.cma: $(OBJS) + $(CAMLC) -a -o stdlib.cma $(OBJS) + +header.exe: header.c ../Makefile.config + if $(SHARPBANGSCRIPTS); \ + then echo "#!$(BINDIR)/cslrun" > header.exe; \ + else $(CC) $(CCCOMPOPTS) $(CCLINKOPTS) header.c -o header.exe; \ + strip header.exe; fi + +pervasives.cmi: pervasives.mli + $(CAMLC) -nopervasives -c pervasives.mli + +pervasives.cmo: pervasives.ml + $(CAMLC) -nopervasives -c pervasives.ml + +sys.cmi: sys.mli + $(CAMLC) -nopervasives -c sys.mli + +.SUFFIXES: .mli .ml .cmi .cmo + +.mli.cmi: + $(CAMLC) $(COMPFLAGS) -c $< + +.ml.cmo: + $(CAMLC) $(COMPFLAGS) -c $< + +$(OBJS): pervasives.cmi + +$(OBJS): $(COMPILER) +$(OBJS:.cmo=.cmi): $(COMPILER) + +clean: + rm -f *.cm[ioa] + rm -f *~ + +include .depend + +depend: + $(CAMLDEP) *.mli *.ml > .depend diff --git a/stdlib/arg.ml b/stdlib/arg.ml new file mode 100644 index 0000000000..3726760f49 --- /dev/null +++ b/stdlib/arg.ml @@ -0,0 +1,61 @@ +type spec = + String of (string -> unit) + | Int of (int -> unit) + | Unit of (unit -> unit) + | Float of (float -> unit) + +exception Bad of string + +type error = + Unknown of string + | Wrong of string * string * string (* option, actual, expected *) + | Missing of string + | Message of string + +open Printf + +let stop error = + let progname = + if Array.length Sys.argv > 0 then Sys.argv.(0) else "(?)" in + begin match error with + Unknown s -> + eprintf "%s: unknown option `%s'.\n" progname s + | Missing s -> + eprintf "%s: option `%s' needs an argument.\n" progname s + | Wrong (opt, arg, expected) -> + eprintf "%s: wrong argument `%s'; option `%s' expects %s.\n" + progname arg opt expected + | Message s -> + eprintf "%s: %s.\n" progname s + end; + exit 2 + +let parse speclist anonfun = + let rec p = function + [] -> () + | s :: t -> + if String.length s >= 1 & String.get s 0 = '-' + then do_key s t + else begin try (anonfun s); p t with Bad m -> stop (Message m) end + and do_key s l = + let action = + try + List.assoc s speclist + with Not_found -> + stop (Unknown s) in + try + match (action, l) with + (Unit f, l) -> f (); p l + | (String f, arg::t) -> f arg; p t + | (Int f, arg::t) -> + begin try f (int_of_string arg) + with Failure "int_of_string" -> stop (Wrong (s, arg, "an integer")) + end; + p t + | (Float f, arg::t) -> f (float_of_string arg); p t + | (_, []) -> stop (Missing s) + with Bad m -> stop (Message m) + in + match Array.to_list Sys.argv with + [] -> () + | a::l -> p l diff --git a/stdlib/arg.mli b/stdlib/arg.mli new file mode 100644 index 0000000000..593d5b36b6 --- /dev/null +++ b/stdlib/arg.mli @@ -0,0 +1,46 @@ +(* Parsing of command line arguments. *) + +(* This module provides a general mechanism for extracting options and + arguments from the command line to the program. *) + +(* Syntax of command lines: + A keyword is a character string starting with a [-]. + An option is a keyword alone or followed by an argument. + There are four types of keywords: Unit, String, Int, and Float. + Unit keywords do not take an argument. + String, Int, and Float keywords take the following word on the command line + as an argument. + Arguments not preceded by a keyword are called anonymous arguments. *) + +(* Examples ([cmd] is assumed to be the command name): + +- [cmd -flag ](a unit option) +- [cmd -int 1 ](an int option with argument [1]) +- [cmd -string foobar ](a string option with argument ["foobar"]) +- [cmd -float 12.34 ](a float option with argument [12.34]) +- [cmd 1 2 3 ](three anonymous arguments: ["1"], ["2"], and ["3"]) +- [cmd 1 2 -flag 3 -string bar 4] +- [ ](four anonymous arguments, a unit option, and +- [ ] a string option with argument ["bar"]) +*) + +type spec = + String of (string -> unit) + | Int of (int -> unit) + | Unit of (unit -> unit) + | Float of (float -> unit) + (* The concrete type describing the behavior associated + with a keyword. *) + +val parse : (string * spec) list -> (string -> unit) -> unit + (* [parse speclist anonfun] parses the command line, + calling the functions in [speclist] whenever appropriate, + and [anonfun] on anonymous arguments. + The functions are called in the same order as they appear + on the command line. + The strings in the [(string * spec) list] are keywords and must + start with a [-], else they are ignored. *) + +exception Bad of string + (* Functions in [speclist] or [anonfun] can raise [Bad] with + an error message to reject invalid arguments. *) diff --git a/stdlib/array.ml b/stdlib/array.ml new file mode 100644 index 0000000000..aa156bef95 --- /dev/null +++ b/stdlib/array.ml @@ -0,0 +1,99 @@ +(* Array operations *) + +external length : 'a array -> int = "%array_length" +external unsafe_get: 'a array -> int -> 'a = "%array_get" +external unsafe_set: 'a array -> int -> 'a -> unit = "%array_set" +external new: int -> 'a -> 'a array = "make_vect" + +let get a n = + if n < 0 or n >= length a + then invalid_arg "Array.get" + else unsafe_get a n + +let set a n v = + if n < 0 or n >= length a + then invalid_arg "Array.set" + else unsafe_set a n v + +let new_matrix sx sy init = + let res = new sx [||] in + for x = 0 to pred sx do + unsafe_set res x (new sy init) + done; + res + +let copy a = + let l = length a in + if l = 0 then [||] else begin + let r = new l (unsafe_get a 0) in + for i = 1 to l-1 do + unsafe_set r i (unsafe_get a i) + done; + r + end + +let concat_aux a1 a2 l1 l2 init = + let r = new (l1 + l2) init in + for i = 0 to l1 - 1 do unsafe_set r i (unsafe_get a1 i) done; + for i = 0 to l2 - 1 do unsafe_set r (i + l1) (unsafe_get a1 i) done; + r + +let concat a1 a2 = + let l1 = length a1 and l2 = length a2 in + if l1 = 0 & l2 = 0 then [||] else begin + let r = new (l1 + l2) (unsafe_get (if l1 > 0 then a1 else a2) 0) in + for i = 0 to l1 - 1 do unsafe_set r i (unsafe_get a1 i) done; + for i = 0 to l2 - 1 do unsafe_set r (i + l1) (unsafe_get a1 i) done; + r + end + +let sub a ofs len = + if ofs < 0 or len < 0 or ofs + len > length a then invalid_arg "Array.sub" + else if len = 0 then [||] + else begin + let r = new len (unsafe_get a ofs) in + for i = 1 to len - 1 do unsafe_set r i (unsafe_get a (ofs + i)) done; + r + end + +let fill a ofs len v = + if ofs < 0 or len < 0 or ofs + len > length a + then invalid_arg "Array.fill" + else for i = ofs to ofs + len - 1 do unsafe_set a i v done + +let blit a1 ofs1 a2 ofs2 len = + if len < 0 or ofs1 < 0 or ofs1 + len > length a1 + or ofs2 < 0 or ofs2 + len > length a2 + then invalid_arg "Array.blit" + else + for i = 0 to len - 1 do + unsafe_set a2 (ofs2 + i) (unsafe_get a1 (ofs1 + i)) + done + +let iter f a = + for i = 0 to length a - 1 do f(unsafe_get a i) done + +let map f a = + let l = length a in + if l = 0 then [||] else begin + let r = new l (f(unsafe_get a 0)) in + for i = 1 to l - 1 do + unsafe_set r i (f(unsafe_get a i)) + done; + r + end + +let to_list a = + let len = length a in + let rec tolist i = + if i >= len then [] else unsafe_get a i :: tolist(i+1) in + tolist 0 + +let of_list = function + [] -> [||] + | hd::tl -> + let a = new (List.length tl + 1) hd in + let rec fill i = function + [] -> a + | hd::tl -> unsafe_set a i hd; fill (i+1) tl in + fill 1 tl diff --git a/stdlib/array.mli b/stdlib/array.mli new file mode 100644 index 0000000000..add7a92cab --- /dev/null +++ b/stdlib/array.mli @@ -0,0 +1,21 @@ +(* Array operations *) + +val length : 'a array -> int = "%array_length" + +val get: 'a array -> int -> 'a +val set: 'a array -> int -> 'a -> unit +val new: int -> 'a -> 'a array = "make_vect" +val new_matrix: int -> int -> 'a -> 'a array array +val concat: 'a array -> 'a array -> 'a array +val sub: 'a array -> int -> int -> 'a array +val copy: 'a array -> 'a array +val fill: 'a array -> int -> int -> 'a -> unit +val blit: 'a array -> int -> 'a array -> int -> int -> unit +val iter: ('a -> 'b) -> 'a array -> unit +val map: ('a -> 'b) -> 'a array -> 'b array +val to_list: 'a array -> 'a list +val of_list: 'a list -> 'a array + +val unsafe_get: 'a array -> int -> 'a = "%array_get" +val unsafe_set: 'a array -> int -> 'a -> unit = "%array_set" + diff --git a/stdlib/baltree.ml b/stdlib/baltree.ml new file mode 100644 index 0000000000..7c61a8f54a --- /dev/null +++ b/stdlib/baltree.ml @@ -0,0 +1,193 @@ +(* Weight-balanced binary trees. + These are binary trees such that one child of a node has at most N times + as many elements as the other child. We take N=3. *) + +type 'a t = Empty | Node of 'a t * 'a * 'a t * int + (* The type of trees containing elements of type ['a]. + [Empty] is the empty tree (containing no elements). *) + +type 'a contents = Nothing | Something of 'a + (* Used with the functions [modify] and [List.split], to represent + the presence or the absence of an element in a tree. *) + +(* Compute the size (number of nodes and leaves) of a tree. *) + +let size = function + Empty -> 1 + | Node(_, _, _, s) -> s + +(* Creates a new node with left son l, val x and right son r. + l and r must be balanced and size l / size r must be between 1/N and N. + Inline expansion of size for better speed. *) + +let new l x r = + let sl = match l with Empty -> 0 | Node(_,_,_,s) -> s in + let sr = match r with Empty -> 0 | Node(_,_,_,s) -> s in + Node(l, x, r, sl + sr + 1) + +(* Same as new, but performs rebalancing if necessary. + Assumes l and r balanced, and size l / size r "reasonable" + (between 1/N^2 and N^2 ???). + Inline expansion of new for better speed in the most frequent case + where no rebalancing is required. *) + +let bal l x r = + let sl = match l with Empty -> 0 | Node(_,_,_,s) -> s in + let sr = match r with Empty -> 0 | Node(_,_,_,s) -> s in + if sl > 3 * sr then begin + match l with + Empty -> invalid_arg "Baltree.bal" + | Node(ll, lv, lr, _) -> + if size ll >= size lr then + new ll lv (new lr x r) + else begin + match lr with + Empty -> invalid_arg "Baltree.bal" + | Node(lrl, lrv, lrr, _)-> + new (new ll lv lrl) lrv (new lrr x r) + end + end else if sr > 3 * sl then begin + match r with + Empty -> invalid_arg "Baltree.bal" + | Node(rl, rv, rr, _) -> + if size rr >= size rl then + new (new l x rl) rv rr + else begin + match rl with + Empty -> invalid_arg "Baltree.bal" + | Node(rll, rlv, rlr, _) -> + new (new l x rll) rlv (new rlr rv rr) + end + end else + Node(l, x, r, sl + sr + 1) + +(* Same as bal, but rebalance regardless of the original ratio + size l / size r *) + +let rec join l x r = + match bal l x r with + Empty -> invalid_arg "Baltree.join" + | Node(l', x', r', _) as t' -> + let sl = size l' and sr = size r' in + if sl > 3 * sr or sr > 3 * sl then join l' x' r' else t' + +(* Merge two trees l and r into one. + All elements of l must precede the elements of r. + Assumes size l / size r between 1/N and N. *) + +let rec merge t1 t2 = + match (t1, t2) with + (Empty, t) -> t + | (t, Empty) -> t + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + bal l1 v1 (bal (merge r1 l2) v2 r2) + +(* Same as merge, but does not assume anything about l and r. *) + +let rec concat t1 t2 = + match (t1, t2) with + (Empty, t) -> t + | (t, Empty) -> t + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + join l1 v1 (join (concat r1 l2) v2 r2) + +(* Insertion *) + +let add searchpred x t = + let rec add = function + Empty -> + Node(Empty, x, Empty, 1) + | Node(l, v, r, _) as t -> + let c = searchpred v in + if c == 0 then t else + if c < 0 then bal (add l) v r else bal l v (add r) + in add t + +(* Membership *) + +let contains searchpred t = + let rec contains = function + Empty -> false + | Node(l, v, r, _) -> + let c = searchpred v in + if c == 0 then true else + if c < 0 then contains l else contains r + in contains t + +(* Search *) + +let find searchpred t = + let rec find = function + Empty -> + raise Not_found + | Node(l, v, r, _) -> + let c = searchpred v in + if c == 0 then v else + if c < 0 then find l else find r + in find t + +(* Deletion *) + +let remove searchpred t = + let rec remove = function + Empty -> + Empty + | Node(l, v, r, _) -> + let c = searchpred v in + if c == 0 then merge l r else + if c < 0 then bal (remove l) v r else bal l v (remove r) + in remove t + +(* Modification *) + +let modify searchpred modifier t = + let rec modify = function + Empty -> + begin match modifier Nothing with + Nothing -> Empty + | Something v -> Node(Empty, v, Empty, 1) + end + | Node(l, v, r, s) -> + let c = searchpred v in + if c == 0 then + begin match modifier(Something v) with + Nothing -> merge l r + | Something v' -> Node(l, v', r, s) + end + else if c < 0 then bal (modify l) v r else bal l v (modify r) + in modify t + +(* Splitting *) + +let split searchpred = + let rec split = function + Empty -> + (Empty, Nothing, Empty) + | Node(l, v, r, _) -> + let c = searchpred v in + if c == 0 then (l, Something v, r) + else if c < 0 then + let (ll, vl, rl) = split l in (ll, vl, join rl v r) + else + let (lr, vr, rr) = split r in (join l v lr, vr, rr) + in split + +(* Comparison (by lexicographic ordering of the fringes of the two trees). *) + +let compare cmp s1 s2 = + let rec compare_aux l1 l2 = + match (l1, l2) with + ([], []) -> 0 + | ([], _) -> -1 + | (_, []) -> 1 + | (Empty::t1, Empty::t2) -> + compare_aux t1 t2 + | (Node(Empty, v1, r1, _) :: t1, Node(Empty, v2, r2, _) :: t2) -> + let c = cmp v1 v2 in + if c != 0 then c else compare_aux (r1::t1) (r2::t2) + | (Node(l1, v1, r1, _) :: t1, t2) -> + compare_aux (l1 :: Node(Empty, v1, r1, 0) :: t1) t2 + | (t1, Node(l2, v2, r2, _) :: t2) -> + compare_aux t1 (l2 :: Node(Empty, v2, r2, 0) :: t2) + in + compare_aux [s1] [s2] diff --git a/stdlib/baltree.mli b/stdlib/baltree.mli new file mode 100644 index 0000000000..4e6f35efbb --- /dev/null +++ b/stdlib/baltree.mli @@ -0,0 +1,77 @@ +(* Basic balanced binary trees *) + +(* This module implements balanced ordered binary trees. + All operations over binary trees are applicative (no side-effects). + The [set] and [List.map] modules are based on this module. + This modules gives a more direct access to the internals of the + binary tree implementation than the [set] and [List.map] abstractions, + but is more delicate to use and not as safe. For advanced users only. *) + +type 'a t = Empty | Node of 'a t * 'a * 'a t * int + (* The type of trees containing elements of type ['a]. + [Empty] is the empty tree (containing no elements). *) + +type 'a contents = Nothing | Something of 'a + (* Used with the functions [modify] and [List.split], to represent + the presence or the absence of an element in a tree. *) + +val add: ('a -> int) -> 'a -> 'a t -> 'a t + (* [add f x t] inserts the element [x] into the tree [t]. + [f] is an ordering function: [f y] must return [0] if + [x] and [y] are equal (or equivalent), a negative integer if + [x] is smaller than [y], and a positive integer if [x] is + greater than [y]. The tree [t] is returned unchanged if + it already contains an element equivalent to [x] (that is, + an element [y] such that [f y] is [0]). + The ordering [f] must be consistent with the orderings used + to build [t] with [add], [remove], [modify] or [List.split] + operations. *) +val contains: ('a -> int) -> 'a t -> bool + (* [contains f t] checks whether [t] contains an element + satisfying [f], that is, an element [x] such + that [f x] is [0]. [f] is an ordering function with the same + constraints as for [add]. It can be coarser (identify more + elements) than the orderings used to build [t], but must be + consistent with them. *) +val find: ('a -> int) -> 'a t -> 'a + (* Same as [contains], except that [find f t] returns the element [x] + such that [f x] is [0], or raises [Not_found] if none has been + found. *) +val remove: ('a -> int) -> 'a t -> 'a t + (* [remove f t] removes one element [x] of [t] such that [f x] is [0]. + [f] is an ordering function with the same constraints as for [add]. + [t] is returned unchanged if it does not contain any element + satisfying [f]. If several elements of [t] satisfy [f], + only one is removed. *) +val modify: ('a -> int) -> ('a contents -> 'a contents) -> 'a t -> 'a t + (* General insertion/modification/deletion function. + [modify f g t] searchs [t] for an element [x] satisfying the + ordering function [f]. If one is found, [g] is applied to + [Something x]; if [g] returns [Nothing], the element [x] + is removed; if [g] returns [Something y], the element [y] + replaces [x] in the tree. (It is assumed that [x] and [y] + are equivalent, in particular, that [f y] is [0].) + If the tree does not contain any [x] satisfying [f], + [g] is applied to [Nothing]; if it returns [Nothing], + the tree is returned unchanged; if it returns [Something x], + the element [x] is inserted in the tree. (It is assumed that + [f x] is [0].) The functions [add] and [remove] are special cases + of [modify], slightly more efficient. *) +val split: ('a -> int) -> 'a t -> 'a t * 'a contents * 'a t + (* [split f t] returns a triple [(less, elt, greater)] where + [less] is a tree containing all elements [x] of [t] such that + [f x] is negative, [greater] is a tree containing all + elements [x] of [t] such that [f x] is positive, and [elt] + is [Something x] if [t] contains an element [x] such that + [f x] is [0], and [Nothing] otherwise. *) +val compare: ('a -> 'a -> int) -> 'a t -> 'a t -> int + (* Compare two trees. The first argument [f] is a comparison function + over the tree elements: [f e1 e2] is zero if the elements [e1] and + [e2] are equal, negative if [e1] is smaller than [e2], + and positive if [e1] is greater than [e2]. [compare f t1 t2] + compares the fringes of [t1] and [t2] by lexicographic extension + of [f]. *) +(*--*) +val join: 'a t -> 'a -> 'a t -> 'a t +val concat: 'a t -> 'a t -> 'a t + diff --git a/stdlib/char.ml b/stdlib/char.ml new file mode 100644 index 0000000000..348c5683c4 --- /dev/null +++ b/stdlib/char.ml @@ -0,0 +1,26 @@ +(* Character operations *) + +external code: char -> int = "%identity" +external unsafe_chr: int -> char = "%identity" + +let chr n = + if n < 0 or n > 255 then invalid_arg "Char.chr" else unsafe_chr n + +external is_printable: char -> bool = "is_printable" + +let escaped = function + '\'' -> "\\'" + | '\\' -> "\\\\" + | '\n' -> "\\n" + | '\t' -> "\\t" + | c -> if is_printable c then + String.make 1 c + else begin + let n = code c in + let s = String.create 4 in + String.unsafe_set s 0 '\\'; + String.unsafe_set s 1 (unsafe_chr (48 + n / 100)); + String.unsafe_set s 2 (unsafe_chr (48 + (n / 10) mod 10)); + String.unsafe_set s 3 (unsafe_chr (48 + n mod 10)); + s + end diff --git a/stdlib/char.mli b/stdlib/char.mli new file mode 100644 index 0000000000..40791c94b5 --- /dev/null +++ b/stdlib/char.mli @@ -0,0 +1,6 @@ +(* Character operations *) + +val code: char -> int = "%identity" +val chr: int -> char +val escaped : char -> string +val unsafe_chr: int -> char = "%identity" diff --git a/stdlib/filename.ml b/stdlib/filename.ml new file mode 100644 index 0000000000..af63af08fc --- /dev/null +++ b/stdlib/filename.ml @@ -0,0 +1,49 @@ +let check_suffix name suff = + String.length name >= String.length suff & + String.sub name (String.length name - String.length suff) (String.length suff) + = suff + +let chop_suffix name suff = + let n = String.length name - String.length suff in + if n < 0 then invalid_arg "chop_suffix" else String.sub name 0 n + +let current_dir_name = "." + +let concat dirname filename = + let l = String.length dirname - 1 in + if l < 0 or String.get dirname l = '/' + then dirname ^ filename + else dirname ^ "/" ^ filename + +let is_absolute n = + (String.length n >= 1 & String.sub n 0 1 = "/") + or (String.length n >= 2 & String.sub n 0 2 = "./") + or (String.length n >= 3 & String.sub n 0 3 = "../") + +let slash_pos s = + let rec pos i = + if i < 0 then raise Not_found + else if String.get s i = '/' then i + else pos (i - 1) + in pos (String.length s - 1) + +let basename name = + try + let p = slash_pos name + 1 in + String.sub name p (String.length name - p) + with Not_found -> + name + +let dirname name = + try + match slash_pos name with + 0 -> "/" + | n -> String.sub name 0 (slash_pos name) + with Not_found -> + "." + + + + + + diff --git a/stdlib/filename.mli b/stdlib/filename.mli new file mode 100644 index 0000000000..bf75f61c5f --- /dev/null +++ b/stdlib/filename.mli @@ -0,0 +1,27 @@ +(* Operations on file names *) + +val current_dir_name : string + (* The conventional name for the current directory + (e.g. [.] in Unix). *) +val concat : string -> string -> string + (* [concat dir file] returns a file name that designates file + [file] in directory [dir]. *) +val is_absolute : string -> bool + (* Return [true] if the file name is absolute or starts with an + explicit reference to the current directory ([./] or [../] in + Unix), and [false] if it is relative to the current directory. *) +val check_suffix : string -> string -> bool + (* [check_suffix name suff] returns [true] if the filename [name] + ends with the suffix [suff]. *) +val chop_suffix : string -> string -> string + (* [chop_suffix name suff] removes the suffix [suff] from + the filename [name]. The behavior is undefined if [name] does not + end with the suffix [suff]. *) +val basename : string -> string +val dirname : string -> string + (* Split a file name into directory name / base file name. + [concat (dirname name) (basename name)] returns a file name + which is equivalent to [name]. Moreover, after setting the + current directory to [dirname name] (with [sys__chdir]), + references to [basename name] (which is a relative file name) + designate the same file as [name] before the call to [chdir]. *) diff --git a/stdlib/format.ml b/stdlib/format.ml new file mode 100644 index 0000000000..ca631fdd2a --- /dev/null +++ b/stdlib/format.ml @@ -0,0 +1,471 @@ +(* Tokens are one of the following : *) + +type pp_token = + Pp_text of string (* normal text *) + | Pp_break of int * int (* complete break *) + | Pp_tbreak of int * int (* go to next tab *) + | Pp_stab (* set a tabulation *) + | Pp_begin of int * block_type (* beginning of a block *) + | Pp_end (* end of a block *) + | Pp_tbegin of tblock (* Beginning of a tabulation block *) + | Pp_tend (* end of a tabulation block *) + | Pp_newline (* to force a newline inside a block *) + | Pp_if_newline (* to do something only if this very + line has been broken *) + +and block_type = + Pp_hbox (* Horizontal block no line breaking *) + | Pp_vbox (* Vertical block each break leads to a new line *) + | Pp_hvbox (* Horizontal-vertical block: same as vbox, except if this block + is small enough to fit on a single line *) + | Pp_hovbox (* Horizontal or Vertical block: breaks lead to new line + only when necessary to print the content of the block *) + | Pp_fits (* Internal usage: when a block fits on a single line *) + +and tblock = Pp_tbox of int list ref (* Tabulation box *) + +(* The Queue: contains all formatting elements. + elements are tuples (size,token,length), where + size is set when the size of the block is known + len is the declared length of the token *) +type pp_queue_elem = + {mutable elem_size : int; token : pp_token; length : int} + +(* Scan stack + each element is (left_total, queue element) where left_total + is the val of pp_left_total when the element has been enqueued *) +type pp_scan_elem = Scan_elem of int * pp_queue_elem +let pp_scan_stack = ref ([] : pp_scan_elem list) + +(* Formatting Stack: + used to break the lines while printing tokens. + The formatting stack contains the description of + the currently active blocks. *) +type pp_format_elem = Format_elem of block_type * int +let pp_format_stack = ref ([]:pp_format_elem list) + +let pp_tbox_stack = ref ([]:tblock list) + +(* Large val for default tokens size *) +let pp_infinity = 9999 + +(* Global variables: default initialization is + set_margin 78 + set_min_space_left 0 *) +(* val of right margin *) +let pp_margin = ref 78 + +(* Minimal space left before margin, when opening a block *) +let pp_min_space_left = ref 10 +(* maximum val of indentation: + no blocks can be opened further *) +let pp_max_indent = ref (!pp_margin - !pp_min_space_left) + +let pp_space_left = ref !pp_margin(* space remaining on the current line *) +and pp_current_indent = ref 0 (* current val of indentation *) +and pp_left_total = ref 1 (* total width of tokens already printed *) +and pp_right_total = ref 1 (* total width of tokens ever put in queue *) +and pp_curr_depth = ref 0 (* current number of opened blocks *) +and pp_max_boxes = ref 35 (* maximum number of blocks which can be + opened at the same time *) +and pp_ellipsis = ref "." (* ellipsis string *) +and pp_out_channel = ref stdout (* out_channel of the pretty_printer *) + +(* Output functions for the formatter *) +let pp_output s = output !pp_out_channel s +and pp_output_string s = output_string !pp_out_channel s +and pp_output_newline () = output_char !pp_out_channel '\n' + +(* The pretty-printer queue *) +let pp_queue = (Queue.new () : pp_queue_elem Queue.t) + +let pp_clear_queue () = + pp_left_total := 1; pp_right_total := 1; + Queue.clear pp_queue + +(* Enter a token in the pretty-printer queue *) +let pp_enqueue ({length=len} as token) = + pp_right_total := !pp_right_total + len; + Queue.add token pp_queue + +(* To output spaces *) +let blank_line = String.make 80 ' ' +let display_blanks n = + if n > 0 then + if n <= 80 then pp_output blank_line 0 n + else pp_output_string (String.make n ' ') + +(* To format a break, indenting a new line *) +let break_new_line offset width = + pp_output_newline (); + let indent = !pp_margin - width + offset in + (* Don't indent more than pp_max_indent *) + let real_indent = min !pp_max_indent indent in + pp_current_indent := real_indent; + pp_space_left := !pp_margin - !pp_current_indent; + display_blanks !pp_current_indent + +(* To force a line break inside a block: no offset is added *) +let break_line width = break_new_line 0 width + +(* To format a break that fits on the current line *) +let break_same_line width = + pp_space_left := !pp_space_left - width; + display_blanks width + +(* To indent no more than pp_max_indent, if one tries to open a block + beyond pp_max_indent, then the block is rejected on the left + by simulating a break. *) +let pp_force_newline () = + match !pp_format_stack with + Format_elem (bl_ty, width) :: _ -> + if width > !pp_space_left then + (match bl_ty with + Pp_fits -> () | Pp_hbox -> () | _ -> break_line width) + | _ -> pp_output_newline() + +(* To skip a token, if the previous line has been broken *) +let pp_skip_token () = + (* When calling pp_skip_token the queue cannot be empty *) + match Queue.take pp_queue with + {elem_size = size; length = len} -> + pp_left_total := !pp_left_total - len; + pp_space_left := !pp_space_left + size + +(* To format a token *) +let format_pp_token size = function + + Pp_text s -> pp_space_left := !pp_space_left - size; pp_output_string s + + | Pp_begin (off,ty) -> + let insertion_point = !pp_margin - !pp_space_left in + if insertion_point > !pp_max_indent then + (* can't open a block right there ! *) + pp_force_newline () else + (* If block is rejected on the left current indentation will change *) + if size > !pp_space_left & !pp_current_indent < insertion_point then + pp_force_newline (); + let offset = !pp_space_left - off in + let bl_type = + begin match ty with + Pp_vbox -> Pp_vbox + | _ -> if size > !pp_space_left then ty else Pp_fits + end in + pp_format_stack := Format_elem (bl_type, offset) :: !pp_format_stack + + | Pp_end -> + begin match !pp_format_stack with + x::(y::l as ls) -> pp_format_stack := ls + | _ -> () (* No more block to close *) + end + + | Pp_tbegin (Pp_tbox _ as tbox) -> pp_tbox_stack := tbox :: !pp_tbox_stack + + | Pp_tend -> + begin match !pp_tbox_stack with + x::ls -> pp_tbox_stack := ls + | _ -> () (* No more tabulation block to close *) + end + + | Pp_stab -> + begin match !pp_tbox_stack with + Pp_tbox tabs :: _ -> + let rec add_tab n = function + [] -> [n] + | x::l as ls -> if n < x then n :: ls else x::add_tab n l in + tabs := add_tab (!pp_margin - !pp_space_left) !tabs + | _ -> () (* No opened tabulation block *) + end + + | Pp_tbreak (n,off) -> + let insertion_point = !pp_margin - !pp_space_left in + begin match !pp_tbox_stack with + Pp_tbox tabs :: _ -> + let rec find n = function + x :: l -> if x >= n then x else find n l + | [] -> raise Not_found in + let tab = + match !tabs with + x :: l -> + begin try find insertion_point !tabs with Not_found -> x end + | _ -> insertion_point in + let offset = tab - insertion_point in + if offset >= 0 then break_same_line (offset + n) else + break_new_line (tab + off) !pp_margin + | _ -> () (* No opened tabulation block *) + end + + | Pp_newline -> + begin match !pp_format_stack with + Format_elem (_,width) :: _ -> break_line width + | _ -> pp_output_newline() + end + + | Pp_if_newline -> + if !pp_current_indent != !pp_margin - !pp_space_left + then pp_skip_token () + + | Pp_break (n,off) -> + begin match !pp_format_stack with + Format_elem (ty,width) :: _ -> + begin match ty with + Pp_hovbox -> + if size > !pp_space_left then break_new_line off width else + (* break the line here leads to new indentation ? *) + if (!pp_current_indent > !pp_margin - width + off) + then break_new_line off width else break_same_line n + | Pp_hvbox -> break_new_line off width + | Pp_fits -> break_same_line n + | Pp_vbox -> break_new_line off width + | Pp_hbox -> break_same_line n + end + | _ -> () (* No opened block *) + end + +(* Print if token size is known or printing is delayed + Size is known when not negative + Printing is delayed when the text waiting in the queue requires + more room to format than List.exists on the current line *) +let rec advance_left () = + try + match Queue.peek pp_queue with + {elem_size = size; token = tok; length = len} -> + if not (size < 0 & + (!pp_right_total - !pp_left_total <= !pp_space_left)) then + begin + Queue.take pp_queue; + format_pp_token (if size < 0 then pp_infinity else size) tok; + pp_left_total := len + !pp_left_total; + advance_left () + end + with Queue.Empty -> () + +let enqueue_advance tok = pp_enqueue tok; advance_left () + +(* To enqueue a string : try to advance *) +let enqueue_string_as n s = + enqueue_advance {elem_size = n; token = Pp_text s; length = n} + +let enqueue_string s = enqueue_string_as (String.length s) s + +(* Routines for scan stack + determine sizes of blocks *) +(* scan_stack is never empty *) +let empty_scan_stack = + [Scan_elem (-1, {elem_size = (-1); token = Pp_text ""; length = 0})] +let clear_scan_stack () = pp_scan_stack := empty_scan_stack + +(* Set size of blocks on scan stack: + if ty = true then size of break is set else size of block is set + in each case pp_scan_stack is popped *) +(* Pattern matching on scan stack is exhaustive, + since scan_stack is never empty. + Pattern matching on token in scan stack is also exhaustive, + since scan_push is used on breaks and opening of boxes *) +let set_size ty = + match !pp_scan_stack with + Scan_elem (left_tot, + ({elem_size = size; token = tok} as queue_elem)) :: t -> + (* test if scan stack contains any data that is not obsolete *) + if left_tot < !pp_left_total then clear_scan_stack () else + begin match tok with + Pp_break (_, _) | Pp_tbreak (_, _) -> + if ty then + begin + queue_elem.elem_size <- !pp_right_total + size; + pp_scan_stack := t + end + | Pp_begin (_, _) -> + if not ty then + begin + queue_elem.elem_size <- !pp_right_total + size; + pp_scan_stack := t + end + | _ -> () (* scan_push is only used for breaks and boxes *) + end + | _ -> () (* scan_stack is never empty *) + +(* Push a token on scan stack. If b is true set_size is called *) +let scan_push b tok = + pp_enqueue tok; + if b then set_size true; + pp_scan_stack := Scan_elem (!pp_right_total,tok) :: !pp_scan_stack + +(* + To open a new block : + the user may set the depth bound pp_max_boxes + any text nested deeper is printed as the character the ellipsis +*) +let pp_open_box (indent,br_ty) = + incr pp_curr_depth; + if !pp_curr_depth < !pp_max_boxes then + (scan_push false + {elem_size = (- !pp_right_total); + token = Pp_begin (indent, br_ty); length = 0}) else + if !pp_curr_depth = !pp_max_boxes then enqueue_string !pp_ellipsis + +(* The box which is always opened *) +let pp_open_sys_box () = + incr pp_curr_depth; + scan_push false + {elem_size = (- !pp_right_total); + token = Pp_begin (0, Pp_hovbox); length = 0} + +(* close a block, setting sizes of its subblocks *) +let close_box () = + if !pp_curr_depth > 1 then + begin + if !pp_curr_depth < !pp_max_boxes then + begin + pp_enqueue {elem_size = 0; token = Pp_end; length = 0}; + set_size true; set_size false + end; + decr pp_curr_depth + end + +(* Initialize pretty-printer. *) +let pp_rinit () = + pp_clear_queue (); + clear_scan_stack(); + pp_current_indent := 0; + pp_curr_depth := 0; pp_space_left := !pp_margin; + pp_format_stack := []; + pp_tbox_stack := []; + pp_open_sys_box () + +(* Flushing pretty-printer queue. *) +let pp_flush b = + while !pp_curr_depth > 1 do + close_box () + done; + pp_right_total := pp_infinity; advance_left (); + if b then pp_output_newline (); + flush !pp_out_channel; + pp_rinit() + +(************************************************************** + + Procedures to format objects, and use boxes + + **************************************************************) + +(* To format a string *) +let print_as n s = + if !pp_curr_depth < !pp_max_boxes then (enqueue_string_as n s) + +let print_string s = print_as (String.length s) s + +(* To format an integer *) +let print_int i = print_string (string_of_int i) + +(* To format a float *) +let print_float f = print_string (string_of_float f) + +(* To format a boolean *) +let print_bool b = print_string (string_of_bool b) + +(* To format a char *) +let print_char c = print_string (String.make 1 c) + +let open_hbox () = pp_open_box (0, Pp_hbox) +and open_vbox indent = pp_open_box (indent, Pp_vbox) + +and open_hvbox indent = pp_open_box (indent, Pp_hvbox) +and open_hovbox indent = pp_open_box (indent, Pp_hovbox) + +(* Print a new line after printing all queued text + (same for print_flush but without a newline) *) +let print_newline () = pp_flush true +and print_flush () = pp_flush false + +(* To get a newline when one does not want to close the current block *) +let force_newline () = + if !pp_curr_depth < !pp_max_boxes + then enqueue_advance {elem_size = 0; token = Pp_newline; length = 0} + +(* To format something if the line has just been broken *) +let print_if_newline () = + if !pp_curr_depth < !pp_max_boxes + then enqueue_advance {elem_size = 0; token = Pp_if_newline ;length = 0} + +(* Breaks: indicate where a block may be broken. + If line is broken then offset is added to the indentation of the current + block else (the val of) width blanks are printed. + To do (?) : add a maximum width and offset val *) +let print_break (width, offset) = + if !pp_curr_depth < !pp_max_boxes then + scan_push true + {elem_size = (- !pp_right_total); token = Pp_break (width,offset); + length = width} + +let print_space () = print_break (1,0) +and print_cut () = print_break (0,0) + +let open_tbox () = + incr pp_curr_depth; + if !pp_curr_depth < !pp_max_boxes then + enqueue_advance + {elem_size = 0; + token = Pp_tbegin (Pp_tbox (ref [])); length = 0} + +(* Close a tabulation block *) +let close_tbox () = + if !pp_curr_depth > 1 then begin + if !pp_curr_depth < !pp_max_boxes then + enqueue_advance {elem_size = 0; token = Pp_tend; length = 0}; + decr pp_curr_depth end + +(* Print a tabulation break *) +let print_tbreak (width, offset) = + if !pp_curr_depth < !pp_max_boxes then + scan_push true + {elem_size = (- !pp_right_total); token = Pp_tbreak (width,offset); + length = width} + +let print_tab () = print_tbreak (0,0) + +let set_tab () = + if !pp_curr_depth < !pp_max_boxes + then enqueue_advance {elem_size = 0; token = Pp_stab; length=0} + +(************************************************************** + + Procedures to control the pretty-printer + + **************************************************************) + +(* Fit max_boxes *) +let set_max_boxes n = if n > 1 then pp_max_boxes := n + +(* To know the current maximum number of boxes allowed *) +let get_max_boxes () = !pp_max_boxes + +(* Ellipsis *) +let set_ellipsis_text s = pp_ellipsis := s +and get_ellipsis_text () = !pp_ellipsis + +(* To set the margin of pretty-formater *) +let set_margin n = + if n >= 1 then + begin + pp_margin := n; + pp_max_indent := !pp_margin - !pp_min_space_left; + pp_rinit () end + +let get_margin () = !pp_margin + +let set_min_space_left n = + if n >= 1 then + begin + pp_min_space_left := n; + pp_max_indent := !pp_margin - !pp_min_space_left; + pp_rinit () end + +let set_max_indent n = set_min_space_left (!pp_margin - n) +let get_max_indent () = !pp_max_indent + +let set_formatter_output os = pp_out_channel := os +let get_formatter_output () = !pp_out_channel + +(* Initializing formatter *) +let _ = pp_rinit() diff --git a/stdlib/format.mli b/stdlib/format.mli new file mode 100644 index 0000000000..5d9a9ac3e2 --- /dev/null +++ b/stdlib/format.mli @@ -0,0 +1,151 @@ +(* Pretty printing *) + +(* This module implements a pretty-printing facility to format text + within ``pretty-printing boxes''. The pretty-printer breaks lines + at specified break hints, and indents lines according to the box structure. +*) + +(* The behaviour of pretty-printing commands is unspecified + if there is no opened pretty-printing box. *) + +(*** Boxes *) +val open_vbox : int -> unit + (* [open_vbox d] opens a new pretty-printing box + with offset [d]. + This box is ``vertical'': every break hint inside this + box leads to a new line. + When a new line is printed in the box, [d] is added to the + current indentation. *) +val open_hbox : unit -> unit + (* [open_hbox ()] opens a new pretty-printing box. + This box is ``horizontal'': the line is not List.split in this box + (new lines may still occur inside boxes nested deeper). *) +val open_hvbox : int -> unit + (* [open_hovbox d] opens a new pretty-printing box + with offset [d]. + This box is ``horizontal-vertical'': it behaves as an + ``horizontal'' box if it fits on a single line, + otherwise it behaves as a ``vertical'' box. + When a new line is printed in the box, [d] is added to the + current indentation. *) +val open_hovbox : int -> unit + (* [open_hovbox d] opens a new pretty-printing box + with offset [d]. + This box is ``horizontal or vertical'': break hints + inside this box may lead to a new line, if there is no more room + on the line to print the remainder of the box. + When a new line is printed in the box, [d] is added to the + current indentation. *) +val close_box : unit -> unit + (* Close the most recently opened pretty-printing box. *) + +(*** Formatting functions *) +val print_string : string -> unit + (* [print_string str] prints [str] in the current box. *) +val print_as : int -> string -> unit + (* [print_as len str] prints [str] in the + current box. The pretty-printer formats [str] as if + it were of length [len]. *) +val print_int : int -> unit + (* Print an integer in the current box. *) +val print_float : float -> unit + (* Print a floating point number in the current box. *) +val print_char : char -> unit + (* Print a character in the current box. *) +val print_bool : bool -> unit + (* Print an boolean in the current box. *) + +(*** Break hints *) +val print_break : int * int -> unit + (* Insert a break hint in a pretty-printing box. + [print_break (nspaces, offset)] indicates that the line may + be List.split (a newline character is printed) at this point, + if the contents of the current box does not fit on one line. + If the line is List.split at that point, [offset] is added to + the current indentation. If the line is not List.split, + [nspaces] spaces are printed. *) +val print_cut : unit -> unit + (* [print_cut ()] is equivalent to [print_break (0,0)]. + This allows line splitting at the current point, without printing + spaces or adding indentation. *) +val print_space : unit -> unit + (* [print_space ()] is equivalent to [print_break (1,0)]. + This either prints one space or splits the line at that point. *) +val force_newline : unit -> unit + (* Force a newline in the current box. *) + +val print_flush : unit -> unit + (* Flush the pretty printer: all opened boxes are closed, + and all pending text is displayed. *) +val print_newline : unit -> unit + (* Equivalent to [print_flush] followed by a new line. *) + +val print_if_newline : unit -> unit + (* If the preceding line has not been List.split, the next + formatting command is ignored. *) + +(*** Tabulations *) +val open_tbox : unit -> unit + (* Open a tabulation box. *) +val close_tbox : unit -> unit + (* Close the most recently opened tabulation box. *) +val print_tbreak : int * int -> unit + (* Break hint in a tabulation box. + [print_tbreak (spaces, offset)] moves the insertion point to + the next tabulation ([spaces] being added to this position). + Nothing occurs if insertion point is already on a + tabulation mark. + If there is no next tabulation on the line, then a newline + is printed and the insertion point moves to the first + tabulation of the box. + If a new line is printed, [offset] is added to the current + indentation. *) +val set_tab : unit -> unit + (* Set a tabulation mark at the current insertion point. *) +val print_tab : unit -> unit + (* [print_tab ()] is equivalent to [print_tbreak (0,0)]. *) + +(*** Margin *) +val set_margin : int -> unit + (* [set_margin d] sets the val of the right margin + to [d] (in characters): this val is used to detect line + overflows that leads to List.split lines. + Nothing happens if [d] is not greater than 1. *) +val get_margin : unit -> int + (* Return the position of the right margin. *) + +(*** Maximum indentation limit *) +val set_max_indent : int -> unit + (* [set_max_indent d] sets the val of the maximum + indentation limit to [d] (in characters): + once this limit is reached, boxes are rejected to the left, + if they do not fit on the current line. + Nothing happens if [d] is not greater than 1. *) +val get_max_indent : unit -> int + (* Return the val of the maximum indentation limit (in + characters). *) + +(*** Formatting depth: maximum number of boxes allowed before ellipsis *) +val set_max_boxes : int -> unit + (* [set_max_boxes max] sets the maximum number + of boxes simultaneously opened. + Material inside boxes nested deeper is printed as an + ellipsis (more precisely as the text returned by + [get_ellipsis_text]). + Nothing happens if [max] is not greater than 1. *) +val get_max_boxes : unit -> int + (* Return the maximum number of boxes allowed before ellipsis. *) + +(*** Ellipsis *) +val set_ellipsis_text : string -> unit + (* Set the text of the ellipsis printed when too many boxes + are opened (a single dot, [.], by default). *) +val get_ellipsis_text : unit -> string + (* Return the text of the ellipsis. *) + +(*** Redirecting formatter output *) +val set_formatter_output : out_channel -> unit + (* Redirect the pretty-printer output to the given channel. *) +val get_formatter_output : unit -> out_channel + (* Return the channel connected to the pretty-printer. *) + diff --git a/stdlib/gc.ml b/stdlib/gc.ml new file mode 100644 index 0000000000..78065fdd87 --- /dev/null +++ b/stdlib/gc.ml @@ -0,0 +1,47 @@ +type stat = { + minor_words : int; + promoted_words : int; + major_words : int; + minor_collections : int; + major_collections : int; + heap_size : int; + heap_chunks : int; + live_words : int; + live_blocks : int; + free_words : int; + free_blocks : int; + largest_free : int; + fragments : int +} + +type control = { + mutable minor_heap_size : int; + mutable major_heap_increment : int; + mutable space_overhead : int; + mutable verbose : bool +} + +external stat : unit -> stat = "gc_stat" +external get : unit -> control = "gc_get" +external set : control -> unit = "gc_set" +external minor : unit -> unit = "gc_minor" +external major : unit -> unit = "gc_major" +external full_major : unit -> unit = "gc_full_major" + +open Printf + +let print_stat c = + let st = stat () in + fprintf c "minor_words: %d\n" st.minor_words; + fprintf c "promoted_words: %d\n" st.promoted_words; + fprintf c "major_words: %d\n" st.major_words; + fprintf c "minor_collections: %d\n" st.minor_collections; + fprintf c "major_collections: %d\n" st.major_collections; + fprintf c "heap_size: %d\n" st.heap_size; + fprintf c "heap_chunks: %d\n" st.heap_chunks; + fprintf c "live_words: %d\n" st.live_words; + fprintf c "live_blocks: %d\n" st.live_blocks; + fprintf c "free_words: %d\n" st.free_words; + fprintf c "free_blocks: %d\n" st.free_blocks; + fprintf c "largest_free: %d\n" st.largest_free; + fprintf c "fragments: %d\n" st.fragments diff --git a/stdlib/gc.mli b/stdlib/gc.mli new file mode 100644 index 0000000000..b77b0e2866 --- /dev/null +++ b/stdlib/gc.mli @@ -0,0 +1,93 @@ +(* Memory management control and statistics. *) + +type stat = { + minor_words : int; + promoted_words : int; + major_words : int; + minor_collections : int; + major_collections : int; + heap_size : int; + heap_chunks : int; + live_words : int; + live_blocks : int; + free_words : int; + free_blocks : int; + largest_free : int; + fragments : int +} + (* The memory management counters are returned in a [stat] record. + All the numbers are computed since the start of the program. + The fields of this record are: +- [minor_words] Number of words allocated in the minor heap. +- [promoted_words] Number of words allocated in the minor heap that + survived a minor collection and were moved to the major heap. +- [major_words] Number of words allocated in the major heap, including + the promoted words. +- [minor_collections] Number of minor collections. +- [major_collections] Number of major collection cycles, not counting + the current cycle. +- [heap_size] Total number of words in the major heap. +- [heap_chunks] Number of times the major heap size was increased. +- [live_words] Number of words of live data in the major heap, including + the header words. +- [live_blocks] Number of live objects in the major heap. +- [free_words] Number of words in the free list. +- [free_blocks] Number of objects in the free list. +- [largest_free] Size (in words) of the largest object in the free list. +- [fragments] Number of wasted words due to fragmentation. These are + 1-words free blocks placed between two live objects. They + cannot be inserted in the free list, thus they are not available + for allocation. + +- The total amount of memory allocated by the program is (in words) + [minor_words + major_words - promoted_words]. Multiply by + the word size (4 on a 32-bit machine, 8 on a 64-bit machine) to get + the number of bytes. + *) + +type control = { + mutable minor_heap_size : int; + mutable major_heap_increment : int; + mutable space_overhead : int; + mutable verbose : bool +} + + (* The GC parameters are given as a [control] record. The fields are: +- [minor_heap_size] The size (in words) of the minor heap. Changing + this parameter will trigger a minor collection. +- [major_heap_increment] The minimum number of words to add to the + major heap when increasing it. +- [space_overhead] The major GC speed is computed from this parameter. + This is the percentage of heap space that will be "wasted" + because the GC does not immediatly collect unreachable + objects. The GC will work more (use more CPU time and collect + objects more eagerly) if [space_overhead] is smaller. + The computation of the GC speed assumes that the amount + of live data is constant. +- [verbose] This flag controls the GC messages on standard error output. + *) + +val stat : unit -> stat = "gc_stat" + (* Return the current values of the memory management counters in a + [stat] record. *) +val print_stat : out_channel -> unit + (* Print the current values of the memory management counters (in + human-readable form) into the channel argument. *) +val get : unit -> control = "gc_get" + (* Return the current values of the GC parameters in a [control] record. *) +val set : control -> unit = "gc_set" + (* [set r] changes the GC parameters according to the [control] record [r]. + The normal usage is: + [ + let r = Gc.get () in (* Get the current parameters. *) + r.verbose <- true; (* Change some of them. *) + Gc.set r (* Set the new values. *) + ] + *) +val minor : unit -> unit = "gc_minor" + (* Trigger a minor collection. *) +val major : unit -> unit = "gc_major" + (* Finish the current major collection cycle. *) +val full_major : unit -> unit = "gc_full_major" + (* Finish the current major collection cycle and perform a complete + new cycle. This will collect all currently unreachable objects. *) diff --git a/stdlib/hashtbl.ml b/stdlib/hashtbl.ml new file mode 100644 index 0000000000..f7cbda3ff2 --- /dev/null +++ b/stdlib/hashtbl.ml @@ -0,0 +1,95 @@ +(* Hash tables *) + +(* We do dynamic hashing, and we double the size of the table when + buckets become too long, but without re-hashing the elements. *) + +type ('a, 'b) t = + { mutable max_len: int; (* max length of a bucket *) + mutable data: ('a, 'b) bucketlist array } (* the buckets *) + +and ('a, 'b) bucketlist = + Empty + | Cons of 'a * 'b * ('a, 'b) bucketlist + +let new initial_size = + { max_len = 2; data = Array.new initial_size Empty } + +let clear h = + for i = 0 to Array.length h.data - 1 do + h.data.(i) <- Empty + done + +let resize h = + let n = Array.length h.data in + let newdata = Array.new (n+n) Empty in + Array.blit h.data 0 newdata 0 n; + Array.blit h.data 0 newdata n n; + h.data <- newdata; + h.max_len <- 2 * h.max_len + +let rec bucket_too_long n bucket = + if n < 0 then true else + match bucket with + Empty -> false + | Cons(_,_,rest) -> bucket_too_long (pred n) rest + +external hash_param : int -> int -> 'a -> int = "hash_univ_param" + +let add h key info = + let i = (hash_param 10 100 key) mod (Array.length h.data) in + let bucket = Cons(key, info, h.data.(i)) in + h.data.(i) <- bucket; + if bucket_too_long h.max_len bucket then resize h + +let remove h key = + let rec remove_bucket = function + Empty -> + Empty + | Cons(k, i, next) -> + if k = key then next else Cons(k, i, remove_bucket next) in + let i = (hash_param 10 100 key) mod (Array.length h.data) in + h.data.(i) <- remove_bucket h.data.(i) + +let find h key = + match h.data.((hash_param 10 100 key) mod (Array.length h.data)) with + Empty -> raise Not_found + | Cons(k1, d1, rest1) -> + if key = k1 then d1 else + match rest1 with + Empty -> raise Not_found + | Cons(k2, d2, rest2) -> + if key = k2 then d2 else + match rest2 with + Empty -> raise Not_found + | Cons(k3, d3, rest3) -> + if key = k3 then d3 else begin + let rec find = function + Empty -> + raise Not_found + | Cons(k, d, rest) -> + if key = k then d else find rest + in find rest3 + end + +let find_all h key = + let rec find_in_bucket = function + Empty -> + [] + | Cons(k, d, rest) -> + if k = key then d :: find_in_bucket rest else find_in_bucket rest in + find_in_bucket h.data.((hash_param 10 100 key) mod (Array.length h.data)) + +let iter f h = + let len = Array.length h.data in + for i = 0 to Array.length h.data - 1 do + let rec do_bucket = function + Empty -> + () + | Cons(k, d, rest) -> + if (hash_param 10 100 k) mod len = i + then begin f k d; do_bucket rest end + else do_bucket rest in + do_bucket h.data.(i) + done + +let hash x = hash_param 50 500 x diff --git a/stdlib/hashtbl.mli b/stdlib/hashtbl.mli new file mode 100644 index 0000000000..fd32f6a36f --- /dev/null +++ b/stdlib/hashtbl.mli @@ -0,0 +1,67 @@ +(* Hash tables and hash functions *) + +(* Hash tables are hashed association tables, with in-place modification. *) + +type ('a, 'b) t + (* The type of hash tables from type ['a] to type ['b]. *) + +val new : int -> ('a,'b) t + (* [new n] creates a new, empty hash table, with initial size [n]. + The table grows as needed, so [n] is just an initial guess. + Better results are said to be achieved when [n] is a prime + number. *) + +val clear : ('a, 'b) t -> unit + (* Empty a hash table. *) + +val add : ('a, 'b) t -> 'a -> 'b -> unit + (* [add tbl x y] adds a binding of [x] to [y] in table [tbl]. + Previous bindings for [x] are not removed, but simply + hidden. That is, after performing [remove tbl x], the previous + binding for [x], if any, is restored. + (This is the semantics of association lists.) *) + +val find : ('a, 'b) t -> 'a -> 'b + (* [find tbl x] returns the current binding of [x] in [tbl], + or raises [Not_found] if no such binding exists. *) + +val find_all : ('a, 'b) t -> 'a -> 'b list + (* [find_all tbl x] returns the list of all data associated with [x] + in [tbl]. The current binding is returned first, then the previous + bindings, in reverse order of introduction in the table. *) + +val remove : ('a, 'b) t -> 'a -> unit + (* [remove tbl x] removes the current binding of [x] in [tbl], + restoring the previous binding if it exists. + It does nothing if [x] is not bound in [tbl]. *) + +val iter : ('a -> 'b -> 'c) -> ('a, 'b) t -> unit + (* [iter f tbl] applies [f] to all bindings in table [tbl], + discarding all the results. + [f] receives the key as first argument, and the associated val + as second argument. The order in which the bindings are passed to + [f] is unpredictable. Each binding is presented exactly once + to [f]. *) + +(*** The polymorphic hash primitive *) + +val hash : 'a -> int + (* [hash x] associates a positive integer to any val of + any type. It is guaranteed that + if [x = y], then [hash x = hash y]. + Moreover, [hash] always terminates, even on cyclic + structures. *) + +val hash_param : int -> int -> 'a -> int = "hash_univ_param" + (* [hash_param n m x] computes a hash val for [x], with the + same properties as for [hash]. The two extra parameters [n] and + [m] give more precise control over hashing. Hashing performs a + depth-first, right-to-left traversal of the structure [x], stopping + after [n] meaningful nodes were encountered, or [m] nodes, + meaningful or not, were encountered. Meaningful nodes are: integers; + floating-point numbers; strings; characters; booleans; and constant + constructors. Larger vals of [m] and [n] means that more + nodes are taken into account to compute the final hash + val, and therefore collisions are less likely to happen. + However, hashing takes longer. The parameters [m] and [n] + govern the tradeoff between accuracy and speed. *) diff --git a/stdlib/header.c b/stdlib/header.c new file mode 100644 index 0000000000..aba20e62a1 --- /dev/null +++ b/stdlib/header.c @@ -0,0 +1,11 @@ +char * runtime_name = "cslrun"; +char * errmsg = "Cannot exec cslrun.\n"; + +int main(argc, argv) + int argc; + char ** argv; +{ + execvp(runtime_name, argv); + write(2, errmsg, sizeof(errmsg)-1); + return 2; +} diff --git a/stdlib/lexing.ml b/stdlib/lexing.ml new file mode 100644 index 0000000000..07bb7b5df5 --- /dev/null +++ b/stdlib/lexing.ml @@ -0,0 +1,75 @@ +(* The run-time library for lexers generated by camllex *) + +type lexbuf = + { refill_buff : lexbuf -> unit; + lex_buffer : string; + mutable lex_abs_pos : int; + mutable lex_start_pos : int; + mutable lex_curr_pos : int; + mutable lex_last_pos : int; + mutable lex_last_action : lexbuf -> Obj.t } + +let lex_aux_buffer = String.create 1024 + +let lex_refill read_fun lexbuf = + let read = + read_fun lex_aux_buffer 1024 in + let n = + if read > 0 + then read + else (String.unsafe_set lex_aux_buffer 0 '\000'; 1) in + String.unsafe_blit lexbuf.lex_buffer n lexbuf.lex_buffer 0 (2048 - n); + String.unsafe_blit lex_aux_buffer 0 lexbuf.lex_buffer (2048 - n) n; + lexbuf.lex_abs_pos <- lexbuf.lex_abs_pos + n; + lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - n; + lexbuf.lex_start_pos <- lexbuf.lex_start_pos - n; + lexbuf.lex_last_pos <- lexbuf.lex_last_pos - n; + if lexbuf.lex_start_pos < 0 then failwith "lexing: token too long" + +let dummy_action x = failwith "lexing: empty token" + +let from_function f = + { refill_buff = lex_refill f; + lex_buffer = String.create 2048; + lex_abs_pos = - 2048; + lex_start_pos = 2048; + lex_curr_pos = 2048; + lex_last_pos = 2048; + lex_last_action = dummy_action } + +let from_channel ic = + from_function (fun buf n -> input ic buf 0 n) + +let from_string s = + { refill_buff = + (fun lexbuf -> lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - 1); + lex_buffer = s ^ "\000"; + lex_abs_pos = 0; + lex_start_pos = 0; + lex_curr_pos = 0; + lex_last_pos = 0; + lex_last_action = dummy_action } + +external get_next_char : lexbuf -> char = "get_next_char" + +let lexeme lexbuf = + let len = lexbuf.lex_curr_pos - lexbuf.lex_start_pos in + let s = String.create len in + String.unsafe_blit lexbuf.lex_buffer lexbuf.lex_start_pos s 0 len; s + +let lexeme_char lexbuf i = + String.get lexbuf.lex_buffer (lexbuf.lex_start_pos + i) + +let start_lexing lexbuf = + lexbuf.lex_start_pos <- lexbuf.lex_curr_pos; + lexbuf.lex_last_action <- dummy_action + +let backtrack lexbuf = + lexbuf.lex_curr_pos <- lexbuf.lex_last_pos; + Obj.magic(lexbuf.lex_last_action lexbuf) + +let lexeme_start lexbuf = + lexbuf.lex_abs_pos + lexbuf.lex_start_pos +and lexeme_end lexbuf = + lexbuf.lex_abs_pos + lexbuf.lex_curr_pos + diff --git a/stdlib/lexing.mli b/stdlib/lexing.mli new file mode 100644 index 0000000000..6e224c4fc4 --- /dev/null +++ b/stdlib/lexing.mli @@ -0,0 +1,68 @@ +(* The run-time library for lexers generated by camllex *) + +(*** Lexer buffers *) + +type lexbuf = + { refill_buff : lexbuf -> unit; + lex_buffer : string; + mutable lex_abs_pos : int; + mutable lex_start_pos : int; + mutable lex_curr_pos : int; + mutable lex_last_pos : int; + mutable lex_last_action : lexbuf -> Obj.t } + (* The type of lexer buffers. A lexer buffer is the argument passed + to the scanning functions defined by the generated scanners. + The lexer buffer holds the current state of the scanner, plus + a function to refill the buffer from the input. *) + +val from_channel : in_channel -> lexbuf + (* Create a lexer buffer on the given input channel. + [create_lexer_channel inchan] returns a lexer buffer which reads + from the input channel [inchan], at the current reading position. *) +val from_string : string -> lexbuf + (* Create a lexer buffer which reads from + the given string. Reading starts from the first character in + the string. An end-of-input condition is generated when the + end of the string is reached. *) +val from_function : (string -> int -> int) -> lexbuf + (* Create a lexer buffer with the given function as its reading method. + When the scanner needs more characters, it will call the given + function, giving it a character string [s] and a character + count [n]. The function should put [n] characters or less in [s], + starting at character number 0, and return the number of characters + provided. A return value of 0 means end of input. *) + +(*** Functions for lexer semantic actions *) + + (* The following functions can be called from the semantic actions + of lexer definitions (the ML code enclosed in braces that + computes the value returned by lexing functions). They give + access to the character string matched by the regular expression + associated with the semantic action. These functions must be + applied to the argument [lexbuf], which, in the code generated by + camllex, is bound to the lexer buffer passed to the parsing + function. *) + +val lexeme : lexbuf -> string + (* [get_lexeme lexbuf] returns the string matched by + the regular expression. *) +val lexeme_char : lexbuf -> int -> char + (* [get_lexeme_char lexbuf i] returns character number [i] in + the matched string. *) +val lexeme_start : lexbuf -> int + (* [get_lexeme_start lexbuf] returns the position in the input stream + of the first character of the matched string. The first character + of the stream has position 0. *) +val lexeme_end : lexbuf -> int + (* [get_lexeme_end lexbuf] returns the position in the input stream + of the character following the last character of the matched + string. The first character of the stream has position 0. *) + +(*--*) + +(* The following definitions are used by the generated scanners only. + They are not intended to be used by user programs. *) + +val start_lexing : lexbuf -> unit +val get_next_char : lexbuf -> char = "get_next_char" +val backtrack : lexbuf -> 'a diff --git a/stdlib/list.ml b/stdlib/list.ml new file mode 100644 index 0000000000..3b6cdb4402 --- /dev/null +++ b/stdlib/list.ml @@ -0,0 +1,104 @@ +(* List operations *) + +let rec length = function + [] -> 0 + | a::l -> 1 + length l + +let hd = function + [] -> failwith "hd" + | a::l -> a + +let tl = function + [] -> failwith "tl" + | a::l -> l + +let rec rev_append accu = function + [] -> accu + | a::l -> rev_append (a :: accu) l + +let rev l = rev_append [] l + +let rec flatten = function + [] -> [] + | l::r -> l @ flatten r + +let rec map f = function + [] -> [] + | a::l -> let r = f a in r :: map f l + +(* let rec map f = function + [] -> [] + | a::l -> f a :: map f l *) + +let rec iter f = function + [] -> () + | a::l -> f a; iter f l + + +let rec fold_left f accu l = + match l with + [] -> accu + | a::l -> fold_left f (f accu a) l + +let rec fold_right f l accu = + match l with + [] -> accu + | a::l -> f a (fold_right f l accu) + +let rec map2 f l1 l2 = + match (l1, l2) with + ([], []) -> [] + | (a1::l1, a2::l2) -> f a1 a2 :: map2 f l1 l2 + | (_, _) -> invalid_arg "List.map2" + +let rec iter2 f l1 l2 = + match (l1, l2) with + ([], []) -> () + | (a1::l1, a2::l2) -> f a1 a2; iter2 f l1 l2 + | (_, _) -> invalid_arg "List.iter2" + +let rec fold_left2 f accu l1 l2 = + match (l1, l2) with + ([], []) -> accu + | (a1::l1, a2::l2) -> fold_left2 f (f accu a1 a2) l1 l2 + | (_, _) -> invalid_arg "List.fold_left2" + +let rec fold_right2 f l1 l2 accu = + match (l1, l2) with + ([], []) -> accu + | (a1::l1, a2::l2) -> f a1 a2 (fold_right2 f l1 l2 accu) + | (_, _) -> invalid_arg "List.fold_right2" + +let rec for_all p = function + [] -> true + | a::l -> p a & for_all p l + +let rec exists p = function + [] -> false + | a::l -> p a or exists p l + +let rec mem x = function + [] -> false + | a::l -> a = x or mem x l + +let rec assoc x = function + [] -> raise Not_found + | (a,b)::l -> if a = x then b else assoc x l + +let rec mem_assoc x = function + [] -> false + | (a,b)::l -> a = x or mem_assoc x l + +let rec assq x = function + [] -> raise Not_found + | (a,b)::l -> if a == x then b else assq x l + +let rec split = function + [] -> ([], []) + | (x,y)::l -> + let (rx, ry) = split l in (x::rx, y::ry) + +let rec combine = function + ([], []) -> [] + | (a1::l1, a2::l2) -> (a1, a2) :: combine(l1, l2) + | (_, _) -> invalid_arg "List.combine" diff --git a/stdlib/list.mli b/stdlib/list.mli new file mode 100644 index 0000000000..00d0cc469f --- /dev/null +++ b/stdlib/list.mli @@ -0,0 +1,24 @@ +(* List operations *) + +val length : 'a list -> int +val hd : 'a list -> 'a +val tl : 'a list -> 'a list +val rev : 'a list -> 'a list +val flatten : 'a list list -> 'a list +val iter : ('a -> 'b) -> 'a list -> unit +val map : ('a -> 'b) -> 'a list -> 'b list +val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b list -> 'a +val fold_right : ('a -> 'b -> 'b) -> 'a list -> 'b -> 'b +val map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list +val iter2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> unit +val fold_left2 : ('a -> 'b -> 'c -> 'a) -> 'a -> 'b list -> 'c list -> 'a +val fold_right2 : ('a -> 'b -> 'c -> 'c) -> 'a list -> 'b list -> 'c -> 'c +val for_all : ('a -> bool) -> 'a list -> bool +val exists : ('a -> bool) -> 'a list -> bool +val mem : 'a -> 'a list -> bool +val assoc : 'a -> ('a * 'b) list -> 'b +val mem_assoc : 'a -> ('a * 'b) list -> bool +val assq : 'a -> ('a * 'b) list -> 'b +val split : ('a * 'b) list -> 'a list * 'b list +val combine : 'a list * 'b list -> ('a * 'b) list + diff --git a/stdlib/obj.ml b/stdlib/obj.ml new file mode 100644 index 0000000000..b4c131ad4e --- /dev/null +++ b/stdlib/obj.ml @@ -0,0 +1,13 @@ +(* Operations on internal representations of values *) + +type t + +external repr : 'a -> t = "%identity" +external magic : 'a -> 'b = "%identity" +external is_block : t -> bool = "obj_is_block" +external tag : t -> int = "%tagof" +external size : t -> int = "%array_length" +external field : t -> int -> t = "%array_get" +external set_field : t -> int -> t -> unit = "%array_set" +external new_block : int -> int -> t = "obj_block" +external update : t -> t -> unit = "%update" diff --git a/stdlib/obj.mli b/stdlib/obj.mli new file mode 100644 index 0000000000..9509c2b8a7 --- /dev/null +++ b/stdlib/obj.mli @@ -0,0 +1,13 @@ +(* Operations on internal representations of values *) + +type t + +val repr : 'a -> t = "%identity" +val magic : 'a -> 'b = "%identity" +val is_block : t -> bool = "obj_is_block" +val tag : t -> int = "%tagof" +val size : t -> int = "%array_length" +val field : t -> int -> t = "%array_get" +val set_field : t -> int -> t -> unit = "%array_set" +val new_block : int -> int -> t = "obj_block" +val update : t -> t -> unit = "%update" diff --git a/stdlib/parsing.ml b/stdlib/parsing.ml new file mode 100644 index 0000000000..0ddf431e79 --- /dev/null +++ b/stdlib/parsing.ml @@ -0,0 +1,148 @@ +(* The parsing engine *) + +type parse_tables = + { actions : (unit -> Obj.t) array; + transl : int array; + lhs : string; + len : string; + defred : string; + dgoto : string; + sindex : string; + rindex : string; + gindex : string; + tablesize : int; + table : string; + check : string } + +exception YYexit of Obj.t +exception Parse_error + +open Lexing + +(* Internal interface to the parsing engine *) + +type parser_env = + { mutable s_stack : int array; (* States *) + mutable v_stack : Obj.t array; (* Semantic attributes *) + mutable symb_start_stack : int array; (* Start positions *) + mutable symb_end_stack : int array; (* End positions *) + mutable stacksize : int; (* Size of the stacks *) + mutable curr_char : int; (* Last token read *) + mutable lval : Obj.t; (* Its semantic attribute *) + mutable symb_start : int; (* Start pos. of the current symbol*) + mutable symb_end : int; (* End pos. of the current symbol *) + mutable asp : int; (* The stack pointer for attributes *) + mutable rule_len : int; (* Number of rhs items in the rule *) + mutable rule_number : int; (* Rule number to reduce by *) + mutable sp : int; (* Saved sp for parse_engine *) + mutable state : int } (* Saved state for parse_engine *) + +type parser_input = + Start + | Token_read + | Stacks_grown_1 + | Stacks_grown_2 + | Semantic_action_computed + +type parser_output = + Read_token + | Raise_parse_error + | Grow_stacks_1 + | Grow_stacks_2 + | Compute_semantic_action + +external parse_engine : + parse_tables -> parser_env -> parser_input -> Obj.t -> parser_output + = "parse_engine" + +let env = + { s_stack = Array.new 100 0; + v_stack = Array.new 100 (Obj.repr ()); + symb_start_stack = Array.new 100 0; + symb_end_stack = Array.new 100 0; + stacksize = 100; + curr_char = 0; + lval = Obj.repr (); + symb_start = 0; + symb_end = 0; + asp = 0; + rule_len = 0; + rule_number = 0; + sp = 0; + state = 0 } + +let grow_stacks() = + let oldsize = env.stacksize in + let newsize = oldsize * 2 in + let new_s = Array.new newsize 0 + and new_v = Array.new newsize (Obj.repr ()) + and new_start = Array.new newsize 0 + and new_end = Array.new newsize 0 in + Array.blit env.s_stack 0 new_s 0 oldsize; + env.s_stack <- new_s; + Array.blit env.v_stack 0 new_v 0 oldsize; + env.v_stack <- new_v; + Array.blit env.symb_start_stack 0 new_start 0 oldsize; + env.symb_start_stack <- new_start; + Array.blit env.symb_end_stack 0 new_end 0 oldsize; + env.symb_end_stack <- new_end; + env.stacksize <- newsize + +let clear_parser() = + Array.fill env.v_stack 0 env.stacksize (Obj.repr ()); + env.lval <- Obj.repr () + +let current_lookahead_fun = ref (fun (x: Obj.t) -> false) + +let yyparse tables start lexer lexbuf = + let rec loop cmd arg = + match parse_engine tables env cmd arg with + Read_token -> + let t = Obj.repr(lexer lexbuf) in + env.symb_start <- lexbuf.lex_abs_pos + lexbuf.lex_start_pos; + env.symb_end <- lexbuf.lex_abs_pos + lexbuf.lex_curr_pos; + loop Token_read t + | Raise_parse_error -> + raise Parse_error + | Compute_semantic_action -> + loop Semantic_action_computed (tables.actions.(env.rule_number) ()) + | Grow_stacks_1 -> + grow_stacks(); loop Stacks_grown_1 (Obj.repr ()) + | Grow_stacks_2 -> + grow_stacks(); loop Stacks_grown_2 (Obj.repr ()) in + let init_asp = env.asp + and init_sp = env.sp + and init_state = env.state + and init_curr_char = env.curr_char in + env.curr_char <- start; + try + loop Start (Obj.repr ()) + with exn -> + let curr_char = env.curr_char in + env.asp <- init_asp; + env.sp <- init_sp; + env.state <- init_state; + env.curr_char <- init_curr_char; + match exn with + YYexit v -> + Obj.magic v + | _ -> + current_lookahead_fun := + (fun tok -> tables.transl.(Obj.tag tok) = curr_char); + raise exn + +let peek_val n = + Obj.magic env.v_stack.(env.asp - n) + +let symbol_start () = + env.symb_start_stack.(env.asp - env.rule_len + 1) +let symbol_end () = + env.symb_end_stack.(env.asp) + +let rhs_start n = + env.symb_start_stack.(env.asp - (env.rule_len - n)) +let rhs_end n = + env.symb_end_stack.(env.asp - (env.rule_len - n)) + +let is_current_lookahead tok = + (!current_lookahead_fun)(Obj.repr tok) diff --git a/stdlib/parsing.mli b/stdlib/parsing.mli new file mode 100644 index 0000000000..9f5fbaffb3 --- /dev/null +++ b/stdlib/parsing.mli @@ -0,0 +1,51 @@ +(* The run-time library for parsers generated by camlyacc *) + +val symbol_start : unit -> int +val symbol_end : unit -> int + (* [symbol_start] and [symbol_end] are to be called in the action part + of a grammar rule only. They return the position of the string that + matches the left-hand side of the rule: [symbol_start()] returns + the position of the first character; [symbol_end()] returns the + position of the last character, plus one. The first character + in a file is at position 0. *) +val rhs_start: int -> int +val rhs_end: int -> int + (* Same as [symbol_start] and [symbol_end] above, but return then + position of the string matching the [n]th item on the + right-hand side of the rule, where [n] is the integer parameter + to [lhs_start] and [lhs_end]. [n] is 1 for the leftmost item. *) +val clear_parser : unit -> unit + (* Empty the parser stack. Call it just after a parsing function + has returned, to remove all pointers from the parser stack + to structures that were built by semantic actions during parsing. + This is optional, but lowers the memory requirements of the + programs. *) + +exception Parse_error + (* Raised when a parser encounters a syntax error. *) + +(*--*) + +(* The following definitions are used by the generated parsers only. + They are not intended to be used by user programs. *) + +type parse_tables = + { actions : (unit -> Obj.t) array; + transl : int array; + lhs : string; + len : string; + defred : string; + dgoto : string; + sindex : string; + rindex : string; + gindex : string; + tablesize : int; + table : string; + check : string } + +exception YYexit of Obj.t + +val yyparse : + parse_tables -> int -> (Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> 'b +val peek_val : int -> 'a +val is_current_lookahead: 'a -> bool diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml new file mode 100644 index 0000000000..bc10141586 --- /dev/null +++ b/stdlib/pervasives.ml @@ -0,0 +1,273 @@ +(* Exceptions *) + +external raise : exn -> 'a = "%raise" + +let failwith s = raise(Failure s) +let invalid_arg s = raise(Invalid_argument s) + +exception Exit + +(* Comparisons *) + +external (=) : 'a -> 'a -> bool = "%equal" +external (<>) : 'a -> 'a -> bool = "%notequal" +external (<) : 'a -> 'a -> bool = "%lessthan" +external (>) : 'a -> 'a -> bool = "%greaterthan" +external (<=) : 'a -> 'a -> bool = "%lessequal" +external (>=) : 'a -> 'a -> bool = "%greaterequal" +external compare: 'a -> 'a -> int = "compare" + +let min x y = if x <= y then x else y +let max x y = if x >= y then x else y + +external (==) : 'a -> 'a -> bool = "%eq" +external (!=) : 'a -> 'a -> bool = "%noteq" + +(* Boolean operations *) + +external not : bool -> bool = "%boolnot" +external (&) : bool -> bool -> bool = "%sequand" +external (or) : bool -> bool -> bool = "%sequor" + +(* Integer operations *) + +external (~-) : int -> int = "%negint" +external succ : int -> int = "%succint" +external pred : int -> int = "%predint" +external (+) : int -> int -> int = "%addint" +external (-) : int -> int -> int = "%subint" +external ( * ) : int -> int -> int = "%mulint" +external (/) : int -> int -> int = "%divint" +external (mod) : int -> int -> int = "%modint" + +let abs x = if x >= 0 then x else -x + +external (land) : int -> int -> int = "%andint" +external (lor) : int -> int -> int = "%orint" +external (lxor) : int -> int -> int = "%xorint" + +let lnot x = x lxor (-1) + +external (lsl) : int -> int -> int = "%lslint" +external (lsr) : int -> int -> int = "%lsrint" +external (asr) : int -> int -> int = "%asrint" + +(* Floating-point operations *) + +external (~-.) : float -> float = "neg_float" +external (+.) : float -> float -> float = "add_float" +external (-.) : float -> float -> float = "sub_float" +external ( *. ) : float -> float -> float = "mul_float" +external (/.) : float -> float -> float = "div_float" +external ( ** ) : float -> float -> float = "power_float" +external exp : float -> float = "exp_float" +external log : float -> float = "log_float" +external sqrt : float -> float = "sqrt_float" +external sin : float -> float = "sin_float" +external cos : float -> float = "cos_float" +external tan : float -> float = "tan_float" +external asin : float -> float = "asin_float" +external acos : float -> float = "acos_float" +external atan : float -> float = "atan_float" +external atan2 : float -> float -> float = "atan2_float" + +let abs_float f = if f >= 0.0 then f else -. f + +external float : int -> float = "float_of_int" +external truncate : float -> int = "int_of_float" + +(* String operations -- more in module String *) + +external string_length : string -> int = "ml_string_length" +external string_create: int -> string = "create_string" +external string_blit : string -> int -> string -> int -> int -> unit + = "blit_string" + +let (^) s1 s2 = + let l1 = string_length s1 and l2 = string_length s2 in + let s = string_create (l1 + l2) in + string_blit s1 0 s 0 l1; + string_blit s2 0 s l1 l2; + s + +(* Pair operations *) + +external fst : 'a * 'b -> 'a = "%field0" +external snd : 'a * 'b -> 'b = "%field1" + +(* String conversion functions *) + +external format_int: string -> int -> string = "format_int" +external format_float: string -> float -> string = "format_float" + +let string_of_bool b = + if b then "true" else "false" + +let string_of_int n = + format_int "%d" n + +external int_of_string : string -> int = "int_of_string" + +let string_of_float f = + format_float "%.12g" f + +external float_of_string : string -> float = "float_of_string" + +(* List operations -- more in module List *) + +let rec (@) l1 l2 = + match l1 with + [] -> l2 + | hd :: tl -> hd :: (tl @ l2) + +(* I/O operations *) + +type in_channel +type out_channel + +external open_descriptor_out: int -> out_channel = "open_descriptor" +external open_descriptor_in: int -> in_channel = "open_descriptor" + +let stdin = open_descriptor_in 0 +let stdout = open_descriptor_out 1 +let stderr = open_descriptor_out 2 + +(* General output functions *) + +open Sys + +let open_out_gen mode perm name = + open_descriptor_out(open_desc name mode perm) + +let open_out name = + open_out_gen [Open_wronly; Open_creat; Open_trunc; Open_text] 0o666 name + +let open_out_bin name = + open_out_gen [Open_wronly; Open_creat; Open_trunc; Open_binary] 0o666 name + +external flush : out_channel -> unit = "flush" + +external unsafe_output : out_channel -> string -> int -> int -> unit = "output" + +external output_char : out_channel -> char -> unit = "output_char" + +let output_string oc s = + unsafe_output oc s 0 (string_length s) + +let output oc s ofs len = + if ofs < 0 or ofs + len > string_length s + then invalid_arg "output" + else unsafe_output oc s ofs len + +external output_byte : out_channel -> int -> unit = "output_char" +external output_binary_int : out_channel -> int -> unit = "output_int" +external output_value : out_channel -> 'a -> unit = "output_value" +external output_compact_value : out_channel -> 'a -> unit = "output_value" +external seek_out : out_channel -> int -> unit = "seek_out" +external pos_out : out_channel -> int = "pos_out" +external size_out : out_channel -> int = "channel_size" +external close_out : out_channel -> unit = "close_out" + +(* General input functions *) + +let open_in_gen mode perm name = + open_descriptor_in(open_desc name mode perm) + +let open_in name = + open_in_gen [Open_rdonly; Open_text] 0 name + +let open_in_bin name = + open_in_gen [Open_rdonly; Open_binary] 0 name + +external input_char : in_channel -> char = "input_char" + +external unsafe_input : in_channel -> string -> int -> int -> int = "input" + +let input ic s ofs len = + if ofs < 0 or ofs + len > string_length s + then invalid_arg "input" + else unsafe_input ic s ofs len + +let rec unsafe_really_input ic s ofs len = + if len <= 0 then () else begin + let r = unsafe_input ic s ofs len in + if r = 0 + then raise End_of_file + else unsafe_really_input ic s (ofs+r) (len-r) + end + +let really_input ic s ofs len = + if ofs < 0 or ofs + len > string_length s + then invalid_arg "really_input" + else unsafe_really_input ic s ofs len + +external input_scan_line : in_channel -> int = "input_scan_line" + +let rec input_line chan = + let n = input_scan_line chan in + if n = 0 then (* n = 0: we are at EOF *) + raise End_of_file + else if n > 0 then begin (* n > 0: newline found in buffer *) + let res = string_create (n-1) in + unsafe_input chan res 0 (n-1); + input_char chan; (* skip the newline *) + res + end else begin (* n < 0: newline not found *) + let beg = string_create (-n) in + unsafe_input chan beg 0 (-n); + try + beg ^ input_line chan + with End_of_file -> + beg + end + +external input_byte : in_channel -> int = "input_char" +external input_binary_int : in_channel -> int = "input_int" +external input_value : in_channel -> 'a = "input_value" +external seek_in : in_channel -> int -> unit = "seek_in" +external pos_in : in_channel -> int = "pos_in" +external in_channel_length : in_channel -> int = "channel_size" +external close_in : in_channel -> unit = "close_in" + +(* Output functions on standard output *) + +let print_char c = output_char stdout c +let print_string s = output_string stdout s +let print_int i = output_string stdout (string_of_int i) +let print_float f = output_string stdout (string_of_float f) +let print_endline s = output_string stdout s; output_char stdout '\n' +let print_newline () = output_char stdout '\n'; flush stdout + +(* Output functions on standard error *) + +let prerr_char c = output_char stderr c +let prerr_string s = output_string stderr s +let prerr_int i = output_string stderr (string_of_int i) +let prerr_float f = output_string stderr (string_of_float f) +let prerr_endline s = + output_string stderr s; output_char stderr '\n'; flush stderr +let prerr_newline () = output_char stderr '\n'; flush stderr + +(* Input functions on standard input *) + +let read_line () = flush stdout; input_line stdin +let read_int () = int_of_string(read_line()) +let read_float () = float_of_string(read_line()) + +(* References *) + +type 'a ref = { mutable contents: 'a } +external ref: 'a -> 'a ref = "%makeblock" +external (!): 'a ref -> 'a = "%field0" +external (:=): 'a ref -> 'a -> unit = "%setfield0" +external incr: int ref -> unit = "%incr" +external decr: int ref -> unit = "%decr" + +(* Miscellaneous *) + +external sys_exit : int -> 'a = "sys_exit" + +let exit retcode = + flush stdout; flush stderr; sys_exit retcode + +type 'a option = None | Some of 'a diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli new file mode 100644 index 0000000000..ff40c49d10 --- /dev/null +++ b/stdlib/pervasives.mli @@ -0,0 +1,198 @@ +(* The initially opened module *) + +(* Predefined in the compiler *) + +(*** +type int +type char +type string +type float +type bool +type unit = () +type exn +type 'a array +type 'a list = [] | :: of 'a * 'a list +type ('a, 'b, 'c) format +exception Out_of_memory +exception Invalid_argument of string +exception Failure of string +exception Not_found +exception Sys_error of string +exception End_of_file +exception Division_by_zero +***) + +(* Exceptions *) + +val raise : exn -> 'a = "%raise" +val failwith: string -> 'a +val invalid_arg: string -> 'a + +exception Exit + +(* Comparisons *) + +val (=) : 'a -> 'a -> bool = "%equal" +val (<>) : 'a -> 'a -> bool = "%notequal" +val (<) : 'a -> 'a -> bool = "%lessthan" +val (>) : 'a -> 'a -> bool = "%greaterthan" +val (<=) : 'a -> 'a -> bool = "%lessequal" +val (>=) : 'a -> 'a -> bool = "%greaterequal" +val compare: 'a -> 'a -> int = "compare" +val min: 'a -> 'a -> 'a +val max: 'a -> 'a -> 'a +val (==) : 'a -> 'a -> bool = "%eq" +val (!=) : 'a -> 'a -> bool = "%noteq" + +(* Boolean operations *) + +val not : bool -> bool = "%boolnot" +val (&) : bool -> bool -> bool = "%sequand" +val (or) : bool -> bool -> bool = "%sequor" + +(* Integer operations *) + +val (~-) : int -> int = "%negint" +val succ : int -> int = "%succint" +val pred : int -> int = "%predint" +val (+) : int -> int -> int = "%addint" +val (-) : int -> int -> int = "%subint" +val ( * ) : int -> int -> int = "%mulint" +val (/) : int -> int -> int = "%divint" +val (mod) : int -> int -> int = "%modint" +val abs : int -> int +val (land) : int -> int -> int = "%andint" +val (lor) : int -> int -> int = "%orint" +val (lxor) : int -> int -> int = "%xorint" +val lnot: int -> int +val (lsl) : int -> int -> int = "%lslint" +val (lsr) : int -> int -> int = "%lsrint" +val (asr) : int -> int -> int = "%asrint" + +(* Floating-point operations *) + +val (~-.) : float -> float = "neg_float" +val (+.) : float -> float -> float = "add_float" +val (-.) : float -> float -> float = "sub_float" +val ( *. ) : float -> float -> float = "mul_float" +val (/.) : float -> float -> float = "div_float" +val ( ** ) : float -> float -> float = "power_float" +val exp : float -> float = "exp_float" +val log : float -> float = "log_float" +val sqrt : float -> float = "sqrt_float" +val sin : float -> float = "sin_float" +val cos : float -> float = "cos_float" +val tan : float -> float = "tan_float" +val asin : float -> float = "asin_float" +val acos : float -> float = "acos_float" +val atan : float -> float = "atan_float" +val atan2 : float -> float -> float = "atan2_float" +val abs_float : float -> float +val float : int -> float = "float_of_int" +val truncate : float -> int = "int_of_float" + +(* String operations -- more in module String *) + +val (^) : string -> string -> string + +(* Pair operations *) + +val fst : 'a * 'b -> 'a = "%field0" +val snd : 'a * 'b -> 'b = "%field1" + +(* String conversion functions *) + +val string_of_bool : bool -> string +val string_of_int : int -> string +val int_of_string : string -> int = "int_of_string" +val string_of_float : float -> string +val float_of_string : string -> float = "float_of_string" + +(* List operations -- more in module List *) + +val (@) : 'a list -> 'a list -> 'a list + +(* I/O operations *) + +type in_channel +type out_channel + +val stdin : in_channel +val stdout : out_channel +val stderr : out_channel + +(* Output functions on standard output *) + +val print_char : char -> unit +val print_string : string -> unit +val print_int : int -> unit +val print_float : float -> unit +val print_endline : string -> unit +val print_newline : unit -> unit + +(* Output functions on standard error *) + +val prerr_char : char -> unit +val prerr_string : string -> unit +val prerr_int : int -> unit +val prerr_float : float -> unit +val prerr_endline : string -> unit +val prerr_newline : unit -> unit + +(* Input functions on standard input *) + +val read_line : unit -> string +val read_int : unit -> int +val read_float : unit -> float + +(* General output functions *) +val open_out : string -> out_channel +val open_out_bin : string -> out_channel +val open_out_gen : Sys.open_flag list -> int -> string -> out_channel +val flush : out_channel -> unit = "flush" +val output_char : out_channel -> char -> unit = "output_char" +val output_string : out_channel -> string -> unit +val output : out_channel -> string -> int -> int -> unit +val output_byte : out_channel -> int -> unit = "output_char" +val output_binary_int : out_channel -> int -> unit = "output_int" +val output_value : out_channel -> 'a -> unit = "output_value" +val output_compact_value : out_channel -> 'a -> unit = "output_value" +val seek_out : out_channel -> int -> unit = "seek_out" +val pos_out : out_channel -> int = "pos_out" +val size_out : out_channel -> int = "channel_size" +val close_out : out_channel -> unit = "close_out" + +(* General input functions *) +val open_in : string -> in_channel +val open_in_bin : string -> in_channel +val open_in_gen : Sys.open_flag list -> int -> string -> in_channel +val input_char : in_channel -> char = "input_char" +val input_line : in_channel -> string +val input : in_channel -> string -> int -> int -> int +val really_input : in_channel -> string -> int -> int -> unit +val input_byte : in_channel -> int = "input_char" +val input_binary_int : in_channel -> int = "input_int" +val input_value : in_channel -> 'a = "input_value" +val seek_in : in_channel -> int -> unit = "seek_in" +val pos_in : in_channel -> int = "pos_in" +val in_channel_length : in_channel -> int = "channel_size" +val close_in : in_channel -> unit = "close_in" + +(* References *) + +type 'a ref = { mutable contents: 'a } +val ref: 'a -> 'a ref = "%makeblock" +val (!): 'a ref -> 'a = "%field0" +val (:=): 'a ref -> 'a -> unit = "%setfield0" +val incr: int ref -> unit = "%incr" +val decr: int ref -> unit = "%decr" + +(* Miscellaneous *) + +val exit : int -> 'a + +type 'a option = None | Some of 'a + +(**** For system use, not for the casual user ****) + +val unsafe_really_input: in_channel -> string -> int -> int -> unit diff --git a/stdlib/printexc.ml b/stdlib/printexc.ml new file mode 100644 index 0000000000..7404513286 --- /dev/null +++ b/stdlib/printexc.ml @@ -0,0 +1,43 @@ +let print_exn = function + Out_of_memory -> + prerr_string "Out of memory\n" + | Match_failure(file, first_char, last_char) -> + prerr_string "Pattern matching failed, file "; + prerr_string file; + prerr_string ", chars "; prerr_int first_char; + prerr_char '-'; prerr_int last_char; prerr_char '\n' + | x -> + prerr_string "Uncaught exception: "; + prerr_string (Obj.magic(Obj.field (Obj.field (Obj.repr x) 0) 0)); + if Obj.size (Obj.repr x) > 1 then begin + prerr_char '('; + for i = 1 to Obj.size (Obj.repr x) - 1 do + if i > 1 then prerr_string ", "; + let arg = Obj.field (Obj.repr x) i in + if not (Obj.is_block arg) then + prerr_int (Obj.magic arg : int) + else if Obj.tag arg = 253 then begin + prerr_char '"'; + prerr_string (Obj.magic arg : string); + prerr_char '"' + end else + prerr_char '_' + done; + prerr_char ')' + end; + prerr_char '\n' + +let print fct arg = + try + fct arg + with x -> + print_exn x; + raise x + +let catch fct arg = + try + fct arg + with x -> + flush stdout; + print_exn x; + exit 2 diff --git a/stdlib/printexc.mli b/stdlib/printexc.mli new file mode 100644 index 0000000000..0b56bd2a83 --- /dev/null +++ b/stdlib/printexc.mli @@ -0,0 +1,14 @@ +(* A catch-all exception handler *) + +val catch: ('a -> 'b) -> 'a -> 'b + (* [Printexc.catch fn x] applies [fn] to [x] and returns the result. + If the evaluation of [fn x] raises any exception, the + name of the exception is printed on standard error output, + and the programs aborts with exit code 2. + Typical use is [Printexc.catch main ()], where [main], with type + [unit->unit], is the entry point of a standalone program, to catch + and print stray exceptions. *) + +val print: ('a -> 'b) -> 'a -> 'b + (* Same as [catch], but re-raise the stray exception after + printing it, instead of aborting the program. *) diff --git a/stdlib/printf.ml b/stdlib/printf.ml new file mode 100644 index 0000000000..e13c2a6004 --- /dev/null +++ b/stdlib/printf.ml @@ -0,0 +1,86 @@ +external format_int: string -> int -> string = "format_int" +external format_float: string -> float -> string = "format_float" + +let fprintf outchan format = + let format = (Obj.magic format : string) in + let rec doprn i = + if i >= String.length format then + Obj.magic () + else + match String.get format i with + '%' -> + let j = skip_args (succ i) in + begin match String.get format j with + '%' -> + output_char outchan '%'; + doprn (succ j) + | 's' -> + Obj.magic(fun s -> + if j <= i+1 then + output_string outchan s + else begin + let p = + try + int_of_string (String.sub format (i+1) (j-i-1)) + with _ -> + invalid_arg "fprintf: bad %s format" in + if p > 0 & String.length s < p then begin + output_string outchan + (String.make (p - String.length s) ' '); + output_string outchan s + end else if p < 0 & String.length s < -p then begin + output_string outchan s; + output_string outchan + (String.make (-p - String.length s) ' ') + end else + output_string outchan s + end; + doprn (succ j)) + | 'c' -> + Obj.magic(fun c -> + output_char outchan c; + doprn (succ j)) + | 'd' | 'o' | 'x' | 'X' | 'u' -> + Obj.magic(doint i j) + | 'f' | 'e' | 'E' | 'g' | 'G' -> + Obj.magic(dofloat i j) + | 'b' -> + Obj.magic(fun b -> + output_string outchan (string_of_bool b); + doprn (succ j)) + | 'a' -> + Obj.magic(fun printer arg -> + printer outchan arg; + doprn(succ j)) + | 't' -> + Obj.magic(fun printer -> + printer outchan; + doprn(succ j)) + | c -> + invalid_arg ("fprintf: unknown format") + end + | c -> output_char outchan c; doprn (succ i) + + and skip_args j = + match String.get format j with + '0' .. '9' | ' ' | '.' | '-' -> skip_args (succ j) + | c -> j + + and doint i j n = + let len = j-i in + let fmt = String.create (len+2) in + String.blit format i fmt 0 len; + String.set fmt len 'l'; + String.set fmt (len+1) (String.get format j); + output_string outchan (format_int fmt n); + doprn (succ j) + + and dofloat i j f = + output_string outchan (format_float (String.sub format i (j-i+1)) f); + doprn (succ j) + + in doprn 0 + +let printf fmt = fprintf stdout fmt +and eprintf fmt = fprintf stderr fmt + diff --git a/stdlib/printf.mli b/stdlib/printf.mli new file mode 100644 index 0000000000..943f972091 --- /dev/null +++ b/stdlib/printf.mli @@ -0,0 +1,45 @@ +(* Formatting printing functions *) + +val fprintf: out_channel -> ('a, out_channel, unit) format -> 'a + (* [fprintf outchan format arg1 ... argN] formats the arguments + [arg1] to [argN] according to the format string [format], + and outputs the resulting string on the channel [outchan]. + The format is a character string which contains two types of + objects: plain characters, which are simply copied to the + output channel, and conversion specifications, each of which + causes conversion and printing of one argument. + Conversion specifications consist in the [%] character, followed + by optional flags and field widths, followed by one conversion + character. The conversion characters and their meanings are: +- [d] or [i]: convert an integer argument to signed decimal +- [u]: convert an integer argument to unsigned decimal +- [x]: convert an integer argument to unsigned hexadecimal, + using lowercase letters. +- [X]: convert an integer argument to unsigned hexadecimal, + using uppercase letters. +- [s]: insert a string argument +- [c]: insert a character argument +- [f]: convert a floating-point argument to decimal notation, + in the style [dddd.ddd] +- [e] or [E]: convert a floating-point argument to decimal notation, + in the style [d.ddd e+-dd] (mantissa and exponent) +- [g] or [G]: convert a floating-point argument to decimal notation, + in style [f] or [e], [E] (whichever is more compact) +- [b]: convert a boolean argument to the string [true] or [false] +- [a]: user-defined printer. Takes two arguments and apply the first + one to [outchan] (the current output channel) and to the second + argument. The first argument must therefore have type + [out_channel -> 'b -> unit] and the second ['b]. + The output produced by the function is therefore inserted + in the output of [fprintf] at the current point. +- [t]: same as [%a], but takes only one argument (with type + [out_channel -> unit]) and apply it to [outchan]. +- Refer to the C library [printf] function for the meaning of + flags and field width specifiers. *) + +val printf: ('a, out_channel, unit) format -> 'a + (* Same as [fprintf], but output on [std_out]. *) + +val eprintf: ('a, out_channel, unit) format -> 'a + (* Same as [fprintf], but output on [std_err]. *) + diff --git a/stdlib/queue.ml b/stdlib/queue.ml new file mode 100644 index 0000000000..977a26338c --- /dev/null +++ b/stdlib/queue.ml @@ -0,0 +1,58 @@ +exception Empty + +type 'a queue_cell = + Nil + | Cons of 'a * 'a queue_cell ref + +type 'a t = + { mutable head: 'a queue_cell; + mutable tail: 'a queue_cell } + +let new () = + { head = Nil; tail = Nil } + +let clear q = + q.head <- Nil; q.tail <- Nil + +let add x q = + match q.tail with + Nil -> (* if tail = Nil then head = Nil *) + let c = Cons(x, ref Nil) in + q.head <- c; q.tail <- c + | Cons(_, newtailref) -> + let c = Cons(x, ref Nil) in + newtailref := c; + q.tail <- c + +let peek q = + match q.head with + Nil -> + raise Empty + | Cons(x, _) -> + x + +let take q = + match q.head with + Nil -> + raise Empty + | Cons(x, rest) -> + q.head <- !rest; + begin match !rest with + Nil -> q.tail <- Nil + | _ -> () + end; + x + +let rec length_aux = function + Nil -> 0 + | Cons(_, rest) -> succ (length_aux !rest) + +let length q = length_aux q.head + +let rec iter_aux f = function + Nil -> + () + | Cons(x, rest) -> + f x; iter_aux f !rest + +let iter f q = iter_aux f q.head diff --git a/stdlib/queue.mli b/stdlib/queue.mli new file mode 100644 index 0000000000..297e81afa0 --- /dev/null +++ b/stdlib/queue.mli @@ -0,0 +1,28 @@ +(* Queues *) + +(* This module implements queues (FIFOs), with in-place modification. *) + +type 'a t + (* The type of queues containing elements of type ['a]. *) + +exception Empty + (* Raised when [take] is applied to an empty queue. *) + +val new: unit -> 'a t + (* Return a new queue, initially empty. *) +val add: 'a -> 'a t -> unit + (* [add x q] adds the element [x] at the end of the queue [q]. *) +val take: 'a t -> 'a + (* [take q] removes and returns the first element in queue [q], + or raises [Empty] if the queue is empty. *) +val peek: 'a t -> 'a + (* [peek q] returns the first element in queue [q], without removing + it from the queue, or raises [Empty] if the queue is empty. *) +val clear : 'a t -> unit + (* Discard all elements from a queue. *) +val length: 'a t -> int + (* Return the number of elements in a queue. *) +val iter: ('a -> 'b) -> 'a t -> unit + (* [iter f q] applies [f] in turn to all elements of [q], from the + least recently entered to the most recently entered. + The queue itself is unchanged. *) diff --git a/stdlib/set.ml b/stdlib/set.ml new file mode 100644 index 0000000000..404056308d --- /dev/null +++ b/stdlib/set.ml @@ -0,0 +1,99 @@ +(* Sets over ordered types *) + +module type OrderedType = + sig + type t + val compare: t -> t -> int + end + +module type S = + sig + type elt + type t + val empty: t + val is_empty: t -> bool + val mem: elt -> t -> bool + val add: elt -> t -> t + val remove: elt -> t -> t + val union: t -> t -> t + val inter: t -> t -> t + val diff: t -> t -> t + val compare: t -> t -> int + val equal: t -> t -> bool + val iter: (elt -> 'a) -> t -> unit + val fold: (elt -> 'a -> 'a) -> t -> 'a -> 'a + val elements: t -> elt list + end + +module Make(Ord: OrderedType): (S with elt = Ord.t) = + struct + open Baltree + type elt = Ord.t + type t = elt Baltree.t + + let empty = Empty + + let is_empty = function Empty -> true | _ -> false + + let mem x s = + Baltree.contains (Ord.compare x) s + + let add x s = + Baltree.add (Ord.compare x) x s + + let remove x s = + Baltree.remove (Ord.compare x) s + + let rec union s1 s2 = + match (s1, s2) with + (Empty, t2) -> t2 + | (t1, Empty) -> t1 + | (Node(l1, v1, r1, _), t2) -> + let (l2, _, r2) = Baltree.split (Ord.compare v1) t2 in + Baltree.join (union l1 l2) v1 (union r1 r2) + + let rec inter s1 s2 = + match (s1, s2) with + (Empty, t2) -> Empty + | (t1, Empty) -> Empty + | (Node(l1, v1, r1, _), t2) -> + match Baltree.split (Ord.compare v1) t2 with + (l2, Nothing, r2) -> + Baltree.concat (inter l1 l2) (inter r1 r2) + | (l2, Something _, r2) -> + Baltree.join (inter l1 l2) v1 (inter r1 r2) + + let rec diff s1 s2 = + match (s1, s2) with + (Empty, t2) -> Empty + | (t1, Empty) -> t1 + | (Node(l1, v1, r1, _), t2) -> + match Baltree.split (Ord.compare v1) t2 with + (l2, Nothing, r2) -> + Baltree.join (diff l1 l2) v1 (diff r1 r2) + | (l2, Something _, r2) -> + Baltree.concat (diff l1 l2) (diff r1 r2) + + let compare s1 s2 = + Baltree.compare Ord.compare s1 s2 + + let equal s1 s2 = + compare s1 s2 = 0 + + let rec iter f = function + Empty -> () + | Node(l, v, r, _) -> iter f l; f v; iter f r + + let rec fold f s accu = + match s with + Empty -> accu + | Node(l, v, r, _) -> fold f l (f v (fold f r accu)) + + let rec elements_aux accu = function + Empty -> accu + | Node(l, v, r, _) -> elements_aux (v :: elements_aux accu r) l + + let elements s = + elements_aux [] s + + end diff --git a/stdlib/set.mli b/stdlib/set.mli new file mode 100644 index 0000000000..4cf37425ab --- /dev/null +++ b/stdlib/set.mli @@ -0,0 +1,28 @@ +(* Sets over ordered types *) + +module type OrderedType = + sig + type t + val compare: t -> t -> int + end + +module type S = + sig + type elt + type t + val empty: t + val is_empty: t -> bool + val mem: elt -> t -> bool + val add: elt -> t -> t + val remove: elt -> t -> t + val union: t -> t -> t + val inter: t -> t -> t + val diff: t -> t -> t + val compare: t -> t -> int + val equal: t -> t -> bool + val iter: (elt -> 'a) -> t -> unit + val fold: (elt -> 'a -> 'a) -> t -> 'a -> 'a + val elements: t -> elt list + end + +module Make(Ord: OrderedType): (S with elt = Ord.t) diff --git a/stdlib/sort.ml b/stdlib/sort.ml new file mode 100644 index 0000000000..1b694bfffa --- /dev/null +++ b/stdlib/sort.ml @@ -0,0 +1,28 @@ +(* Merging and sorting *) + +let rec merge order l1 l2 = + match l1 with + [] -> l2 + | h1 :: t1 -> + match l2 with + [] -> l1 + | h2 :: t2 -> + if order h1 h2 + then h1 :: merge order t1 l2 + else h2 :: merge order l1 t2 + +let list order l = + let rec initlist = function + [] -> [] + | [e] -> [[e]] + | e1::e2::rest -> + (if order e1 e2 then [e1;e2] else [e2;e1]) :: initlist rest in + let rec merge2 = function + l1::l2::rest -> merge order l1 l2 :: merge2 rest + | x -> x in + let rec mergeall = function + [] -> [] + | [l] -> l + | llist -> mergeall (merge2 llist) in + mergeall(initlist l) + diff --git a/stdlib/sort.mli b/stdlib/sort.mli new file mode 100644 index 0000000000..545a0fad73 --- /dev/null +++ b/stdlib/sort.mli @@ -0,0 +1,13 @@ +(* Sorting and merging lists *) + +val list : ('a -> 'a -> bool) -> 'a list -> 'a list + (* Sort a list in increasing order according to an ordering predicate. + The predicate should return [true] if its first argument is + less than or equal to its second argument. *) + +val merge : ('a -> 'a -> bool) -> 'a list -> 'a list -> 'a list + (* Merge two lists according to the given predicate. + Assuming the two argument lists are sorted according to the + predicate, [merge] returns a sorted list containing the elements + from the two lists. The behavior is undefined if the two + argument lists were not sorted. *) diff --git a/stdlib/stack.ml b/stdlib/stack.ml new file mode 100644 index 0000000000..8b1710cdd3 --- /dev/null +++ b/stdlib/stack.ml @@ -0,0 +1,18 @@ +type 'a t = { mutable c : 'a list } + +exception Empty + +let new () = { c = [] } + +let clear s = s.c <- [] + +let push x s = s.c <- x :: s.c + +let pop s = + match s.c with + hd::tl -> s.c <- tl; hd + | [] -> raise Empty + +let length s = List.length s.c + +let iter f s = List.iter f s.c diff --git a/stdlib/stack.mli b/stdlib/stack.mli new file mode 100644 index 0000000000..a1133edcce --- /dev/null +++ b/stdlib/stack.mli @@ -0,0 +1,25 @@ +(* Stacks *) + +(* This modl implements stacks (LIFOs), with in-place modification. *) + +type 'a t + (* The type of stacks containing elements of type ['a]. *) + +exception Empty + (* Raised when [pop] is applied to an empty stack. *) + +val new: unit -> 'a t + (* Return a new stack, initially empty. *) +val push: 'a -> 'a t -> unit + (* [push x s] adds the element [x] at the top of stack [s]. *) +val pop: 'a t -> 'a + (* [pop s] removes and returns the topmost element in stack [s], + or raises [Empty] if the stack is empty. *) +val clear : 'a t -> unit + (* Discard all elements from a stack. *) +val length: 'a t -> int + (* Return the number of elements in a stack. *) +val iter: ('a -> 'b) -> 'a t -> unit + (* [iter f s] applies [f] in turn to all elements of [s], from the + element at the top of the stack to the element at the + bottom of the stack. The stack itself is unchanged. *) diff --git a/stdlib/string.ml b/stdlib/string.ml new file mode 100644 index 0000000000..eeb5676a7e --- /dev/null +++ b/stdlib/string.ml @@ -0,0 +1,93 @@ +(* String operations *) + +external length : string -> int = "ml_string_length" +external create: int -> string = "create_string" +external unsafe_get : string -> int -> char = "%string_get" +external unsafe_set : string -> int -> char -> unit = "%string_set" +external unsafe_blit : string -> int -> string -> int -> int -> unit + = "blit_string" +external unsafe_fill : string -> int -> int -> char -> unit = "fill_string" + +let get s n = + if n < 0 or n >= length s + then invalid_arg "String.get" + else unsafe_get s n + +let set s n c = + if n < 0 or n >= length s + then invalid_arg "String.set" + else unsafe_set s n c + +let make n c = + let s = create n in + unsafe_fill s 0 n c; + s + +let copy s = + let len = length s in + let r = create len in + unsafe_blit s 0 r 0 len; + r + +let sub s ofs len = + if ofs < 0 or len < 0 or ofs + len > length s + then invalid_arg "String.sub" + else begin + let r = create len in + unsafe_blit s ofs r 0 len; + r + end + + +let fill s ofs len c = + if ofs < 0 or len < 0 or ofs + len > length s + then invalid_arg "String.fill" + else unsafe_fill s ofs len c + +let blit s1 ofs1 s2 ofs2 len = + if len < 0 or ofs1 < 0 or ofs1 + len > length s1 + or ofs2 < 0 or ofs2 + len > length s2 + then invalid_arg "String.blit" + else unsafe_blit s1 ofs1 s2 ofs2 len + + +external is_printable: char -> bool = "is_printable" + +let escaped s = + let n = ref 0 in + for i = 0 to length s - 1 do + n := !n + + (match unsafe_get s i with + '"' | '\\' | '\n' | '\t' -> 2 + | c -> if is_printable c then 1 else 4) + done; + if !n = length s then s else begin + let s' = create !n in + n := 0; + for i = 0 to length s - 1 do + begin + match unsafe_get s i with + ('"' | '\\') as c -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n c + | '\n' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'n' + | '\t' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 't' + | c -> + if is_printable c then + unsafe_set s' !n c + else begin + let a = Char.code c in + unsafe_set s' !n '\\'; + incr n; + unsafe_set s' !n (Char.unsafe_chr (48 + a / 100)); + incr n; + unsafe_set s' !n (Char.unsafe_chr (48 + (a / 10) mod 10)); + incr n; + unsafe_set s' !n (Char.unsafe_chr (48 + a mod 10)) + end + end; + incr n + done; + s' + end diff --git a/stdlib/string.mli b/stdlib/string.mli new file mode 100644 index 0000000000..6dd586f868 --- /dev/null +++ b/stdlib/string.mli @@ -0,0 +1,24 @@ +(* String operations *) + +val length : string -> int = "ml_string_length" + +val get : string -> int -> char +val set : string -> int -> char -> unit + +val create : int -> string = "create_string" +val make : int -> char -> string +val copy : string -> string +val sub : string -> int -> int -> string + +val fill : string -> int -> int -> char -> unit +val blit : string -> int -> string -> int -> int -> unit + +val escaped: string -> string + +val unsafe_get : string -> int -> char = "%string_get" +val unsafe_set : string -> int -> char -> unit = "%string_set" +val unsafe_blit : string -> int -> string -> int -> int -> unit + = "blit_string" +val unsafe_fill : string -> int -> int -> char -> unit = "fill_string" + + diff --git a/stdlib/sys.ml b/stdlib/sys.ml new file mode 100644 index 0000000000..b6172e4183 --- /dev/null +++ b/stdlib/sys.ml @@ -0,0 +1,52 @@ +(* System interface *) + +type open_flag = + Open_rdonly | Open_wronly | Open_rdwr + | Open_append | Open_creat | Open_trunc | Open_excl + | Open_binary | Open_text + +external get_argv: unit -> string array = "sys_get_argv" + +let argv = get_argv() + +external remove: string -> unit = "sys_remove" +external getenv: string -> string = "sys_getenv" +external open_desc: string -> open_flag list -> int -> int = "sys_open" +external close_desc: int -> unit = "sys_close" +external command: string -> int = "sys_system_command" +external chdir: string -> unit = "sys_chdir" + +type signal_behavior = + Signal_default + | Signal_ignore + | Signal_handle of (int -> unit) + +external signal: int -> signal_behavior -> unit = "install_signal_handler" + +let sigabrt = -1 +let sigalrm = -2 +let sigfpe = -3 +let sighup = -4 +let sigill = -5 +let sigint = -6 +let sigkill = -7 +let sigpipe = -8 +let sigquit = -9 +let sigsegv = -10 +let sigterm = -11 +let sigusr1 = -12 +let sigusr2 = -13 +let sigchld = -14 +let sigcont = -15 +let sigstop = -16 +let sigtstp = -17 +let sigttin = -18 +let sigttou = -19 + +exception Break + +let catch_break on = + if on then + signal sigint (Signal_handle(fun _ -> raise Break)) + else + signal sigint Signal_default diff --git a/stdlib/sys.mli b/stdlib/sys.mli new file mode 100644 index 0000000000..b6332a14a4 --- /dev/null +++ b/stdlib/sys.mli @@ -0,0 +1,45 @@ +(* System interface *) + +type open_flag = + Open_rdonly | Open_wronly | Open_rdwr + | Open_append | Open_creat | Open_trunc | Open_excl + | Open_binary | Open_text + +val argv: string array +val remove: string -> unit = "sys_remove" +val getenv: string -> string = "sys_getenv" +val open_desc: string -> open_flag list -> int -> int = "sys_open" +val close_desc: int -> unit = "sys_close" +val command: string -> int = "sys_system_command" +val chdir: string -> unit = "sys_chdir" + +type signal_behavior = + Signal_default + | Signal_ignore + | Signal_handle of (int -> unit) + +val signal: int -> signal_behavior -> unit = "install_signal_handler" + +val sigabrt: int +val sigalrm: int +val sigfpe: int +val sighup: int +val sigill: int +val sigint: int +val sigkill: int +val sigpipe: int +val sigquit: int +val sigsegv: int +val sigterm: int +val sigusr1: int +val sigusr2: int +val sigchld: int +val sigcont: int +val sigstop: int +val sigtstp: int +val sigttin: int +val sigttou: int + +exception Break + +val catch_break: bool -> unit diff --git a/test/KB/equations.ml b/test/KB/equations.ml new file mode 100644 index 0000000000..054c15b2e6 --- /dev/null +++ b/test/KB/equations.ml @@ -0,0 +1,98 @@ +(****************** Equation manipulations *************) + +open Terms + +type rule = + { number: int; + numvars: int; + lhs: term; + rhs: term } + +(* standardizes an equation so its variables are 1,2,... *) + +let mk_rule num m n = + let all_vars = union (vars m) (vars n) in + let counter = ref 0 in + let subst = + List.map (fun v -> incr counter; (v, Var !counter)) (List.rev all_vars) in + { number = num; + numvars = !counter; + lhs = substitute subst m; + rhs = substitute subst n } + + +(* checks that rules are numbered in sequence and returns their number *) + +let check_rules rules = + let counter = ref 0 in + List.iter (fun r -> incr counter; + if r.number <> !counter + then failwith "Rule numbers not in sequence") + rules; + !counter + + +let pretty_rule rule = + print_int rule.number; print_string " : "; + pretty_term rule.lhs; print_string " = "; pretty_term rule.rhs; + print_newline() + + +let pretty_rules rules = List.iter pretty_rule rules + +(****************** Rewriting **************************) + +(* Top-level rewriting. Let eq:L=R be an equation, M be a term such that L<=M. + With sigma = matching L M, we define the image of M by eq as sigma(R) *) +let reduce l m r = + substitute (matching l m) r + +(* Test whether m can be reduced by l, i.e. m contains an instance of l. *) + +let rec reducible l m = + try + matching l m; true + with Failure _ -> + match m with Term(_,sons) -> List.exists (reducible l) sons + | _ -> false + +(* Top-level rewriting with multiple rules. *) + +let rec mreduce rules m = + match rules with + [] -> failwith "mreduce" + | rule::rest -> + try + reduce rule.lhs m rule.rhs + with Failure _ -> + mreduce rest m + + +(* One step of rewriting in leftmost-outermost strategy, + with multiple rules. Fails if no redex is found *) + +let rec mrewrite1 rules m = + try + mreduce rules m + with Failure _ -> + match m with + Var n -> failwith "mrewrite1" + | Term(f, sons) -> Term(f, mrewrite1_sons rules sons) + +and mrewrite1_sons rules = function + [] -> failwith "mrewrite1" + | son::rest -> + try + mrewrite1 rules son :: rest + with Failure _ -> + son :: mrewrite1_sons rules rest + + +(* Iterating rewrite1. Returns a normal form. May loop forever *) + +let rec mrewrite_all rules m = + try + mrewrite_all rules (mrewrite1 rules m) + with Failure _ -> + m + diff --git a/test/KB/equations.mli b/test/KB/equations.mli new file mode 100644 index 0000000000..bd71235e8d --- /dev/null +++ b/test/KB/equations.mli @@ -0,0 +1,18 @@ +open Terms + +type rule = + { number: int; + numvars: int; + lhs: term; + rhs: term } + +val mk_rule: int -> term -> term -> rule +val check_rules: rule list -> int +val pretty_rule: rule -> unit +val pretty_rules: rule list -> unit +val reduce: term -> term -> term -> term +val reducible: term -> term -> bool +val mreduce: rule list -> term -> term +val mrewrite1: rule list -> term -> term +val mrewrite1_sons: rule list -> term list -> term list +val mrewrite_all: rule list -> term -> term diff --git a/test/KB/kb.ml b/test/KB/kb.ml new file mode 100644 index 0000000000..ff63518ae9 --- /dev/null +++ b/test/KB/kb.ml @@ -0,0 +1,174 @@ +open Terms +open Equations + +(****************** Critical pairs *********************) + +(* All (u,subst) such that N/u (&var) unifies with M, + with principal unifier subst *) + +let rec super m = function + Term(_,sons) as n -> + let rec collate n = function + [] -> [] + | son::rest -> + List.map (fun (u, subst) -> (n::u, subst)) (super m son) + @ collate (n+1) rest in + let insides = collate 1 sons in + begin try + ([], unify m n) :: insides + with Failure _ -> + insides + end + | _ -> [] + + +(* Ex : +let (m,_) = <<F(A,B)>> +and (n,_) = <<H(F(A,x),F(x,y))>> in super m n +==> [[1],[2,Term ("B",[])]; x <- B + [2],[2,Term ("A",[]); 1,Term ("B",[])]] x <- A y <- B +*) + +(* All (u,subst), u&[], such that n/u unifies with m *) + +let super_strict m = function + Term(_,sons) -> + let rec collate n = function + [] -> [] + | son::rest -> + List.map (fun (u, subst) -> (n::u, subst)) (super m son) + @ collate (n+1) rest in + collate 1 sons + | _ -> [] + + +(* Critical pairs of l1=r1 with l2=r2 *) +(* critical_pairs : term_pair -> term_pair -> term_pair list *) +let critical_pairs (l1,r1) (l2,r2) = + let mk_pair (u,subst) = + substitute subst (replace l2 u r1), substitute subst r2 in + List.map mk_pair (super l1 l2) + +(* Strict critical pairs of l1=r1 with l2=r2 *) +(* strict_critical_pairs : term_pair -> term_pair -> term_pair list *) +let strict_critical_pairs (l1,r1) (l2,r2) = + let mk_pair (u,subst) = + substitute subst (replace l2 u r1), substitute subst r2 in + List.map mk_pair (super_strict l1 l2) + + +(* All critical pairs of eq1 with eq2 *) +let mutual_critical_pairs eq1 eq2 = + (strict_critical_pairs eq1 eq2) @ (critical_pairs eq2 eq1) + +(* Renaming of variables *) + +let rename n (t1,t2) = + let rec ren_rec = function + Var k -> Var(k+n) + | Term(op,sons) -> Term(op, List.map ren_rec sons) in + (ren_rec t1, ren_rec t2) + + +(************************ Completion ******************************) + +let deletion_message rule = + print_string "Rule ";print_int rule.number; print_string " deleted"; + print_newline() + + +(* Generate failure message *) +let non_orientable (m,n) = + pretty_term m; print_string " = "; pretty_term n; print_newline() + + +let rec partition p = function + [] -> ([], []) + | x::l -> let (l1, l2) = partition p l in + if p x then (x::l1, l2) else (l1, x::l2) + + +let rec get_rule n = function + [] -> raise Not_found + | r::l -> if n = r.number then r else get_rule n l + + +(* Improved Knuth-Bendix completion procedure *) + +let kb_completion greater = + let rec kbrec j rules = + let rec process failures (k,l) eqs = +(**** + print_string "***kb_completion "; print_int j; print_newline(); + pretty_rules rules; + List.iter non_orientable failures; + print_int k; print_string " "; print_int l; print_newline(); + List.iter non_orientable eqs; +***) + match eqs with + [] -> + if k<l then next_criticals failures (k+1,l) else + if l<j then next_criticals failures (1,l+1) else + begin match failures with + [] -> rules (* successful completion *) + | _ -> print_string "Non-orientable equations :"; print_newline(); + List.iter non_orientable failures; + failwith "kb_completion" + end + | (m,n)::eqs -> + let m' = mrewrite_all rules m + and n' = mrewrite_all rules n + and enter_rule(left,right) = + let new_rule = mk_rule (j+1) left right in + pretty_rule new_rule; + let left_reducible rule = reducible left rule.lhs in + let (redl,irredl) = partition left_reducible rules in + List.iter deletion_message redl; + let right_reduce rule = + mk_rule rule.number rule.lhs + (mrewrite_all (new_rule::rules) rule.rhs) in + let irreds = List.map right_reduce irredl in + let eqs' = List.map (fun rule -> (rule.lhs, rule.rhs)) redl in + kbrec (j+1) (new_rule::irreds) [] (k,l) (eqs @ eqs' @ failures) in +(*** + print_string "--- Considering "; non_orientable (m', n'); +***) + if m' = n' then process failures (k,l) eqs else + if greater(m',n') then enter_rule(m',n') else + if greater(n',m') then enter_rule(n',m') else + process ((m',n')::failures) (k,l) eqs + + and next_criticals failures (k,l) = +(**** + print_string "***next_criticals "; + print_int k; print_string " "; print_int l ; print_newline(); +****) + try + let rl = get_rule l rules in + let el = (rl.lhs, rl.rhs) in + if k=l then + process failures (k,l) + (strict_critical_pairs el (rename rl.numvars el)) + else + try + let rk = get_rule k rules in + let ek = (rk.lhs, rk.rhs) in + process failures (k,l) + (mutual_critical_pairs el (rename rl.numvars ek)) + with Not_found -> next_criticals failures (k+1,l) + with Not_found -> next_criticals failures (1,l+1) + in process + in kbrec + + +(* complete_rules is assumed locally confluent, and checked Noetherian with + ordering greater, rules is any list of rules *) + +let kb_complete greater complete_rules rules = + let n = check_rules complete_rules + and eqs = List.map (fun rule -> (rule.lhs, rule.rhs)) rules in + let completed_rules = + kb_completion greater n complete_rules [] (n,n) eqs in + print_string "Canonical set found :"; print_newline(); + pretty_rules (List.rev completed_rules) + diff --git a/test/KB/kb.mli b/test/KB/kb.mli new file mode 100644 index 0000000000..accac402c3 --- /dev/null +++ b/test/KB/kb.mli @@ -0,0 +1,15 @@ +open Terms +open Equations + +val super: term -> term -> (int list * (int * term) list) list +val super_strict: term -> term -> (int list * (int * term) list) list +val critical_pairs: term * term -> term * term -> (term * term) list +val strict_critical_pairs: term * term -> term * term -> (term * term) list +val mutual_critical_pairs: term * term -> term * term -> (term * term) list +val rename: int -> term * term -> term * term +val deletion_message: rule -> unit +val non_orientable: term * term -> unit +val partition: ('a -> bool) -> 'a list -> 'a list * 'a list +val get_rule: int -> rule list -> rule +val kb_completion: (term * term -> bool) -> int -> rule list -> (term * term) list -> int * int -> (term * term) list -> rule list +val kb_complete: (term * term -> bool) -> rule list -> rule list -> unit diff --git a/test/KB/kbmain.ml b/test/KB/kbmain.ml new file mode 100644 index 0000000000..2a10773c80 --- /dev/null +++ b/test/KB/kbmain.ml @@ -0,0 +1,66 @@ +open Terms +open Equations +open Orderings +open Kb + +(**** +let group_rules = [ + { number = 1; numvars = 1; + lhs = Term("*", [Term("U",[]); Var 1]); rhs = Var 1 }; + { number = 2; numvars = 1; + lhs = Term("*", [Term("I",[Var 1]); Var 1]); rhs = Term("U",[]) }; + { number = 3; numvars = 3; + lhs = Term("*", [Term("*", [Var 1; Var 2]); Var 3]); + rhs = Term("*", [Var 1; Term("*", [Var 2; Var 3])]) } +] +****) + +let geom_rules = [ + { number = 1; numvars = 1; + lhs = Term ("*",[(Term ("U",[])); (Var 1)]); + rhs = Var 1 }; + { number = 2; numvars = 1; + lhs = Term ("*",[(Term ("I",[(Var 1)])); (Var 1)]); + rhs = Term ("U",[]) }; + { number = 3; numvars = 3; + lhs = Term ("*",[(Term ("*",[(Var 1); (Var 2)])); (Var 3)]); + rhs = Term ("*",[(Var 1); (Term ("*",[(Var 2); (Var 3)]))]) }; + { number = 4; numvars = 0; + lhs = Term ("*",[(Term ("A",[])); (Term ("B",[]))]); + rhs = Term ("*",[(Term ("B",[])); (Term ("A",[]))]) }; + { number = 5; numvars = 0; + lhs = Term ("*",[(Term ("C",[])); (Term ("C",[]))]); + rhs = Term ("U",[]) }; + { number = 6; numvars = 0; + lhs = Term("*", + [(Term ("C",[])); + (Term ("*",[(Term ("A",[])); (Term ("I",[(Term ("C",[]))]))]))]); + rhs = Term ("I",[(Term ("A",[]))]) }; + { number = 7; numvars = 0; + lhs = Term("*", + [(Term ("C",[])); + (Term ("*",[(Term ("B",[])); (Term ("I",[(Term ("C",[]))]))]))]); + rhs = Term ("B",[]) } +] + +let group_rank = function + "U" -> 0 + | "*" -> 1 + | "I" -> 2 + | "B" -> 3 + | "C" -> 4 + | "A" -> 5 + +let group_precedence op1 op2 = + let r1 = group_rank op1 + and r2 = group_rank op2 in + if r1 = r2 then Equal else + if r1 > r2 then Greater else NotGE + +let group_order = rpo group_precedence lex_ext + +let greater pair = + match group_order pair with Greater -> true | _ -> false + +let _ = kb_complete greater [] geom_rules + diff --git a/test/KB/orderings.ml b/test/KB/orderings.ml new file mode 100644 index 0000000000..8b58d80a2a --- /dev/null +++ b/test/KB/orderings.ml @@ -0,0 +1,85 @@ +(*********************** Recursive Path Ordering ****************************) + +open Terms + +type ordering = + Greater + | Equal + | NotGE + +let ge_ord order pair = match order pair with NotGE -> false | _ -> true +and gt_ord order pair = match order pair with Greater -> true | _ -> false +and eq_ord order pair = match order pair with Equal -> true | _ -> false + + +let rec rem_eq equiv x = function + [] -> failwith "rem_eq" + | y::l -> if equiv (x,y) then l else y :: rem_eq equiv x l + + +let diff_eq equiv (x,y) = + let rec diffrec = function + ([],_) as p -> p + | (h::t, y) -> try + diffrec (t, rem_eq equiv h y) + with Failure _ -> + let (x',y') = diffrec (t,y) in (h::x',y') in + if List.length x > List.length y then diffrec(y,x) else diffrec(x,y) + + +(* Multiset extension of order *) + +let mult_ext order = function + Term(_,sons1), Term(_,sons2) -> + begin match diff_eq (eq_ord order) (sons1,sons2) with + ([],[]) -> Equal + | (l1,l2) -> + if List.for_all + (fun n -> List.exists (fun m -> gt_ord order (m,n)) l1) l2 + then Greater else NotGE + end + | _ -> failwith "mult_ext" + + +(* Lexicographic extension of order *) + +let lex_ext order = function + (Term(_,sons1) as m), (Term(_,sons2) as n) -> + let rec lexrec = function + ([] , []) -> Equal + | ([] , _ ) -> NotGE + | ( _ , []) -> Greater + | (x1::l1, x2::l2) -> + match order (x1,x2) with + Greater -> if List.for_all (fun n' -> gt_ord order (m,n')) l2 + then Greater else NotGE + | Equal -> lexrec (l1,l2) + | NotGE -> if List.exists (fun m' -> ge_ord order (m',n)) l1 + then Greater else NotGE in + lexrec (sons1, sons2) + | _ -> failwith "lex_ext" + + +(* Recursive path ordering *) + +let rpo op_order ext = + let rec rporec (m,n) = + if m = n then Equal else + match m with + Var vm -> NotGE + | Term(op1,sons1) -> + match n with + Var vn -> + if occurs vn m then Greater else NotGE + | Term(op2,sons2) -> + match (op_order op1 op2) with + Greater -> + if List.for_all (fun n' -> gt_ord rporec (m,n')) sons2 + then Greater else NotGE + | Equal -> + ext rporec (m,n) + | NotGE -> + if List.exists (fun m' -> ge_ord rporec (m',n)) sons1 + then Greater else NotGE + in rporec + diff --git a/test/KB/orderings.mli b/test/KB/orderings.mli new file mode 100644 index 0000000000..f540e527dd --- /dev/null +++ b/test/KB/orderings.mli @@ -0,0 +1,17 @@ +open Terms + +type ordering = + Greater + | Equal + | NotGE + +val ge_ord: ('a -> ordering) -> 'a -> bool +val gt_ord: ('a -> ordering) -> 'a -> bool +val eq_ord: ('a -> ordering) -> 'a -> bool +val rem_eq: ('a * 'b -> bool) -> 'a -> 'b list -> 'b list +val diff_eq: ('a * 'a -> bool) -> 'a list * 'a list -> 'a list * 'a list +val mult_ext: (term * term -> ordering) -> term * term -> ordering +val lex_ext: (term * term -> ordering) -> term * term -> ordering +val rpo: (string -> string -> ordering) -> + ((term * term -> ordering) -> term * term -> ordering) -> + term * term -> ordering diff --git a/test/KB/terms.ml b/test/KB/terms.ml new file mode 100644 index 0000000000..35c65552b2 --- /dev/null +++ b/test/KB/terms.ml @@ -0,0 +1,123 @@ +(****************** Term manipulations *****************) + +type term = + Var of int + | Term of string * term list + +let rec union l1 l2 = + match l1 with + [] -> l2 + | a::r -> if List.mem a l2 then union r l2 else a :: union r l2 + + +let rec vars = function + Var n -> [n] + | Term(_,l) -> vars_of_list l +and vars_of_list = function + [] -> [] + | t::r -> union (vars t) (vars_of_list r) + + +let rec substitute subst = function + Term(oper,sons) -> Term(oper, List.map (substitute subst) sons) + | Var(n) as t -> try List.assoc n subst with Not_found -> t + + +(* Term replacement: replace M u N is M[u<-N]. *) + +let rec replace m u n = + match (u, m) with + [], _ -> n + | i::u, Term(oper, sons) -> Term(oper, replace_nth i sons u n) + | _ -> failwith "replace" + +and replace_nth i sons u n = + match sons with + s::r -> if i = 1 + then replace s u n :: r + else s :: replace_nth (i-1) r u n + | [] -> failwith "replace_nth" + + +(* Term matching. *) + +let matching term1 term2 = + let rec match_rec subst t1 t2 = + match (t1, t2) with + Var v, _ -> + if List.mem_assoc v subst then + if t2 = List.assoc v subst then subst else failwith "matching" + else + (v, t2) :: subst + | Term(op1,sons1), Term(op2,sons2) -> + if op1 = op2 + then List.fold_left2 match_rec subst sons1 sons2 + else failwith "matching" + | _ -> failwith "matching" in + match_rec [] term1 term2 + + +(* A naive unification algorithm. *) + +let compsubst subst1 subst2 = + (List.map (fun (v,t) -> (v, substitute subst1 t)) subst2) @ subst1 + + +let rec occurs n = function + Var m -> m = n + | Term(_,sons) -> List.exists (occurs n) sons + + +let rec unify term1 term2 = + match (term1, term2) with + Var n1, _ -> + if term1 = term2 then [] + else if occurs n1 term2 then failwith "unify" + else [n1, term2] + | term1, Var n2 -> + if occurs n2 term1 then failwith "unify" + else [n2, term1] + | Term(op1,sons1), Term(op2,sons2) -> + if op1 = op2 then + List.fold_left2 (fun s t1 t2 -> compsubst (unify (substitute s t1) + (substitute s t2)) s) + [] sons1 sons2 + else failwith "unify" + + +(* We need to print terms with variables independently from input terms + obtained by parsing. We give arbitrary names v1,v2,... to their variables. +*) + +let infixes = ["+";"*"] + +let rec pretty_term = function + Var n -> + print_string "v"; print_int n + | Term (oper,sons) -> + if List.mem oper infixes then begin + match sons with + [s1;s2] -> + pretty_close s1; print_string oper; pretty_close s2 + | _ -> + failwith "pretty_term : infix arity <> 2" + end else begin + print_string oper; + match sons with + [] -> () + | t::lt -> print_string "("; + pretty_term t; + List.iter (fun t -> print_string ","; pretty_term t) lt; + print_string ")" + end + +and pretty_close = function + Term(oper, _) as m -> + if List.mem oper infixes then begin + print_string "("; pretty_term m; print_string ")" + end else + pretty_term m + | m -> + pretty_term m + + diff --git a/test/KB/terms.mli b/test/KB/terms.mli new file mode 100644 index 0000000000..3e3f831b36 --- /dev/null +++ b/test/KB/terms.mli @@ -0,0 +1,17 @@ +type term = + Var of int + | Term of string * term list + +val union: 'a list -> 'a list -> 'a list +val vars: term -> int list +val vars_of_list: term list -> int list +val substitute: (int * term) list -> term -> term +val replace: term -> int list -> term -> term +val replace_nth: int -> term list -> int list -> term -> term list +val matching: term -> term -> (int * term) list +val compsubst: (int * term) list -> (int * term) list -> (int * term) list +val occurs: int -> term -> bool +val unify: term -> term -> (int * term) list +val infixes: string list +val pretty_term: term -> unit +val pretty_close: term -> unit diff --git a/test/Lex/gram_aux.ml b/test/Lex/gram_aux.ml new file mode 100644 index 0000000000..525ee69b5e --- /dev/null +++ b/test/Lex/gram_aux.ml @@ -0,0 +1,33 @@ +(* Auxiliaries for the parser. *) + +open Syntax + +let regexp_for_string s = + let l = String.length s in + if l = 0 then + Epsilon + else begin + let re = ref(Characters [String.get s (l - 1)]) in + for i = l - 2 downto 0 do + re := Sequence(Characters [String.get s i], !re) + done; + !re + end + + +let char_class c1 c2 = + let class = ref [] in + for i = Char.code c2 downto Char.code c1 do + class := Char.chr i :: !class + done; + !class + + +let all_chars = char_class '\001' '\255' + + +let rec subtract l1 l2 = + match l1 with + [] -> [] + | a::l -> if List.mem a l2 then subtract l l2 else a :: subtract l l2 + diff --git a/test/Lex/grammar.mly b/test/Lex/grammar.mly new file mode 100644 index 0000000000..eb1c8cc248 --- /dev/null +++ b/test/Lex/grammar.mly @@ -0,0 +1,100 @@ +/* The grammar for lexer definitions */ + +%{ +open Syntax +open Gram_aux +%} + +%token <string> Tident +%token <char> Tchar +%token <string> Tstring +%token <Syntax.location> Taction +%token Trule Tparse Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket +%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash + +%left Tor +%left CONCAT +%nonassoc Tmaybe +%left Tstar +%left Tplus + +%start lexer_definition +%type <Syntax.lexer_definition> lexer_definition + +%% + +lexer_definition: + header Trule definition other_definitions Tend + { Lexdef($1, $3::(List.rev $4)) } +; +header: + Taction + { $1 } + | + { Location(0,0) } +; +other_definitions: + other_definitions Tand definition + { $3::$1 } + | + { [] } +; +definition: + Tident Tequal entry + { ($1,$3) } +; +entry: + Tparse case rest_of_entry + { $2 :: List.rev $3 } +; +rest_of_entry: + rest_of_entry Tor case + { $3::$1 } + | + { [] } +; +case: + regexp Taction + { ($1,$2) } +; +regexp: + Tunderscore + { Characters all_chars } + | Teof + { Characters ['\000'] } + | Tchar + { Characters [$1] } + | Tstring + { regexp_for_string $1 } + | Tlbracket char_class Trbracket + { Characters $2 } + | regexp Tstar + { Repetition $1 } + | regexp Tmaybe + { Alternative($1, Epsilon) } + | regexp Tplus + { Sequence($1, Repetition $1) } + | regexp Tor regexp + { Alternative($1,$3) } + | regexp regexp %prec CONCAT + { Sequence($1,$2) } + | Tlparen regexp Trparen + { $2 } +; +char_class: + Tcaret char_class1 + { subtract all_chars $2 } + | char_class1 + { $1 } +; +char_class1: + Tchar Tdash Tchar + { char_class $1 $3 } + | Tchar + { [$1] } + | char_class char_class %prec CONCAT + { $1 @ $2 } +; + +%% + diff --git a/test/Lex/lexgen.ml b/test/Lex/lexgen.ml new file mode 100644 index 0000000000..73d011577f --- /dev/null +++ b/test/Lex/lexgen.ml @@ -0,0 +1,252 @@ +(* Compiling a lexer definition *) + +open Syntax + +(* Deep abstract syntax for regular expressions *) + +type regexp = + Empty + | Chars of int + | Action of int + | Seq of regexp * regexp + | Alt of regexp * regexp + | Star of regexp + +(* From shallow to deep syntax *) + +(*** + +let print_char_class c = + let print_interval low high = + prerr_int low; + if high - 1 > low then begin + prerr_char '-'; + prerr_int (high-1) + end; + prerr_char ' ' in + let rec print_class first next = function + [] -> print_interval first next + | c::l -> + if char.code c = next + then print_class first (next+1) l + else begin + print_interval first next; + print_class (char.code c) (char.code c + 1) l + end in + match c with + [] -> prerr_newline() + | c::l -> print_class (char.code c) (char.code c + 1) l; prerr_newline() + + +let rec print_regexp = function + Empty -> prerr_string "Empty" + | Chars n -> prerr_string "Chars "; prerr_int n + | Action n -> prerr_string "Action "; prerr_int n + | Seq(r1,r2) -> print_regexp r1; prerr_string "; "; print_regexp r2 + | Alt(r1,r2) -> prerr_string "("; print_regexp r1; prerr_string " | "; print_regexp r2; prerr_string ")" + | Star r -> prerr_string "("; print_regexp r; prerr_string ")*" + +***) + +let chars = ref ([] : char list list) +let chars_count = ref 0 +let actions = ref ([] : (int * location) list) +let actions_count = ref 0 + +let rec encode_regexp = function + Epsilon -> Empty + | Characters cl -> + let n = !chars_count in +(*** prerr_int n; prerr_char ' '; print_char_class cl; ***) + chars := cl :: !chars; + chars_count := !chars_count + 1; + Chars(n) + | Sequence(r1,r2) -> + Seq(encode_regexp r1, encode_regexp r2) + | Alternative(r1,r2) -> + Alt(encode_regexp r1, encode_regexp r2) + | Repetition r -> + Star (encode_regexp r) + + +let encode_casedef = + List.fold_left + (fun reg (expr,act) -> + let act_num = !actions_count in + actions_count := !actions_count + 1; + actions := (act_num, act) :: !actions; + Alt(reg, Seq(encode_regexp expr, Action act_num))) + Empty + + +let encode_lexdef (Lexdef(_, ld)) = + chars := []; + chars_count := 0; + actions := []; + actions_count := 0; + let name_regexp_list = + List.map (fun (name, casedef) -> (name, encode_casedef casedef)) ld in +(* List.iter print_char_class chars; *) + let chr = Array.of_list (List.rev !chars) + and act = !actions in + chars := []; + actions := []; + (chr, name_regexp_list, act) + + +(* To generate directly a NFA from a regular expression. + Confer Aho-Sethi-Ullman, dragon book, chap. 3 *) + +type transition = + OnChars of int + | ToAction of int + + +let rec merge_trans l1 l2 = + match (l1, l2) with + ([], s2) -> s2 + | (s1, []) -> s1 + | ((OnChars n1 as t1) :: r1 as s1), ((OnChars n2 as t2) :: r2 as s2) -> + if n1 = n2 then t1 :: merge_trans r1 r2 else + if n1 < n2 then t1 :: merge_trans r1 s2 else + t2 :: merge_trans s1 r2 + | ((ToAction n1 as t1) :: r1 as s1), ((ToAction n2 as t2) :: r2 as s2) -> + if n1 = n2 then t1 :: merge_trans r1 r2 else + if n1 < n2 then t1 :: merge_trans r1 s2 else + t2 :: merge_trans s1 r2 + | ((OnChars n1 as t1) :: r1 as s1), ((ToAction n2 as t2) :: r2 as s2) -> + t1 :: merge_trans r1 s2 + | ((ToAction n1 as t1) :: r1 as s1), ((OnChars n2 as t2) :: r2 as s2) -> + t2 :: merge_trans s1 r2 + + +let rec nullable = function + Empty -> true + | Chars _ -> false + | Action _ -> false + | Seq(r1,r2) -> nullable r1 & nullable r2 + | Alt(r1,r2) -> nullable r1 or nullable r2 + | Star r -> true + + +let rec firstpos = function + Empty -> [] + | Chars pos -> [OnChars pos] + | Action act -> [ToAction act] + | Seq(r1,r2) -> if nullable r1 + then merge_trans (firstpos r1) (firstpos r2) + else firstpos r1 + | Alt(r1,r2) -> merge_trans (firstpos r1) (firstpos r2) + | Star r -> firstpos r + + +let rec lastpos = function + Empty -> [] + | Chars pos -> [OnChars pos] + | Action act -> [ToAction act] + | Seq(r1,r2) -> if nullable r2 + then merge_trans (lastpos r1) (lastpos r2) + else lastpos r2 + | Alt(r1,r2) -> merge_trans (lastpos r1) (lastpos r2) + | Star r -> lastpos r + + +let followpos size name_regexp_list = + let v = Array.new size [] in + let fill_pos first = function + OnChars pos -> v.(pos) <- merge_trans first v.(pos); () + | ToAction _ -> () in + let rec fill = function + Seq(r1,r2) -> + fill r1; fill r2; + List.iter (fill_pos (firstpos r2)) (lastpos r1) + | Alt(r1,r2) -> + fill r1; fill r2 + | Star r -> + fill r; + List.iter (fill_pos (firstpos r)) (lastpos r) + | _ -> () in + List.iter (fun (name, regexp) -> fill regexp) name_regexp_list; + v + + +let no_action = 0x3FFFFFFF + +let split_trans_set = + List.fold_left + (fun (act, pos_set as act_pos_set) trans -> + match trans with + OnChars pos -> (act, pos :: pos_set) + | ToAction act1 -> if act1 < act then (act1, pos_set) + else act_pos_set) + (no_action, []) + + +let memory = (Hashtbl.new 131 : (transition list, int) Hashtbl.t) +let todo = ref ([] : (transition list * int) list) +let next = ref 0 + +let get_state st = + try + Hashtbl.find memory st + with Not_found -> + let nbr = !next in + next := !next + 1; + Hashtbl.add memory st nbr; + todo := (st, nbr) :: !todo; + nbr + +let rec map_on_states f = + match !todo with + [] -> [] + | (st,i)::r -> todo := r; let res = f st in (res,i) :: map_on_states f + +let number_of_states () = !next + +let goto_state = function + [] -> Backtrack + | ps -> Goto (get_state ps) + + +let transition_from chars follow pos_set = + let tr = Array.new 256 [] + and shift = Array.new 256 Backtrack in + List.iter + (fun pos -> + List.iter + (fun c -> + tr.(Char.code c) <- + merge_trans tr.(Char.code c) follow.(pos)) + chars.(pos)) + pos_set; + for i = 0 to 255 do + shift.(i) <- goto_state tr.(i) + done; + shift + + +let translate_state chars follow state = + match split_trans_set state with + n, [] -> Perform n + | n, ps -> Shift( (if n = no_action then No_remember else Remember n), + transition_from chars follow ps) + + +let make_dfa lexdef = + let (chars, name_regexp_list, actions) = + encode_lexdef lexdef in +(** + List.iter (fun (name, regexp) -> prerr_string name; prerr_string " = "; print_regexp regexp; prerr_newline()) name_regexp_list; +**) + let follow = + followpos (Array.length chars) name_regexp_list in + let initial_states = + List.map (fun (name, regexp) -> (name, get_state(firstpos regexp))) + name_regexp_list in + let states = + map_on_states (translate_state chars follow) in + let v = + Array.new (number_of_states()) (Perform 0) in + List.iter (fun (auto, i) -> v.(i) <- auto) states; + (initial_states, v, actions) + diff --git a/test/Lex/main.ml b/test/Lex/main.ml new file mode 100644 index 0000000000..94902ed21b --- /dev/null +++ b/test/Lex/main.ml @@ -0,0 +1,104 @@ +(* The lexer generator. Command-line parsing. *) + +open Syntax +open Scanner +open Grammar +open Lexgen +open Output + +let main () = + if Array.length Sys.argv <> 2 then begin + prerr_string "Usage: camllex <input file>\n"; + exit 2 + end; + let source_name = Sys.argv.(1) in + let dest_name = + if Filename.check_suffix source_name ".mll" then + Filename.chop_suffix source_name ".mll" ^ ".ml" + else + source_name ^ ".ml" in + ic := open_in source_name; + oc := open_out dest_name; + let lexbuf = Lexing.from_channel !ic in + let (Lexdef(header,_) as def) = + try + Grammar.lexer_definition Scanner.main lexbuf + with + Parsing.Parse_error -> + prerr_string "Syntax error around char "; + prerr_int (Lexing.lexeme_start lexbuf); + prerr_endline "."; + exit 2 + | Scan_aux.Lexical_error s -> + prerr_string "Lexical error around char "; + prerr_int (Lexing.lexeme_start lexbuf); + prerr_string ": "; + prerr_string s; + prerr_endline "."; + exit 2 in + let ((init, states, acts) as dfa) = make_dfa def in + output_lexdef header dfa; + close_in !ic; + close_out !oc + +let _ = main(); exit 0 + + +(***** +let main () = + ic := stdin; + oc := stdout; + let lexbuf = lexing.from_channel ic in + let (Lexdef(header,_) as def) = + try + grammar.lexer_definition scanner.main lexbuf + with + parsing.Parse_error x -> + prerr_string "Syntax error around char "; + prerr_int (lexing.lexeme_start lexbuf); + prerr_endline "."; + sys.exit 2 + | scan_aux.Lexical_error s -> + prerr_string "Lexical error around char "; + prerr_int (lexing.lexeme_start lexbuf); + prerr_string ": "; + prerr_string s; + prerr_endline "."; + sys.exit 2 in + let ((init, states, acts) as dfa) = make_dfa def in + output_lexdef header dfa + +****) + +(**** +let debug_scanner lexbuf = + let tok = scanner.main lexbuf in + begin match tok with + Tident s -> prerr_string "Tident "; prerr_string s + | Tchar c -> prerr_string "Tchar "; prerr_char c + | Tstring s -> prerr_string "Tstring "; prerr_string s + | Taction(Location(i1,i2)) -> + prerr_string "Taction "; prerr_int i1; prerr_string "-"; + prerr_int i2 + | Trule -> prerr_string "Trule" + | Tparse -> prerr_string "Tparse" + | Tand -> prerr_string "Tand" + | Tequal -> prerr_string "Tequal" + | Tend -> prerr_string "Tend" + | Tor -> prerr_string "Tor" + | Tunderscore -> prerr_string "Tunderscore" + | Teof -> prerr_string "Teof" + | Tlbracket -> prerr_string "Tlbracket" + | Trbracket -> prerr_string "Trbracket" + | Tstar -> prerr_string "Tstar" + | Tmaybe -> prerr_string "Tmaybe" + | Tplus -> prerr_string "Tplus" + | Tlparen -> prerr_string "Tlparen" + | Trparen -> prerr_string "Trparen" + | Tcaret -> prerr_string "Tcaret" + | Tdash -> prerr_string "Tdash" + end; + prerr_newline(); + tok + +****) diff --git a/test/Lex/output.ml b/test/Lex/output.ml new file mode 100644 index 0000000000..301edcba3c --- /dev/null +++ b/test/Lex/output.ml @@ -0,0 +1,155 @@ +(* Generating a DFA as a set of mutually recursive functions *) + +open Syntax + +let ic = ref stdin +let oc = ref stdout + +(* 1- Generating the actions *) + +let copy_buffer = String.create 1024 + +let copy_chunk (Location(start,stop)) = + seek_in !ic start; + let tocopy = ref(stop - start) in + while !tocopy > 0 do + let m = + input !ic copy_buffer 0 (min !tocopy (String.length copy_buffer)) in + output !oc copy_buffer 0 m; + tocopy := !tocopy - m + done + + +let output_action (i,act) = + output_string !oc ("action_" ^ string_of_int i ^ " lexbuf = (\n"); + copy_chunk act; + output_string !oc ")\nand " + + +(* 2- Generating the states *) + +let states = ref ([||] : automata array) + +type occurrence = + { mutable pos: int list; + mutable freq: int } + +let enumerate_vect v = + let env = ref [] in + for pos = 0 to Array.length v - 1 do + try + let occ = List.assoc v.(pos) !env in + occ.pos <- pos :: occ.pos; + occ.freq <- occ.freq + 1 + with Not_found -> + env := (v.(pos), {pos = [pos]; freq = 1 }) :: !env + done; + Sort.list (fun (e1, occ1) (e2, occ2) -> occ1.freq >= occ2.freq) !env + + +let output_move = function + Backtrack -> + output_string !oc "lexing.backtrack lexbuf" + | Goto dest -> + match !states.(dest) with + Perform act_num -> + output_string !oc ("action_" ^ string_of_int act_num ^ " lexbuf") + | _ -> + output_string !oc ("state_" ^ string_of_int dest ^ " lexbuf") + + +(* Cannot use standard char_for_read because the characters to escape + are not the same in CL6 and CL1999. *) + +let output_char_lit oc = function + '\'' -> output_string oc "\\'" + | '\\' -> output_string oc "\\\\" + | '\n' -> output_string oc "\\n" + | '\t' -> output_string oc "\\t" + | c -> if Char.code c >= 32 & Char.code c < 128 then + output_char oc c + else begin + let n = Char.code c in + output_char oc '\\'; + output_char oc (Char.chr (48 + n / 100)); + output_char oc (Char.chr (48 + (n / 10) mod 10)); + output_char oc (Char.chr (48 + n mod 10)) + end + +let rec output_chars = function + [] -> + failwith "output_chars" + | [c] -> + output_string !oc "'"; + output_char_lit !oc (Char.chr c); + output_string !oc "'" + | c::cl -> + output_string !oc "'"; + output_char_lit !oc (Char.chr c); + output_string !oc "'|"; + output_chars cl + +let output_one_trans (dest, occ) = + output_chars occ.pos; + output_string !oc " -> "; + output_move dest; + output_string !oc "\n | " + +let output_all_trans trans = + output_string !oc " match lexing.next_char lexbuf with\n "; + match enumerate_vect trans with + [] -> + failwith "output_all_trans" + | (default, _) :: rest -> + List.iter output_one_trans rest; + output_string !oc "_ -> "; + output_move default; + output_string !oc "\nand " + +let output_state state_num = function + Perform i -> + () + | Shift(what_to_do, moves) -> + output_string !oc + ("state_" ^ string_of_int state_num ^ " lexbuf =\n"); + begin match what_to_do with + No_remember -> () + | Remember i -> + output_string !oc + (" Lexing.set_backtrack lexbuf action_" ^ + string_of_int i ^ ";\n") + end; + output_all_trans moves + + +(* 3- Generating the entry points *) + +let rec output_entries = function + [] -> failwith "output_entries" + | (name,state_num) :: rest -> + output_string !oc (name ^ " lexbuf =\n"); + output_string !oc " Lexing.init lexbuf;\n"; + output_string !oc (" state_" ^ string_of_int state_num ^ + " lexbuf\n"); + match rest with + [] -> output_string !oc "\n"; () + | _ -> output_string !oc "\nand "; output_entries rest + + +(* All together *) + +let output_lexdef header (initial_st, st, actions) = + prerr_int (Array.length st); prerr_string " states, "; + prerr_int (List.length actions); prerr_string " actions."; + prerr_newline(); + copy_chunk header; + output_string !oc "\nlet rec "; + states := st; + List.iter output_action actions; + for i = 0 to Array.length st - 1 do + output_state i st.(i) + done; + output_entries initial_st + + + diff --git a/test/Lex/scan_aux.ml b/test/Lex/scan_aux.ml new file mode 100644 index 0000000000..8b01d63479 --- /dev/null +++ b/test/Lex/scan_aux.ml @@ -0,0 +1,46 @@ +(* Auxiliaries for the lexical analyzer *) + +let brace_depth = ref 0 +let comment_depth = ref 0 + +exception Lexical_error of string + +let initial_string_buffer = String.create 256 +let string_buff = ref initial_string_buffer +let string_index = ref 0 + +let reset_string_buffer () = + string_buff := initial_string_buffer; + string_index := 0 + + +let store_string_char c = + begin + if !string_index >= String.length !string_buff then begin + let new_buff = String.create (String.length !string_buff * 2) in + String.blit new_buff 0 !string_buff 0 (String.length !string_buff); + string_buff := new_buff + end + end; + String.unsafe_set !string_buff !string_index c; + incr string_index + +let get_stored_string () = + let s = String.sub !string_buff 0 !string_index in + string_buff := initial_string_buffer; + s + + +let char_for_backslash = function + 'n' -> '\010' (* '\n' when bootstrapped *) + | 't' -> '\009' (* '\t' *) + | 'b' -> '\008' (* '\b' *) + | 'r' -> '\013' (* '\r' *) + | c -> c + + +let char_for_decimal_code lexbuf i = + Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + + 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + + (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48)) + diff --git a/test/Lex/scanner.mll b/test/Lex/scanner.mll new file mode 100644 index 0000000000..7cb13ba70e --- /dev/null +++ b/test/Lex/scanner.mll @@ -0,0 +1,118 @@ +(* The lexical analyzer for lexer definitions. *) + +{ +open Syntax +open Grammar +open Scan_aux +} + +rule main = parse + [' ' '\010' '\013' '\009' ] + + { main lexbuf } + | "(*" + { comment_depth := 1; + comment lexbuf; + main lexbuf } + | (['A'-'Z' 'a'-'z'] | '_' ['A'-'Z' 'a'-'z' '\'' '0'-'9']) + ( '_' ? ['A'-'Z' 'a'-'z' ''' '0'-'9'] ) * + { match Lexing.lexeme lexbuf with + "rule" -> Trule + | "parse" -> Tparse + | "and" -> Tand + | "eof" -> Teof + | s -> Tident s } + | '"' + { reset_string_buffer(); + string lexbuf; + Tstring(get_stored_string()) } + | "'" + { Tchar(char lexbuf) } + | '{' + { let n1 = Lexing.lexeme_end lexbuf in + brace_depth := 1; + let n2 = action lexbuf in + Taction(Location(n1, n2)) } + | '=' { Tequal } + | ";;" { Tend } + | '|' { Tor } + | '_' { Tunderscore } + | "eof" { Teof } + | '[' { Tlbracket } + | ']' { Trbracket } + | '*' { Tstar } + | '?' { Tmaybe } + | '+' { Tplus } + | '(' { Tlparen } + | ')' { Trparen } + | '^' { Tcaret } + | '-' { Tdash } + | eof + { raise(Lexical_error "unterminated lexer definition") } + | _ + { raise(Lexical_error("illegal character " ^ Lexing.lexeme lexbuf)) } + +and action = parse + '{' + { incr brace_depth; + action lexbuf } + | '}' + { decr brace_depth; + if !brace_depth = 0 then Lexing.lexeme_start lexbuf else action lexbuf } + | '"' + { reset_string_buffer(); + string lexbuf; + reset_string_buffer(); + action lexbuf } + | '\'' + { char lexbuf; action lexbuf } + | "(*" + { comment_depth := 1; + comment lexbuf; + action lexbuf } + | eof + { raise (Lexical_error "unterminated action") } + | _ + { action lexbuf } + +and string = parse + '"' + { () } + | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + + { string lexbuf } + | '\\' ['\\' '"' 'n' 't' 'b' 'r'] + { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); + string lexbuf } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] + { store_string_char(char_for_decimal_code lexbuf 1); + string lexbuf } + | eof + { raise(Lexical_error "unterminated string") } + | _ + { store_string_char(Lexing.lexeme_char lexbuf 0); + string lexbuf } + +and char = parse + [^ '\\'] "'" + { Lexing.lexeme_char lexbuf 0 } + | '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" + { char_for_backslash (Lexing.lexeme_char lexbuf 1) } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" + { char_for_decimal_code lexbuf 1 } + | _ + { raise(Lexical_error "bad character constant") } + +and comment = parse + "(*" + { incr comment_depth; comment lexbuf } + | "*)" + { decr comment_depth; + if !comment_depth = 0 then () else comment lexbuf } + | '"' + { reset_string_buffer(); + string lexbuf; + reset_string_buffer(); + comment lexbuf } + | eof + { raise(Lexical_error "unterminated comment") } + | _ + { comment lexbuf } diff --git a/test/Lex/syntax.ml b/test/Lex/syntax.ml new file mode 100644 index 0000000000..f692e6f625 --- /dev/null +++ b/test/Lex/syntax.ml @@ -0,0 +1,26 @@ +(* The shallow abstract syntax *) + +type location = + Location of int * int + +type regular_expression = + Epsilon + | Characters of char list + | Sequence of regular_expression * regular_expression + | Alternative of regular_expression * regular_expression + | Repetition of regular_expression + +type lexer_definition = + Lexdef of location * (string * (regular_expression * location) list) list + +(* Representation of automata *) + +type automata = + Perform of int + | Shift of automata_trans * automata_move array +and automata_trans = + No_remember + | Remember of int +and automata_move = + Backtrack + | Goto of int diff --git a/test/Lex/testmain.ml b/test/Lex/testmain.ml new file mode 100644 index 0000000000..e0a914ee09 --- /dev/null +++ b/test/Lex/testmain.ml @@ -0,0 +1,34 @@ +(* The lexer generator. Command-line parsing. *) + +#open "syntax";; +#open "testscanner";; +#open "grammar";; +#open "lexgen";; +#open "output";; + +let main () = + ic := stdin; + oc := stdout; + let lexbuf = lexing.from_channel ic in + let (Lexdef(header,_) as def) = + try + grammar.lexer_definition testscanner.main lexbuf + with + parsing.Parse_error x -> + prerr_string "Syntax error around char "; + prerr_int (lexing.lexeme_start lexbuf); + prerr_endline "."; + sys.exit 2 + | scan_aux.Lexical_error s -> + prerr_string "Lexical error around char "; + prerr_int (lexing.lexeme_start lexbuf); + prerr_string ": "; + prerr_string s; + prerr_endline "."; + sys.exit 2 in + let ((init, states, acts) as dfa) = make_dfa def in + output_lexdef header dfa +;; + +main(); sys.exit 0 +;; diff --git a/test/Lex/testscanner.mll b/test/Lex/testscanner.mll new file mode 100644 index 0000000000..91ada299f2 --- /dev/null +++ b/test/Lex/testscanner.mll @@ -0,0 +1,121 @@ +(* The lexical analyzer for lexer definitions. *) + +{ +#open "syntax";; +#open "grammar";; +#open "scan_aux";; +} + +rule main = parse + _ * "qwertyuiopasdfghjklzxcvbnm0123456789!@#$%^&*()" + { main lexbuf } + | [' ' '\010' '\013' '\009' ] + + { main lexbuf } + | "(*" + { comment_depth := 1; + comment lexbuf; + main lexbuf } + | (['A'-'Z' 'a'-'z'] | '_' ['A'-'Z' 'a'-'z' '\'' '0'-'9']) + ( '_' ? ['A'-'Z' 'a'-'z' ''' '0'-'9'] ) * + { match lexing.lexeme lexbuf with + "rule" -> Trule + | "parse" -> Tparse + | "and" -> Tand + | "eof" -> Teof + | s -> Tident s } + | '"' + { reset_string_buffer(); + string lexbuf; + Tstring(get_stored_string()) } + | "'" + { Tchar(char lexbuf) } + | '{' + { let n1 = lexing.lexeme_end lexbuf in + brace_depth := 1; + let n2 = action lexbuf in + Taction(Location(n1, n2)) } + | '=' { Tequal } + | ";;" { Tend } + | '|' { Tor } + | '_' { Tunderscore } + | "eof" { Teof } + | '[' { Tlbracket } + | ']' { Trbracket } + | '*' { Tstar } + | '?' { Tmaybe } + | '+' { Tplus } + | '(' { Tlparen } + | ')' { Trparen } + | '^' { Tcaret } + | '-' { Tdash } + | eof + { raise(Lexical_error "unterminated lexer definition") } + | _ + { raise(Lexical_error("illegal character " ^ lexing.lexeme lexbuf)) } + +and action = parse + '{' + { brace_depth := brace_depth + 1; + action lexbuf } + | '}' + { brace_depth := brace_depth - 1; + if brace_depth = 0 then lexing.lexeme_start lexbuf else action lexbuf } + | '"' + { reset_string_buffer(); + string lexbuf; + reset_string_buffer(); + action lexbuf } + | '\'' + { char lexbuf; action lexbuf } + | "(*" + { comment_depth := 1; + comment lexbuf; + action lexbuf } + | eof + { raise (Lexical_error "unterminated action") } + | _ + { action lexbuf } + +and string = parse + '"' + { () } + | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + + { string lexbuf } + | '\\' ['\\' '"' 'n' 't' 'b' 'r'] + { store_string_char(char_for_backslash(lexing.lexeme_char lexbuf 1)); + string lexbuf } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] + { store_string_char(char_for_decimal_code lexbuf 1); + string lexbuf } + | eof + { raise(Lexical_error "unterminated string") } + | _ + { store_string_char(lexing.lexeme_char lexbuf 0); + string lexbuf } + +and char = parse + [^ '\\'] "'" + { lexing.lexeme_char lexbuf 0 } + | '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" + { char_for_backslash (lexing.lexeme_char lexbuf 1) } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" + { char_for_decimal_code lexbuf 1 } + | _ + { raise(Lexical_error "bad character constant") } + +and comment = parse + "(*" + { comment_depth := comment_depth + 1; comment lexbuf } + | "*)" + { comment_depth := comment_depth - 1; + if comment_depth = 0 then () else comment lexbuf } + | '"' + { reset_string_buffer(); + string lexbuf; + reset_string_buffer(); + comment lexbuf } + | eof + { raise(Lexical_error "unterminated comment") } + | _ + { comment lexbuf } +;; diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000000..63d3193c50 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,97 @@ +CAMLC=../boot/camlrun ../camlc -I ../stdlib +CAMLYACC=../yacc/camlyacc +CAMLLEX=../boot/camlrun ../lex/camllex +CAMLDEP=../tools/camldep + +EXE=fib takc taku sieve quicksort quicksort.fast fft fft.fast \ + soli soli.fast boyer kb nucleic genlex + +all: $(EXE) + +# KB + +KBFILES=KB/terms.mli KB/terms.ml KB/equations.mli KB/equations.ml \ + KB/kb.mli KB/kb.ml KB/orderings.mli KB/orderings.ml KB/kbmain.ml + +kb: $(KBFILES) + $(CAMLC) -I KB $(KBFILES) -o kb + +clean:: + rm -f KB/*.cm[io] + rm -f KB/*~ + +# Genlex + +GENLEXFILES=Lex/syntax.ml Lex/scan_aux.ml Lex/grammar.mli Lex/scanner.ml \ + Lex/gram_aux.ml Lex/grammar.ml Lex/lexgen.ml Lex/output.ml Lex/main.ml + +genlex: $(GENLEXFILES) + $(CAMLC) -I Lex $(GENLEXFILES) -o genlex + +clean:: + rm -f Lex/*.cm[io] + rm -f Lex/*~ + +Lex/grammar.ml Lex/grammar.mli: Lex/grammar.mly $(CAMLYACC) + $(CAMLYACC) $(YACCFLAGS) Lex/grammar.mly + +clean:: + rm -f Lex/grammar.ml Lex/grammar.mli + +Lex/scanner.ml: Lex/scanner.mll $(CAMLLEX) + $(CAMLLEX) Lex/scanner.mll + +clean:: + rm -f Lex/scanner.ml + +# Common rules + +.SUFFIXES: .mli .ml .cmi .cmo .fast + +.ml: + $(CAMLC) -o $* $< + +.ml.fast: + $(CAMLC) -fast -o $*.fast $< + +.mli.cmi: + $(CAMLC) -c $< + +.ml.cmo: + $(CAMLC) -c $< + +$(EXE): ../camlc + +clean:: + rm -f $(EXE) + rm -f *.cm[io] + rm -f *~ + +# Regression test + +test: $(EXE) + set -e; \ + for prog in $(EXE); do \ + echo $$prog; \ + if test -f Results/$$prog.runtest; then \ + sh Results/$$prog.runtest test; \ + elif test -f Results/$$prog.out; then \ + sh Results/runtest $$prog; \ + fi; \ + done + +clean:: + rm -f Lex/testscanner.ml + +# Benchmark + +bench: $(EXE) + set -e; \ + for prog in $(EXE); do \ + echo $$prog; \ + if test -f Results/$$prog.runtest; then \ + sh Results/$$prog.runtest bench; \ + else \ + xtime -o /dev/null ../byterun/camlrun $$prog; \ + fi; \ + done diff --git a/test/Results/boyer.out b/test/Results/boyer.out new file mode 100644 index 0000000000..f38e3263b1 --- /dev/null +++ b/test/Results/boyer.out @@ -0,0 +1 @@ +Proved! diff --git a/test/Results/fft.runtest b/test/Results/fft.runtest new file mode 100644 index 0000000000..59420f3036 --- /dev/null +++ b/test/Results/fft.runtest @@ -0,0 +1,4 @@ +case $1 in + test) ../byterun/camlrun fft | awk '$2 >= 1e-10 { exit 2; }';; + bench) xtime -o /dev/null ../byterun/camlrun fft;; +esac
\ No newline at end of file diff --git a/test/Results/fib.out b/test/Results/fib.out new file mode 100644 index 0000000000..08c2ab3e02 --- /dev/null +++ b/test/Results/fib.out @@ -0,0 +1 @@ +1346269 diff --git a/test/Results/genlex.runtest b/test/Results/genlex.runtest new file mode 100644 index 0000000000..88668eebfc --- /dev/null +++ b/test/Results/genlex.runtest @@ -0,0 +1,5 @@ +case $1 in + test) ../byterun/camlrun genlex Lex/testscanner.mll;; + bench) xtime -o /dev/null ../byterun/camlrun genlex Lex/testscanner.mll;; +esac + diff --git a/test/Results/kb.out b/test/Results/kb.out new file mode 100644 index 0000000000..758a0b4d60 --- /dev/null +++ b/test/Results/kb.out @@ -0,0 +1,273 @@ +1 : U*v1 = v1 +2 : I(v1)*v1 = U +3 : (v3*v2)*v1 = v3*(v2*v1) +4 : A*B = B*A +5 : C*C = U +6 : I(A) = C*(A*I(C)) +7 : C*(B*I(C)) = B +8 : I(v2)*(v2*v1) = v1 +9 : A*(B*v1) = B*(A*v1) +10 : C*(C*v1) = v1 +11 : C*(A*(I(C)*A)) = U +12 : C*(B*(I(C)*v1)) = B*v1 +13 : I(U)*v1 = v1 +14 : I(I(v1))*U = v1 +15 : I(v3*v2)*(v3*(v2*v1)) = v1 +16 : C*(A*(I(C)*(B*A))) = B +17 : I(C)*U = C +18 : C*(A*(I(C)*(A*v1))) = v1 +19 : I(C)*B = B*I(C) +20 : I(I(v2))*v1 = v2*v1 +Rule 14 deleted +21 : v1*U = v1 +Rule 17 deleted +22 : I(C) = C +Rule 19 deleted +Rule 18 deleted +Rule 16 deleted +Rule 12 deleted +Rule 11 deleted +Rule 7 deleted +23 : C*B = B*C +24 : C*(A*(C*(A*v1))) = v1 +25 : C*(A*(C*(B*A))) = B +26 : C*(B*(C*v1)) = B*v1 +27 : C*(A*(C*A)) = U +28 : C*(B*C) = B +29 : C*(A*(C*(B*(A*v1)))) = B*v1 +30 : I(I(v2*v1)*v2) = v1 +31 : I(v2*I(v1))*v2 = v1 +32 : I(v4*(v3*v2))*(v4*(v3*(v2*v1))) = v1 +33 : I(v1*A)*(v1*(B*A)) = B +34 : I(v1*C)*v1 = C +35 : I(v3*I(v2))*(v3*v1) = v2*v1 +36 : I(v2*A)*(v2*(B*(A*v1))) = B*v1 +37 : I(v2*C)*(v2*v1) = C*v1 +38 : v1*I(v1) = U +39 : I(C*(A*C))*v1 = A*v1 +40 : v2*(I(v2)*v1) = v1 +41 : I(U) = U +Rule 13 deleted +42 : I(I(v1)) = v1 +Rule 20 deleted +43 : C*(B*v1) = B*(C*v1) +Rule 29 deleted +Rule 28 deleted +Rule 26 deleted +Rule 25 deleted +44 : A*(C*(A*v1)) = C*v1 +Rule 24 deleted +45 : A*(C*A) = C +Rule 27 deleted +46 : v2*(I(v1*v2)*v1) = U +47 : I(I(v3*(v2*v1))*(v3*v2)) = v1 +48 : I(I(B*A)*A) = B +49 : v3*(I(v2*v3)*(v2*v1)) = v1 +50 : I(I(v1)*I(v2)) = v2*v1 +51 : I(I(B*(A*v1))*A) = B*v1 +52 : I(I(v1)*C) = C*v1 +53 : I(v2*I(v1*v2)) = v1 +54 : I(v3*(v2*I(v1)))*(v3*v2) = v1 +55 : I(v1*(C*(A*C)))*v1 = A +56 : v2*I(I(v1)*v2) = v1 +57 : I(v2*(I(v3*v1)*v3))*v2 = v1 +58 : I(v5*(v4*(v3*v2)))*(v5*(v4*(v3*(v2*v1)))) = v1 +59 : I(v2*(v1*A))*(v2*(v1*(B*A))) = B +60 : I(v2*(v1*C))*(v2*v1) = C +61 : I(v4*(v3*I(v2)))*(v4*(v3*v1)) = v2*v1 +62 : I(v3*(v2*A))*(v3*(v2*(B*(A*v1)))) = B*v1 +63 : I(v3*(v2*C))*(v3*(v2*v1)) = C*v1 +64 : I(v3*(I(v4*v2)*v4))*(v3*v1) = v2*v1 +65 : v4*(I(v3*(v2*v4))*(v3*(v2*v1))) = v1 +66 : I(I(B)*A)*A = B +67 : I(A*A)*(B*(A*A)) = B +68 : v1*(I(A*v1)*(B*A)) = B +69 : I(I(v1*A)*(v1*B))*B = A +70 : v1*I(C*v1) = C +71 : I(A*I(v1))*(B*A) = v1*B +72 : I(C*I(v1)) = v1*C +73 : I(v2*(C*(A*C)))*(v2*v1) = A*v1 +74 : I(A*I(v2))*(B*(A*v1)) = v2*(B*v1) +75 : v3*(I(I(v2)*v3)*v1) = v2*v1 +76 : I(I(B*I(v1))*A)*(v1*A) = B +77 : I(v1*A)*(v1*(B*(B*A))) = B*B +78 : I(I(B)*A)*(A*v1) = B*v1 +79 : I(A*A)*(B*(A*(A*v1))) = B*v1 +80 : I(v2*A)*(v2*(B*(B*(A*v1)))) = B*(B*v1) +81 : v2*(I(A*v2)*(B*(A*v1))) = B*v1 +82 : I(I(v2*A)*(v2*B))*(B*v1) = A*v1 +83 : I(I(B*I(v2))*A)*(v2*(A*v1)) = B*v1 +84 : I(A*C)*(B*A) = B*C +85 : I(A*C)*(B*(A*v1)) = B*(C*v1) +86 : v2*(I(C*v2)*v1) = C*v1 +87 : I(I(B*C)*A)*(C*A) = B +88 : I(I(B*C)*A)*(C*(A*v1)) = B*v1 +89 : v2*(v1*I(v2*v1)) = U +90 : B*(A*I(B)) = A +91 : I(v2*v1)*v2 = I(v1) +Rule 64 deleted +Rule 57 deleted +Rule 55 deleted +Rule 46 deleted +Rule 34 deleted +Rule 31 deleted +Rule 30 deleted +92 : I(C*(A*C)) = A +Rule 39 deleted +93 : I(v3*(v2*v1))*(v3*v2) = I(v1) +Rule 60 deleted +Rule 54 deleted +Rule 47 deleted +94 : I(v1*I(v2)) = v2*I(v1) +Rule 83 deleted +Rule 76 deleted +Rule 74 deleted +Rule 72 deleted +Rule 71 deleted +Rule 53 deleted +Rule 50 deleted +Rule 35 deleted +95 : I(v2*(I(B)*A))*(v2*(A*v1)) = B*v1 +96 : I(v1*(I(B)*A))*(v1*A) = B +97 : I(v1*A)*(v1*B) = B*(C*(A*C)) +Rule 82 deleted +Rule 69 deleted +98 : I(v1*C) = C*I(v1) +Rule 88 deleted +Rule 87 deleted +Rule 85 deleted +Rule 84 deleted +Rule 52 deleted +Rule 37 deleted +99 : v3*(v2*(I(v3*v2)*v1)) = v1 +100 : B*(A*(I(B)*v1)) = A*v1 +101 : I(v3*v2)*(v3*v1) = I(v2)*v1 +Rule 97 deleted +Rule 96 deleted +Rule 95 deleted +Rule 93 deleted +Rule 80 deleted +Rule 77 deleted +Rule 73 deleted +Rule 65 deleted +Rule 63 deleted +Rule 62 deleted +Rule 61 deleted +Rule 59 deleted +Rule 58 deleted +Rule 49 deleted +Rule 36 deleted +Rule 33 deleted +Rule 32 deleted +Rule 15 deleted +102 : B*(C*I(B)) = C +103 : B*(C*(I(B)*v1)) = C*v1 +104 : B*(I(B*A)*A) = U +105 : B*(I(B*A)*(A*v1)) = v1 +106 : I(B*A)*A = I(B) +Rule 104 deleted +Rule 48 deleted +107 : B*(v1*(I(B*(A*v1))*A)) = U +108 : I(I(B*(B*A))*A) = B*B +109 : B*(v2*(I(B*(A*v2))*(A*v1))) = v1 +110 : I(I(B*(B*(A*v1)))*A) = B*(B*v1) +111 : I(I(B)*A) = B*(C*(A*C)) +Rule 78 deleted +Rule 66 deleted +112 : I(I(B*v1)*A) = B*(C*(A*(C*v1))) +Rule 110 deleted +Rule 108 deleted +Rule 51 deleted +113 : v3*(v2*I(I(v1)*(v3*v2))) = v1 +114 : v1*I(C*(A*(C*v1))) = A +115 : I(I(v1)*v2) = I(v2)*v1 +Rule 113 deleted +Rule 112 deleted +Rule 111 deleted +Rule 75 deleted +Rule 56 deleted +116 : v2*(v1*(I(A*(v2*v1))*(B*A))) = B +117 : I(A*v1)*(B*A) = I(v1)*B +Rule 116 deleted +Rule 68 deleted +118 : v2*(v1*I(C*(v2*v1))) = C +119 : I(C*v1) = I(v1)*C +Rule 118 deleted +Rule 114 deleted +Rule 92 deleted +Rule 86 deleted +Rule 70 deleted +120 : v1*(I(A*(C*v1))*C) = A +121 : I(A*A)*(B*(B*(A*A))) = B*B +122 : I(A*A)*(B*(B*(A*(A*v1)))) = B*(B*v1) +123 : I(A*A)*(B*(A*v1)) = B*(C*(A*(C*v1))) +Rule 79 deleted +Rule 67 deleted +124 : v3*(v2*(I(A*(v3*v2))*(B*(A*v1)))) = B*v1 +125 : v1*(I(A*v1)*(B*(B*A))) = B*B +126 : I(A*v2)*(B*(A*v1)) = I(v2)*(B*v1) +Rule 124 deleted +Rule 123 deleted +Rule 81 deleted +127 : v3*(v2*(v1*I(v3*(v2*v1)))) = U +128 : v2*I(v1*v2) = I(v1) +Rule 89 deleted +129 : A*I(B) = I(B)*A +Rule 90 deleted +130 : I(v1*v2) = I(v2)*I(v1) +Rule 128 deleted +Rule 127 deleted +Rule 126 deleted +Rule 125 deleted +Rule 122 deleted +Rule 121 deleted +Rule 120 deleted +Rule 119 deleted +Rule 117 deleted +Rule 115 deleted +Rule 109 deleted +Rule 107 deleted +Rule 106 deleted +Rule 105 deleted +Rule 101 deleted +Rule 99 deleted +Rule 98 deleted +Rule 94 deleted +Rule 91 deleted +131 : B*(C*(A*(C*(I(B)*(A*v1))))) = v1 +132 : B*(C*(A*(C*(I(B)*A)))) = U +133 : C*(A*(C*(I(B)*A))) = I(B) +Rule 132 deleted +134 : A*(I(B)*v1) = I(B)*(A*v1) +Rule 100 deleted +135 : C*I(B) = I(B)*C +Rule 102 deleted +136 : C*(I(B)*v1) = I(B)*(C*v1) +Rule 133 deleted +Rule 131 deleted +Rule 103 deleted +Canonical set found : +1 : U*v1 = v1 +2 : I(v1)*v1 = U +3 : (v3*v2)*v1 = v3*(v2*v1) +4 : A*B = B*A +5 : C*C = U +6 : I(A) = C*(A*C) +8 : I(v2)*(v2*v1) = v1 +9 : A*(B*v1) = B*(A*v1) +10 : C*(C*v1) = v1 +21 : v1*U = v1 +22 : I(C) = C +23 : C*B = B*C +38 : v1*I(v1) = U +40 : v2*(I(v2)*v1) = v1 +41 : I(U) = U +42 : I(I(v1)) = v1 +43 : C*(B*v1) = B*(C*v1) +44 : A*(C*(A*v1)) = C*v1 +45 : A*(C*A) = C +129 : A*I(B) = I(B)*A +130 : I(v1*v2) = I(v2)*I(v1) +134 : A*(I(B)*v1) = I(B)*(A*v1) +135 : C*I(B) = I(B)*C +136 : C*(I(B)*v1) = I(B)*(C*v1) diff --git a/test/Results/nucleic.out b/test/Results/nucleic.out new file mode 100644 index 0000000000..bb62e59506 --- /dev/null +++ b/test/Results/nucleic.out @@ -0,0 +1 @@ +33.79759 diff --git a/test/Results/quicksort.fast.out b/test/Results/quicksort.fast.out new file mode 100644 index 0000000000..2c94e48371 --- /dev/null +++ b/test/Results/quicksort.fast.out @@ -0,0 +1,2 @@ +OK +OK diff --git a/test/Results/quicksort.out b/test/Results/quicksort.out new file mode 100644 index 0000000000..2c94e48371 --- /dev/null +++ b/test/Results/quicksort.out @@ -0,0 +1,2 @@ +OK +OK diff --git a/test/Results/runtest b/test/Results/runtest new file mode 100644 index 0000000000..80ea739992 --- /dev/null +++ b/test/Results/runtest @@ -0,0 +1 @@ +../byterun/camlrun $1 | cmp - Results/$1.out diff --git a/test/Results/sieve.out b/test/Results/sieve.out new file mode 100644 index 0000000000..8ca674d46f --- /dev/null +++ b/test/Results/sieve.out @@ -0,0 +1 @@ +2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 101 103 107 109 113 127 131 137 139 149 151 157 163 167 173 179 181 191 193 197 199 211 223 227 229 233 239 241 251 257 263 269 271 277 281 283 293 307 311 313 317 331 337 347 349 353 359 367 373 379 383 389 397 401 409 419 421 431 433 439 443 449 457 461 463 467 479 487 491 499 503 509 521 523 541 547 557 563 569 571 577 587 593 599 601 607 613 617 619 631 641 643 647 653 659 661 673 677 683 691 701 709 719 727 733 739 743 751 757 761 769 773 787 797 809 811 821 823 827 829 839 853 857 859 863 877 881 883 887 907 911 919 929 937 941 947 953 967 971 977 983 991 997 1009 1013 1019 1021 1031 1033 1039 1049 1051 1061 1063 1069 1087 1091 1093 1097 1103 1109 1117 1123 1129 1151 1153 1163 1171 1181 1187 1193 1201 1213 1217 1223 1229 1231 1237 1249 1259 1277 1279 1283 1289 1291 1297 1301 1303 1307 1319 1321 1327 1361 1367 1373 1381 1399 1409 1423 1427 1429 1433 1439 1447 1451 1453 1459 1471 1481 1483 1487 1489 1493 1499 1511 1523 1531 1543 1549 1553 1559 1567 1571 1579 1583 1597 1601 1607 1609 1613 1619 1621 1627 1637 1657 1663 1667 1669 1693 1697 1699 1709 1721 1723 1733 1741 1747 1753 1759 1777 1783 1787 1789 1801 1811 1823 1831 1847 1861 1867 1871 1873 1877 1879 1889 1901 1907 1913 1931 1933 1949 1951 1973 1979 1987 1993 1997 1999 2003 2011 2017 2027 2029 2039 2053 2063 2069 2081 2083 2087 2089 2099 2111 2113 2129 2131 2137 2141 2143 2153 2161 2179 2203 2207 2213 2221 2237 2239 2243 2251 2267 2269 2273 2281 2287 2293 2297 2309 2311 2333 2339 2341 2347 2351 2357 2371 2377 2381 2383 2389 2393 2399 2411 2417 2423 2437 2441 2447 2459 2467 2473 2477 2503 2521 2531 2539 2543 2549 2551 2557 2579 2591 2593 2609 2617 2621 2633 2647 2657 2659 2663 2671 2677 2683 2687 2689 2693 2699 2707 2711 2713 2719 2729 2731 2741 2749 2753 2767 2777 2789 2791 2797 2801 2803 2819 2833 2837 2843 2851 2857 2861 2879 2887 2897 2903 2909 2917 2927 2939 2953 2957 2963 2969 2971 2999 3001 3011 3019 3023 3037 3041 3049 3061 3067 3079 3083 3089 3109 3119 3121 3137 3163 3167 3169 3181 3187 3191 3203 3209 3217 3221 3229 3251 3253 3257 3259 3271 3299 3301 3307 3313 3319 3323 3329 3331 3343 3347 3359 3361 3371 3373 3389 3391 3407 3413 3433 3449 3457 3461 3463 3467 3469 3491 3499 3511 3517 3527 3529 3533 3539 3541 3547 3557 3559 3571 3581 3583 3593 3607 3613 3617 3623 3631 3637 3643 3659 3671 3673 3677 3691 3697 3701 3709 3719 3727 3733 3739 3761 3767 3769 3779 3793 3797 3803 3821 3823 3833 3847 3851 3853 3863 3877 3881 3889 3907 3911 3917 3919 3923 3929 3931 3943 3947 3967 3989 4001 4003 4007 4013 4019 4021 4027 4049 4051 4057 4073 4079 4091 4093 4099 4111 4127 4129 4133 4139 4153 4157 4159 4177 4201 4211 4217 4219 4229 4231 4241 4243 4253 4259 4261 4271 4273 4283 4289 4297 4327 4337 4339 4349 4357 4363 4373 4391 4397 4409 4421 4423 4441 4447 4451 4457 4463 4481 4483 4493 4507 4513 4517 4519 4523 4547 4549 4561 4567 4583 4591 4597 4603 4621 4637 4639 4643 4649 4651 4657 4663 4673 4679 4691 4703 4721 4723 4729 4733 4751 4759 4783 4787 4789 4793 4799 4801 4813 4817 4831 4861 4871 4877 4889 4903 4909 4919 4931 4933 4937 4943 4951 4957 4967 4969 4973 4987 4993 4999 5003 5009 5011 5021 5023 5039 5051 5059 5077 5081 5087 5099 5101 5107 5113 5119 5147 5153 5167 5171 5179 5189 5197 5209 5227 5231 5233 5237 5261 5273 5279 5281 5297 5303 5309 5323 5333 5347 5351 5381 5387 5393 5399 5407 5413 5417 5419 5431 5437 5441 5443 5449 5471 5477 5479 5483 5501 5503 5507 5519 5521 5527 5531 5557 5563 5569 5573 5581 5591 5623 5639 5641 5647 5651 5653 5657 5659 5669 5683 5689 5693 5701 5711 5717 5737 5741 5743 5749 5779 5783 5791 5801 5807 5813 5821 5827 5839 5843 5849 5851 5857 5861 5867 5869 5879 5881 5897 5903 5923 5927 5939 5953 5981 5987 6007 6011 6029 6037 6043 6047 6053 6067 6073 6079 6089 6091 6101 6113 6121 6131 6133 6143 6151 6163 6173 6197 6199 6203 6211 6217 6221 6229 6247 6257 6263 6269 6271 6277 6287 6299 6301 6311 6317 6323 6329 6337 6343 6353 6359 6361 6367 6373 6379 6389 6397 6421 6427 6449 6451 6469 6473 6481 6491 6521 6529 6547 6551 6553 6563 6569 6571 6577 6581 6599 6607 6619 6637 6653 6659 6661 6673 6679 6689 6691 6701 6703 6709 6719 6733 6737 6761 6763 6779 6781 6791 6793 6803 6823 6827 6829 6833 6841 6857 6863 6869 6871 6883 6899 6907 6911 6917 6947 6949 6959 6961 6967 6971 6977 6983 6991 6997 7001 7013 7019 7027 7039 7043 7057 7069 7079 7103 7109 7121 7127 7129 7151 7159 7177 7187 7193 7207 7211 7213 7219 7229 7237 7243 7247 7253 7283 7297 7307 7309 7321 7331 7333 7349 7351 7369 7393 7411 7417 7433 7451 7457 7459 7477 7481 7487 7489 7499 7507 7517 7523 7529 7537 7541 7547 7549 7559 7561 7573 7577 7583 7589 7591 7603 7607 7621 7639 7643 7649 7669 7673 7681 7687 7691 7699 7703 7717 7723 7727 7741 7753 7757 7759 7789 7793 7817 7823 7829 7841 7853 7867 7873 7877 7879 7883 7901 7907 7919 7927 7933 7937 7949 7951 7963 7993 8009 8011 8017 8039 8053 8059 8069 8081 8087 8089 8093 8101 8111 8117 8123 8147 8161 8167 8171 8179 8191 8209 8219 8221 8231 8233 8237 8243 8263 8269 8273 8287 8291 8293 8297 8311 8317 8329 8353 8363 8369 8377 8387 8389 8419 8423 8429 8431 8443 8447 8461 8467 8501 8513 8521 8527 8537 8539 8543 8563 8573 8581 8597 8599 8609 8623 8627 8629 8641 8647 8663 8669 8677 8681 8689 8693 8699 8707 8713 8719 8731 8737 8741 8747 8753 8761 8779 8783 8803 8807 8819 8821 8831 8837 8839 8849 8861 8863 8867 8887 8893 8923 8929 8933 8941 8951 8963 8969 8971 8999 9001 9007 9011 9013 9029 9041 9043 9049 9059 9067 9091 9103 9109 9127 9133 9137 9151 9157 9161 9173 9181 9187 9199 9203 9209 9221 9227 9239 9241 9257 9277 9281 9283 9293 9311 9319 9323 9337 9341 9343 9349 9371 9377 9391 9397 9403 9413 9419 9421 9431 9433 9437 9439 9461 9463 9467 9473 9479 9491 9497 9511 9521 9533 9539 9547 9551 9587 9601 9613 9619 9623 9629 9631 9643 9649 9661 9677 9679 9689 9697 9719 9721 9733 9739 9743 9749 9767 9769 9781 9787 9791 9803 9811 9817 9829 9833 9839 9851 9857 9859 9871 9883 9887 9901 9907 9923 9929 9931 9941 9949 9967 9973 10007 10009 10037 10039 10061 10067 10069 10079 10091 10093 10099 10103 10111 10133 10139 10141 10151 10159 10163 10169 10177 10181 10193 10211 10223 10243 10247 10253 10259 10267 10271 10273 10289 10301 10303 10313 10321 10331 10333 10337 10343 10357 10369 10391 10399 10427 10429 10433 10453 10457 10459 10463 10477 10487 10499 10501 10513 10529 10531 10559 10567 10589 10597 10601 10607 10613 10627 10631 10639 10651 10657 10663 10667 10687 10691 10709 10711 10723 10729 10733 10739 10753 10771 10781 10789 10799 10831 10837 10847 10853 10859 10861 10867 10883 10889 10891 10903 10909 10937 10939 10949 10957 10973 10979 10987 10993 11003 11027 11047 11057 11059 11069 11071 11083 11087 11093 11113 11117 11119 11131 11149 11159 11161 11171 11173 11177 11197 11213 11239 11243 11251 11257 11261 11273 11279 11287 11299 11311 11317 11321 11329 11351 11353 11369 11383 11393 11399 11411 11423 11437 11443 11447 11467 11471 11483 11489 11491 11497 11503 11519 11527 11549 11551 11579 11587 11593 11597 11617 11621 11633 11657 11677 11681 11689 11699 11701 11717 11719 11731 11743 11777 11779 11783 11789 11801 11807 11813 11821 11827 11831 11833 11839 11863 11867 11887 11897 11903 11909 11923 11927 11933 11939 11941 11953 11959 11969 11971 11981 11987 12007 12011 12037 12041 12043 12049 12071 12073 12097 12101 12107 12109 12113 12119 12143 12149 12157 12161 12163 12197 12203 12211 12227 12239 12241 12251 12253 12263 12269 12277 12281 12289 12301 12323 12329 12343 12347 12373 12377 12379 12391 12401 12409 12413 12421 12433 12437 12451 12457 12473 12479 12487 12491 12497 12503 12511 12517 12527 12539 12541 12547 12553 12569 12577 12583 12589 12601 12611 12613 12619 12637 12641 12647 12653 12659 12671 12689 12697 12703 12713 12721 12739 12743 12757 12763 12781 12791 12799 12809 12821 12823 12829 12841 12853 12889 12893 12899 12907 12911 12917 12919 12923 12941 12953 12959 12967 12973 12979 12983 13001 13003 13007 13009 13033 13037 13043 13049 13063 13093 13099 13103 13109 13121 13127 13147 13151 13159 13163 13171 13177 13183 13187 13217 13219 13229 13241 13249 13259 13267 13291 13297 13309 13313 13327 13331 13337 13339 13367 13381 13397 13399 13411 13417 13421 13441 13451 13457 13463 13469 13477 13487 13499 13513 13523 13537 13553 13567 13577 13591 13597 13613 13619 13627 13633 13649 13669 13679 13681 13687 13691 13693 13697 13709 13711 13721 13723 13729 13751 13757 13759 13763 13781 13789 13799 13807 13829 13831 13841 13859 13873 13877 13879 13883 13901 13903 13907 13913 13921 13931 13933 13963 13967 13997 13999 14009 14011 14029 14033 14051 14057 14071 14081 14083 14087 14107 14143 14149 14153 14159 14173 14177 14197 14207 14221 14243 14249 14251 14281 14293 14303 14321 14323 14327 14341 14347 14369 14387 14389 14401 14407 14411 14419 14423 14431 14437 14447 14449 14461 14479 14489 14503 14519 14533 14537 14543 14549 14551 14557 14561 14563 14591 14593 14621 14627 14629 14633 14639 14653 14657 14669 14683 14699 14713 14717 14723 14731 14737 14741 14747 14753 14759 14767 14771 14779 14783 14797 14813 14821 14827 14831 14843 14851 14867 14869 14879 14887 14891 14897 14923 14929 14939 14947 14951 14957 14969 14983 15013 15017 15031 15053 15061 15073 15077 15083 15091 15101 15107 15121 15131 15137 15139 15149 15161 15173 15187 15193 15199 15217 15227 15233 15241 15259 15263 15269 15271 15277 15287 15289 15299 15307 15313 15319 15329 15331 15349 15359 15361 15373 15377 15383 15391 15401 15413 15427 15439 15443 15451 15461 15467 15473 15493 15497 15511 15527 15541 15551 15559 15569 15581 15583 15601 15607 15619 15629 15641 15643 15647 15649 15661 15667 15671 15679 15683 15727 15731 15733 15737 15739 15749 15761 15767 15773 15787 15791 15797 15803 15809 15817 15823 15859 15877 15881 15887 15889 15901 15907 15913 15919 15923 15937 15959 15971 15973 15991 16001 16007 16033 16057 16061 16063 16067 16069 16073 16087 16091 16097 16103 16111 16127 16139 16141 16183 16187 16189 16193 16217 16223 16229 16231 16249 16253 16267 16273 16301 16319 16333 16339 16349 16361 16363 16369 16381 16411 16417 16421 16427 16433 16447 16451 16453 16477 16481 16487 16493 16519 16529 16547 16553 16561 16567 16573 16603 16607 16619 16631 16633 16649 16651 16657 16661 16673 16691 16693 16699 16703 16729 16741 16747 16759 16763 16787 16811 16823 16829 16831 16843 16871 16879 16883 16889 16901 16903 16921 16927 16931 16937 16943 16963 16979 16981 16987 16993 17011 17021 17027 17029 17033 17041 17047 17053 17077 17093 17099 17107 17117 17123 17137 17159 17167 17183 17189 17191 17203 17207 17209 17231 17239 17257 17291 17293 17299 17317 17321 17327 17333 17341 17351 17359 17377 17383 17387 17389 17393 17401 17417 17419 17431 17443 17449 17467 17471 17477 17483 17489 17491 17497 17509 17519 17539 17551 17569 17573 17579 17581 17597 17599 17609 17623 17627 17657 17659 17669 17681 17683 17707 17713 17729 17737 17747 17749 17761 17783 17789 17791 17807 17827 17837 17839 17851 17863 17881 17891 17903 17909 17911 17921 17923 17929 17939 17957 17959 17971 17977 17981 17987 17989 18013 18041 18043 18047 18049 18059 18061 18077 18089 18097 18119 18121 18127 18131 18133 18143 18149 18169 18181 18191 18199 18211 18217 18223 18229 18233 18251 18253 18257 18269 18287 18289 18301 18307 18311 18313 18329 18341 18353 18367 18371 18379 18397 18401 18413 18427 18433 18439 18443 18451 18457 18461 18481 18493 18503 18517 18521 18523 18539 18541 18553 18583 18587 18593 18617 18637 18661 18671 18679 18691 18701 18713 18719 18731 18743 18749 18757 18773 18787 18793 18797 18803 18839 18859 18869 18899 18911 18913 18917 18919 18947 18959 18973 18979 19001 19009 19013 19031 19037 19051 19069 19073 19079 19081 19087 19121 19139 19141 19157 19163 19181 19183 19207 19211 19213 19219 19231 19237 19249 19259 19267 19273 19289 19301 19309 19319 19333 19373 19379 19381 19387 19391 19403 19417 19421 19423 19427 19429 19433 19441 19447 19457 19463 19469 19471 19477 19483 19489 19501 19507 19531 19541 19543 19553 19559 19571 19577 19583 19597 19603 19609 19661 19681 19687 19697 19699 19709 19717 19727 19739 19751 19753 19759 19763 19777 19793 19801 19813 19819 19841 19843 19853 19861 19867 19889 19891 19913 19919 19927 19937 19949 19961 19963 19973 19979 19991 19993 19997 20011 20021 20023 20029 20047 20051 20063 20071 20089 20101 20107 20113 20117 20123 20129 20143 20147 20149 20161 20173 20177 20183 20201 20219 20231 20233 20249 20261 20269 20287 20297 20323 20327 20333 20341 20347 20353 20357 20359 20369 20389 20393 20399 20407 20411 20431 20441 20443 20477 20479 20483 20507 20509 20521 20533 20543 20549 20551 20563 20593 20599 20611 20627 20639 20641 20663 20681 20693 20707 20717 20719 20731 20743 20747 20749 20753 20759 20771 20773 20789 20807 20809 20849 20857 20873 20879 20887 20897 20899 20903 20921 20929 20939 20947 20959 20963 20981 20983 21001 21011 21013 21017 21019 21023 21031 21059 21061 21067 21089 21101 21107 21121 21139 21143 21149 21157 21163 21169 21179 21187 21191 21193 21211 21221 21227 21247 21269 21277 21283 21313 21317 21319 21323 21341 21347 21377 21379 21383 21391 21397 21401 21407 21419 21433 21467 21481 21487 21491 21493 21499 21503 21517 21521 21523 21529 21557 21559 21563 21569 21577 21587 21589 21599 21601 21611 21613 21617 21647 21649 21661 21673 21683 21701 21713 21727 21737 21739 21751 21757 21767 21773 21787 21799 21803 21817 21821 21839 21841 21851 21859 21863 21871 21881 21893 21911 21929 21937 21943 21961 21977 21991 21997 22003 22013 22027 22031 22037 22039 22051 22063 22067 22073 22079 22091 22093 22109 22111 22123 22129 22133 22147 22153 22157 22159 22171 22189 22193 22229 22247 22259 22271 22273 22277 22279 22283 22291 22303 22307 22343 22349 22367 22369 22381 22391 22397 22409 22433 22441 22447 22453 22469 22481 22483 22501 22511 22531 22541 22543 22549 22567 22571 22573 22613 22619 22621 22637 22639 22643 22651 22669 22679 22691 22697 22699 22709 22717 22721 22727 22739 22741 22751 22769 22777 22783 22787 22807 22811 22817 22853 22859 22861 22871 22877 22901 22907 22921 22937 22943 22961 22963 22973 22993 23003 23011 23017 23021 23027 23029 23039 23041 23053 23057 23059 23063 23071 23081 23087 23099 23117 23131 23143 23159 23167 23173 23189 23197 23201 23203 23209 23227 23251 23269 23279 23291 23293 23297 23311 23321 23327 23333 23339 23357 23369 23371 23399 23417 23431 23447 23459 23473 23497 23509 23531 23537 23539 23549 23557 23561 23563 23567 23581 23593 23599 23603 23609 23623 23627 23629 23633 23663 23669 23671 23677 23687 23689 23719 23741 23743 23747 23753 23761 23767 23773 23789 23801 23813 23819 23827 23831 23833 23857 23869 23873 23879 23887 23893 23899 23909 23911 23917 23929 23957 23971 23977 23981 23993 24001 24007 24019 24023 24029 24043 24049 24061 24071 24077 24083 24091 24097 24103 24107 24109 24113 24121 24133 24137 24151 24169 24179 24181 24197 24203 24223 24229 24239 24247 24251 24281 24317 24329 24337 24359 24371 24373 24379 24391 24407 24413 24419 24421 24439 24443 24469 24473 24481 24499 24509 24517 24527 24533 24547 24551 24571 24593 24611 24623 24631 24659 24671 24677 24683 24691 24697 24709 24733 24749 24763 24767 24781 24793 24799 24809 24821 24841 24847 24851 24859 24877 24889 24907 24917 24919 24923 24943 24953 24967 24971 24977 24979 24989 25013 25031 25033 25037 25057 25073 25087 25097 25111 25117 25121 25127 25147 25153 25163 25169 25171 25183 25189 25219 25229 25237 25243 25247 25253 25261 25301 25303 25307 25309 25321 25339 25343 25349 25357 25367 25373 25391 25409 25411 25423 25439 25447 25453 25457 25463 25469 25471 25523 25537 25541 25561 25577 25579 25583 25589 25601 25603 25609 25621 25633 25639 25643 25657 25667 25673 25679 25693 25703 25717 25733 25741 25747 25759 25763 25771 25793 25799 25801 25819 25841 25847 25849 25867 25873 25889 25903 25913 25919 25931 25933 25939 25943 25951 25969 25981 25997 25999 26003 26017 26021 26029 26041 26053 26083 26099 26107 26111 26113 26119 26141 26153 26161 26171 26177 26183 26189 26203 26209 26227 26237 26249 26251 26261 26263 26267 26293 26297 26309 26317 26321 26339 26347 26357 26371 26387 26393 26399 26407 26417 26423 26431 26437 26449 26459 26479 26489 26497 26501 26513 26539 26557 26561 26573 26591 26597 26627 26633 26641 26647 26669 26681 26683 26687 26693 26699 26701 26711 26713 26717 26723 26729 26731 26737 26759 26777 26783 26801 26813 26821 26833 26839 26849 26861 26863 26879 26881 26891 26893 26903 26921 26927 26947 26951 26953 26959 26981 26987 26993 27011 27017 27031 27043 27059 27061 27067 27073 27077 27091 27103 27107 27109 27127 27143 27179 27191 27197 27211 27239 27241 27253 27259 27271 27277 27281 27283 27299 27329 27337 27361 27367 27397 27407 27409 27427 27431 27437 27449 27457 27479 27481 27487 27509 27527 27529 27539 27541 27551 27581 27583 27611 27617 27631 27647 27653 27673 27689 27691 27697 27701 27733 27737 27739 27743 27749 27751 27763 27767 27773 27779 27791 27793 27799 27803 27809 27817 27823 27827 27847 27851 27883 27893 27901 27917 27919 27941 27943 27947 27953 27961 27967 27983 27997 28001 28019 28027 28031 28051 28057 28069 28081 28087 28097 28099 28109 28111 28123 28151 28163 28181 28183 28201 28211 28219 28229 28277 28279 28283 28289 28297 28307 28309 28319 28349 28351 28387 28393 28403 28409 28411 28429 28433 28439 28447 28463 28477 28493 28499 28513 28517 28537 28541 28547 28549 28559 28571 28573 28579 28591 28597 28603 28607 28619 28621 28627 28631 28643 28649 28657 28661 28663 28669 28687 28697 28703 28711 28723 28729 28751 28753 28759 28771 28789 28793 28807 28813 28817 28837 28843 28859 28867 28871 28879 28901 28909 28921 28927 28933 28949 28961 28979 29009 29017 29021 29023 29027 29033 29059 29063 29077 29101 29123 29129 29131 29137 29147 29153 29167 29173 29179 29191 29201 29207 29209 29221 29231 29243 29251 29269 29287 29297 29303 29311 29327 29333 29339 29347 29363 29383 29387 29389 29399 29401 29411 29423 29429 29437 29443 29453 29473 29483 29501 29527 29531 29537 29567 29569 29573 29581 29587 29599 29611 29629 29633 29641 29663 29669 29671 29683 29717 29723 29741 29753 29759 29761 29789 29803 29819 29833 29837 29851 29863 29867 29873 29879 29881 29917 29921 29927 29947 29959 29983 29989 30011 30013 30029 30047 30059 30071 30089 30091 30097 30103 30109 30113 30119 30133 30137 30139 30161 30169 30181 30187 30197 30203 30211 30223 30241 30253 30259 30269 30271 30293 30307 30313 30319 30323 30341 30347 30367 30389 30391 30403 30427 30431 30449 30467 30469 30491 30493 30497 30509 30517 30529 30539 30553 30557 30559 30577 30593 30631 30637 30643 30649 30661 30671 30677 30689 30697 30703 30707 30713 30727 30757 30763 30773 30781 30803 30809 30817 30829 30839 30841 30851 30853 30859 30869 30871 30881 30893 30911 30931 30937 30941 30949 30971 30977 30983 31013 31019 31033 31039 31051 31063 31069 31079 31081 31091 31121 31123 31139 31147 31151 31153 31159 31177 31181 31183 31189 31193 31219 31223 31231 31237 31247 31249 31253 31259 31267 31271 31277 31307 31319 31321 31327 31333 31337 31357 31379 31387 31391 31393 31397 31469 31477 31481 31489 31511 31513 31517 31531 31541 31543 31547 31567 31573 31583 31601 31607 31627 31643 31649 31657 31663 31667 31687 31699 31721 31723 31727 31729 31741 31751 31769 31771 31793 31799 31817 31847 31849 31859 31873 31883 31891 31907 31957 31963 31973 31981 31991 32003 32009 32027 32029 32051 32057 32059 32063 32069 32077 32083 32089 32099 32117 32119 32141 32143 32159 32173 32183 32189 32191 32203 32213 32233 32237 32251 32257 32261 32297 32299 32303 32309 32321 32323 32327 32341 32353 32359 32363 32369 32371 32377 32381 32401 32411 32413 32423 32429 32441 32443 32467 32479 32491 32497 32503 32507 32531 32533 32537 32561 32563 32569 32573 32579 32587 32603 32609 32611 32621 32633 32647 32653 32687 32693 32707 32713 32717 32719 32749 32771 32779 32783 32789 32797 32801 32803 32831 32833 32839 32843 32869 32887 32909 32911 32917 32933 32939 32941 32957 32969 32971 32983 32987 32993 32999 33013 33023 33029 33037 33049 33053 33071 33073 33083 33091 33107 33113 33119 33149 33151 33161 33179 33181 33191 33199 33203 33211 33223 33247 33287 33289 33301 33311 33317 33329 33331 33343 33347 33349 33353 33359 33377 33391 33403 33409 33413 33427 33457 33461 33469 33479 33487 33493 33503 33521 33529 33533 33547 33563 33569 33577 33581 33587 33589 33599 33601 33613 33617 33619 33623 33629 33637 33641 33647 33679 33703 33713 33721 33739 33749 33751 33757 33767 33769 33773 33791 33797 33809 33811 33827 33829 33851 33857 33863 33871 33889 33893 33911 33923 33931 33937 33941 33961 33967 33997 34019 34031 34033 34039 34057 34061 34123 34127 34129 34141 34147 34157 34159 34171 34183 34211 34213 34217 34231 34253 34259 34261 34267 34273 34283 34297 34301 34303 34313 34319 34327 34337 34351 34361 34367 34369 34381 34403 34421 34429 34439 34457 34469 34471 34483 34487 34499 34501 34511 34513 34519 34537 34543 34549 34583 34589 34591 34603 34607 34613 34631 34649 34651 34667 34673 34679 34687 34693 34703 34721 34729 34739 34747 34757 34759 34763 34781 34807 34819 34841 34843 34847 34849 34871 34877 34883 34897 34913 34919 34939 34949 34961 34963 34981 35023 35027 35051 35053 35059 35069 35081 35083 35089 35099 35107 35111 35117 35129 35141 35149 35153 35159 35171 35201 35221 35227 35251 35257 35267 35279 35281 35291 35311 35317 35323 35327 35339 35353 35363 35381 35393 35401 35407 35419 35423 35437 35447 35449 35461 35491 35507 35509 35521 35527 35531 35533 35537 35543 35569 35573 35591 35593 35597 35603 35617 35671 35677 35729 35731 35747 35753 35759 35771 35797 35801 35803 35809 35831 35837 35839 35851 35863 35869 35879 35897 35899 35911 35923 35933 35951 35963 35969 35977 35983 35993 35999 36007 36011 36013 36017 36037 36061 36067 36073 36083 36097 36107 36109 36131 36137 36151 36161 36187 36191 36209 36217 36229 36241 36251 36263 36269 36277 36293 36299 36307 36313 36319 36341 36343 36353 36373 36383 36389 36433 36451 36457 36467 36469 36473 36479 36493 36497 36523 36527 36529 36541 36551 36559 36563 36571 36583 36587 36599 36607 36629 36637 36643 36653 36671 36677 36683 36691 36697 36709 36713 36721 36739 36749 36761 36767 36779 36781 36787 36791 36793 36809 36821 36833 36847 36857 36871 36877 36887 36899 36901 36913 36919 36923 36929 36931 36943 36947 36973 36979 36997 37003 37013 37019 37021 37039 37049 37057 37061 37087 37097 37117 37123 37139 37159 37171 37181 37189 37199 37201 37217 37223 37243 37253 37273 37277 37307 37309 37313 37321 37337 37339 37357 37361 37363 37369 37379 37397 37409 37423 37441 37447 37463 37483 37489 37493 37501 37507 37511 37517 37529 37537 37547 37549 37561 37567 37571 37573 37579 37589 37591 37607 37619 37633 37643 37649 37657 37663 37691 37693 37699 37717 37747 37781 37783 37799 37811 37813 37831 37847 37853 37861 37871 37879 37889 37897 37907 37951 37957 37963 37967 37987 37991 37993 37997 38011 38039 38047 38053 38069 38083 38113 38119 38149 38153 38167 38177 38183 38189 38197 38201 38219 38231 38237 38239 38261 38273 38281 38287 38299 38303 38317 38321 38327 38329 38333 38351 38371 38377 38393 38431 38447 38449 38453 38459 38461 38501 38543 38557 38561 38567 38569 38593 38603 38609 38611 38629 38639 38651 38653 38669 38671 38677 38693 38699 38707 38711 38713 38723 38729 38737 38747 38749 38767 38783 38791 38803 38821 38833 38839 38851 38861 38867 38873 38891 38903 38917 38921 38923 38933 38953 38959 38971 38977 38993 39019 39023 39041 39043 39047 39079 39089 39097 39103 39107 39113 39119 39133 39139 39157 39161 39163 39181 39191 39199 39209 39217 39227 39229 39233 39239 39241 39251 39293 39301 39313 39317 39323 39341 39343 39359 39367 39371 39373 39383 39397 39409 39419 39439 39443 39451 39461 39499 39503 39509 39511 39521 39541 39551 39563 39569 39581 39607 39619 39623 39631 39659 39667 39671 39679 39703 39709 39719 39727 39733 39749 39761 39769 39779 39791 39799 39821 39827 39829 39839 39841 39847 39857 39863 39869 39877 39883 39887 39901 39929 39937 39953 39971 39979 39983 39989 diff --git a/test/Results/soli.fast.out b/test/Results/soli.fast.out new file mode 100644 index 0000000000..b94045c39f --- /dev/null +++ b/test/Results/soli.fast.out @@ -0,0 +1,50 @@ +500 +1000 +1500 +2000 +2500 +3000 +3500 +4000 +4500 +5000 +5500 +6000 +6500 +7000 +7500 +8000 +8500 +9000 +9500 +10000 +10500 +11000 +11500 +12000 +12500 +13000 +13500 +14000 +14500 +15000 +15500 +16000 +16500 +17000 +17500 +18000 +18500 +19000 +19500 +20000 + +......... +... ... +... ... +. . +. $ . +. . +... ... +... ... +......... diff --git a/test/Results/soli.out b/test/Results/soli.out new file mode 100644 index 0000000000..b94045c39f --- /dev/null +++ b/test/Results/soli.out @@ -0,0 +1,50 @@ +500 +1000 +1500 +2000 +2500 +3000 +3500 +4000 +4500 +5000 +5500 +6000 +6500 +7000 +7500 +8000 +8500 +9000 +9500 +10000 +10500 +11000 +11500 +12000 +12500 +13000 +13500 +14000 +14500 +15000 +15500 +16000 +16500 +17000 +17500 +18000 +18500 +19000 +19500 +20000 + +......... +... ... +... ... +. . +. $ . +. . +... ... +... ... +......... diff --git a/test/Results/takc.out b/test/Results/takc.out new file mode 100644 index 0000000000..0fecf6533b --- /dev/null +++ b/test/Results/takc.out @@ -0,0 +1 @@ +350 diff --git a/test/Results/taku.out b/test/Results/taku.out new file mode 100644 index 0000000000..0fecf6533b --- /dev/null +++ b/test/Results/taku.out @@ -0,0 +1 @@ +350 diff --git a/test/boyer.ml b/test/boyer.ml new file mode 100644 index 0000000000..fdbcfd37b7 --- /dev/null +++ b/test/boyer.ml @@ -0,0 +1,889 @@ +(* Manipulations over terms *) + +type term = + Var of int + | Prop of head * term list +and head = + { name: string; + mutable props: (term * term) list } + +let rec print_term = function + Var v -> + print_string "v"; print_int v + | Prop (head,argl) -> + print_string "("; + print_string head.name; + List.iter (fun t -> print_string " "; print_term t) argl; + print_string ")" + +let lemmas = ref ([] : head list) + +(* Replacement for property lists *) + +let get name = + let rec get_rec = function + hd1::hdl -> + if hd1.name = name then hd1 else get_rec hdl + | [] -> + let entry = {name = name; props = []} in + lemmas := entry :: !lemmas; + entry + in get_rec !lemmas + +let add_lemma (Prop(_, [(Prop(headl,_) as left); right])) = + headl.props <- (left, right) :: headl.props + +(* Substitutions *) + +type subst = Bind of int * term + +let get_binding v list = + let rec get_rec = function + [] -> failwith "unbound" + | Bind(w,t)::rest -> if v = w then t else get_rec rest + in get_rec list + +let apply_subst alist term = + let rec as_rec = function + Var v -> begin try get_binding v alist with Failure _ -> term end + | Prop (head,argl) -> Prop (head, List.map as_rec argl) + in as_rec term + +exception Unify + +let rec unify (term1, term2) = + unify1 (term1, term2, []) + +and unify1 (term1, term2, unify_subst) = + match term2 with + Var v -> + begin try + if get_binding v unify_subst = term1 + then unify_subst + else raise Unify + with Failure _ -> + Bind(v,term1) :: unify_subst + end + | Prop (head2, argl2) -> + match term1 with + Var _ -> raise Unify + | Prop (head1,argl1) -> + if head1 == head2 + then unify1_lst (argl1, argl2, unify_subst) + else raise Unify + +and unify1_lst = function + ([], [], unify_subst) -> unify_subst + | (h1::r1, h2::r2, unify_subst) -> + unify1_lst(r1, r2, unify1(h1, h2, unify_subst)) + | _ -> raise Unify + + +let rec rewrite = function + Var _ as term -> term + | Prop (head, argl) -> + rewrite_with_lemmas (Prop (head, List.map rewrite argl), head.props) +and rewrite_with_lemmas = function + (term, []) -> + term + | (term, (t1,t2)::rest) -> + try + rewrite (apply_subst (unify (term, t1)) t2) + with Unify -> + rewrite_with_lemmas (term, rest) + +type cterm = CVar of int | CProp of string * cterm list + +let rec cterm_to_term = function + CVar v -> Var v + | CProp(p, l) -> Prop(get p, List.map cterm_to_term l) + +let add t = add_lemma (cterm_to_term t) + +let _ = +add (CProp +("equal", + [CProp ("compile",[CVar 5]); + CProp + ("reverse", + [CProp ("codegen",[CProp ("optimize",[CVar 5]); CProp ("nil",[])])])])); +add (CProp +("equal", + [CProp ("eqp",[CVar 23; CVar 24]); + CProp ("equal",[CProp ("fix",[CVar 23]); CProp ("fix",[CVar 24])])])); +add (CProp +("equal", + [CProp ("gt",[CVar 23; CVar 24]); CProp ("lt",[CVar 24; CVar 23])])); +add (CProp +("equal", + [CProp ("le",[CVar 23; CVar 24]); CProp ("ge",[CVar 24; CVar 23])])); +add (CProp +("equal", + [CProp ("ge",[CVar 23; CVar 24]); CProp ("le",[CVar 24; CVar 23])])); +add (CProp +("equal", + [CProp ("boolean",[CVar 23]); + CProp + ("or", + [CProp ("equal",[CVar 23; CProp ("true",[])]); + CProp ("equal",[CVar 23; CProp ("false",[])])])])); +add (CProp +("equal", + [CProp ("iff",[CVar 23; CVar 24]); + CProp + ("and", + [CProp ("implies",[CVar 23; CVar 24]); + CProp ("implies",[CVar 24; CVar 23])])])); +add (CProp +("equal", + [CProp ("even1",[CVar 23]); + CProp + ("if", + [CProp ("zerop",[CVar 23]); CProp ("true",[]); + CProp ("odd",[CProp ("sub1",[CVar 23])])])])); +add (CProp +("equal", + [CProp ("countps_",[CVar 11; CVar 15]); + CProp ("countps_loop",[CVar 11; CVar 15; CProp ("zero",[])])])); +add (CProp +("equal", + [CProp ("fact_",[CVar 8]); + CProp ("fact_loop",[CVar 8; CProp ("one",[])])])); +add (CProp +("equal", + [CProp ("reverse_",[CVar 23]); + CProp ("reverse_loop",[CVar 23; CProp ("nil",[])])])); +add (CProp +("equal", + [CProp ("divides",[CVar 23; CVar 24]); + CProp ("zerop",[CProp ("remainder",[CVar 24; CVar 23])])])); +add (CProp +("equal", + [CProp ("assume_true",[CVar 21; CVar 0]); + CProp ("cons",[CProp ("cons",[CVar 21; CProp ("true",[])]); CVar 0])])); +add (CProp +("equal", + [CProp ("assume_false",[CVar 21; CVar 0]); + CProp ("cons",[CProp ("cons",[CVar 21; CProp ("false",[])]); CVar 0])])); +add (CProp +("equal", + [CProp ("tautology_checker",[CVar 23]); + CProp ("tautologyp",[CProp ("normalize",[CVar 23]); CProp ("nil",[])])])); +add (CProp +("equal", + [CProp ("falsify",[CVar 23]); + CProp ("falsify1",[CProp ("normalize",[CVar 23]); CProp ("nil",[])])])); +add (CProp +("equal", + [CProp ("prime",[CVar 23]); + CProp + ("and", + [CProp ("not",[CProp ("zerop",[CVar 23])]); + CProp + ("not", + [CProp ("equal",[CVar 23; CProp ("add1",[CProp ("zero",[])])])]); + CProp ("prime1",[CVar 23; CProp ("sub1",[CVar 23])])])])); +add (CProp +("equal", + [CProp ("and",[CVar 15; CVar 16]); + CProp + ("if", + [CVar 15; + CProp ("if",[CVar 16; CProp ("true",[]); CProp ("false",[])]); + CProp ("false",[])])])); +add (CProp +("equal", + [CProp ("or",[CVar 15; CVar 16]); + CProp + ("if", + [CVar 15; CProp ("true",[]); + CProp ("if",[CVar 16; CProp ("true",[]); CProp ("false",[])]); + CProp ("false",[])])])); +add (CProp +("equal", + [CProp ("not",[CVar 15]); + CProp ("if",[CVar 15; CProp ("false",[]); CProp ("true",[])])])); +add (CProp +("equal", + [CProp ("implies",[CVar 15; CVar 16]); + CProp + ("if", + [CVar 15; + CProp ("if",[CVar 16; CProp ("true",[]); CProp ("false",[])]); + CProp ("true",[])])])); +add (CProp +("equal", + [CProp ("fix",[CVar 23]); + CProp ("if",[CProp ("numberp",[CVar 23]); CVar 23; CProp ("zero",[])])])); +add (CProp +("equal", + [CProp ("if",[CProp ("if",[CVar 0; CVar 1; CVar 2]); CVar 3; CVar 4]); + CProp + ("if", + [CVar 0; CProp ("if",[CVar 1; CVar 3; CVar 4]); + CProp ("if",[CVar 2; CVar 3; CVar 4])])])); +add (CProp +("equal", + [CProp ("zerop",[CVar 23]); + CProp + ("or", + [CProp ("equal",[CVar 23; CProp ("zero",[])]); + CProp ("not",[CProp ("numberp",[CVar 23])])])])); +add (CProp +("equal", + [CProp ("plus",[CProp ("plus",[CVar 23; CVar 24]); CVar 25]); + CProp ("plus",[CVar 23; CProp ("plus",[CVar 24; CVar 25])])])); +add (CProp +("equal", + [CProp ("equal",[CProp ("plus",[CVar 0; CVar 1]); CProp ("zero",[])]); + CProp ("and",[CProp ("zerop",[CVar 0]); CProp ("zerop",[CVar 1])])])); +add (CProp +("equal",[CProp ("difference",[CVar 23; CVar 23]); CProp ("zero",[])])); +add (CProp +("equal", + [CProp + ("equal", + [CProp ("plus",[CVar 0; CVar 1]); CProp ("plus",[CVar 0; CVar 2])]); + CProp ("equal",[CProp ("fix",[CVar 1]); CProp ("fix",[CVar 2])])])); +add (CProp +("equal", + [CProp + ("equal",[CProp ("zero",[]); CProp ("difference",[CVar 23; CVar 24])]); + CProp ("not",[CProp ("gt",[CVar 24; CVar 23])])])); +add (CProp +("equal", + [CProp ("equal",[CVar 23; CProp ("difference",[CVar 23; CVar 24])]); + CProp + ("and", + [CProp ("numberp",[CVar 23]); + CProp + ("or", + [CProp ("equal",[CVar 23; CProp ("zero",[])]); + CProp ("zerop",[CVar 24])])])])); +add (CProp +("equal", + [CProp + ("meaning", + [CProp ("plus_tree",[CProp ("append",[CVar 23; CVar 24])]); CVar 0]); + CProp + ("plus", + [CProp ("meaning",[CProp ("plus_tree",[CVar 23]); CVar 0]); + CProp ("meaning",[CProp ("plus_tree",[CVar 24]); CVar 0])])])); +add (CProp +("equal", + [CProp + ("meaning", + [CProp ("plus_tree",[CProp ("plus_fringe",[CVar 23])]); CVar 0]); + CProp ("fix",[CProp ("meaning",[CVar 23; CVar 0])])])); +add (CProp +("equal", + [CProp ("append",[CProp ("append",[CVar 23; CVar 24]); CVar 25]); + CProp ("append",[CVar 23; CProp ("append",[CVar 24; CVar 25])])])); +add (CProp +("equal", + [CProp ("reverse",[CProp ("append",[CVar 0; CVar 1])]); + CProp + ("append",[CProp ("reverse",[CVar 1]); CProp ("reverse",[CVar 0])])])); +add (CProp +("equal", + [CProp ("times",[CVar 23; CProp ("plus",[CVar 24; CVar 25])]); + CProp + ("plus", + [CProp ("times",[CVar 23; CVar 24]); + CProp ("times",[CVar 23; CVar 25])])])); +add (CProp +("equal", + [CProp ("times",[CProp ("times",[CVar 23; CVar 24]); CVar 25]); + CProp ("times",[CVar 23; CProp ("times",[CVar 24; CVar 25])])])); +add (CProp +("equal", + [CProp + ("equal",[CProp ("times",[CVar 23; CVar 24]); CProp ("zero",[])]); + CProp ("or",[CProp ("zerop",[CVar 23]); CProp ("zerop",[CVar 24])])])); +add (CProp +("equal", + [CProp ("exec",[CProp ("append",[CVar 23; CVar 24]); CVar 15; CVar 4]); + CProp + ("exec",[CVar 24; CProp ("exec",[CVar 23; CVar 15; CVar 4]); CVar 4])])); +add (CProp +("equal", + [CProp ("mc_flatten",[CVar 23; CVar 24]); + CProp ("append",[CProp ("flatten",[CVar 23]); CVar 24])])); +add (CProp +("equal", + [CProp ("member",[CVar 23; CProp ("append",[CVar 0; CVar 1])]); + CProp + ("or", + [CProp ("member",[CVar 23; CVar 0]); + CProp ("member",[CVar 23; CVar 1])])])); +add (CProp +("equal", + [CProp ("member",[CVar 23; CProp ("reverse",[CVar 24])]); + CProp ("member",[CVar 23; CVar 24])])); +add (CProp +("equal", + [CProp ("length",[CProp ("reverse",[CVar 23])]); + CProp ("length",[CVar 23])])); +add (CProp +("equal", + [CProp ("member",[CVar 0; CProp ("intersect",[CVar 1; CVar 2])]); + CProp + ("and", + [CProp ("member",[CVar 0; CVar 1]); CProp ("member",[CVar 0; CVar 2])])])); +add (CProp +("equal",[CProp ("nth",[CProp ("zero",[]); CVar 8]); CProp ("zero",[])])); +add (CProp +("equal", + [CProp ("exp",[CVar 8; CProp ("plus",[CVar 9; CVar 10])]); + CProp + ("times", + [CProp ("exp",[CVar 8; CVar 9]); CProp ("exp",[CVar 8; CVar 10])])])); +add (CProp +("equal", + [CProp ("exp",[CVar 8; CProp ("times",[CVar 9; CVar 10])]); + CProp ("exp",[CProp ("exp",[CVar 8; CVar 9]); CVar 10])])); +add (CProp +("equal", + [CProp ("reverse_loop",[CVar 23; CVar 24]); + CProp ("append",[CProp ("reverse",[CVar 23]); CVar 24])])); +add (CProp +("equal", + [CProp ("reverse_loop",[CVar 23; CProp ("nil",[])]); + CProp ("reverse",[CVar 23])])); +add (CProp +("equal", + [CProp ("count_list",[CVar 25; CProp ("sort_lp",[CVar 23; CVar 24])]); + CProp + ("plus", + [CProp ("count_list",[CVar 25; CVar 23]); + CProp ("count_list",[CVar 25; CVar 24])])])); +add (CProp +("equal", + [CProp + ("equal", + [CProp ("append",[CVar 0; CVar 1]); CProp ("append",[CVar 0; CVar 2])]); + CProp ("equal",[CVar 1; CVar 2])])); +add (CProp +("equal", + [CProp + ("plus", + [CProp ("remainder",[CVar 23; CVar 24]); + CProp ("times",[CVar 24; CProp ("quotient",[CVar 23; CVar 24])])]); + CProp ("fix",[CVar 23])])); +add (CProp +("equal", + [CProp + ("power_eval",[CProp ("big_plus",[CVar 11; CVar 8; CVar 1]); CVar 1]); + CProp ("plus",[CProp ("power_eval",[CVar 11; CVar 1]); CVar 8])])); +add (CProp +("equal", + [CProp + ("power_eval", + [CProp ("big_plus",[CVar 23; CVar 24; CVar 8; CVar 1]); CVar 1]); + CProp + ("plus", + [CVar 8; + CProp + ("plus", + [CProp ("power_eval",[CVar 23; CVar 1]); + CProp ("power_eval",[CVar 24; CVar 1])])])])); +add (CProp +("equal", + [CProp ("remainder",[CVar 24; CProp ("one",[])]); CProp ("zero",[])])); +add (CProp +("equal", + [CProp ("lt",[CProp ("remainder",[CVar 23; CVar 24]); CVar 24]); + CProp ("not",[CProp ("zerop",[CVar 24])])])); +add (CProp +("equal",[CProp ("remainder",[CVar 23; CVar 23]); CProp ("zero",[])])); +add (CProp +("equal", + [CProp ("lt",[CProp ("quotient",[CVar 8; CVar 9]); CVar 8]); + CProp + ("and", + [CProp ("not",[CProp ("zerop",[CVar 8])]); + CProp + ("or", + [CProp ("zerop",[CVar 9]); + CProp ("not",[CProp ("equal",[CVar 9; CProp ("one",[])])])])])])); +add (CProp +("equal", + [CProp ("lt",[CProp ("remainder",[CVar 23; CVar 24]); CVar 23]); + CProp + ("and", + [CProp ("not",[CProp ("zerop",[CVar 24])]); + CProp ("not",[CProp ("zerop",[CVar 23])]); + CProp ("not",[CProp ("lt",[CVar 23; CVar 24])])])])); +add (CProp +("equal", + [CProp ("power_eval",[CProp ("power_rep",[CVar 8; CVar 1]); CVar 1]); + CProp ("fix",[CVar 8])])); +add (CProp +("equal", + [CProp + ("power_eval", + [CProp + ("big_plus", + [CProp ("power_rep",[CVar 8; CVar 1]); + CProp ("power_rep",[CVar 9; CVar 1]); CProp ("zero",[]); + CVar 1]); + CVar 1]); + CProp ("plus",[CVar 8; CVar 9])])); +add (CProp +("equal", + [CProp ("gcd",[CVar 23; CVar 24]); CProp ("gcd",[CVar 24; CVar 23])])); +add (CProp +("equal", + [CProp ("nth",[CProp ("append",[CVar 0; CVar 1]); CVar 8]); + CProp + ("append", + [CProp ("nth",[CVar 0; CVar 8]); + CProp + ("nth", + [CVar 1; CProp ("difference",[CVar 8; CProp ("length",[CVar 0])])])])])); +add (CProp +("equal", + [CProp ("difference",[CProp ("plus",[CVar 23; CVar 24]); CVar 23]); + CProp ("fix",[CVar 24])])); +add (CProp +("equal", + [CProp ("difference",[CProp ("plus",[CVar 24; CVar 23]); CVar 23]); + CProp ("fix",[CVar 24])])); +add (CProp +("equal", + [CProp + ("difference", + [CProp ("plus",[CVar 23; CVar 24]); CProp ("plus",[CVar 23; CVar 25])]); + CProp ("difference",[CVar 24; CVar 25])])); +add (CProp +("equal", + [CProp ("times",[CVar 23; CProp ("difference",[CVar 2; CVar 22])]); + CProp + ("difference", + [CProp ("times",[CVar 2; CVar 23]); + CProp ("times",[CVar 22; CVar 23])])])); +add (CProp +("equal", + [CProp ("remainder",[CProp ("times",[CVar 23; CVar 25]); CVar 25]); + CProp ("zero",[])])); +add (CProp +("equal", + [CProp + ("difference", + [CProp ("plus",[CVar 1; CProp ("plus",[CVar 0; CVar 2])]); CVar 0]); + CProp ("plus",[CVar 1; CVar 2])])); +add (CProp +("equal", + [CProp + ("difference", + [CProp ("add1",[CProp ("plus",[CVar 24; CVar 25])]); CVar 25]); + CProp ("add1",[CVar 24])])); +add (CProp +("equal", + [CProp + ("lt", + [CProp ("plus",[CVar 23; CVar 24]); CProp ("plus",[CVar 23; CVar 25])]); + CProp ("lt",[CVar 24; CVar 25])])); +add (CProp +("equal", + [CProp + ("lt", + [CProp ("times",[CVar 23; CVar 25]); + CProp ("times",[CVar 24; CVar 25])]); + CProp + ("and", + [CProp ("not",[CProp ("zerop",[CVar 25])]); + CProp ("lt",[CVar 23; CVar 24])])])); +add (CProp +("equal", + [CProp ("lt",[CVar 24; CProp ("plus",[CVar 23; CVar 24])]); + CProp ("not",[CProp ("zerop",[CVar 23])])])); +add (CProp +("equal", + [CProp + ("gcd", + [CProp ("times",[CVar 23; CVar 25]); + CProp ("times",[CVar 24; CVar 25])]); + CProp ("times",[CVar 25; CProp ("gcd",[CVar 23; CVar 24])])])); +add (CProp +("equal", + [CProp ("value",[CProp ("normalize",[CVar 23]); CVar 0]); + CProp ("value",[CVar 23; CVar 0])])); +add (CProp +("equal", + [CProp + ("equal", + [CProp ("flatten",[CVar 23]); + CProp ("cons",[CVar 24; CProp ("nil",[])])]); + CProp + ("and", + [CProp ("nlistp",[CVar 23]); CProp ("equal",[CVar 23; CVar 24])])])); +add (CProp +("equal", + [CProp ("listp",[CProp ("gother",[CVar 23])]); + CProp ("listp",[CVar 23])])); +add (CProp +("equal", + [CProp ("samefringe",[CVar 23; CVar 24]); + CProp + ("equal",[CProp ("flatten",[CVar 23]); CProp ("flatten",[CVar 24])])])); +add (CProp +("equal", + [CProp + ("equal", + [CProp ("greatest_factor",[CVar 23; CVar 24]); CProp ("zero",[])]); + CProp + ("and", + [CProp + ("or", + [CProp ("zerop",[CVar 24]); + CProp ("equal",[CVar 24; CProp ("one",[])])]); + CProp ("equal",[CVar 23; CProp ("zero",[])])])])); +add (CProp +("equal", + [CProp + ("equal", + [CProp ("greatest_factor",[CVar 23; CVar 24]); CProp ("one",[])]); + CProp ("equal",[CVar 23; CProp ("one",[])])])); +add (CProp +("equal", + [CProp ("numberp",[CProp ("greatest_factor",[CVar 23; CVar 24])]); + CProp + ("not", + [CProp + ("and", + [CProp + ("or", + [CProp ("zerop",[CVar 24]); + CProp ("equal",[CVar 24; CProp ("one",[])])]); + CProp ("not",[CProp ("numberp",[CVar 23])])])])])); +add (CProp +("equal", + [CProp ("times_list",[CProp ("append",[CVar 23; CVar 24])]); + CProp + ("times", + [CProp ("times_list",[CVar 23]); CProp ("times_list",[CVar 24])])])); +add (CProp +("equal", + [CProp ("prime_list",[CProp ("append",[CVar 23; CVar 24])]); + CProp + ("and", + [CProp ("prime_list",[CVar 23]); CProp ("prime_list",[CVar 24])])])); +add (CProp +("equal", + [CProp ("equal",[CVar 25; CProp ("times",[CVar 22; CVar 25])]); + CProp + ("and", + [CProp ("numberp",[CVar 25]); + CProp + ("or", + [CProp ("equal",[CVar 25; CProp ("zero",[])]); + CProp ("equal",[CVar 22; CProp ("one",[])])])])])); +add (CProp +("equal", + [CProp ("ge",[CVar 23; CVar 24]); + CProp ("not",[CProp ("lt",[CVar 23; CVar 24])])])); +add (CProp +("equal", + [CProp ("equal",[CVar 23; CProp ("times",[CVar 23; CVar 24])]); + CProp + ("or", + [CProp ("equal",[CVar 23; CProp ("zero",[])]); + CProp + ("and", + [CProp ("numberp",[CVar 23]); + CProp ("equal",[CVar 24; CProp ("one",[])])])])])); +add (CProp +("equal", + [CProp ("remainder",[CProp ("times",[CVar 24; CVar 23]); CVar 24]); + CProp ("zero",[])])); +add (CProp +("equal", + [CProp ("equal",[CProp ("times",[CVar 0; CVar 1]); CProp ("one",[])]); + CProp + ("and", + [CProp ("not",[CProp ("equal",[CVar 0; CProp ("zero",[])])]); + CProp ("not",[CProp ("equal",[CVar 1; CProp ("zero",[])])]); + CProp ("numberp",[CVar 0]); CProp ("numberp",[CVar 1]); + CProp ("equal",[CProp ("sub1",[CVar 0]); CProp ("zero",[])]); + CProp ("equal",[CProp ("sub1",[CVar 1]); CProp ("zero",[])])])])); +add (CProp +("equal", + [CProp + ("lt", + [CProp ("length",[CProp ("delete",[CVar 23; CVar 11])]); + CProp ("length",[CVar 11])]); + CProp ("member",[CVar 23; CVar 11])])); +add (CProp +("equal", + [CProp ("sort2",[CProp ("delete",[CVar 23; CVar 11])]); + CProp ("delete",[CVar 23; CProp ("sort2",[CVar 11])])])); +add (CProp ("equal",[CProp ("dsort",[CVar 23]); CProp ("sort2",[CVar 23])])); +add (CProp +("equal", + [CProp + ("length", + [CProp + ("cons", + [CVar 0; + CProp + ("cons", + [CVar 1; + CProp + ("cons", + [CVar 2; + CProp + ("cons", + [CVar 3; + CProp ("cons",[CVar 4; CProp ("cons",[CVar 5; CVar 6])])])])])])]) + ; CProp ("plus",[CProp ("six",[]); CProp ("length",[CVar 6])])])); +add (CProp +("equal", + [CProp + ("difference", + [CProp ("add1",[CProp ("add1",[CVar 23])]); CProp ("two",[])]); + CProp ("fix",[CVar 23])])); +add (CProp +("equal", + [CProp + ("quotient", + [CProp ("plus",[CVar 23; CProp ("plus",[CVar 23; CVar 24])]); + CProp ("two",[])]); + CProp + ("plus",[CVar 23; CProp ("quotient",[CVar 24; CProp ("two",[])])])])); +add (CProp +("equal", + [CProp ("sigma",[CProp ("zero",[]); CVar 8]); + CProp + ("quotient", + [CProp ("times",[CVar 8; CProp ("add1",[CVar 8])]); CProp ("two",[])])])); +add (CProp +("equal", + [CProp ("plus",[CVar 23; CProp ("add1",[CVar 24])]); + CProp + ("if", + [CProp ("numberp",[CVar 24]); + CProp ("add1",[CProp ("plus",[CVar 23; CVar 24])]); + CProp ("add1",[CVar 23])])])); +add (CProp +("equal", + [CProp + ("equal", + [CProp ("difference",[CVar 23; CVar 24]); + CProp ("difference",[CVar 25; CVar 24])]); + CProp + ("if", + [CProp ("lt",[CVar 23; CVar 24]); + CProp ("not",[CProp ("lt",[CVar 24; CVar 25])]); + CProp + ("if", + [CProp ("lt",[CVar 25; CVar 24]); + CProp ("not",[CProp ("lt",[CVar 24; CVar 23])]); + CProp ("equal",[CProp ("fix",[CVar 23]); CProp ("fix",[CVar 25])])])])]) +); +add (CProp +("equal", + [CProp + ("meaning", + [CProp ("plus_tree",[CProp ("delete",[CVar 23; CVar 24])]); CVar 0]); + CProp + ("if", + [CProp ("member",[CVar 23; CVar 24]); + CProp + ("difference", + [CProp ("meaning",[CProp ("plus_tree",[CVar 24]); CVar 0]); + CProp ("meaning",[CVar 23; CVar 0])]); + CProp ("meaning",[CProp ("plus_tree",[CVar 24]); CVar 0])])])); +add (CProp +("equal", + [CProp ("times",[CVar 23; CProp ("add1",[CVar 24])]); + CProp + ("if", + [CProp ("numberp",[CVar 24]); + CProp + ("plus", + [CVar 23; CProp ("times",[CVar 23; CVar 24]); + CProp ("fix",[CVar 23])])])])); +add (CProp +("equal", + [CProp ("nth",[CProp ("nil",[]); CVar 8]); + CProp + ("if",[CProp ("zerop",[CVar 8]); CProp ("nil",[]); CProp ("zero",[])])])); +add (CProp +("equal", + [CProp ("last",[CProp ("append",[CVar 0; CVar 1])]); + CProp + ("if", + [CProp ("listp",[CVar 1]); CProp ("last",[CVar 1]); + CProp + ("if", + [CProp ("listp",[CVar 0]); + CProp ("cons",[CProp ("car",[CProp ("last",[CVar 0])]); CVar 1]); + CVar 1])])])); +add (CProp +("equal", + [CProp ("equal",[CProp ("lt",[CVar 23; CVar 24]); CVar 25]); + CProp + ("if", + [CProp ("lt",[CVar 23; CVar 24]); + CProp ("equal",[CProp ("true",[]); CVar 25]); + CProp ("equal",[CProp ("false",[]); CVar 25])])])); +add (CProp +("equal", + [CProp ("assignment",[CVar 23; CProp ("append",[CVar 0; CVar 1])]); + CProp + ("if", + [CProp ("assignedp",[CVar 23; CVar 0]); + CProp ("assignment",[CVar 23; CVar 0]); + CProp ("assignment",[CVar 23; CVar 1])])])); +add (CProp +("equal", + [CProp ("car",[CProp ("gother",[CVar 23])]); + CProp + ("if", + [CProp ("listp",[CVar 23]); + CProp ("car",[CProp ("flatten",[CVar 23])]); CProp ("zero",[])])])); +add (CProp +("equal", + [CProp ("flatten",[CProp ("cdr",[CProp ("gother",[CVar 23])])]); + CProp + ("if", + [CProp ("listp",[CVar 23]); + CProp ("cdr",[CProp ("flatten",[CVar 23])]); + CProp ("cons",[CProp ("zero",[]); CProp ("nil",[])])])])); +add (CProp +("equal", + [CProp ("quotient",[CProp ("times",[CVar 24; CVar 23]); CVar 24]); + CProp + ("if", + [CProp ("zerop",[CVar 24]); CProp ("zero",[]); + CProp ("fix",[CVar 23])])])); +add (CProp +("equal", + [CProp ("get",[CVar 9; CProp ("set",[CVar 8; CVar 21; CVar 12])]); + CProp + ("if", + [CProp ("eqp",[CVar 9; CVar 8]); CVar 21; + CProp ("get",[CVar 9; CVar 12])])])) + +(* Tautology checker *) + +let truep (x, lst) = + match x with + Prop(head, _) -> + head.name = "true" or List.mem x lst + | _ -> + List.mem x lst + +and falsep (x, lst) = + match x with + Prop(head, _) -> + head.name = "false" or List.mem x lst + | _ -> + List.mem x lst + + +let rec tautologyp (x, true_lst, false_lst) = + if truep (x, true_lst) then true else + if falsep (x, false_lst) then false else begin +(* + print_term x; print_newline(); +*) + match x with + Var _ -> false + | Prop (head,[test; yes; no]) as p -> + if head.name = "if" then + if truep (test, true_lst) then + tautologyp (yes, true_lst, false_lst) + else if falsep (test, false_lst) then + tautologyp (no, true_lst, false_lst) + else tautologyp (yes, test::true_lst, false_lst) & + tautologyp (no, true_lst, test::false_lst) + else + false + end + + +let tautp x = +(* print_term x; print_string"\n"; *) + let y = rewrite x in +(* print_term y; print_string "\n"; *) + tautologyp (y, [], []) + +(* the benchmark *) + +let subst = +[Bind(23, cterm_to_term( + CProp + ("f", + [CProp + ("plus", + [CProp ("plus",[CVar 0; CVar 1]); + CProp ("plus",[CVar 2; CProp ("zero",[])])])]))); + Bind(24, cterm_to_term( + CProp + ("f", + [CProp + ("times", + [CProp ("times",[CVar 0; CVar 1]); + CProp ("plus",[CVar 2; CVar 3])])]))); + Bind(25, cterm_to_term( + CProp + ("f", + [CProp + ("reverse", + [CProp + ("append", + [CProp ("append",[CVar 0; CVar 1]); + CProp ("nil",[])])])]))); + Bind(20, cterm_to_term( + CProp + ("equal", + [CProp ("plus",[CVar 0; CVar 1]); + CProp ("difference",[CVar 23; CVar 24])]))); + Bind(22, cterm_to_term( + CProp + ("lt", + [CProp ("remainder",[CVar 0; CVar 1]); + CProp ("member",[CVar 0; CProp ("length",[CVar 1])])])))] + +let term = cterm_to_term( + CProp + ("implies", + [CProp + ("and", + [CProp ("implies",[CVar 23; CVar 24]); + CProp + ("and", + [CProp ("implies",[CVar 24; CVar 25]); + CProp + ("and", + [CProp ("implies",[CVar 25; CVar 20]); + CProp ("implies",[CVar 20; CVar 22])])])]); + CProp ("implies",[CVar 23; CVar 22])])) + +let _ = + if tautp (apply_subst subst term) then + print_string "Proved!\n" + else + print_string "Cannot prove!\n"; + exit 0 + +(********* +with + failure s -> + print_string "Exception failure("; print_string s; print_string ")\n" + | Unify -> + print_string "Exception Unify\n" + | match_failure(file,start,stop) -> + print_string "Exception match_failure("; + print_string file; + print_string ","; + print_int start; + print_string ","; + print_int stop; + print_string ")\n" + | _ -> + print_string "Exception ?\n" + +**********) diff --git a/test/fft.ml b/test/fft.ml new file mode 100644 index 0000000000..dc42d21030 --- /dev/null +++ b/test/fft.ml @@ -0,0 +1,175 @@ +let pi = 3.14159265358979323846 + +let tpi = 2.0 *. pi + +let fft px py np = + let i = ref 2 in + let m = ref 1 in + + while (!i < np) do + i := !i + !i; + m := !m + 1 + done; + + let n = !i in + + if n <> np then begin + for i = np+1 to n do + px.(i) <- 0.0; + py.(i) <- 0.0 + done; + print_string "Use "; print_int n; + print_string " point fft"; print_newline() + end; + + let n2 = ref(n+n) in + for k = 1 to !m-1 do + n2 := !n2 / 2; + let n4 = !n2 / 4 in + let e = tpi /. float !n2 in + let a = ref 0.0 in + + for j = 1 to n4 do + let a3 = 3.0 *. !a in + let cc1 = cos(!a) in + let ss1 = sin(!a) in + let cc3 = cos(a3) in + let ss3 = sin(a3) in + a := e *. float j; + let is = ref j in + let id = ref(2 * !n2) in + + while !is < n do + let i0r = ref !is in + while !i0r < n do + let i0 = !i0r in + let i1 = i0 + n4 in + let i2 = i1 + n4 in + let i3 = i2 + n4 in + let r1 = px.(i0) -. px.(i2) in + px.(i0) <- px.(i0) +. px.(i2); + let r2 = px.(i1) -. px.(i3) in + px.(i1) <- px.(i1) +. px.(i3); + let s1 = py.(i0) -. py.(i2) in + py.(i0) <- py.(i0) +. py.(i2); + let s2 = py.(i1) -. py.(i3) in + py.(i1) <- py.(i1) +. py.(i3); + let s3 = r1 -. s2 in + let r1 = r1 +. s2 in + let s2 = r2 -. s1 in + let r2 = r2 +. s1 in + px.(i2) <- r1*.cc1 -. s2*.ss1; + py.(i2) <- -.s2*.cc1 -. r1*.ss1; + px.(i3) <- s3*.cc3 +. r2*.ss3; + py.(i3) <- r2*.cc3 -. s3*.ss3; + i0r := i0 + !id + done; + is := 2 * !id - !n2 + j; + id := 4 * !id + done + done + done; + +(************************************) +(* Last stage, length=2 butterfly *) +(************************************) + + let is = ref 1 in + let id = ref 4 in + + while !is < n do + let i0r = ref !is in + while !i0r <= n do + let i0 = !i0r in + let i1 = i0 + 1 in + let r1 = px.(i0) in + px.(i0) <- r1 +. px.(i1); + px.(i1) <- r1 -. px.(i1); + let r1 = py.(i0) in + py.(i0) <- r1 +. py.(i1); + py.(i1) <- r1 -. py.(i1); + i0r := i0 + !id + done; + is := 2 * !id - 1; + id := 4 * !id + done; + +(*************************) +(* Bit reverse counter *) +(*************************) + + let j = ref 1 in + + for i = 1 to n - 1 do + if i < !j then begin + let xt = px.(!j) in + px.(!j) <- px.(i); + px.(i) <- xt; + let xt = py.(!j) in + py.(!j) <- py.(i); + py.(i) <- xt + end; + let k = ref(n / 2) in + while !k < !j do + j := !j - !k; + k := !k / 2 + done; + j := !j + !k + done; + + n + + +let test np = + print_int np; print_string "... "; flush stdout; + let enp = float np in + let npm = np / 2 - 1 in + let pxr = Array.new (np+2) 0.0 + and pxi = Array.new (np+2) 0.0 in + let t = pi /. enp in + pxr.(1) <- (enp -. 1.0) *. 0.5; + pxi.(1) <- 0.0; + let n2 = np / 2 in + pxr.(n2+1) <- -0.5; + pxi.(n2+1) <- 0.0; + + for i = 1 to npm do + let j = np - i in + pxr.(i+1) <- -0.5; + pxr.(j+1) <- -0.5; + let z = t *. float i in + let y = -0.5*.(cos(z)/.sin(z)) in + pxi.(i+1) <- y; + pxi.(j+1) <- -.y + done; +(** + print_newline(); + for i=0 to 15 do printf "%d %f %f\n" i pxr.(i+1) pxi.(i+1) done; +**) + fft pxr pxi np; +(** + for i=0 to 15 do printf "%d %f %f\n" i pxr.(i+1) pxi.(i+1) done; +**) + let zr = ref 0.0 in + let zi = ref 0.0 in + let kr = ref 0 in + let ki = ref 0 in + for i = 0 to np-1 do + let a = abs_float(pxr.(i+1) -. float i) in + if !zr < a then begin + zr := a; + kr := i + end; + let a = abs_float(pxi.(i+1)) in + if !zi < a then begin + zi := a; + ki := i + end + done; + let zm = if abs_float !zr < abs_float !zi then !zi else !zr in + print_float zm; print_newline() + + +let _ = + let np = ref 16 in for i = 1 to 12 do test !np; np := !np*2 done + diff --git a/test/fib.ml b/test/fib.ml new file mode 100644 index 0000000000..536fcfd366 --- /dev/null +++ b/test/fib.ml @@ -0,0 +1,10 @@ +let rec fib n = + if n < 2 then 1 else fib(n-1) + fib(n-2) + +let _ = + let n = + if Array.length Sys.argv >= 2 + then int_of_string Sys.argv.(1) + else 30 in + print_int(fib n); print_newline(); exit 0 + diff --git a/test/nucleic.ml b/test/nucleic.ml new file mode 100644 index 0000000000..e4edcfe0f9 --- /dev/null +++ b/test/nucleic.ml @@ -0,0 +1,3325 @@ +(* Use floating-point arithmetic *) + +external (+) : float -> float -> float = "add_float" +external (-) : float -> float -> float = "sub_float" +external ( * ) : float -> float -> float = "mul_float" +external (/) : float -> float -> float = "div_float" + +type intg = int + +(* -- MATH UTILITIES --------------------------------------------------------*) + +let constant_pi = 3.14159265358979323846 +let constant_minus_pi = -3.14159265358979323846 +let constant_pi2 = 1.57079632679489661923 +let constant_minus_pi2 = -1.57079632679489661923 + +(* -- POINTS ----------------------------------------------------------------*) + +type pt = float * float * float + +let +pt_sub ((x1,y1,z1):pt) ((x2,y2,z2):pt) + = (x1 - x2, y1 - y2, z1 - z2) + + +let +pt_dist ((x1,y1,z1):pt) ((x2,y2,z2):pt) + = let dx = x1 - x2 + and dy = y1 - y2 + and dz = z1 - z2 + in + sqrt ((dx * dx) + (dy * dy) + (dz * dz)) + + +let +pt_phi ((x,y,z):pt) + = let b = atan2 x z + in + atan2 (((cos b) * z + (sin b) * x)) y + + +let +pt_theta ((x,y,z):pt) + = atan2 x z + + +(* -- COORDINATE TRANSFORMATIONS --------------------------------------------*) + +(* + The notation for the transformations follows "Paul, R.P. (1981) Robot + Manipulators. MIT Press." with the exception that our transformation + matrices don't have the perspective terms and are the transpose of + Paul's one. See also "M\"antyl\"a, M. (1985) An Introduction to + Solid Modeling, Computer Science Press" Appendix A. + + The components of a transformation matrix are named like this: + + a b c + d e f + g h i + tx ty tz + + The components tx, ty, and tz are the translation vector. +*) + +type tfo = + float*float*float*float*float*float*float*float*float*float*float*float + + +let tfo_id = (1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0) + +(* + The function "tfo-apply" multiplies a transformation matrix, tfo, by a + point vector, p. The result is a new point. +*) + +let +tfo_apply ((a,b,c,d,e,f,g,h,i,tx,ty,tz):tfo) ((x,y,z):pt) + = ( + ((x * a) + (y * d) + (z * g) + tx) + , + ((x * b) + (y * e) + (z * h) + ty) + , + ((x * c) + (y * f) + (z * i) + tz) + ) + + +(* + The function "tfo-combine" multiplies two transformation matrices A and B. + The result is a new matrix which cumulates the transformations described + by A and B. +*) + +let +tfo_combine + ((a_a,a_b,a_c,a_d,a_e,a_f,a_g,a_h,a_i,a_tx,a_ty,a_tz):tfo) + ((b_a,b_b,b_c,b_d,b_e,b_f,b_g,b_h,b_i,b_tx,b_ty,b_tz):tfo) + = ( + ((a_a * b_a) + (a_b * b_d) + (a_c * b_g)) + , + ((a_a * b_b) + (a_b * b_e) + (a_c * b_h)) + , + ((a_a * b_c) + (a_b * b_f) + (a_c * b_i)) + , + ((a_d * b_a) + (a_e * b_d) + (a_f * b_g)) + , + ((a_d * b_b) + (a_e * b_e) + (a_f * b_h)) + , + ((a_d * b_c) + (a_e * b_f) + (a_f * b_i)) + , + ((a_g * b_a) + (a_h * b_d) + (a_i * b_g)) + , + ((a_g * b_b) + (a_h * b_e) + (a_i * b_h)) + , + ((a_g * b_c) + (a_h * b_f) + (a_i * b_i)) + , + ((a_tx * b_a) + (a_ty * b_d) + (a_tz * b_g) + b_tx) + , + ((a_tx * b_b) + (a_ty * b_e) + (a_tz * b_h) + b_ty) + , + ((a_tx * b_c) + (a_ty * b_f) + (a_tz * b_i) + b_tz) + ) + +(* + The function "tfo-inv-ortho" computes the inverse of a homogeneous + transformation matrix. +*) + +let +tfo_inv_ortho ((a,b,c,d,e,f,g,h,i,tx,ty,tz):tfo) + = ( + a,d,g, + b,e,h, + c,f,i, + (-.((a * tx) + (b * ty) + (c * tz))) + , + (-.((d * tx) + (e * ty) + (f * tz))) + , + (-.((g * tx) + (h * ty) + (i * tz))) + ) + + +(* + Given three points p1, p2, and p3, the function "tfo-align" computes + a transformation matrix such that point p1 gets mapped to (0,0,0), p2 gets + mapped to the Y axis and p3 gets mapped to the YZ plane. +*) + +let +tfo_align ((x1,y1,z1):pt) ((x2,y2,z2):pt) ((x3,y3,z3):pt) + = let x31 = x3 - x1 in + let y31 = y3 - y1 in + let z31 = z3 - z1 in + let rotpy = pt_sub (x2,y2,z2) (x1,y1,z1) in + let phi = pt_phi rotpy in + let theta = pt_theta rotpy in + let sinp = sin phi in + let sint = sin theta in + let cosp = cos phi in + let cost = cos theta in + let sinpsint = sinp * sint in + let sinpcost = sinp * cost in + let cospsint = cosp * sint in + let cospcost = cosp * cost in + let rotpz = ( + ((cost * x31) - (sint * z31)) + , + ((sinpsint * x31) + (cosp * y31) + (sinpcost * z31)) + , + ((cospsint * x31) + (-.(sinp * y31)) + (cospcost * z31)) + ) in + let rho = pt_theta rotpz in + let cosr = cos rho in + let sinr = sin rho in + let x = (-.(x1 * cost)) + (z1 * sint) in + let y = ((-.(x1 * sinpsint)) - (y1 * cosp)) - (z1 * sinpcost) in + let z = ((-.(x1 * cospsint) + (y1 * sinp))) - (z1 * cospcost) in + ( + ((cost * cosr) - (cospsint * sinr)) + , + sinpsint + , + ((cost * sinr) + (cospsint * cosr)) + , + (sinp * sinr) + , + cosp + , + (-.(sinp * cosr)) + , + ((-.(sint * cosr)) - (cospcost * sinr)) + , + sinpcost + , + ((-.(sint * sinr) + (cospcost * cosr))) + , + ((x * cosr) - (z * sinr)) + , + y + , + ((x * sinr + (z * cosr))) + ) + + +(* -- NUCLEIC ACID CONFORMATIONS DATA BASE ----------------------------------*) + +(* + Numbering of atoms follows the paper: + + IUPAC-IUB Joint Commission on Biochemical Nomenclature (JCBN) + (1983) Abbreviations and Symbols for the Description of + Conformations of Polynucleotide Chains. Eur. J. Biochem 131, + 9-15. +*) + +(* Define remaining atoms for each nucleotide type. *) + +type nuc_specific = + A of pt*pt*pt*pt*pt*pt*pt*pt +| C of pt*pt*pt*pt*pt*pt +| G of pt*pt*pt*pt*pt*pt*pt*pt*pt +| U of pt*pt*pt*pt*pt + + +(* + A n6 n7 n9 c8 h2 h61 h62 h8 + C n4 o2 h41 h42 h5 h6 + G n2 n7 n9 c8 o6 h1 h21 h22 h8 + U o2 o4 h3 h5 h6 +*) + +(* Define part common to all 4 nucleotide types. *) + +type nuc = tfo*tfo*tfo*tfo* + pt*pt*pt*pt*pt*pt*pt*pt*pt*pt*pt*pt* + pt*pt*pt*pt*pt*pt*pt*pt*pt*pt*pt*pt* + pt*nuc_specific + + +(* + dgf_base_tfo ; defines the standard position for wc and wc_dumas + p_o3'_275_tfo ; defines the standard position for the connect function + p_o3'_180_tfo + p_o3'_60_tfo + p o1p o2p o5' c5' h5' h5'' c4' h4' o4' c1' h1' c2' h2'' o2' h2' c3' + h3' o3' n1 n3 c2 c4 c5 c6 +*) + +let is_A = function + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,A(_,_,_,_,_,_,_,_)) -> true + | _ -> false + + +let is_C = function + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,C(_,_,_,_,_,_)) -> true + | _ -> false + + +let is_G = function + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,G(_,_,_,_,_,_,_,_,_)) -> true + | _ -> false + + +let +nuc_C1' + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = c1' + + +let +nuc_C2 + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = c2 + + +let +nuc_C3' + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = c3' + + +let +nuc_C4 + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = c4 + + +let +nuc_C4' + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = c4' + + +let +nuc_N1 + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = n1 + + +let +nuc_O3' + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = o3' + + +let +nuc_P + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = p + + +let +nuc_dgf_base_tfo + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = dgf_base_tfo + + +let +nuc_p_o3'_180_tfo + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = p_o3'_180_tfo + + +let +nuc_p_o3'_275_tfo + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = p_o3'_275_tfo + + +let +nuc_p_o3'_60_tfo + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,_) + = p_o3'_60_tfo + + +let +rA_N9 + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,A (n6,n7,n9,c8,h2,h61,h62,h8)) + = n9 + + +let +rG_N9 + (dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6,G (n2,n7,n9,c8,o6,h1,h21,h22,h8)) + = n9 + + +(* Database of nucleotide conformations: *) + +let rA + = ( + ( (-0.0018), (-0.8207), (0.5714), (* dgf_base_tfo *) + (0.2679), (-0.5509), (-0.7904), + (0.9634), (0.1517), (0.2209), + (0.0073), (8.4030), (0.6232)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (5.4550), (8.2120), (-2.8810)), (* C5' *) + ( (5.4546), (8.8508), (-1.9978)), (* H5' *) + ( (5.7588), (8.6625), (-3.8259)), (* H5'' *) + ( (6.4970), (7.1480), (-2.5980)), (* C4' *) + ( (7.4896), (7.5919), (-2.5214)), (* H4' *) + ( (6.1630), (6.4860), (-1.3440)), (* O4' *) + ( (6.5400), (5.1200), (-1.4190)), (* C1' *) + ( (7.2763), (4.9681), (-0.6297)), (* H1' *) + ( (7.1940), (4.8830), (-2.7770)), (* C2' *) + ( (6.8667), (3.9183), (-3.1647)), (* H2'' *) + ( (8.5860), (5.0910), (-2.6140)), (* O2' *) + ( (8.9510), (4.7626), (-1.7890)), (* H2' *) + ( (6.5720), (6.0040), (-3.6090)), (* C3' *) + ( (5.5636), (5.7066), (-3.8966)), (* H3' *) + ( (7.3801), (6.3562), (-4.7350)), (* O3' *) + ( (4.7150), (0.4910), (-0.1360)), (* N1 *) + ( (6.3490), (2.1730), (-0.6020)), (* N3 *) + ( (5.9530), (0.9650), (-0.2670)), (* C2 *) + ( (5.2900), (2.9790), (-0.8260)), (* C4 *) + ( (3.9720), (2.6390), (-0.7330)), (* C5 *) + ( (3.6770), (1.3160), (-0.3660)), (* C6 *) + (A ( + ( (2.4280), (0.8450), (-0.2360)), (* N6 *) + ( (3.1660), (3.7290), (-1.0360)), (* N7 *) + ( (5.3170), (4.2990), (-1.1930)), (* N9 *) + ( (4.0100), (4.6780), (-1.2990)), (* C8 *) + ( (6.6890), (0.1903), (-0.0518)), (* H2 *) + ( (1.6470), (1.4460), (-0.4040)), (* H61 *) + ( (2.2780), (-0.1080), (-0.0280)), (* H62 *) + ( (3.4421), (5.5744), (-1.5482))) (* H8 *) + ) + ) + + +let rA01 + = ( + ( (-0.0043), (-0.8175), (0.5759), (* dgf_base_tfo *) + (0.2617), (-0.5567), (-0.7884), + (0.9651), (0.1473), (0.2164), + (0.0359), (8.3929), (0.5532)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (5.4352), (8.2183), (-2.7757)), (* C5' *) + ( (5.3830), (8.7883), (-1.8481)), (* H5' *) + ( (5.7729), (8.7436), (-3.6691)), (* H5'' *) + ( (6.4830), (7.1518), (-2.5252)), (* C4' *) + ( (7.4749), (7.5972), (-2.4482)), (* H4' *) + ( (6.1626), (6.4620), (-1.2827)), (* O4' *) + ( (6.5431), (5.0992), (-1.3905)), (* C1' *) + ( (7.2871), (4.9328), (-0.6114)), (* H1' *) + ( (7.1852), (4.8935), (-2.7592)), (* C2' *) + ( (6.8573), (3.9363), (-3.1645)), (* H2'' *) + ( (8.5780), (5.1025), (-2.6046)), (* O2' *) + ( (8.9516), (4.7577), (-1.7902)), (* H2' *) + ( (6.5522), (6.0300), (-3.5612)), (* C3' *) + ( (5.5420), (5.7356), (-3.8459)), (* H3' *) + ( (7.3487), (6.4089), (-4.6867)), (* O3' *) + ( (4.7442), (0.4514), (-0.1390)), (* N1 *) + ( (6.3687), (2.1459), (-0.5926)), (* N3 *) + ( (5.9795), (0.9335), (-0.2657)), (* C2 *) + ( (5.3052), (2.9471), (-0.8125)), (* C4 *) + ( (3.9891), (2.5987), (-0.7230)), (* C5 *) + ( (3.7016), (1.2717), (-0.3647)), (* C6 *) + (A ( + ( (2.4553), (0.7925), (-0.2390)), (* N6 *) + ( (3.1770), (3.6859), (-1.0198)), (* N7 *) + ( (5.3247), (4.2695), (-1.1710)), (* N9 *) + ( (4.0156), (4.6415), (-1.2759)), (* C8 *) + ( (6.7198), (0.1618), (-0.0547)), (* H2 *) + ( (1.6709), (1.3900), (-0.4039)), (* H61 *) + ( (2.3107), (-0.1627), (-0.0373)), (* H62 *) + ( (3.4426), (5.5361), (-1.5199))) (* H8 *) + ) + ) + + +let rA02 + = ( + ( (0.5566), (0.0449), (0.8296), (* dgf_base_tfo *) + (0.5125), (0.7673), (-0.3854), + (-0.6538), (0.6397), (0.4041), + (-9.1161), (-3.7679), (-2.9968)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (4.5778), (6.6594), (-4.0364)), (* C5' *) + ( (4.9220), (7.1963), (-4.9204)), (* H5' *) + ( (3.7996), (5.9091), (-4.1764)), (* H5'' *) + ( (5.7873), (5.8869), (-3.5482)), (* C4' *) + ( (6.0405), (5.0875), (-4.2446)), (* H4' *) + ( (6.9135), (6.8036), (-3.4310)), (* O4' *) + ( (7.7293), (6.4084), (-2.3392)), (* C1' *) + ( (8.7078), (6.1815), (-2.7624)), (* H1' *) + ( (7.1305), (5.1418), (-1.7347)), (* C2' *) + ( (7.2040), (5.1982), (-0.6486)), (* H2'' *) + ( (7.7417), (4.0392), (-2.3813)), (* O2' *) + ( (8.6785), (4.1443), (-2.5630)), (* H2' *) + ( (5.6666), (5.2728), (-2.1536)), (* C3' *) + ( (5.1747), (5.9805), (-1.4863)), (* H3' *) + ( (4.9997), (4.0086), (-2.1973)), (* O3' *) + ( (10.3245), (8.5459), (1.5467)), (* N1 *) + ( (9.8051), (6.9432), (-0.1497)), (* N3 *) + ( (10.5175), (7.4328), (0.8408)), (* C2 *) + ( (8.7523), (7.7422), (-0.4228)), (* C4 *) + ( (8.4257), (8.9060), (0.2099)), (* C5 *) + ( (9.2665), (9.3242), (1.2540)), (* C6 *) + (A ( + ( (9.0664), (10.4462), (1.9610)), (* N6 *) + ( (7.2750), (9.4537), (-0.3428)), (* N7 *) + ( (7.7962), (7.5519), (-1.3859)), (* N9 *) + ( (6.9479), (8.6157), (-1.2771)), (* C8 *) + ( (11.4063), (6.9047), (1.1859)), (* H2 *) + ( (8.2845), (11.0341), (1.7552)), (* H61 *) + ( (9.6584), (10.6647), (2.7198)), (* H62 *) + ( (6.0430), (8.9853), (-1.7594))) (* H8 *) + ) + ) + +let rA03 + = ( + ( (-0.5021), (0.0731), (0.8617), (* dgf_base_tfo *) + (-0.8112), (0.3054), (-0.4986), + (-0.2996), (-0.9494), (-0.0940), + (6.4273), (-5.1944), (-3.7807)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (4.1214), (6.7116), (-1.9049)), (* C5' *) + ( (3.3465), (5.9610), (-2.0607)), (* H5' *) + ( (4.0789), (7.2928), (-0.9837)), (* H5'' *) + ( (5.4170), (5.9293), (-1.8186)), (* C4' *) + ( (5.4506), (5.3400), (-0.9023)), (* H4' *) + ( (5.5067), (5.0417), (-2.9703)), (* O4' *) + ( (6.8650), (4.9152), (-3.3612)), (* C1' *) + ( (7.1090), (3.8577), (-3.2603)), (* H1' *) + ( (7.7152), (5.7282), (-2.3894)), (* C2' *) + ( (8.5029), (6.2356), (-2.9463)), (* H2'' *) + ( (8.1036), (4.8568), (-1.3419)), (* O2' *) + ( (8.3270), (3.9651), (-1.6184)), (* H2' *) + ( (6.7003), (6.7565), (-1.8911)), (* C3' *) + ( (6.5898), (7.5329), (-2.6482)), (* H3' *) + ( (7.0505), (7.2878), (-0.6105)), (* O3' *) + ( (9.6740), (4.7656), (-7.6614)), (* N1 *) + ( (9.0739), (4.3013), (-5.3941)), (* N3 *) + ( (9.8416), (4.2192), (-6.4581)), (* C2 *) + ( (7.9885), (5.0632), (-5.6446)), (* C4 *) + ( (7.6822), (5.6856), (-6.8194)), (* C5 *) + ( (8.5831), (5.5215), (-7.8840)), (* C6 *) + (A ( + ( (8.4084), (6.0747), (-9.0933)), (* N6 *) + ( (6.4857), (6.3816), (-6.7035)), (* N7 *) + ( (6.9740), (5.3703), (-4.7760)), (* N9 *) + ( (6.1133), (6.1613), (-5.4808)), (* C8 *) + ( (10.7627), (3.6375), (-6.4220)), (* H2 *) + ( (7.6031), (6.6390), (-9.2733)), (* H61 *) + ( (9.1004), (5.9708), (-9.7893)), (* H62 *) + ( (5.1705), (6.6830), (-5.3167))) (* H8 *) + ) + ) + + +let rA04 + = ( + ( (-0.5426), (-0.8175), (0.1929), (* dgf_base_tfo *) + (0.8304), (-0.5567), (-0.0237), + (0.1267), (0.1473), (0.9809), + (-0.5075), (8.3929), (0.2229)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (5.4352), (8.2183), (-2.7757)), (* C5' *) + ( (5.3830), (8.7883), (-1.8481)), (* H5' *) + ( (5.7729), (8.7436), (-3.6691)), (* H5'' *) + ( (6.4830), (7.1518), (-2.5252)), (* C4' *) + ( (7.4749), (7.5972), (-2.4482)), (* H4' *) + ( (6.1626), (6.4620), (-1.2827)), (* O4' *) + ( (6.5431), (5.0992), (-1.3905)), (* C1' *) + ( (7.2871), (4.9328), (-0.6114)), (* H1' *) + ( (7.1852), (4.8935), (-2.7592)), (* C2' *) + ( (6.8573), (3.9363), (-3.1645)), (* H2'' *) + ( (8.5780), (5.1025), (-2.6046)), (* O2' *) + ( (8.9516), (4.7577), (-1.7902)), (* H2' *) + ( (6.5522), (6.0300), (-3.5612)), (* C3' *) + ( (5.5420), (5.7356), (-3.8459)), (* H3' *) + ( (7.3487), (6.4089), (-4.6867)), (* O3' *) + ( (3.6343), (2.6680), (2.0783)), (* N1 *) + ( (5.4505), (3.9805), (1.2446)), (* N3 *) + ( (4.7540), (3.3816), (2.1851)), (* C2 *) + ( (4.8805), (3.7951), (0.0354)), (* C4 *) + ( (3.7416), (3.0925), (-0.2305)), (* C5 *) + ( (3.0873), (2.4980), (0.8606)), (* C6 *) + (A ( + ( (1.9600), (1.7805), (0.7462)), (* N6 *) + ( (3.4605), (3.1184), (-1.5906)), (* N7 *) + ( (5.3247), (4.2695), (-1.1710)), (* N9 *) + ( (4.4244), (3.8244), (-2.0953)), (* C8 *) + ( (5.0814), (3.4352), (3.2234)), (* H2 *) + ( (1.5423), (1.6454), (-0.1520)), (* H61 *) + ( (1.5716), (1.3398), (1.5392)), (* H62 *) + ( (4.2675), (3.8876), (-3.1721))) (* H8 *) + ) + ) + + +let rA05 + = ( + ( (-0.5891), (0.0449), (0.8068), (* dgf_base_tfo *) + (0.5375), (0.7673), (0.3498), + (-0.6034), (0.6397), (-0.4762), + (-0.3019), (-3.7679), (-9.5913)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (4.5778), (6.6594), (-4.0364)), (* C5' *) + ( (4.9220), (7.1963), (-4.9204)), (* H5' *) + ( (3.7996), (5.9091), (-4.1764)), (* H5'' *) + ( (5.7873), (5.8869), (-3.5482)), (* C4' *) + ( (6.0405), (5.0875), (-4.2446)), (* H4' *) + ( (6.9135), (6.8036), (-3.4310)), (* O4' *) + ( (7.7293), (6.4084), (-2.3392)), (* C1' *) + ( (8.7078), (6.1815), (-2.7624)), (* H1' *) + ( (7.1305), (5.1418), (-1.7347)), (* C2' *) + ( (7.2040), (5.1982), (-0.6486)), (* H2'' *) + ( (7.7417), (4.0392), (-2.3813)), (* O2' *) + ( (8.6785), (4.1443), (-2.5630)), (* H2' *) + ( (5.6666), (5.2728), (-2.1536)), (* C3' *) + ( (5.1747), (5.9805), (-1.4863)), (* H3' *) + ( (4.9997), (4.0086), (-2.1973)), (* O3' *) + ( (10.2594), (10.6774), (-1.0056)), (* N1 *) + ( (9.7528), (8.7080), (-2.2631)), (* N3 *) + ( (10.4471), (9.7876), (-1.9791)), (* C2 *) + ( (8.7271), (8.5575), (-1.3991)), (* C4 *) + ( (8.4100), (9.3803), (-0.3580)), (* C5 *) + ( (9.2294), (10.5030), (-0.1574)), (* C6 *) + (A ( + ( (9.0349), (11.3951), (0.8250)), (* N6 *) + ( (7.2891), (8.9068), (0.3121)), (* N7 *) + ( (7.7962), (7.5519), (-1.3859)), (* N9 *) + ( (6.9702), (7.8292), (-0.3353)), (* C8 *) + ( (11.3132), (10.0537), (-2.5851)), (* H2 *) + ( (8.2741), (11.2784), (1.4629)), (* H61 *) + ( (9.6733), (12.1368), (0.9529)), (* H62 *) + ( (6.0888), (7.3990), (0.1403))) (* H8 *) + ) + ) + + +let rA06 + = ( + ( (-0.9815), (0.0731), (-0.1772), (* dgf_base_tfo *) + (0.1912), (0.3054), (-0.9328), + (-0.0141), (-0.9494), (-0.3137), + (5.7506), (-5.1944), (4.7470)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (4.1214), (6.7116), (-1.9049)), (* C5' *) + ( (3.3465), (5.9610), (-2.0607)), (* H5' *) + ( (4.0789), (7.2928), (-0.9837)), (* H5'' *) + ( (5.4170), (5.9293), (-1.8186)), (* C4' *) + ( (5.4506), (5.3400), (-0.9023)), (* H4' *) + ( (5.5067), (5.0417), (-2.9703)), (* O4' *) + ( (6.8650), (4.9152), (-3.3612)), (* C1' *) + ( (7.1090), (3.8577), (-3.2603)), (* H1' *) + ( (7.7152), (5.7282), (-2.3894)), (* C2' *) + ( (8.5029), (6.2356), (-2.9463)), (* H2'' *) + ( (8.1036), (4.8568), (-1.3419)), (* O2' *) + ( (8.3270), (3.9651), (-1.6184)), (* H2' *) + ( (6.7003), (6.7565), (-1.8911)), (* C3' *) + ( (6.5898), (7.5329), (-2.6482)), (* H3' *) + ( (7.0505), (7.2878), (-0.6105)), (* O3' *) + ( (6.6624), (3.5061), (-8.2986)), (* N1 *) + ( (6.5810), (3.2570), (-5.9221)), (* N3 *) + ( (6.5151), (2.8263), (-7.1625)), (* C2 *) + ( (6.8364), (4.5817), (-5.8882)), (* C4 *) + ( (7.0116), (5.4064), (-6.9609)), (* C5 *) + ( (6.9173), (4.8260), (-8.2361)), (* C6 *) + (A ( + ( (7.0668), (5.5163), (-9.3763)), (* N6 *) + ( (7.2573), (6.7070), (-6.5394)), (* N7 *) + ( (6.9740), (5.3703), (-4.7760)), (* N9 *) + ( (7.2238), (6.6275), (-5.2453)), (* C8 *) + ( (6.3146), (1.7741), (-7.3641)), (* H2 *) + ( (7.2568), (6.4972), (-9.3456)), (* H61 *) + ( (7.0437), (5.0478), (-10.2446)), (* H62 *) + ( (7.4108), (7.6227), (-4.8418))) (* H8 *) + ) + ) + + +let rA07 + = ( + ( (0.2379), (0.1310), (-0.9624), (* dgf_base_tfo *) + (-0.5876), (-0.7696), (-0.2499), + (-0.7734), (0.6249), (-0.1061), + (30.9870), (-26.9344), (42.6416)), + ( (0.7529), (0.1548), (0.6397), (* P_O3'_275_tfo *) + (0.2952), (-0.9481), (-0.1180), + (0.5882), (0.2777), (-0.7595), + (-58.8919), (-11.3095), (6.0866)), + ( (-0.0239), (0.9667), (-0.2546), (* P_O3'_180_tfo *) + (0.9731), (-0.0359), (-0.2275), + (-0.2290), (-0.2532), (-0.9399), + (3.5401), (-29.7913), (52.2796)), + ( (-0.8912), (-0.4531), (0.0242), (* P_O3'_60_tfo *) + (-0.1183), (0.1805), (-0.9764), + (0.4380), (-0.8730), (-0.2145), + (19.9023), (54.8054), (15.2799)), + ( (41.8210), (8.3880), (43.5890)), (* P *) + ( (42.5400), (8.0450), (44.8330)), (* O1P *) + ( (42.2470), (9.6920), (42.9910)), (* O2P *) + ( (40.2550), (8.2030), (43.7340)), (* O5' *) + ( (39.3505), (8.4697), (42.6565)), (* C5' *) + ( (39.1377), (7.5433), (42.1230)), (* H5' *) + ( (39.7203), (9.3119), (42.0717)), (* H5'' *) + ( (38.0405), (8.9195), (43.2869)), (* C4' *) + ( (37.3687), (9.3036), (42.5193)), (* H4' *) + ( (37.4319), (7.8146), (43.9387)), (* O4' *) + ( (37.1959), (8.1354), (45.3237)), (* C1' *) + ( (36.1788), (8.5202), (45.3970)), (* H1' *) + ( (38.1721), (9.2328), (45.6504)), (* C2' *) + ( (39.1555), (8.7939), (45.8188)), (* H2'' *) + ( (37.7862), (10.0617), (46.7013)), (* O2' *) + ( (37.3087), (9.6229), (47.4092)), (* H2' *) + ( (38.1844), (10.0268), (44.3367)), (* C3' *) + ( (39.1578), (10.5054), (44.2289)), (* H3' *) + ( (37.0547), (10.9127), (44.3441)), (* O3' *) + ( (34.8811), (4.2072), (47.5784)), (* N1 *) + ( (35.1084), (6.1336), (46.1818)), (* N3 *) + ( (34.4108), (5.1360), (46.7207)), (* C2 *) + ( (36.3908), (6.1224), (46.6053)), (* C4 *) + ( (36.9819), (5.2334), (47.4697)), (* C5 *) + ( (36.1786), (4.1985), (48.0035)), (* C6 *) + (A ( + ( (36.6103), (3.2749), (48.8452)), (* N6 *) + ( (38.3236), (5.5522), (47.6595)), (* N7 *) + ( (37.3887), (7.0024), (46.2437)), (* N9 *) + ( (38.5055), (6.6096), (46.9057)), (* C8 *) + ( (33.3553), (5.0152), (46.4771)), (* H2 *) + ( (37.5730), (3.2804), (49.1507)), (* H61 *) + ( (35.9775), (2.5638), (49.1828)), (* H62 *) + ( (39.5461), (6.9184), (47.0041))) (* H8 *) + ) + ) + + +let rA08 + = ( + ( (0.1084), (-0.0895), (-0.9901), (* dgf_base_tfo *) + (0.9789), (-0.1638), (0.1220), + (-0.1731), (-0.9824), (0.0698), + (-2.9039), (47.2655), (33.0094)), + ( (0.7529), (0.1548), (0.6397), (* P_O3'_275_tfo *) + (0.2952), (-0.9481), (-0.1180), + (0.5882), (0.2777), (-0.7595), + (-58.8919), (-11.3095), (6.0866)), + ( (-0.0239), (0.9667), (-0.2546), (* P_O3'_180_tfo *) + (0.9731), (-0.0359), (-0.2275), + (-0.2290), (-0.2532), (-0.9399), + (3.5401), (-29.7913), (52.2796)), + ( (-0.8912), (-0.4531), (0.0242), (* P_O3'_60_tfo *) + (-0.1183), (0.1805), (-0.9764), + (0.4380), (-0.8730), (-0.2145), + (19.9023), (54.8054), (15.2799)), + ( (41.8210), (8.3880), (43.5890)), (* P *) + ( (42.5400), (8.0450), (44.8330)), (* O1P *) + ( (42.2470), (9.6920), (42.9910)), (* O2P *) + ( (40.2550), (8.2030), (43.7340)), (* O5' *) + ( (39.4850), (8.9301), (44.6977)), (* C5' *) + ( (39.0638), (9.8199), (44.2296)), (* H5' *) + ( (40.0757), (9.0713), (45.6029)), (* H5'' *) + ( (38.3102), (8.0414), (45.0789)), (* C4' *) + ( (37.7842), (8.4637), (45.9351)), (* H4' *) + ( (37.4200), (7.9453), (43.9769)), (* O4' *) + ( (37.2249), (6.5609), (43.6273)), (* C1' *) + ( (36.3360), (6.2168), (44.1561)), (* H1' *) + ( (38.4347), (5.8414), (44.1590)), (* C2' *) + ( (39.2688), (5.9974), (43.4749)), (* H2'' *) + ( (38.2344), (4.4907), (44.4348)), (* O2' *) + ( (37.6374), (4.0386), (43.8341)), (* H2' *) + ( (38.6926), (6.6079), (45.4637)), (* C3' *) + ( (39.7585), (6.5640), (45.6877)), (* H3' *) + ( (37.8238), (6.0705), (46.4723)), (* O3' *) + ( (33.9162), (6.2598), (39.7758)), (* N1 *) + ( (34.6709), (6.5759), (42.0215)), (* N3 *) + ( (33.7257), (6.5186), (41.0858)), (* C2 *) + ( (35.8935), (6.3324), (41.5018)), (* C4 *) + ( (36.2105), (6.0601), (40.1932)), (* C5 *) + ( (35.1538), (6.0151), (39.2537)), (* C6 *) + (A ( + ( (35.3088), (5.7642), (37.9649)), (* N6 *) + ( (37.5818), (5.8677), (40.0507)), (* N7 *) + ( (37.0932), (6.3197), (42.1810)), (* N9 *) + ( (38.0509), (6.0354), (41.2635)), (* C8 *) + ( (32.6830), (6.6898), (41.3532)), (* H2 *) + ( (36.2305), (5.5855), (37.5925)), (* H61 *) + ( (34.5056), (5.7512), (37.3528)), (* H62 *) + ( (39.1318), (5.8993), (41.2285))) (* H8 *) + ) + ) + + +let rA09 + = ( + ( (0.8467), (0.4166), (-0.3311), (* dgf_base_tfo *) + (-0.3962), (0.9089), (0.1303), + (0.3552), (0.0209), (0.9346), + (-42.7319), (-26.6223), (-29.8163)), + ( (0.7529), (0.1548), (0.6397), (* P_O3'_275_tfo *) + (0.2952), (-0.9481), (-0.1180), + (0.5882), (0.2777), (-0.7595), + (-58.8919), (-11.3095), (6.0866)), + ( (-0.0239), (0.9667), (-0.2546), (* P_O3'_180_tfo *) + (0.9731), (-0.0359), (-0.2275), + (-0.2290), (-0.2532), (-0.9399), + (3.5401), (-29.7913), (52.2796)), + ( (-0.8912), (-0.4531), (0.0242), (* P_O3'_60_tfo *) + (-0.1183), (0.1805), (-0.9764), + (0.4380), (-0.8730), (-0.2145), + (19.9023), (54.8054), (15.2799)), + ( (41.8210), (8.3880), (43.5890)), (* P *) + ( (42.5400), (8.0450), (44.8330)), (* O1P *) + ( (42.2470), (9.6920), (42.9910)), (* O2P *) + ( (40.2550), (8.2030), (43.7340)), (* O5' *) + ( (39.3505), (8.4697), (42.6565)), (* C5' *) + ( (39.1377), (7.5433), (42.1230)), (* H5' *) + ( (39.7203), (9.3119), (42.0717)), (* H5'' *) + ( (38.0405), (8.9195), (43.2869)), (* C4' *) + ( (37.6479), (8.1347), (43.9335)), (* H4' *) + ( (38.2691), (10.0933), (44.0524)), (* O4' *) + ( (37.3999), (11.1488), (43.5973)), (* C1' *) + ( (36.5061), (11.1221), (44.2206)), (* H1' *) + ( (37.0364), (10.7838), (42.1836)), (* C2' *) + ( (37.8636), (11.0489), (41.5252)), (* H2'' *) + ( (35.8275), (11.3133), (41.7379)), (* O2' *) + ( (35.6214), (12.1896), (42.0714)), (* H2' *) + ( (36.9316), (9.2556), (42.2837)), (* C3' *) + ( (37.1778), (8.8260), (41.3127)), (* H3' *) + ( (35.6285), (8.9334), (42.7926)), (* O3' *) + ( (38.1482), (15.2833), (46.4641)), (* N1 *) + ( (37.3641), (13.0968), (45.9007)), (* N3 *) + ( (37.5032), (14.1288), (46.7300)), (* C2 *) + ( (37.9570), (13.3377), (44.7113)), (* C4 *) + ( (38.6397), (14.4660), (44.3267)), (* C5 *) + ( (38.7473), (15.5229), (45.2609)), (* C6 *) + (A ( + ( (39.3720), (16.6649), (45.0297)), (* N6 *) + ( (39.1079), (14.3351), (43.0223)), (* N7 *) + ( (38.0132), (12.4868), (43.6280)), (* N9 *) + ( (38.7058), (13.1402), (42.6620)), (* C8 *) + ( (37.0731), (14.0857), (47.7306)), (* H2 *) + ( (39.8113), (16.8281), (44.1350)), (* H61 *) + ( (39.4100), (17.3741), (45.7478)), (* H62 *) + ( (39.0412), (12.9660), (41.6397))) (* H8 *) + ) + ) + + +let rA10 + = ( + ( (0.7063), (0.6317), (-0.3196), (* dgf_base_tfo *) + (-0.0403), (-0.4149), (-0.9090), + (-0.7068), (0.6549), (-0.2676), + (6.4402), (-52.1496), (30.8246)), + ( (0.7529), (0.1548), (0.6397), (* P_O3'_275_tfo *) + (0.2952), (-0.9481), (-0.1180), + (0.5882), (0.2777), (-0.7595), + (-58.8919), (-11.3095), (6.0866)), + ( (-0.0239), (0.9667), (-0.2546), (* P_O3'_180_tfo *) + (0.9731), (-0.0359), (-0.2275), + (-0.2290), (-0.2532), (-0.9399), + (3.5401), (-29.7913), (52.2796)), + ( (-0.8912), (-0.4531), (0.0242), (* P_O3'_60_tfo *) + (-0.1183), (0.1805), (-0.9764), + (0.4380), (-0.8730), (-0.2145), + (19.9023), (54.8054), (15.2799)), + ( (41.8210), (8.3880), (43.5890)), (* P *) + ( (42.5400), (8.0450), (44.8330)), (* O1P *) + ( (42.2470), (9.6920), (42.9910)), (* O2P *) + ( (40.2550), (8.2030), (43.7340)), (* O5' *) + ( (39.4850), (8.9301), (44.6977)), (* C5' *) + ( (39.0638), (9.8199), (44.2296)), (* H5' *) + ( (40.0757), (9.0713), (45.6029)), (* H5'' *) + ( (38.3102), (8.0414), (45.0789)), (* C4' *) + ( (37.7099), (7.8166), (44.1973)), (* H4' *) + ( (38.8012), (6.8321), (45.6380)), (* O4' *) + ( (38.2431), (6.6413), (46.9529)), (* C1' *) + ( (37.3505), (6.0262), (46.8385)), (* H1' *) + ( (37.8484), (8.0156), (47.4214)), (* C2' *) + ( (38.7381), (8.5406), (47.7690)), (* H2'' *) + ( (36.8286), (8.0368), (48.3701)), (* O2' *) + ( (36.8392), (7.3063), (48.9929)), (* H2' *) + ( (37.3576), (8.6512), (46.1132)), (* C3' *) + ( (37.5207), (9.7275), (46.1671)), (* H3' *) + ( (35.9985), (8.2392), (45.9032)), (* O3' *) + ( (39.9117), (2.2278), (48.8527)), (* N1 *) + ( (38.6207), (3.6941), (47.4757)), (* N3 *) + ( (38.9872), (2.4888), (47.9057)), (* C2 *) + ( (39.2961), (4.6720), (48.1174)), (* C4 *) + ( (40.2546), (4.5307), (49.0912)), (* C5 *) + ( (40.5932), (3.2189), (49.4985)), (* C6 *) + (A ( + ( (41.4938), (2.9317), (50.4229)), (* N6 *) + ( (40.7195), (5.7755), (49.5060)), (* N7 *) + ( (39.1730), (6.0305), (47.9170)), (* N9 *) + ( (40.0413), (6.6250), (48.7728)), (* C8 *) + ( (38.5257), (1.5960), (47.4838)), (* H2 *) + ( (41.9907), (3.6753), (50.8921)), (* H61 *) + ( (41.6848), (1.9687), (50.6599)), (* H62 *) + ( (40.3571), (7.6321), (49.0452))) (* H8 *) + ) + ) + + +let rAs = [rA01;rA02;rA03;rA04;rA05;rA06;rA07;rA08;rA09;rA10] + +let rC + = ( + ( (-0.0359), (-0.8071), (0.5894), (* dgf_base_tfo *) + (-0.2669), (0.5761), (0.7726), + (-0.9631), (-0.1296), (-0.2361), + (0.1584), (8.3434), (0.5434)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (5.2430), (-8.2420), (2.8260)), (* C5' *) + ( (5.1974), (-8.8497), (1.9223)), (* H5' *) + ( (5.5548), (-8.7348), (3.7469)), (* H5'' *) + ( (6.3140), (-7.2060), (2.5510)), (* C4' *) + ( (7.2954), (-7.6762), (2.4898)), (* H4' *) + ( (6.0140), (-6.5420), (1.2890)), (* O4' *) + ( (6.4190), (-5.1840), (1.3620)), (* C1' *) + ( (7.1608), (-5.0495), (0.5747)), (* H1' *) + ( (7.0760), (-4.9560), (2.7270)), (* C2' *) + ( (6.7770), (-3.9803), (3.1099)), (* H2'' *) + ( (8.4500), (-5.1930), (2.5810)), (* O2' *) + ( (8.8309), (-4.8755), (1.7590)), (* H2' *) + ( (6.4060), (-6.0590), (3.5580)), (* C3' *) + ( (5.4021), (-5.7313), (3.8281)), (* H3' *) + ( (7.1570), (-6.4240), (4.7070)), (* O3' *) + ( (5.2170), (-4.3260), (1.1690)), (* N1 *) + ( (4.2960), (-2.2560), (0.6290)), (* N3 *) + ( (5.4330), (-3.0200), (0.7990)), (* C2 *) + ( (2.9930), (-2.6780), (0.7940)), (* C4 *) + ( (2.8670), (-4.0630), (1.1830)), (* C5 *) + ( (3.9570), (-4.8300), (1.3550)), (* C6 *) + (C ( + ( (2.0187), (-1.8047), (0.5874)), (* N4 *) + ( (6.5470), (-2.5560), (0.6290)), (* O2 *) + ( (1.0684), (-2.1236), (0.7109)), (* H41 *) + ( (2.2344), (-0.8560), (0.3162)), (* H42 *) + ( (1.8797), (-4.4972), (1.3404)), (* H5 *) + ( (3.8479), (-5.8742), (1.6480))) (* H6 *) + ) + ) + + +let rC01 + = ( + ( (-0.0137), (-0.8012), (0.5983), (* dgf_base_tfo *) + (-0.2523), (0.5817), (0.7733), + (-0.9675), (-0.1404), (-0.2101), + (0.2031), (8.3874), (0.4228)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (5.2416), (-8.2422), (2.8181)), (* C5' *) + ( (5.2050), (-8.8128), (1.8901)), (* H5' *) + ( (5.5368), (-8.7738), (3.7227)), (* H5'' *) + ( (6.3232), (-7.2037), (2.6002)), (* C4' *) + ( (7.3048), (-7.6757), (2.5577)), (* H4' *) + ( (6.0635), (-6.5092), (1.3456)), (* O4' *) + ( (6.4697), (-5.1547), (1.4629)), (* C1' *) + ( (7.2354), (-5.0043), (0.7018)), (* H1' *) + ( (7.0856), (-4.9610), (2.8521)), (* C2' *) + ( (6.7777), (-3.9935), (3.2487)), (* H2'' *) + ( (8.4627), (-5.1992), (2.7423)), (* O2' *) + ( (8.8693), (-4.8638), (1.9399)), (* H2' *) + ( (6.3877), (-6.0809), (3.6362)), (* C3' *) + ( (5.3770), (-5.7562), (3.8834)), (* H3' *) + ( (7.1024), (-6.4754), (4.7985)), (* O3' *) + ( (5.2764), (-4.2883), (1.2538)), (* N1 *) + ( (4.3777), (-2.2062), (0.7229)), (* N3 *) + ( (5.5069), (-2.9779), (0.9088)), (* C2 *) + ( (3.0693), (-2.6246), (0.8500)), (* C4 *) + ( (2.9279), (-4.0146), (1.2149)), (* C5 *) + ( (4.0101), (-4.7892), (1.4017)), (* C6 *) + (C ( + ( (2.1040), (-1.7437), (0.6331)), (* N4 *) + ( (6.6267), (-2.5166), (0.7728)), (* O2 *) + ( (1.1496), (-2.0600), (0.7287)), (* H41 *) + ( (2.3303), (-0.7921), (0.3815)), (* H42 *) + ( (1.9353), (-4.4465), (1.3419)), (* H5 *) + ( (3.8895), (-5.8371), (1.6762))) (* H6 *) + ) + ) + + +let rC02 + = ( + ( (0.5141), (0.0246), (0.8574), (* dgf_base_tfo *) + (-0.5547), (-0.7529), (0.3542), + (0.6542), (-0.6577), (-0.3734), + (-9.1111), (-3.4598), (-3.2939)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (4.3825), (-6.6585), (4.0489)), (* C5' *) + ( (4.6841), (-7.2019), (4.9443)), (* H5' *) + ( (3.6189), (-5.8889), (4.1625)), (* H5'' *) + ( (5.6255), (-5.9175), (3.5998)), (* C4' *) + ( (5.8732), (-5.1228), (4.3034)), (* H4' *) + ( (6.7337), (-6.8605), (3.5222)), (* O4' *) + ( (7.5932), (-6.4923), (2.4548)), (* C1' *) + ( (8.5661), (-6.2983), (2.9064)), (* H1' *) + ( (7.0527), (-5.2012), (1.8322)), (* C2' *) + ( (7.1627), (-5.2525), (0.7490)), (* H2'' *) + ( (7.6666), (-4.1249), (2.4880)), (* O2' *) + ( (8.5944), (-4.2543), (2.6981)), (* H2' *) + ( (5.5661), (-5.3029), (2.2009)), (* C3' *) + ( (5.0841), (-6.0018), (1.5172)), (* H3' *) + ( (4.9062), (-4.0452), (2.2042)), (* O3' *) + ( (7.6298), (-7.6136), (1.4752)), (* N1 *) + ( (8.6945), (-8.7046), (-0.2857)), (* N3 *) + ( (8.6943), (-7.6514), (0.6066)), (* C2 *) + ( (7.7426), (-9.6987), (-0.3801)), (* C4 *) + ( (6.6642), (-9.5742), (0.5722)), (* C5 *) + ( (6.6391), (-8.5592), (1.4526)), (* C6 *) + (C ( + ( (7.9033), (-10.6371), (-1.3010)), (* N4 *) + ( (9.5840), (-6.8186), (0.6136)), (* O2 *) + ( (7.2009), (-11.3604), (-1.3619)), (* H41 *) + ( (8.7058), (-10.6168), (-1.9140)), (* H42 *) + ( (5.8585), (-10.3083), (0.5822)), (* H5 *) + ( (5.8197), (-8.4773), (2.1667))) (* H6 *) + ) + ) + + +let rC03 + = ( + ( (-0.4993), (0.0476), (0.8651), (* dgf_base_tfo *) + (0.8078), (-0.3353), (0.4847), + (0.3132), (0.9409), (0.1290), + (6.2989), (-5.2303), (-3.8577)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (3.9938), (-6.7042), (1.9023)), (* C5' *) + ( (3.2332), (-5.9343), (2.0319)), (* H5' *) + ( (3.9666), (-7.2863), (0.9812)), (* H5'' *) + ( (5.3098), (-5.9546), (1.8564)), (* C4' *) + ( (5.3863), (-5.3702), (0.9395)), (* H4' *) + ( (5.3851), (-5.0642), (3.0076)), (* O4' *) + ( (6.7315), (-4.9724), (3.4462)), (* C1' *) + ( (7.0033), (-3.9202), (3.3619)), (* H1' *) + ( (7.5997), (-5.8018), (2.4948)), (* C2' *) + ( (8.3627), (-6.3254), (3.0707)), (* H2'' *) + ( (8.0410), (-4.9501), (1.4724)), (* O2' *) + ( (8.2781), (-4.0644), (1.7570)), (* H2' *) + ( (6.5701), (-6.8129), (1.9714)), (* C3' *) + ( (6.4186), (-7.5809), (2.7299)), (* H3' *) + ( (6.9357), (-7.3841), (0.7235)), (* O3' *) + ( (6.8024), (-5.4718), (4.8475)), (* N1 *) + ( (7.9218), (-5.5700), (6.8877)), (* N3 *) + ( (7.8908), (-5.0886), (5.5944)), (* C2 *) + ( (6.9789), (-6.3827), (7.4823)), (* C4 *) + ( (5.8742), (-6.7319), (6.6202)), (* C5 *) + ( (5.8182), (-6.2769), (5.3570)), (* C6 *) + (C ( + ( (7.1702), (-6.7511), (8.7402)), (* N4 *) + ( (8.7747), (-4.3728), (5.1568)), (* O2 *) + ( (6.4741), (-7.3461), (9.1662)), (* H41 *) + ( (7.9889), (-6.4396), (9.2429)), (* H42 *) + ( (5.0736), (-7.3713), (6.9922)), (* H5 *) + ( (4.9784), (-6.5473), (4.7170))) (* H6 *) + ) + ) + + +let rC04 + = ( + ( (-0.5669), (-0.8012), (0.1918), (* dgf_base_tfo *) + (-0.8129), (0.5817), (0.0273), + (-0.1334), (-0.1404), (-0.9811), + (-0.3279), (8.3874), (0.3355)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (5.2416), (-8.2422), (2.8181)), (* C5' *) + ( (5.2050), (-8.8128), (1.8901)), (* H5' *) + ( (5.5368), (-8.7738), (3.7227)), (* H5'' *) + ( (6.3232), (-7.2037), (2.6002)), (* C4' *) + ( (7.3048), (-7.6757), (2.5577)), (* H4' *) + ( (6.0635), (-6.5092), (1.3456)), (* O4' *) + ( (6.4697), (-5.1547), (1.4629)), (* C1' *) + ( (7.2354), (-5.0043), (0.7018)), (* H1' *) + ( (7.0856), (-4.9610), (2.8521)), (* C2' *) + ( (6.7777), (-3.9935), (3.2487)), (* H2'' *) + ( (8.4627), (-5.1992), (2.7423)), (* O2' *) + ( (8.8693), (-4.8638), (1.9399)), (* H2' *) + ( (6.3877), (-6.0809), (3.6362)), (* C3' *) + ( (5.3770), (-5.7562), (3.8834)), (* H3' *) + ( (7.1024), (-6.4754), (4.7985)), (* O3' *) + ( (5.2764), (-4.2883), (1.2538)), (* N1 *) + ( (3.8961), (-3.0896), (-0.1893)), (* N3 *) + ( (5.0095), (-3.8907), (-0.0346)), (* C2 *) + ( (3.0480), (-2.6632), (0.8116)), (* C4 *) + ( (3.4093), (-3.1310), (2.1292)), (* C5 *) + ( (4.4878), (-3.9124), (2.3088)), (* C6 *) + (C ( + ( (2.0216), (-1.8941), (0.4804)), (* N4 *) + ( (5.7005), (-4.2164), (-0.9842)), (* O2 *) + ( (1.4067), (-1.5873), (1.2205)), (* H41 *) + ( (1.8721), (-1.6319), (-0.4835)), (* H42 *) + ( (2.8048), (-2.8507), (2.9918)), (* H5 *) + ( (4.7491), (-4.2593), (3.3085))) (* H6 *) + ) + ) + + +let rC05 + = ( + ( (-0.6298), (0.0246), (0.7763), (* dgf_base_tfo *) + (-0.5226), (-0.7529), (-0.4001), + (0.5746), (-0.6577), (0.4870), + (-0.0208), (-3.4598), (-9.6882)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (4.3825), (-6.6585), (4.0489)), (* C5' *) + ( (4.6841), (-7.2019), (4.9443)), (* H5' *) + ( (3.6189), (-5.8889), (4.1625)), (* H5'' *) + ( (5.6255), (-5.9175), (3.5998)), (* C4' *) + ( (5.8732), (-5.1228), (4.3034)), (* H4' *) + ( (6.7337), (-6.8605), (3.5222)), (* O4' *) + ( (7.5932), (-6.4923), (2.4548)), (* C1' *) + ( (8.5661), (-6.2983), (2.9064)), (* H1' *) + ( (7.0527), (-5.2012), (1.8322)), (* C2' *) + ( (7.1627), (-5.2525), (0.7490)), (* H2'' *) + ( (7.6666), (-4.1249), (2.4880)), (* O2' *) + ( (8.5944), (-4.2543), (2.6981)), (* H2' *) + ( (5.5661), (-5.3029), (2.2009)), (* C3' *) + ( (5.0841), (-6.0018), (1.5172)), (* H3' *) + ( (4.9062), (-4.0452), (2.2042)), (* O3' *) + ( (7.6298), (-7.6136), (1.4752)), (* N1 *) + ( (8.5977), (-9.5977), (0.7329)), (* N3 *) + ( (8.5951), (-8.5745), (1.6594)), (* C2 *) + ( (7.7372), (-9.7371), (-0.3364)), (* C4 *) + ( (6.7596), (-8.6801), (-0.4476)), (* C5 *) + ( (6.7338), (-7.6721), (0.4408)), (* C6 *) + (C ( + ( (7.8849), (-10.7881), (-1.1289)), (* N4 *) + ( (9.3993), (-8.5377), (2.5743)), (* O2 *) + ( (7.2499), (-10.8809), (-1.9088)), (* H41 *) + ( (8.6122), (-11.4649), (-0.9468)), (* H42 *) + ( (6.0317), (-8.6941), (-1.2588)), (* H5 *) + ( (5.9901), (-6.8809), (0.3459))) (* H6 *) + ) + ) + + +let rC06 + = ( + ( (-0.9837), (0.0476), (-0.1733), (* dgf_base_tfo *) + (-0.1792), (-0.3353), (0.9249), + (-0.0141), (0.9409), (0.3384), + (5.7793), (-5.2303), (4.5997)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (3.9938), (-6.7042), (1.9023)), (* C5' *) + ( (3.2332), (-5.9343), (2.0319)), (* H5' *) + ( (3.9666), (-7.2863), (0.9812)), (* H5'' *) + ( (5.3098), (-5.9546), (1.8564)), (* C4' *) + ( (5.3863), (-5.3702), (0.9395)), (* H4' *) + ( (5.3851), (-5.0642), (3.0076)), (* O4' *) + ( (6.7315), (-4.9724), (3.4462)), (* C1' *) + ( (7.0033), (-3.9202), (3.3619)), (* H1' *) + ( (7.5997), (-5.8018), (2.4948)), (* C2' *) + ( (8.3627), (-6.3254), (3.0707)), (* H2'' *) + ( (8.0410), (-4.9501), (1.4724)), (* O2' *) + ( (8.2781), (-4.0644), (1.7570)), (* H2' *) + ( (6.5701), (-6.8129), (1.9714)), (* C3' *) + ( (6.4186), (-7.5809), (2.7299)), (* H3' *) + ( (6.9357), (-7.3841), (0.7235)), (* O3' *) + ( (6.8024), (-5.4718), (4.8475)), (* N1 *) + ( (6.6920), (-5.0495), (7.1354)), (* N3 *) + ( (6.6201), (-4.5500), (5.8506)), (* C2 *) + ( (6.9254), (-6.3614), (7.4926)), (* C4 *) + ( (7.1046), (-7.2543), (6.3718)), (* C5 *) + ( (7.0391), (-6.7951), (5.1106)), (* C6 *) + (C ( + ( (6.9614), (-6.6648), (8.7815)), (* N4 *) + ( (6.4083), (-3.3696), (5.6340)), (* O2 *) + ( (7.1329), (-7.6280), (9.0324)), (* H41 *) + ( (6.8204), (-5.9469), (9.4777)), (* H42 *) + ( (7.2954), (-8.3135), (6.5440)), (* H5 *) + ( (7.1753), (-7.4798), (4.2735))) (* H6 *) + ) + ) + + +let rC07 + = ( + ( (0.0033), (0.2720), (-0.9623), (* dgf_base_tfo *) + (0.3013), (-0.9179), (-0.2584), + (-0.9535), (-0.2891), (-0.0850), + (43.0403), (13.7233), (34.5710)), + ( (0.9187), (0.2887), (0.2694), (* P_O3'_275_tfo *) + (0.0302), (-0.7316), (0.6811), + (0.3938), (-0.6176), (-0.6808), + (-48.4330), (26.3254), (13.6383)), + ( (-0.1504), (0.7744), (-0.6145), (* P_O3'_180_tfo *) + (0.7581), (0.4893), (0.4311), + (0.6345), (-0.4010), (-0.6607), + (-31.9784), (-13.4285), (44.9650)), + ( (-0.6236), (-0.7810), (-0.0337), (* P_O3'_60_tfo *) + (-0.6890), (0.5694), (-0.4484), + (0.3694), (-0.2564), (-0.8932), + (12.1105), (30.8774), (46.0946)), + ( (33.3400), (11.0980), (46.1750)), (* P *) + ( (34.5130), (10.2320), (46.4660)), (* O1P *) + ( (33.4130), (12.3960), (46.9340)), (* O2P *) + ( (31.9810), (10.3390), (46.4820)), (* O5' *) + ( (30.8152), (11.1619), (46.2003)), (* C5' *) + ( (30.4519), (10.9454), (45.1957)), (* H5' *) + ( (31.0379), (12.2016), (46.4400)), (* H5'' *) + ( (29.7081), (10.7448), (47.1428)), (* C4' *) + ( (28.8710), (11.4416), (47.0982)), (* H4' *) + ( (29.2550), (9.4394), (46.8162)), (* O4' *) + ( (29.3907), (8.5625), (47.9460)), (* C1' *) + ( (28.4416), (8.5669), (48.4819)), (* H1' *) + ( (30.4468), (9.2031), (48.7952)), (* C2' *) + ( (31.4222), (8.9651), (48.3709)), (* H2'' *) + ( (30.3701), (8.9157), (50.1624)), (* O2' *) + ( (30.0652), (8.0304), (50.3740)), (* H2' *) + ( (30.1622), (10.6879), (48.6120)), (* C3' *) + ( (31.0952), (11.2399), (48.7254)), (* H3' *) + ( (29.1076), (11.1535), (49.4702)), (* O3' *) + ( (29.7883), (7.2209), (47.5235)), (* N1 *) + ( (29.1825), (5.0438), (46.8275)), (* N3 *) + ( (28.8008), (6.2912), (47.2263)), (* C2 *) + ( (30.4888), (4.6890), (46.7186)), (* C4 *) + ( (31.5034), (5.6405), (47.0249)), (* C5 *) + ( (31.1091), (6.8691), (47.4156)), (* C6 *) + (C ( + ( (30.8109), (3.4584), (46.3336)), (* N4 *) + ( (27.6171), (6.5989), (47.3189)), (* O2 *) + ( (31.7923), (3.2301), (46.2638)), (* H41 *) + ( (30.0880), (2.7857), (46.1215)), (* H42 *) + ( (32.5542), (5.3634), (46.9395)), (* H5 *) + ( (31.8523), (7.6279), (47.6603))) (* H6 *) + ) + ) + + +let rC08 + = ( + ( (0.0797), (-0.6026), (-0.7941), (* dgf_base_tfo *) + (0.7939), (0.5201), (-0.3150), + (0.6028), (-0.6054), (0.5198), + (-36.8341), (41.5293), (1.6628)), + ( (0.9187), (0.2887), (0.2694), (* P_O3'_275_tfo *) + (0.0302), (-0.7316), (0.6811), + (0.3938), (-0.6176), (-0.6808), + (-48.4330), (26.3254), (13.6383)), + ( (-0.1504), (0.7744), (-0.6145), (* P_O3'_180_tfo *) + (0.7581), (0.4893), (0.4311), + (0.6345), (-0.4010), (-0.6607), + (-31.9784), (-13.4285), (44.9650)), + ( (-0.6236), (-0.7810), (-0.0337), (* P_O3'_60_tfo *) + (-0.6890), (0.5694), (-0.4484), + (0.3694), (-0.2564), (-0.8932), + (12.1105), (30.8774), (46.0946)), + ( (33.3400), (11.0980), (46.1750)), (* P *) + ( (34.5130), (10.2320), (46.4660)), (* O1P *) + ( (33.4130), (12.3960), (46.9340)), (* O2P *) + ( (31.9810), (10.3390), (46.4820)), (* O5' *) + ( (31.8779), (9.9369), (47.8760)), (* C5' *) + ( (31.3239), (10.6931), (48.4322)), (* H5' *) + ( (32.8647), (9.6624), (48.2489)), (* H5'' *) + ( (31.0429), (8.6773), (47.9401)), (* C4' *) + ( (31.0779), (8.2331), (48.9349)), (* H4' *) + ( (29.6956), (8.9669), (47.5983)), (* O4' *) + ( (29.2784), (8.1700), (46.4782)), (* C1' *) + ( (28.8006), (7.2731), (46.8722)), (* H1' *) + ( (30.5544), (7.7940), (45.7875)), (* C2' *) + ( (30.8837), (8.6410), (45.1856)), (* H2'' *) + ( (30.5100), (6.6007), (45.0582)), (* O2' *) + ( (29.6694), (6.4168), (44.6326)), (* H2' *) + ( (31.5146), (7.5954), (46.9527)), (* C3' *) + ( (32.5255), (7.8261), (46.6166)), (* H3' *) + ( (31.3876), (6.2951), (47.5516)), (* O3' *) + ( (28.3976), (8.9302), (45.5933)), (* N1 *) + ( (26.2155), (9.6135), (44.9910)), (* N3 *) + ( (27.0281), (8.8961), (45.8192)), (* C2 *) + ( (26.7044), (10.3489), (43.9595)), (* C4 *) + ( (28.1088), (10.3837), (43.7247)), (* C5 *) + ( (28.8978), (9.6708), (44.5535)), (* C6 *) + (C ( + ( (25.8715), (11.0249), (43.1749)), (* N4 *) + ( (26.5733), (8.2371), (46.7484)), (* O2 *) + ( (26.2707), (11.5609), (42.4177)), (* H41 *) + ( (24.8760), (10.9939), (43.3427)), (* H42 *) + ( (28.5089), (10.9722), (42.8990)), (* H5 *) + ( (29.9782), (9.6687), (44.4097))) (* H6 *) + ) + ) + + +let rC09 + = ( + ( (0.8727), (0.4760), (-0.1091), (* dgf_base_tfo *) + (-0.4188), (0.6148), (-0.6682), + (-0.2510), (0.6289), (0.7359), + (-8.1687), (-52.0761), (-25.0726)), + ( (0.9187), (0.2887), (0.2694), (* P_O3'_275_tfo *) + (0.0302), (-0.7316), (0.6811), + (0.3938), (-0.6176), (-0.6808), + (-48.4330), (26.3254), (13.6383)), + ( (-0.1504), (0.7744), (-0.6145), (* P_O3'_180_tfo *) + (0.7581), (0.4893), (0.4311), + (0.6345), (-0.4010), (-0.6607), + (-31.9784), (-13.4285), (44.9650)), + ( (-0.6236), (-0.7810), (-0.0337), (* P_O3'_60_tfo *) + (-0.6890), (0.5694), (-0.4484), + (0.3694), (-0.2564), (-0.8932), + (12.1105), (30.8774), (46.0946)), + ( (33.3400), (11.0980), (46.1750)), (* P *) + ( (34.5130), (10.2320), (46.4660)), (* O1P *) + ( (33.4130), (12.3960), (46.9340)), (* O2P *) + ( (31.9810), (10.3390), (46.4820)), (* O5' *) + ( (30.8152), (11.1619), (46.2003)), (* C5' *) + ( (30.4519), (10.9454), (45.1957)), (* H5' *) + ( (31.0379), (12.2016), (46.4400)), (* H5'' *) + ( (29.7081), (10.7448), (47.1428)), (* C4' *) + ( (29.4506), (9.6945), (47.0059)), (* H4' *) + ( (30.1045), (10.9634), (48.4885)), (* O4' *) + ( (29.1794), (11.8418), (49.1490)), (* C1' *) + ( (28.4388), (11.2210), (49.6533)), (* H1' *) + ( (28.5211), (12.6008), (48.0367)), (* C2' *) + ( (29.1947), (13.3949), (47.7147)), (* H2'' *) + ( (27.2316), (13.0683), (48.3134)), (* O2' *) + ( (27.0851), (13.3391), (49.2227)), (* H2' *) + ( (28.4131), (11.5507), (46.9391)), (* C3' *) + ( (28.4451), (12.0512), (45.9713)), (* H3' *) + ( (27.2707), (10.6955), (47.1097)), (* O3' *) + ( (29.8751), (12.7405), (50.0682)), (* N1 *) + ( (30.7172), (13.1841), (52.2328)), (* N3 *) + ( (30.0617), (12.3404), (51.3847)), (* C2 *) + ( (31.1834), (14.3941), (51.8297)), (* C4 *) + ( (30.9913), (14.8074), (50.4803)), (* C5 *) + ( (30.3434), (13.9610), (49.6548)), (* C6 *) + (C ( + ( (31.8090), (15.1847), (52.6957)), (* N4 *) + ( (29.6470), (11.2494), (51.7616)), (* O2 *) + ( (32.1422), (16.0774), (52.3606)), (* H41 *) + ( (31.9392), (14.8893), (53.6527)), (* H42 *) + ( (31.3632), (15.7771), (50.1491)), (* H5 *) + ( (30.1742), (14.2374), (48.6141))) (* H6 *) + ) + ) + + +let rC10 + = ( + ( (0.1549), (0.8710), (-0.4663), (* dgf_base_tfo *) + (0.6768), (-0.4374), (-0.5921), + (-0.7197), (-0.2239), (-0.6572), + (25.2447), (-14.1920), (50.3201)), + ( (0.9187), (0.2887), (0.2694), (* P_O3'_275_tfo *) + (0.0302), (-0.7316), (0.6811), + (0.3938), (-0.6176), (-0.6808), + (-48.4330), (26.3254), (13.6383)), + ( (-0.1504), (0.7744), (-0.6145), (* P_O3'_180_tfo *) + (0.7581), (0.4893), (0.4311), + (0.6345), (-0.4010), (-0.6607), + (-31.9784), (-13.4285), (44.9650)), + ( (-0.6236), (-0.7810), (-0.0337), (* P_O3'_60_tfo *) + (-0.6890), (0.5694), (-0.4484), + (0.3694), (-0.2564), (-0.8932), + (12.1105), (30.8774), (46.0946)), + ( (33.3400), (11.0980), (46.1750)), (* P *) + ( (34.5130), (10.2320), (46.4660)), (* O1P *) + ( (33.4130), (12.3960), (46.9340)), (* O2P *) + ( (31.9810), (10.3390), (46.4820)), (* O5' *) + ( (31.8779), (9.9369), (47.8760)), (* C5' *) + ( (31.3239), (10.6931), (48.4322)), (* H5' *) + ( (32.8647), (9.6624), (48.2489)), (* H5'' *) + ( (31.0429), (8.6773), (47.9401)), (* C4' *) + ( (30.0440), (8.8473), (47.5383)), (* H4' *) + ( (31.6749), (7.6351), (47.2119)), (* O4' *) + ( (31.9159), (6.5022), (48.0616)), (* C1' *) + ( (31.0691), (5.8243), (47.9544)), (* H1' *) + ( (31.9300), (7.0685), (49.4493)), (* C2' *) + ( (32.9024), (7.5288), (49.6245)), (* H2'' *) + ( (31.5672), (6.1750), (50.4632)), (* O2' *) + ( (31.8416), (5.2663), (50.3200)), (* H2' *) + ( (30.8618), (8.1514), (49.3749)), (* C3' *) + ( (31.1122), (8.9396), (50.0850)), (* H3' *) + ( (29.5351), (7.6245), (49.5409)), (* O3' *) + ( (33.1890), (5.8629), (47.7343)), (* N1 *) + ( (34.4004), (4.2636), (46.4828)), (* N3 *) + ( (33.2062), (4.8497), (46.7851)), (* C2 *) + ( (35.5600), (4.6374), (47.0822)), (* C4 *) + ( (35.5444), (5.6751), (48.0577)), (* C5 *) + ( (34.3565), (6.2450), (48.3432)), (* C6 *) + (C ( + ( (36.6977), (4.0305), (46.7598)), (* N4 *) + ( (32.1661), (4.5034), (46.2348)), (* O2 *) + ( (37.5405), (4.3347), (47.2259)), (* H41 *) + ( (36.7033), (3.2923), (46.0706)), (* H42 *) + ( (36.4713), (5.9811), (48.5428)), (* H5 *) + ( (34.2986), (7.0426), (49.0839))) (* H6 *) + ) + ) + + +let rCs = [rC01;rC02;rC03;rC04;rC05;rC06;rC07;rC08;rC09;rC10] + + +let rG + = ( + ( (-0.0018), (-0.8207), (0.5714), (* dgf_base_tfo *) + (0.2679), (-0.5509), (-0.7904), + (0.9634), (0.1517), (0.2209), + (0.0073), (8.4030), (0.6232)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (5.4550), (8.2120), (-2.8810)), (* C5' *) + ( (5.4546), (8.8508), (-1.9978)), (* H5' *) + ( (5.7588), (8.6625), (-3.8259)), (* H5'' *) + ( (6.4970), (7.1480), (-2.5980)), (* C4' *) + ( (7.4896), (7.5919), (-2.5214)), (* H4' *) + ( (6.1630), (6.4860), (-1.3440)), (* O4' *) + ( (6.5400), (5.1200), (-1.4190)), (* C1' *) + ( (7.2763), (4.9681), (-0.6297)), (* H1' *) + ( (7.1940), (4.8830), (-2.7770)), (* C2' *) + ( (6.8667), (3.9183), (-3.1647)), (* H2'' *) + ( (8.5860), (5.0910), (-2.6140)), (* O2' *) + ( (8.9510), (4.7626), (-1.7890)), (* H2' *) + ( (6.5720), (6.0040), (-3.6090)), (* C3' *) + ( (5.5636), (5.7066), (-3.8966)), (* H3' *) + ( (7.3801), (6.3562), (-4.7350)), (* O3' *) + ( (4.7150), (0.4910), (-0.1360)), (* N1 *) + ( (6.3490), (2.1730), (-0.6020)), (* N3 *) + ( (5.9530), (0.9650), (-0.2670)), (* C2 *) + ( (5.2900), (2.9790), (-0.8260)), (* C4 *) + ( (3.9720), (2.6390), (-0.7330)), (* C5 *) + ( (3.6770), (1.3160), (-0.3660)), (* C6 *) + (G ( + ( (6.8426), (0.0056), (-0.0019)), (* N2 *) + ( (3.1660), (3.7290), (-1.0360)), (* N7 *) + ( (5.3170), (4.2990), (-1.1930)), (* N9 *) + ( (4.0100), (4.6780), (-1.2990)), (* C8 *) + ( (2.4280), (0.8450), (-0.2360)), (* O6 *) + ( (4.6151), (-0.4677), (0.1305)), (* H1 *) + ( (6.6463), (-0.9463), (0.2729)), (* H21 *) + ( (7.8170), (0.2642), (-0.0640)), (* H22 *) + ( (3.4421), (5.5744), (-1.5482))) (* H8 *) + ) + ) + + +let rG01 + = ( + ( (-0.0043), (-0.8175), (0.5759), (* dgf_base_tfo *) + (0.2617), (-0.5567), (-0.7884), + (0.9651), (0.1473), (0.2164), + (0.0359), (8.3929), (0.5532)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (5.4352), (8.2183), (-2.7757)), (* C5' *) + ( (5.3830), (8.7883), (-1.8481)), (* H5' *) + ( (5.7729), (8.7436), (-3.6691)), (* H5'' *) + ( (6.4830), (7.1518), (-2.5252)), (* C4' *) + ( (7.4749), (7.5972), (-2.4482)), (* H4' *) + ( (6.1626), (6.4620), (-1.2827)), (* O4' *) + ( (6.5431), (5.0992), (-1.3905)), (* C1' *) + ( (7.2871), (4.9328), (-0.6114)), (* H1' *) + ( (7.1852), (4.8935), (-2.7592)), (* C2' *) + ( (6.8573), (3.9363), (-3.1645)), (* H2'' *) + ( (8.5780), (5.1025), (-2.6046)), (* O2' *) + ( (8.9516), (4.7577), (-1.7902)), (* H2' *) + ( (6.5522), (6.0300), (-3.5612)), (* C3' *) + ( (5.5420), (5.7356), (-3.8459)), (* H3' *) + ( (7.3487), (6.4089), (-4.6867)), (* O3' *) + ( (4.7442), (0.4514), (-0.1390)), (* N1 *) + ( (6.3687), (2.1459), (-0.5926)), (* N3 *) + ( (5.9795), (0.9335), (-0.2657)), (* C2 *) + ( (5.3052), (2.9471), (-0.8125)), (* C4 *) + ( (3.9891), (2.5987), (-0.7230)), (* C5 *) + ( (3.7016), (1.2717), (-0.3647)), (* C6 *) + (G ( + ( (6.8745), (-0.0224), (-0.0058)), (* N2 *) + ( (3.1770), (3.6859), (-1.0198)), (* N7 *) + ( (5.3247), (4.2695), (-1.1710)), (* N9 *) + ( (4.0156), (4.6415), (-1.2759)), (* C8 *) + ( (2.4553), (0.7925), (-0.2390)), (* O6 *) + ( (4.6497), (-0.5095), (0.1212)), (* H1 *) + ( (6.6836), (-0.9771), (0.2627)), (* H21 *) + ( (7.8474), (0.2424), (-0.0653)), (* H22 *) + ( (3.4426), (5.5361), (-1.5199))) (* H8 *) + ) + ) + + +let rG02 + = ( + ( (0.5566), (0.0449), (0.8296), (* dgf_base_tfo *) + (0.5125), (0.7673), (-0.3854), + (-0.6538), (0.6397), (0.4041), + (-9.1161), (-3.7679), (-2.9968)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (4.5778), (6.6594), (-4.0364)), (* C5' *) + ( (4.9220), (7.1963), (-4.9204)), (* H5' *) + ( (3.7996), (5.9091), (-4.1764)), (* H5'' *) + ( (5.7873), (5.8869), (-3.5482)), (* C4' *) + ( (6.0405), (5.0875), (-4.2446)), (* H4' *) + ( (6.9135), (6.8036), (-3.4310)), (* O4' *) + ( (7.7293), (6.4084), (-2.3392)), (* C1' *) + ( (8.7078), (6.1815), (-2.7624)), (* H1' *) + ( (7.1305), (5.1418), (-1.7347)), (* C2' *) + ( (7.2040), (5.1982), (-0.6486)), (* H2'' *) + ( (7.7417), (4.0392), (-2.3813)), (* O2' *) + ( (8.6785), (4.1443), (-2.5630)), (* H2' *) + ( (5.6666), (5.2728), (-2.1536)), (* C3' *) + ( (5.1747), (5.9805), (-1.4863)), (* H3' *) + ( (4.9997), (4.0086), (-2.1973)), (* O3' *) + ( (10.3245), (8.5459), (1.5467)), (* N1 *) + ( (9.8051), (6.9432), (-0.1497)), (* N3 *) + ( (10.5175), (7.4328), (0.8408)), (* C2 *) + ( (8.7523), (7.7422), (-0.4228)), (* C4 *) + ( (8.4257), (8.9060), (0.2099)), (* C5 *) + ( (9.2665), (9.3242), (1.2540)), (* C6 *) + (G ( + ( (11.6077), (6.7966), (1.2752)), (* N2 *) + ( (7.2750), (9.4537), (-0.3428)), (* N7 *) + ( (7.7962), (7.5519), (-1.3859)), (* N9 *) + ( (6.9479), (8.6157), (-1.2771)), (* C8 *) + ( (9.0664), (10.4462), (1.9610)), (* O6 *) + ( (10.9838), (8.7524), (2.2697)), (* H1 *) + ( (12.2274), (7.0896), (2.0170)), (* H21 *) + ( (11.8502), (5.9398), (0.7984)), (* H22 *) + ( (6.0430), (8.9853), (-1.7594))) (* H8 *) + ) + ) + + +let rG03 + = ( + ( (-0.5021), (0.0731), (0.8617), (* dgf_base_tfo *) + (-0.8112), (0.3054), (-0.4986), + (-0.2996), (-0.9494), (-0.0940), + (6.4273), (-5.1944), (-3.7807)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (4.1214), (6.7116), (-1.9049)), (* C5' *) + ( (3.3465), (5.9610), (-2.0607)), (* H5' *) + ( (4.0789), (7.2928), (-0.9837)), (* H5'' *) + ( (5.4170), (5.9293), (-1.8186)), (* C4' *) + ( (5.4506), (5.3400), (-0.9023)), (* H4' *) + ( (5.5067), (5.0417), (-2.9703)), (* O4' *) + ( (6.8650), (4.9152), (-3.3612)), (* C1' *) + ( (7.1090), (3.8577), (-3.2603)), (* H1' *) + ( (7.7152), (5.7282), (-2.3894)), (* C2' *) + ( (8.5029), (6.2356), (-2.9463)), (* H2'' *) + ( (8.1036), (4.8568), (-1.3419)), (* O2' *) + ( (8.3270), (3.9651), (-1.6184)), (* H2' *) + ( (6.7003), (6.7565), (-1.8911)), (* C3' *) + ( (6.5898), (7.5329), (-2.6482)), (* H3' *) + ( (7.0505), (7.2878), (-0.6105)), (* O3' *) + ( (9.6740), (4.7656), (-7.6614)), (* N1 *) + ( (9.0739), (4.3013), (-5.3941)), (* N3 *) + ( (9.8416), (4.2192), (-6.4581)), (* C2 *) + ( (7.9885), (5.0632), (-5.6446)), (* C4 *) + ( (7.6822), (5.6856), (-6.8194)), (* C5 *) + ( (8.5831), (5.5215), (-7.8840)), (* C6 *) + (G ( + ( (10.9733), (3.5117), (-6.4286)), (* N2 *) + ( (6.4857), (6.3816), (-6.7035)), (* N7 *) + ( (6.9740), (5.3703), (-4.7760)), (* N9 *) + ( (6.1133), (6.1613), (-5.4808)), (* C8 *) + ( (8.4084), (6.0747), (-9.0933)), (* O6 *) + ( (10.3759), (4.5855), (-8.3504)), (* H1 *) + ( (11.6254), (3.3761), (-7.1879)), (* H21 *) + ( (11.1917), (3.0460), (-5.5593)), (* H22 *) + ( (5.1705), (6.6830), (-5.3167))) (* H8 *) + ) + ) + + +let rG04 + = ( + ( (-0.5426), (-0.8175), (0.1929), (* dgf_base_tfo *) + (0.8304), (-0.5567), (-0.0237), + (0.1267), (0.1473), (0.9809), + (-0.5075), (8.3929), (0.2229)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (5.4352), (8.2183), (-2.7757)), (* C5' *) + ( (5.3830), (8.7883), (-1.8481)), (* H5' *) + ( (5.7729), (8.7436), (-3.6691)), (* H5'' *) + ( (6.4830), (7.1518), (-2.5252)), (* C4' *) + ( (7.4749), (7.5972), (-2.4482)), (* H4' *) + ( (6.1626), (6.4620), (-1.2827)), (* O4' *) + ( (6.5431), (5.0992), (-1.3905)), (* C1' *) + ( (7.2871), (4.9328), (-0.6114)), (* H1' *) + ( (7.1852), (4.8935), (-2.7592)), (* C2' *) + ( (6.8573), (3.9363), (-3.1645)), (* H2'' *) + ( (8.5780), (5.1025), (-2.6046)), (* O2' *) + ( (8.9516), (4.7577), (-1.7902)), (* H2' *) + ( (6.5522), (6.0300), (-3.5612)), (* C3' *) + ( (5.5420), (5.7356), (-3.8459)), (* H3' *) + ( (7.3487), (6.4089), (-4.6867)), (* O3' *) + ( (3.6343), (2.6680), (2.0783)), (* N1 *) + ( (5.4505), (3.9805), (1.2446)), (* N3 *) + ( (4.7540), (3.3816), (2.1851)), (* C2 *) + ( (4.8805), (3.7951), (0.0354)), (* C4 *) + ( (3.7416), (3.0925), (-0.2305)), (* C5 *) + ( (3.0873), (2.4980), (0.8606)), (* C6 *) + (G ( + ( (5.1433), (3.4373), (3.4609)), (* N2 *) + ( (3.4605), (3.1184), (-1.5906)), (* N7 *) + ( (5.3247), (4.2695), (-1.1710)), (* N9 *) + ( (4.4244), (3.8244), (-2.0953)), (* C8 *) + ( (1.9600), (1.7805), (0.7462)), (* O6 *) + ( (3.2489), (2.2879), (2.9191)), (* H1 *) + ( (4.6785), (3.0243), (4.2568)), (* H21 *) + ( (5.9823), (3.9654), (3.6539)), (* H22 *) + ( (4.2675), (3.8876), (-3.1721))) (* H8 *) + ) + ) + + +let rG05 + = ( + ( (-0.5891), (0.0449), (0.8068), (* dgf_base_tfo *) + (0.5375), (0.7673), (0.3498), + (-0.6034), (0.6397), (-0.4762), + (-0.3019), (-3.7679), (-9.5913)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (4.5778), (6.6594), (-4.0364)), (* C5' *) + ( (4.9220), (7.1963), (-4.9204)), (* H5' *) + ( (3.7996), (5.9091), (-4.1764)), (* H5'' *) + ( (5.7873), (5.8869), (-3.5482)), (* C4' *) + ( (6.0405), (5.0875), (-4.2446)), (* H4' *) + ( (6.9135), (6.8036), (-3.4310)), (* O4' *) + ( (7.7293), (6.4084), (-2.3392)), (* C1' *) + ( (8.7078), (6.1815), (-2.7624)), (* H1' *) + ( (7.1305), (5.1418), (-1.7347)), (* C2' *) + ( (7.2040), (5.1982), (-0.6486)), (* H2'' *) + ( (7.7417), (4.0392), (-2.3813)), (* O2' *) + ( (8.6785), (4.1443), (-2.5630)), (* H2' *) + ( (5.6666), (5.2728), (-2.1536)), (* C3' *) + ( (5.1747), (5.9805), (-1.4863)), (* H3' *) + ( (4.9997), (4.0086), (-2.1973)), (* O3' *) + ( (10.2594), (10.6774), (-1.0056)), (* N1 *) + ( (9.7528), (8.7080), (-2.2631)), (* N3 *) + ( (10.4471), (9.7876), (-1.9791)), (* C2 *) + ( (8.7271), (8.5575), (-1.3991)), (* C4 *) + ( (8.4100), (9.3803), (-0.3580)), (* C5 *) + ( (9.2294), (10.5030), (-0.1574)), (* C6 *) + (G ( + ( (11.5110), (10.1256), (-2.7114)), (* N2 *) + ( (7.2891), (8.9068), (0.3121)), (* N7 *) + ( (7.7962), (7.5519), (-1.3859)), (* N9 *) + ( (6.9702), (7.8292), (-0.3353)), (* C8 *) + ( (9.0349), (11.3951), (0.8250)), (* O6 *) + ( (10.9013), (11.4422), (-0.9512)), (* H1 *) + ( (12.1031), (10.9341), (-2.5861)), (* H21 *) + ( (11.7369), (9.5180), (-3.4859)), (* H22 *) + ( (6.0888), (7.3990), (0.1403))) (* H8 *) + ) + ) + + +let rG06 + = ( + ( (-0.9815), (0.0731), (-0.1772), (* dgf_base_tfo *) + (0.1912), (0.3054), (-0.9328), + (-0.0141), (-0.9494), (-0.3137), + (5.7506), (-5.1944), (4.7470)), + ( (-0.8143), (-0.5091), (-0.2788), (* P_O3'_275_tfo *) + (-0.0433), (-0.4257), (0.9038), + (-0.5788), (0.7480), (0.3246), + (1.5227), (6.9114), (-7.0765)), + ( (0.3822), (-0.7477), (0.5430), (* P_O3'_180_tfo *) + (0.4552), (0.6637), (0.5935), + (-0.8042), (0.0203), (0.5941), + (-6.9472), (-4.1186), (-5.9108)), + ( (0.5640), (0.8007), (-0.2022), (* P_O3'_60_tfo *) + (-0.8247), (0.5587), (-0.0878), + (0.0426), (0.2162), (0.9754), + (6.2694), (-7.0540), (3.3316)), + ( (2.8930), (8.5380), (-3.3280)), (* P *) + ( (1.6980), (7.6960), (-3.5570)), (* O1P *) + ( (3.2260), (9.5010), (-4.4020)), (* O2P *) + ( (4.1590), (7.6040), (-3.0340)), (* O5' *) + ( (4.1214), (6.7116), (-1.9049)), (* C5' *) + ( (3.3465), (5.9610), (-2.0607)), (* H5' *) + ( (4.0789), (7.2928), (-0.9837)), (* H5'' *) + ( (5.4170), (5.9293), (-1.8186)), (* C4' *) + ( (5.4506), (5.3400), (-0.9023)), (* H4' *) + ( (5.5067), (5.0417), (-2.9703)), (* O4' *) + ( (6.8650), (4.9152), (-3.3612)), (* C1' *) + ( (7.1090), (3.8577), (-3.2603)), (* H1' *) + ( (7.7152), (5.7282), (-2.3894)), (* C2' *) + ( (8.5029), (6.2356), (-2.9463)), (* H2'' *) + ( (8.1036), (4.8568), (-1.3419)), (* O2' *) + ( (8.3270), (3.9651), (-1.6184)), (* H2' *) + ( (6.7003), (6.7565), (-1.8911)), (* C3' *) + ( (6.5898), (7.5329), (-2.6482)), (* H3' *) + ( (7.0505), (7.2878), (-0.6105)), (* O3' *) + ( (6.6624), (3.5061), (-8.2986)), (* N1 *) + ( (6.5810), (3.2570), (-5.9221)), (* N3 *) + ( (6.5151), (2.8263), (-7.1625)), (* C2 *) + ( (6.8364), (4.5817), (-5.8882)), (* C4 *) + ( (7.0116), (5.4064), (-6.9609)), (* C5 *) + ( (6.9173), (4.8260), (-8.2361)), (* C6 *) + (G ( + ( (6.2717), (1.5402), (-7.4250)), (* N2 *) + ( (7.2573), (6.7070), (-6.5394)), (* N7 *) + ( (6.9740), (5.3703), (-4.7760)), (* N9 *) + ( (7.2238), (6.6275), (-5.2453)), (* C8 *) + ( (7.0668), (5.5163), (-9.3763)), (* O6 *) + ( (6.5754), (2.9964), (-9.1545)), (* H1 *) + ( (6.1908), (1.1105), (-8.3354)), (* H21 *) + ( (6.1346), (0.9352), (-6.6280)), (* H22 *) + ( (7.4108), (7.6227), (-4.8418))) (* H8 *) + ) + ) + + +let rG07 + = ( + ( (0.0894), (-0.6059), (0.7905), (* dgf_base_tfo *) + (-0.6810), (0.5420), (0.4924), + (-0.7268), (-0.5824), (-0.3642), + (34.1424), (45.9610), (-11.8600)), + ( (-0.8644), (-0.4956), (-0.0851), (* P_O3'_275_tfo *) + (-0.0427), (0.2409), (-0.9696), + (0.5010), (-0.8345), (-0.2294), + (4.0167), (54.5377), (12.4779)), + ( (0.3706), (-0.6167), (0.6945), (* P_O3'_180_tfo *) + (-0.2867), (-0.7872), (-0.5460), + (0.8834), (0.0032), (-0.4686), + (-52.9020), (18.6313), (-0.6709)), + ( (0.4155), (0.9025), (-0.1137), (* P_O3'_60_tfo *) + (0.9040), (-0.4236), (-0.0582), + (-0.1007), (-0.0786), (-0.9918), + (-7.6624), (-25.2080), (49.5181)), + ( (31.3810), (0.1400), (47.5810)), (* P *) + ( (29.9860), (0.6630), (47.6290)), (* O1P *) + ( (31.7210), (-0.6460), (48.8090)), (* O2P *) + ( (32.4940), (1.2540), (47.2740)), (* O5' *) + ( (33.8709), (0.7918), (47.2113)), (* C5' *) + ( (34.1386), (0.5870), (46.1747)), (* H5' *) + ( (34.0186), (-0.0095), (47.9353)), (* H5'' *) + ( (34.7297), (1.9687), (47.6685)), (* C4' *) + ( (35.7723), (1.6845), (47.8113)), (* H4' *) + ( (34.6455), (2.9768), (46.6660)), (* O4' *) + ( (34.1690), (4.1829), (47.2627)), (* C1' *) + ( (35.0437), (4.7633), (47.5560)), (* H1' *) + ( (33.4145), (3.7532), (48.4954)), (* C2' *) + ( (32.4340), (3.3797), (48.2001)), (* H2'' *) + ( (33.3209), (4.6953), (49.5217)), (* O2' *) + ( (33.2374), (5.6059), (49.2295)), (* H2' *) + ( (34.2724), (2.5970), (48.9773)), (* C3' *) + ( (33.6373), (1.8935), (49.5157)), (* H3' *) + ( (35.3453), (3.1884), (49.7285)), (* O3' *) + ( (34.0511), (7.8930), (43.7791)), (* N1 *) + ( (34.9937), (6.3369), (45.3199)), (* N3 *) + ( (35.0882), (7.3126), (44.4200)), (* C2 *) + ( (33.7190), (5.9650), (45.5374)), (* C4 *) + ( (32.5845), (6.4770), (44.9458)), (* C5 *) + ( (32.7430), (7.5179), (43.9914)), (* C6 *) + (G ( + ( (36.3030), (7.7827), (44.1036)), (* N2 *) + ( (31.4499), (5.8335), (45.4368)), (* N7 *) + ( (33.2760), (4.9817), (46.4043)), (* N9 *) + ( (31.9235), (4.9639), (46.2934)), (* C8 *) + ( (31.8602), (8.1000), (43.3695)), (* O6 *) + ( (34.2623), (8.6223), (43.1283)), (* H1 *) + ( (36.5188), (8.5081), (43.4347)), (* H21 *) + ( (37.0888), (7.3524), (44.5699)), (* H22 *) + ( (31.0815), (4.4201), (46.7218))) (* H8 *) + ) + ) + + +let rG08 + = ( + ( (0.2224), (0.6335), (0.7411), (* dgf_base_tfo *) + (-0.3644), (-0.6510), (0.6659), + (0.9043), (-0.4181), (0.0861), + (-47.6824), (-0.5823), (-31.7554)), + ( (-0.8644), (-0.4956), (-0.0851), (* P_O3'_275_tfo *) + (-0.0427), (0.2409), (-0.9696), + (0.5010), (-0.8345), (-0.2294), + (4.0167), (54.5377), (12.4779)), + ( (0.3706), (-0.6167), (0.6945), (* P_O3'_180_tfo *) + (-0.2867), (-0.7872), (-0.5460), + (0.8834), (0.0032), (-0.4686), + (-52.9020), (18.6313), (-0.6709)), + ( (0.4155), (0.9025), (-0.1137), (* P_O3'_60_tfo *) + (0.9040), (-0.4236), (-0.0582), + (-0.1007), (-0.0786), (-0.9918), + (-7.6624), (-25.2080), (49.5181)), + ( (31.3810), (0.1400), (47.5810)), (* P *) + ( (29.9860), (0.6630), (47.6290)), (* O1P *) + ( (31.7210), (-0.6460), (48.8090)), (* O2P *) + ( (32.4940), (1.2540), (47.2740)), (* O5' *) + ( (32.5924), (2.3488), (48.2255)), (* C5' *) + ( (33.3674), (2.1246), (48.9584)), (* H5' *) + ( (31.5994), (2.5917), (48.6037)), (* H5'' *) + ( (33.0722), (3.5577), (47.4258)), (* C4' *) + ( (33.0310), (4.4778), (48.0089)), (* H4' *) + ( (34.4173), (3.3055), (47.0316)), (* O4' *) + ( (34.5056), (3.3910), (45.6094)), (* C1' *) + ( (34.7881), (4.4152), (45.3663)), (* H1' *) + ( (33.1122), (3.1198), (45.1010)), (* C2' *) + ( (32.9230), (2.0469), (45.1369)), (* H2'' *) + ( (32.7946), (3.6590), (43.8529)), (* O2' *) + ( (33.5170), (3.6707), (43.2207)), (* H2' *) + ( (32.2730), (3.8173), (46.1566)), (* C3' *) + ( (31.3094), (3.3123), (46.2244)), (* H3' *) + ( (32.2391), (5.2039), (45.7807)), (* O3' *) + ( (39.3337), (2.7157), (44.1441)), (* N1 *) + ( (37.4430), (3.8242), (45.0824)), (* N3 *) + ( (38.7276), (3.7646), (44.7403)), (* C2 *) + ( (36.7791), (2.6963), (44.7704)), (* C4 *) + ( (37.2860), (1.5653), (44.1678)), (* C5 *) + ( (38.6647), (1.5552), (43.8235)), (* C6 *) + (G ( + ( (39.5123), (4.8216), (44.9936)), (* N2 *) + ( (36.2829), (0.6110), (44.0078)), (* N7 *) + ( (35.4394), (2.4314), (44.9931)), (* N9 *) + ( (35.2180), (1.1815), (44.5128)), (* C8 *) + ( (39.2907), (0.6514), (43.2796)), (* O6 *) + ( (40.3076), (2.8048), (43.9352)), (* H1 *) + ( (40.4994), (4.9066), (44.7977)), (* H21 *) + ( (39.0738), (5.6108), (45.4464)), (* H22 *) + ( (34.3856), (0.4842), (44.4185))) (* H8 *) + ) + ) + + +let rG09 + = ( + ( (-0.9699), (-0.1688), (-0.1753), (* dgf_base_tfo *) + (-0.1050), (-0.3598), (0.9271), + (-0.2196), (0.9176), (0.3312), + (45.6217), (-38.9484), (-12.3208)), + ( (-0.8644), (-0.4956), (-0.0851), (* P_O3'_275_tfo *) + (-0.0427), (0.2409), (-0.9696), + (0.5010), (-0.8345), (-0.2294), + (4.0167), (54.5377), (12.4779)), + ( (0.3706), (-0.6167), (0.6945), (* P_O3'_180_tfo *) + (-0.2867), (-0.7872), (-0.5460), + (0.8834), (0.0032), (-0.4686), + (-52.9020), (18.6313), (-0.6709)), + ( (0.4155), (0.9025), (-0.1137), (* P_O3'_60_tfo *) + (0.9040), (-0.4236), (-0.0582), + (-0.1007), (-0.0786), (-0.9918), + (-7.6624), (-25.2080), (49.5181)), + ( (31.3810), (0.1400), (47.5810)), (* P *) + ( (29.9860), (0.6630), (47.6290)), (* O1P *) + ( (31.7210), (-0.6460), (48.8090)), (* O2P *) + ( (32.4940), (1.2540), (47.2740)), (* O5' *) + ( (33.8709), (0.7918), (47.2113)), (* C5' *) + ( (34.1386), (0.5870), (46.1747)), (* H5' *) + ( (34.0186), (-0.0095), (47.9353)), (* H5'' *) + ( (34.7297), (1.9687), (47.6685)), (* C4' *) + ( (34.5880), (2.8482), (47.0404)), (* H4' *) + ( (34.3575), (2.2770), (49.0081)), (* O4' *) + ( (35.5157), (2.1993), (49.8389)), (* C1' *) + ( (35.9424), (3.2010), (49.8893)), (* H1' *) + ( (36.4701), (1.2820), (49.1169)), (* C2' *) + ( (36.1545), (0.2498), (49.2683)), (* H2'' *) + ( (37.8262), (1.4547), (49.4008)), (* O2' *) + ( (38.0227), (1.6945), (50.3094)), (* H2' *) + ( (36.2242), (1.6797), (47.6725)), (* C3' *) + ( (36.4297), (0.8197), (47.0351)), (* H3' *) + ( (37.0289), (2.8480), (47.4426)), (* O3' *) + ( (34.3005), (3.5042), (54.6070)), (* N1 *) + ( (34.7693), (3.7936), (52.2874)), (* N3 *) + ( (34.4484), (4.2541), (53.4939)), (* C2 *) + ( (34.9354), (2.4584), (52.2785)), (* C4 *) + ( (34.8092), (1.5915), (53.3422)), (* C5 *) + ( (34.4646), (2.1367), (54.6085)), (* C6 *) + (G ( + ( (34.2514), (5.5708), (53.6503)), (* N2 *) + ( (35.0641), (0.2835), (52.9337)), (* N7 *) + ( (35.2669), (1.6690), (51.1915)), (* N9 *) + ( (35.3288), (0.3954), (51.6563)), (* C8 *) + ( (34.3151), (1.5317), (55.6650)), (* O6 *) + ( (34.0623), (3.9797), (55.4539)), (* H1 *) + ( (33.9950), (6.0502), (54.5016)), (* H21 *) + ( (34.3512), (6.1432), (52.8242)), (* H22 *) + ( (35.5414), (-0.6006), (51.2679))) (* H8 *) + ) + ) + + +let rG10 + = ( + ( (-0.0980), (-0.9723), (0.2122), (* dgf_base_tfo *) + (-0.9731), (0.1383), (0.1841), + (-0.2083), (-0.1885), (-0.9597), + (17.8469), (38.8265), (37.0475)), + ( (-0.8644), (-0.4956), (-0.0851), (* P_O3'_275_tfo *) + (-0.0427), (0.2409), (-0.9696), + (0.5010), (-0.8345), (-0.2294), + (4.0167), (54.5377), (12.4779)), + ( (0.3706), (-0.6167), (0.6945), (* P_O3'_180_tfo *) + (-0.2867), (-0.7872), (-0.5460), + (0.8834), (0.0032), (-0.4686), + (-52.9020), (18.6313), (-0.6709)), + ( (0.4155), (0.9025), (-0.1137), (* P_O3'_60_tfo *) + (0.9040), (-0.4236), (-0.0582), + (-0.1007), (-0.0786), (-0.9918), + (-7.6624), (-25.2080), (49.5181)), + ( (31.3810), (0.1400), (47.5810)), (* P *) + ( (29.9860), (0.6630), (47.6290)), (* O1P *) + ( (31.7210), (-0.6460), (48.8090)), (* O2P *) + ( (32.4940), (1.2540), (47.2740)), (* O5' *) + ( (32.5924), (2.3488), (48.2255)), (* C5' *) + ( (33.3674), (2.1246), (48.9584)), (* H5' *) + ( (31.5994), (2.5917), (48.6037)), (* H5'' *) + ( (33.0722), (3.5577), (47.4258)), (* C4' *) + ( (34.0333), (3.3761), (46.9447)), (* H4' *) + ( (32.0890), (3.8338), (46.4332)), (* O4' *) + ( (31.6377), (5.1787), (46.5914)), (* C1' *) + ( (32.2499), (5.8016), (45.9392)), (* H1' *) + ( (31.9167), (5.5319), (48.0305)), (* C2' *) + ( (31.1507), (5.0820), (48.6621)), (* H2'' *) + ( (32.0865), (6.8890), (48.3114)), (* O2' *) + ( (31.5363), (7.4819), (47.7942)), (* H2' *) + ( (33.2398), (4.8224), (48.2563)), (* C3' *) + ( (33.3166), (4.5570), (49.3108)), (* H3' *) + ( (34.2528), (5.7056), (47.7476)), (* O3' *) + ( (28.2782), (6.3049), (42.9364)), (* N1 *) + ( (30.4001), (5.8547), (43.9258)), (* N3 *) + ( (29.6195), (6.1568), (42.8913)), (* C2 *) + ( (29.7005), (5.7006), (45.0649)), (* C4 *) + ( (28.3383), (5.8221), (45.2343)), (* C5 *) + ( (27.5519), (6.1461), (44.0958)), (* C6 *) + (G ( + ( (30.1838), (6.3385), (41.6890)), (* N2 *) + ( (27.9936), (5.5926), (46.5651)), (* N7 *) + ( (30.2046), (5.3825), (46.3136)), (* N9 *) + ( (29.1371), (5.3398), (47.1506)), (* C8 *) + ( (26.3361), (6.3024), (44.0495)), (* O6 *) + ( (27.8122), (6.5394), (42.0833)), (* H1 *) + ( (29.7125), (6.5595), (40.8235)), (* H21 *) + ( (31.1859), (6.2231), (41.6389)), (* H22 *) + ( (28.9406), (5.1504), (48.2059))) (* H8 *) + ) + ) + + +let rGs = [rG01;rG02;rG03;rG04;rG05;rG06;rG07;rG08;rG09;rG10] + + +let rU + = ( + ( (-0.0359), (-0.8071), (0.5894), (* dgf_base_tfo *) + (-0.2669), (0.5761), (0.7726), + (-0.9631), (-0.1296), (-0.2361), + (0.1584), (8.3434), (0.5434)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (5.2430), (-8.2420), (2.8260)), (* C5' *) + ( (5.1974), (-8.8497), (1.9223)), (* H5' *) + ( (5.5548), (-8.7348), (3.7469)), (* H5'' *) + ( (6.3140), (-7.2060), (2.5510)), (* C4' *) + ( (7.2954), (-7.6762), (2.4898)), (* H4' *) + ( (6.0140), (-6.5420), (1.2890)), (* O4' *) + ( (6.4190), (-5.1840), (1.3620)), (* C1' *) + ( (7.1608), (-5.0495), (0.5747)), (* H1' *) + ( (7.0760), (-4.9560), (2.7270)), (* C2' *) + ( (6.7770), (-3.9803), (3.1099)), (* H2'' *) + ( (8.4500), (-5.1930), (2.5810)), (* O2' *) + ( (8.8309), (-4.8755), (1.7590)), (* H2' *) + ( (6.4060), (-6.0590), (3.5580)), (* C3' *) + ( (5.4021), (-5.7313), (3.8281)), (* H3' *) + ( (7.1570), (-6.4240), (4.7070)), (* O3' *) + ( (5.2170), (-4.3260), (1.1690)), (* N1 *) + ( (4.2960), (-2.2560), (0.6290)), (* N3 *) + ( (5.4330), (-3.0200), (0.7990)), (* C2 *) + ( (2.9930), (-2.6780), (0.7940)), (* C4 *) + ( (2.8670), (-4.0630), (1.1830)), (* C5 *) + ( (3.9570), (-4.8300), (1.3550)), (* C6 *) + (U ( + ( (6.5470), (-2.5560), (0.6290)), (* O2 *) + ( (2.0540), (-1.9000), (0.6130)), (* O4 *) + ( (4.4300), (-1.3020), (0.3600)), (* H3 *) + ( (1.9590), (-4.4570), (1.3250)), (* H5 *) + ( (3.8460), (-5.7860), (1.6240))) (* H6 *) + ) + ) + + +let rU01 + = ( + ( (-0.0137), (-0.8012), (0.5983), (* dgf_base_tfo *) + (-0.2523), (0.5817), (0.7733), + (-0.9675), (-0.1404), (-0.2101), + (0.2031), (8.3874), (0.4228)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (5.2416), (-8.2422), (2.8181)), (* C5' *) + ( (5.2050), (-8.8128), (1.8901)), (* H5' *) + ( (5.5368), (-8.7738), (3.7227)), (* H5'' *) + ( (6.3232), (-7.2037), (2.6002)), (* C4' *) + ( (7.3048), (-7.6757), (2.5577)), (* H4' *) + ( (6.0635), (-6.5092), (1.3456)), (* O4' *) + ( (6.4697), (-5.1547), (1.4629)), (* C1' *) + ( (7.2354), (-5.0043), (0.7018)), (* H1' *) + ( (7.0856), (-4.9610), (2.8521)), (* C2' *) + ( (6.7777), (-3.9935), (3.2487)), (* H2'' *) + ( (8.4627), (-5.1992), (2.7423)), (* O2' *) + ( (8.8693), (-4.8638), (1.9399)), (* H2' *) + ( (6.3877), (-6.0809), (3.6362)), (* C3' *) + ( (5.3770), (-5.7562), (3.8834)), (* H3' *) + ( (7.1024), (-6.4754), (4.7985)), (* O3' *) + ( (5.2764), (-4.2883), (1.2538)), (* N1 *) + ( (4.3777), (-2.2062), (0.7229)), (* N3 *) + ( (5.5069), (-2.9779), (0.9088)), (* C2 *) + ( (3.0693), (-2.6246), (0.8500)), (* C4 *) + ( (2.9279), (-4.0146), (1.2149)), (* C5 *) + ( (4.0101), (-4.7892), (1.4017)), (* C6 *) + (U ( + ( (6.6267), (-2.5166), (0.7728)), (* O2 *) + ( (2.1383), (-1.8396), (0.6581)), (* O4 *) + ( (4.5223), (-1.2489), (0.4716)), (* H3 *) + ( (2.0151), (-4.4065), (1.3290)), (* H5 *) + ( (3.8886), (-5.7486), (1.6535))) (* H6 *) + ) + ) + + +let rU02 + = ( + ( (0.5141), (0.0246), (0.8574), (* dgf_base_tfo *) + (-0.5547), (-0.7529), (0.3542), + (0.6542), (-0.6577), (-0.3734), + (-9.1111), (-3.4598), (-3.2939)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (4.3825), (-6.6585), (4.0489)), (* C5' *) + ( (4.6841), (-7.2019), (4.9443)), (* H5' *) + ( (3.6189), (-5.8889), (4.1625)), (* H5'' *) + ( (5.6255), (-5.9175), (3.5998)), (* C4' *) + ( (5.8732), (-5.1228), (4.3034)), (* H4' *) + ( (6.7337), (-6.8605), (3.5222)), (* O4' *) + ( (7.5932), (-6.4923), (2.4548)), (* C1' *) + ( (8.5661), (-6.2983), (2.9064)), (* H1' *) + ( (7.0527), (-5.2012), (1.8322)), (* C2' *) + ( (7.1627), (-5.2525), (0.7490)), (* H2'' *) + ( (7.6666), (-4.1249), (2.4880)), (* O2' *) + ( (8.5944), (-4.2543), (2.6981)), (* H2' *) + ( (5.5661), (-5.3029), (2.2009)), (* C3' *) + ( (5.0841), (-6.0018), (1.5172)), (* H3' *) + ( (4.9062), (-4.0452), (2.2042)), (* O3' *) + ( (7.6298), (-7.6136), (1.4752)), (* N1 *) + ( (8.6945), (-8.7046), (-0.2857)), (* N3 *) + ( (8.6943), (-7.6514), (0.6066)), (* C2 *) + ( (7.7426), (-9.6987), (-0.3801)), (* C4 *) + ( (6.6642), (-9.5742), (0.5722)), (* C5 *) + ( (6.6391), (-8.5592), (1.4526)), (* C6 *) + (U ( + ( (9.5840), (-6.8186), (0.6136)), (* O2 *) + ( (7.8505), (-10.5925), (-1.2223)), (* O4 *) + ( (9.4601), (-8.7514), (-0.9277)), (* H3 *) + ( (5.9281), (-10.2509), (0.5782)), (* H5 *) + ( (5.8831), (-8.4931), (2.1028))) (* H6 *) + ) + ) + + +let rU03 + = ( + ( (-0.4993), (0.0476), (0.8651), (* dgf_base_tfo *) + (0.8078), (-0.3353), (0.4847), + (0.3132), (0.9409), (0.1290), + (6.2989), (-5.2303), (-3.8577)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (3.9938), (-6.7042), (1.9023)), (* C5' *) + ( (3.2332), (-5.9343), (2.0319)), (* H5' *) + ( (3.9666), (-7.2863), (0.9812)), (* H5'' *) + ( (5.3098), (-5.9546), (1.8564)), (* C4' *) + ( (5.3863), (-5.3702), (0.9395)), (* H4' *) + ( (5.3851), (-5.0642), (3.0076)), (* O4' *) + ( (6.7315), (-4.9724), (3.4462)), (* C1' *) + ( (7.0033), (-3.9202), (3.3619)), (* H1' *) + ( (7.5997), (-5.8018), (2.4948)), (* C2' *) + ( (8.3627), (-6.3254), (3.0707)), (* H2'' *) + ( (8.0410), (-4.9501), (1.4724)), (* O2' *) + ( (8.2781), (-4.0644), (1.7570)), (* H2' *) + ( (6.5701), (-6.8129), (1.9714)), (* C3' *) + ( (6.4186), (-7.5809), (2.7299)), (* H3' *) + ( (6.9357), (-7.3841), (0.7235)), (* O3' *) + ( (6.8024), (-5.4718), (4.8475)), (* N1 *) + ( (7.9218), (-5.5700), (6.8877)), (* N3 *) + ( (7.8908), (-5.0886), (5.5944)), (* C2 *) + ( (6.9789), (-6.3827), (7.4823)), (* C4 *) + ( (5.8742), (-6.7319), (6.6202)), (* C5 *) + ( (5.8182), (-6.2769), (5.3570)), (* C6 *) + (U ( + ( (8.7747), (-4.3728), (5.1568)), (* O2 *) + ( (7.1154), (-6.7509), (8.6509)), (* O4 *) + ( (8.7055), (-5.3037), (7.4491)), (* H3 *) + ( (5.1416), (-7.3178), (6.9665)), (* H5 *) + ( (5.0441), (-6.5310), (4.7784))) (* H6 *) + ) + ) + + +let rU04 + = ( + ( (-0.5669), (-0.8012), (0.1918), (* dgf_base_tfo *) + (-0.8129), (0.5817), (0.0273), + (-0.1334), (-0.1404), (-0.9811), + (-0.3279), (8.3874), (0.3355)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (5.2416), (-8.2422), (2.8181)), (* C5' *) + ( (5.2050), (-8.8128), (1.8901)), (* H5' *) + ( (5.5368), (-8.7738), (3.7227)), (* H5'' *) + ( (6.3232), (-7.2037), (2.6002)), (* C4' *) + ( (7.3048), (-7.6757), (2.5577)), (* H4' *) + ( (6.0635), (-6.5092), (1.3456)), (* O4' *) + ( (6.4697), (-5.1547), (1.4629)), (* C1' *) + ( (7.2354), (-5.0043), (0.7018)), (* H1' *) + ( (7.0856), (-4.9610), (2.8521)), (* C2' *) + ( (6.7777), (-3.9935), (3.2487)), (* H2'' *) + ( (8.4627), (-5.1992), (2.7423)), (* O2' *) + ( (8.8693), (-4.8638), (1.9399)), (* H2' *) + ( (6.3877), (-6.0809), (3.6362)), (* C3' *) + ( (5.3770), (-5.7562), (3.8834)), (* H3' *) + ( (7.1024), (-6.4754), (4.7985)), (* O3' *) + ( (5.2764), (-4.2883), (1.2538)), (* N1 *) + ( (3.8961), (-3.0896), (-0.1893)), (* N3 *) + ( (5.0095), (-3.8907), (-0.0346)), (* C2 *) + ( (3.0480), (-2.6632), (0.8116)), (* C4 *) + ( (3.4093), (-3.1310), (2.1292)), (* C5 *) + ( (4.4878), (-3.9124), (2.3088)), (* C6 *) + (U ( + ( (5.7005), (-4.2164), (-0.9842)), (* O2 *) + ( (2.0800), (-1.9458), (0.5503)), (* O4 *) + ( (3.6834), (-2.7882), (-1.1190)), (* H3 *) + ( (2.8508), (-2.8721), (2.9172)), (* H5 *) + ( (4.7188), (-4.2247), (3.2295))) (* H6 *) + ) + ) + + +let rU05 + = ( + ( (-0.6298), (0.0246), (0.7763), (* dgf_base_tfo *) + (-0.5226), (-0.7529), (-0.4001), + (0.5746), (-0.6577), (0.4870), + (-0.0208), (-3.4598), (-9.6882)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (4.3825), (-6.6585), (4.0489)), (* C5' *) + ( (4.6841), (-7.2019), (4.9443)), (* H5' *) + ( (3.6189), (-5.8889), (4.1625)), (* H5'' *) + ( (5.6255), (-5.9175), (3.5998)), (* C4' *) + ( (5.8732), (-5.1228), (4.3034)), (* H4' *) + ( (6.7337), (-6.8605), (3.5222)), (* O4' *) + ( (7.5932), (-6.4923), (2.4548)), (* C1' *) + ( (8.5661), (-6.2983), (2.9064)), (* H1' *) + ( (7.0527), (-5.2012), (1.8322)), (* C2' *) + ( (7.1627), (-5.2525), (0.7490)), (* H2'' *) + ( (7.6666), (-4.1249), (2.4880)), (* O2' *) + ( (8.5944), (-4.2543), (2.6981)), (* H2' *) + ( (5.5661), (-5.3029), (2.2009)), (* C3' *) + ( (5.0841), (-6.0018), (1.5172)), (* H3' *) + ( (4.9062), (-4.0452), (2.2042)), (* O3' *) + ( (7.6298), (-7.6136), (1.4752)), (* N1 *) + ( (8.5977), (-9.5977), (0.7329)), (* N3 *) + ( (8.5951), (-8.5745), (1.6594)), (* C2 *) + ( (7.7372), (-9.7371), (-0.3364)), (* C4 *) + ( (6.7596), (-8.6801), (-0.4476)), (* C5 *) + ( (6.7338), (-7.6721), (0.4408)), (* C6 *) + (U ( + ( (9.3993), (-8.5377), (2.5743)), (* O2 *) + ( (7.8374), (-10.6990), (-1.1008)), (* O4 *) + ( (9.2924), (-10.3081), (0.8477)), (* H3 *) + ( (6.0932), (-8.6982), (-1.1929)), (* H5 *) + ( (6.0481), (-6.9515), (0.3446))) (* H6 *) + ) + ) + + +let rU06 + = ( + ( (-0.9837), (0.0476), (-0.1733), (* dgf_base_tfo *) + (-0.1792), (-0.3353), (0.9249), + (-0.0141), (0.9409), (0.3384), + (5.7793), (-5.2303), (4.5997)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (3.9938), (-6.7042), (1.9023)), (* C5' *) + ( (3.2332), (-5.9343), (2.0319)), (* H5' *) + ( (3.9666), (-7.2863), (0.9812)), (* H5'' *) + ( (5.3098), (-5.9546), (1.8564)), (* C4' *) + ( (5.3863), (-5.3702), (0.9395)), (* H4' *) + ( (5.3851), (-5.0642), (3.0076)), (* O4' *) + ( (6.7315), (-4.9724), (3.4462)), (* C1' *) + ( (7.0033), (-3.9202), (3.3619)), (* H1' *) + ( (7.5997), (-5.8018), (2.4948)), (* C2' *) + ( (8.3627), (-6.3254), (3.0707)), (* H2'' *) + ( (8.0410), (-4.9501), (1.4724)), (* O2' *) + ( (8.2781), (-4.0644), (1.7570)), (* H2' *) + ( (6.5701), (-6.8129), (1.9714)), (* C3' *) + ( (6.4186), (-7.5809), (2.7299)), (* H3' *) + ( (6.9357), (-7.3841), (0.7235)), (* O3' *) + ( (6.8024), (-5.4718), (4.8475)), (* N1 *) + ( (6.6920), (-5.0495), (7.1354)), (* N3 *) + ( (6.6201), (-4.5500), (5.8506)), (* C2 *) + ( (6.9254), (-6.3614), (7.4926)), (* C4 *) + ( (7.1046), (-7.2543), (6.3718)), (* C5 *) + ( (7.0391), (-6.7951), (5.1106)), (* C6 *) + (U ( + ( (6.4083), (-3.3696), (5.6340)), (* O2 *) + ( (6.9679), (-6.6901), (8.6800)), (* O4 *) + ( (6.5626), (-4.3957), (7.8812)), (* H3 *) + ( (7.2781), (-8.2254), (6.5350)), (* H5 *) + ( (7.1657), (-7.4312), (4.3503))) (* H6 *) + ) + ) + + +let rU07 + = ( + ( (-0.9434), (0.3172), (0.0971), (* dgf_base_tfo *) + (0.2294), (0.4125), (0.8816), + (0.2396), (0.8539), (-0.4619), + (8.3625), (-52.7147), (1.3745)), + ( (0.2765), (-0.1121), (-0.9545), (* P_O3'_275_tfo *) + (-0.8297), (0.4733), (-0.2959), + (0.4850), (0.8737), (0.0379), + (-14.7774), (-45.2464), (21.9088)), + ( (0.1063), (-0.6334), (-0.7665), (* P_O3'_180_tfo *) + (-0.5932), (-0.6591), (0.4624), + (-0.7980), (0.4055), (-0.4458), + (43.7634), (4.3296), (28.4890)), + ( (0.7136), (-0.5032), (-0.4873), (* P_O3'_60_tfo *) + (0.6803), (0.3317), (0.6536), + (-0.1673), (-0.7979), (0.5791), + (-17.1858), (41.4390), (-27.0751)), + ( (21.3880), (15.0780), (45.5770)), (* P *) + ( (21.9980), (14.5500), (46.8210)), (* O1P *) + ( (21.1450), (14.0270), (44.5420)), (* O2P *) + ( (22.1250), (16.3600), (44.9460)), (* O5' *) + ( (21.5037), (16.8594), (43.7323)), (* C5' *) + ( (20.8147), (17.6663), (43.9823)), (* H5' *) + ( (21.1086), (16.0230), (43.1557)), (* H5'' *) + ( (22.5654), (17.4874), (42.8616)), (* C4' *) + ( (22.1584), (17.7243), (41.8785)), (* H4' *) + ( (23.0557), (18.6826), (43.4751)), (* O4' *) + ( (24.4788), (18.6151), (43.6455)), (* C1' *) + ( (24.9355), (19.0840), (42.7739)), (* H1' *) + ( (24.7958), (17.1427), (43.6474)), (* C2' *) + ( (24.5652), (16.7400), (44.6336)), (* H2'' *) + ( (26.1041), (16.8773), (43.2455)), (* O2' *) + ( (26.7516), (17.5328), (43.5149)), (* H2' *) + ( (23.8109), (16.5979), (42.6377)), (* C3' *) + ( (23.5756), (15.5686), (42.9084)), (* H3' *) + ( (24.2890), (16.7447), (41.2729)), (* O3' *) + ( (24.9420), (19.2174), (44.8923)), (* N1 *) + ( (25.2655), (20.5636), (44.8883)), (* N3 *) + ( (25.1663), (21.2219), (43.8561)), (* C2 *) + ( (25.6911), (21.1219), (46.0494)), (* C4 *) + ( (25.8051), (20.4068), (47.2048)), (* C5 *) + ( (26.2093), (20.9962), (48.2534)), (* C6 *) + (U ( + ( (25.4692), (19.0221), (47.2053)), (* O2 *) + ( (25.0502), (18.4827), (46.0370)), (* O4 *) + ( (25.9599), (22.1772), (46.0966)), (* H3 *) + ( (25.5545), (18.4409), (48.1234)), (* H5 *) + ( (24.7854), (17.4265), (45.9883))) (* H6 *) + ) + ) + + +let rU08 + = ( + ( (-0.0080), (-0.7928), (0.6094), (* dgf_base_tfo *) + (-0.7512), (0.4071), (0.5197), + (-0.6601), (-0.4536), (-0.5988), + (44.1482), (30.7036), (2.1088)), + ( (0.2765), (-0.1121), (-0.9545), (* P_O3'_275_tfo *) + (-0.8297), (0.4733), (-0.2959), + (0.4850), (0.8737), (0.0379), + (-14.7774), (-45.2464), (21.9088)), + ( (0.1063), (-0.6334), (-0.7665), (* P_O3'_180_tfo *) + (-0.5932), (-0.6591), (0.4624), + (-0.7980), (0.4055), (-0.4458), + (43.7634), (4.3296), (28.4890)), + ( (0.7136), (-0.5032), (-0.4873), (* P_O3'_60_tfo *) + (0.6803), (0.3317), (0.6536), + (-0.1673), (-0.7979), (0.5791), + (-17.1858), (41.4390), (-27.0751)), + ( (21.3880), (15.0780), (45.5770)), (* P *) + ( (21.9980), (14.5500), (46.8210)), (* O1P *) + ( (21.1450), (14.0270), (44.5420)), (* O2P *) + ( (22.1250), (16.3600), (44.9460)), (* O5' *) + ( (23.5096), (16.1227), (44.5783)), (* C5' *) + ( (23.5649), (15.8588), (43.5222)), (* H5' *) + ( (23.9621), (15.4341), (45.2919)), (* H5'' *) + ( (24.2805), (17.4138), (44.7151)), (* C4' *) + ( (25.3492), (17.2309), (44.6030)), (* H4' *) + ( (23.8497), (18.3471), (43.7208)), (* O4' *) + ( (23.4090), (19.5681), (44.3321)), (* C1' *) + ( (24.2595), (20.2496), (44.3524)), (* H1' *) + ( (23.0418), (19.1813), (45.7407)), (* C2' *) + ( (22.0532), (18.7224), (45.7273)), (* H2'' *) + ( (23.1307), (20.2521), (46.6291)), (* O2' *) + ( (22.8888), (21.1051), (46.2611)), (* H2' *) + ( (24.0799), (18.1326), (46.0700)), (* C3' *) + ( (23.6490), (17.4370), (46.7900)), (* H3' *) + ( (25.3329), (18.7227), (46.5109)), (* O3' *) + ( (22.2515), (20.1624), (43.6698)), (* N1 *) + ( (22.4760), (21.0609), (42.6406)), (* N3 *) + ( (23.6229), (21.3462), (42.3061)), (* C2 *) + ( (21.3986), (21.6081), (42.0236)), (* C4 *) + ( (20.1189), (21.3012), (42.3804)), (* C5 *) + ( (19.1599), (21.8516), (41.7578)), (* C6 *) + (U ( + ( (19.8919), (20.3745), (43.4387)), (* O2 *) + ( (20.9790), (19.8423), (44.0440)), (* O4 *) + ( (21.5235), (22.3222), (41.2097)), (* H3 *) + ( (18.8732), (20.1200), (43.7312)), (* H5 *) + ( (20.8545), (19.1313), (44.8608))) (* H6 *) + ) + ) + + +let rU09 + = ( + ( (-0.0317), (0.1374), (0.9900), (* dgf_base_tfo *) + (-0.3422), (-0.9321), (0.1184), + (0.9391), (-0.3351), (0.0765), + (-32.1929), (25.8198), (-28.5088)), + ( (0.2765), (-0.1121), (-0.9545), (* P_O3'_275_tfo *) + (-0.8297), (0.4733), (-0.2959), + (0.4850), (0.8737), (0.0379), + (-14.7774), (-45.2464), (21.9088)), + ( (0.1063), (-0.6334), (-0.7665), (* P_O3'_180_tfo *) + (-0.5932), (-0.6591), (0.4624), + (-0.7980), (0.4055), (-0.4458), + (43.7634), (4.3296), (28.4890)), + ( (0.7136), (-0.5032), (-0.4873), (* P_O3'_60_tfo *) + (0.6803), (0.3317), (0.6536), + (-0.1673), (-0.7979), (0.5791), + (-17.1858), (41.4390), (-27.0751)), + ( (21.3880), (15.0780), (45.5770)), (* P *) + ( (21.9980), (14.5500), (46.8210)), (* O1P *) + ( (21.1450), (14.0270), (44.5420)), (* O2P *) + ( (22.1250), (16.3600), (44.9460)), (* O5' *) + ( (21.5037), (16.8594), (43.7323)), (* C5' *) + ( (20.8147), (17.6663), (43.9823)), (* H5' *) + ( (21.1086), (16.0230), (43.1557)), (* H5'' *) + ( (22.5654), (17.4874), (42.8616)), (* C4' *) + ( (23.0565), (18.3036), (43.3915)), (* H4' *) + ( (23.5375), (16.5054), (42.4925)), (* O4' *) + ( (23.6574), (16.4257), (41.0649)), (* C1' *) + ( (24.4701), (17.0882), (40.7671)), (* H1' *) + ( (22.3525), (16.9643), (40.5396)), (* C2' *) + ( (21.5993), (16.1799), (40.6133)), (* H2'' *) + ( (22.4693), (17.4849), (39.2515)), (* O2' *) + ( (23.0899), (17.0235), (38.6827)), (* H2' *) + ( (22.0341), (18.0633), (41.5279)), (* C3' *) + ( (20.9509), (18.1709), (41.5846)), (* H3' *) + ( (22.7249), (19.3020), (41.2100)), (* O3' *) + ( (23.8580), (15.0648), (40.5757)), (* N1 *) + ( (25.1556), (14.5982), (40.4523)), (* N3 *) + ( (26.1047), (15.3210), (40.7448)), (* C2 *) + ( (25.3391), (13.3315), (40.0020)), (* C4 *) + ( (24.2974), (12.5148), (39.6749)), (* C5 *) + ( (24.5450), (11.3410), (39.2610)), (* C6 *) + (U ( + ( (22.9633), (12.9979), (39.8053)), (* O2 *) + ( (22.8009), (14.2648), (40.2524)), (* O4 *) + ( (26.3414), (12.9194), (39.8855)), (* H3 *) + ( (22.1227), (12.3533), (39.5486)), (* H5 *) + ( (21.7989), (14.6788), (40.3650))) (* H6 *) + ) + ) + + +let rU10 + = ( + ( (-0.9674), (0.1021), (-0.2318), (* dgf_base_tfo *) + (-0.2514), (-0.2766), (0.9275), + (0.0306), (0.9555), (0.2933), + (27.8571), (-42.1305), (-24.4563)), + ( (0.2765), (-0.1121), (-0.9545), (* P_O3'_275_tfo *) + (-0.8297), (0.4733), (-0.2959), + (0.4850), (0.8737), (0.0379), + (-14.7774), (-45.2464), (21.9088)), + ( (0.1063), (-0.6334), (-0.7665), (* P_O3'_180_tfo *) + (-0.5932), (-0.6591), (0.4624), + (-0.7980), (0.4055), (-0.4458), + (43.7634), (4.3296), (28.4890)), + ( (0.7136), (-0.5032), (-0.4873), (* P_O3'_60_tfo *) + (0.6803), (0.3317), (0.6536), + (-0.1673), (-0.7979), (0.5791), + (-17.1858), (41.4390), (-27.0751)), + ( (21.3880), (15.0780), (45.5770)), (* P *) + ( (21.9980), (14.5500), (46.8210)), (* O1P *) + ( (21.1450), (14.0270), (44.5420)), (* O2P *) + ( (22.1250), (16.3600), (44.9460)), (* O5' *) + ( (23.5096), (16.1227), (44.5783)), (* C5' *) + ( (23.5649), (15.8588), (43.5222)), (* H5' *) + ( (23.9621), (15.4341), (45.2919)), (* H5'' *) + ( (24.2805), (17.4138), (44.7151)), (* C4' *) + ( (23.8509), (18.1819), (44.0720)), (* H4' *) + ( (24.2506), (17.8583), (46.0741)), (* O4' *) + ( (25.5830), (18.0320), (46.5775)), (* C1' *) + ( (25.8569), (19.0761), (46.4256)), (* H1' *) + ( (26.4410), (17.1555), (45.7033)), (* C2' *) + ( (26.3459), (16.1253), (46.0462)), (* H2'' *) + ( (27.7649), (17.5888), (45.6478)), (* O2' *) + ( (28.1004), (17.9719), (46.4616)), (* H2' *) + ( (25.7796), (17.2997), (44.3513)), (* C3' *) + ( (25.9478), (16.3824), (43.7871)), (* H3' *) + ( (26.2154), (18.4984), (43.6541)), (* O3' *) + ( (25.7321), (17.6281), (47.9726)), (* N1 *) + ( (25.5136), (18.5779), (48.9560)), (* N3 *) + ( (25.2079), (19.7276), (48.6503)), (* C2 *) + ( (25.6482), (18.1987), (50.2518)), (* C4 *) + ( (25.9847), (16.9266), (50.6092)), (* C5 *) + ( (26.0918), (16.6439), (51.8416)), (* C6 *) + (U ( + ( (26.2067), (15.9515), (49.5943)), (* O2 *) + ( (26.0713), (16.3497), (48.3080)), (* O4 *) + ( (25.4890), (18.9105), (51.0618)), (* H3 *) + ( (26.4742), (14.9310), (49.8682)), (* H5 *) + ( (26.2346), (15.6394), (47.4975))) (* H6 *) + ) + ) + + +let rUs = [rU01;rU02;rU03;rU04;rU05;rU06;rU07;rU08;rU09;rU10] + + +let rG' + = ( + ( (-0.2067), (-0.0264), (0.9780), (* dgf_base_tfo *) + (0.9770), (-0.0586), (0.2049), + (0.0519), (0.9979), (0.0379), + (1.0331), (-46.8078), (-36.4742)), + ( (-0.8644), (-0.4956), (-0.0851), (* P_O3'_275_tfo *) + (-0.0427), (0.2409), (-0.9696), + (0.5010), (-0.8345), (-0.2294), + (4.0167), (54.5377), (12.4779)), + ( (0.3706), (-0.6167), (0.6945), (* P_O3'_180_tfo *) + (-0.2867), (-0.7872), (-0.5460), + (0.8834), (0.0032), (-0.4686), + (-52.9020), (18.6313), (-0.6709)), + ( (0.4155), (0.9025), (-0.1137), (* P_O3'_60_tfo *) + (0.9040), (-0.4236), (-0.0582), + (-0.1007), (-0.0786), (-0.9918), + (-7.6624), (-25.2080), (49.5181)), + ( (31.3810), (0.1400), (47.5810)), (* P *) + ( (29.9860), (0.6630), (47.6290)), (* O1P *) + ( (31.7210), (-0.6460), (48.8090)), (* O2P *) + ( (32.4940), (1.2540), (47.2740)), (* O5' *) + ( (32.1610), (2.2370), (46.2560)), (* C5' *) + ( (31.2986), (2.8190), (46.5812)), (* H5' *) + ( (32.0980), (1.7468), (45.2845)), (* H5'' *) + ( (33.3476), (3.1959), (46.1947)), (* C4' *) + ( (33.2668), (3.8958), (45.3630)), (* H4' *) + ( (33.3799), (3.9183), (47.4216)), (* O4' *) + ( (34.6515), (3.7222), (48.0398)), (* C1' *) + ( (35.2947), (4.5412), (47.7180)), (* H1' *) + ( (35.1756), (2.4228), (47.4827)), (* C2' *) + ( (34.6778), (1.5937), (47.9856)), (* H2'' *) + ( (36.5631), (2.2672), (47.4798)), (* O2' *) + ( (37.0163), (2.6579), (48.2305)), (* H2' *) + ( (34.6953), (2.5043), (46.0448)), (* C3' *) + ( (34.5444), (1.4917), (45.6706)), (* H3' *) + ( (35.6679), (3.3009), (45.3487)), (* O3' *) + ( (37.4804), (4.0914), (52.2559)), (* N1 *) + ( (36.9670), (4.1312), (49.9281)), (* N3 *) + ( (37.8045), (4.2519), (50.9550)), (* C2 *) + ( (35.7171), (3.8264), (50.3222)), (* C4 *) + ( (35.2668), (3.6420), (51.6115)), (* C5 *) + ( (36.2037), (3.7829), (52.6706)), (* C6 *) + (G ( + ( (39.0869), (4.5552), (50.7092)), (* N2 *) + ( (33.9075), (3.3338), (51.6102)), (* N7 *) + ( (34.6126), (3.6358), (49.5108)), (* N9 *) + ( (33.5805), (3.3442), (50.3425)), (* C8 *) + ( (35.9958), (3.6512), (53.8724)), (* O6 *) + ( (38.2106), (4.2053), (52.9295)), (* H1 *) + ( (39.8218), (4.6863), (51.3896)), (* H21 *) + ( (39.3420), (4.6857), (49.7407)), (* H22 *) + ( (32.5194), (3.1070), (50.2664))) (* H8 *) + ) + ) + + +let rU' + = ( + ( (-0.0109), (0.5907), (0.8068), (* dgf_base_tfo *) + (0.2217), (-0.7853), (0.5780), + (0.9751), (0.1852), (-0.1224), + (-1.4225), (-11.0956), (-2.5217)), + ( (-0.8313), (-0.4738), (-0.2906), (* P_O3'_275_tfo *) + (0.0649), (0.4366), (-0.8973), + (0.5521), (-0.7648), (-0.3322), + (1.6833), (6.8060), (-7.0011)), + ( (0.3445), (-0.7630), (0.5470), (* P_O3'_180_tfo *) + (-0.4628), (-0.6450), (-0.6082), + (0.8168), (-0.0436), (-0.5753), + (-6.8179), (-3.9778), (-5.9887)), + ( (0.5855), (0.7931), (-0.1682), (* P_O3'_60_tfo *) + (0.8103), (-0.5790), (0.0906), + (-0.0255), (-0.1894), (-0.9816), + (6.1203), (-7.1051), (3.1984)), + ( (2.6760), (-8.4960), (3.2880)), (* P *) + ( (1.4950), (-7.6230), (3.4770)), (* O1P *) + ( (2.9490), (-9.4640), (4.3740)), (* O2P *) + ( (3.9730), (-7.5950), (3.0340)), (* O5' *) + ( (5.2430), (-8.2420), (2.8260)), (* C5' *) + ( (5.1974), (-8.8497), (1.9223)), (* H5' *) + ( (5.5548), (-8.7348), (3.7469)), (* H5'' *) + ( (6.3140), (-7.2060), (2.5510)), (* C4' *) + ( (5.8744), (-6.2116), (2.4731)), (* H4' *) + ( (7.2798), (-7.2260), (3.6420)), (* O4' *) + ( (8.5733), (-6.9410), (3.1329)), (* C1' *) + ( (8.9047), (-6.0374), (3.6446)), (* H1' *) + ( (8.4429), (-6.6596), (1.6327)), (* C2' *) + ( (9.2880), (-7.1071), (1.1096)), (* H2'' *) + ( (8.2502), (-5.2799), (1.4754)), (* O2' *) + ( (8.7676), (-4.7284), (2.0667)), (* H2' *) + ( (7.1642), (-7.4416), (1.3021)), (* C3' *) + ( (7.4125), (-8.5002), (1.2260)), (* H3' *) + ( (6.5160), (-6.9772), (0.1267)), (* O3' *) + ( (9.4531), (-8.1107), (3.4087)), (* N1 *) + ( (11.5931), (-9.0015), (3.6357)), (* N3 *) + ( (10.8101), (-7.8950), (3.3748)), (* C2 *) + ( (11.1439), (-10.2744), (3.9206)), (* C4 *) + ( (9.7056), (-10.4026), (3.9332)), (* C5 *) + ( (8.9192), (-9.3419), (3.6833)), (* C6 *) + (U ( + ( (11.3013), (-6.8063), (3.1326)), (* O2 *) + ( (11.9431), (-11.1876), (4.1375)), (* O4 *) + ( (12.5840), (-8.8673), (3.6158)), (* H3 *) + ( (9.2891), (-11.2898), (4.1313)), (* H5 *) + ( (7.9263), (-9.4537), (3.6977))) (* H6 *) + ) + ) + + +(* -- PARTIAL INSTANTIATIONS ------------------------------------------------*) + +type var = intg*tfo*nuc + +let mk_var i t n = (i,t,n : var) + +let absolute_pos (i,t,n: var) p = tfo_apply t p + +let atom_pos atom (i,t,n: var) = absolute_pos (i,t,n) (atom n) + +let rec get_var id ((i,t,n : var)::lst) = + if id = i then (i,t,n) else get_var id lst + +(* -- SEARCH ----------------------------------------------------------------*) + +(* Sequential backtracking algorithm *) + +let rec search partial_inst l constraint = + match l with + [] -> [partial_inst] + | (h::t) -> + let rec try_assignments = function + [] -> [] + | v::vs -> + if constraint v partial_inst then + (search (v::partial_inst) t constraint) @ (try_assignments vs) + else + try_assignments vs + in + try_assignments (h partial_inst) + + + +(* -- DOMAINS ---------------------------------------------------------------*) + +(* Primary structure: strand A CUGCCACGUCUG, strand B CAGACGUGGCAG + + Secondary structure: strand A CUGCCACGUCUG + |||||||||||| + GACGGUGCAGAC strand B + + Tertiary structure: + + 5' end of strand A C1----G12 3' end of strand B + U2-------A11 + G3-------C10 + C4-----G9 + C5---G8 + A6 + G6-C7 + C5----G8 + A4-------U9 + G3--------C10 + A2-------U11 + 5' end of strand B C1----G12 3' end of strand A + + "helix", "stacked" and "connected" describe the spatial relationship + between two consecutive nucleotides. E.g. the nucleotides C1 and U2 + from the strand A. + + "wc" (stands for Watson-Crick and is a type of base-pairing), + and "wc-dumas" describe the spatial relationship between + nucleotides from two chains that are growing in opposite directions. + E.g. the nucleotides C1 from strand A and G12 from strand B. +*) + +(* Dynamic Domains *) + +(* Given, + "refnuc" a nucleotide which is already positioned, + "nucl" the nucleotide to be placed, + and "tfo" a transformation matrix which expresses the desired + relationship between "refnuc" and "nucl", + the function "dgf-base" computes the transformation matrix that + places the nucleotide "nucl" in the given relationship to "refnuc". +*) + +let +dgf_base tfo v nucl + = let (i,t,n) = v in + let x = if is_A n then + tfo_align (atom_pos nuc_C1' v) + (atom_pos rA_N9 v) + (atom_pos nuc_C4 v) + else if is_C n then + tfo_align (atom_pos nuc_C1' v) + (atom_pos nuc_N1 v) + (atom_pos nuc_C2 v) + else if is_G n then + tfo_align (atom_pos nuc_C1' v) + (atom_pos rG_N9 v) + (atom_pos nuc_C4 v) + else + tfo_align (atom_pos nuc_C1' v) + (atom_pos nuc_N1 v) + (atom_pos nuc_C2 v) + in + tfo_combine (nuc_dgf_base_tfo nucl) + (tfo_combine tfo (tfo_inv_ortho x)) + + +(* Placement of first nucleotide. *) + +let +reference n i partial_inst = [ mk_var i tfo_id n ] + + +(* The transformation matrix for wc is from: + + Chandrasekaran R. et al (1989) A Re-Examination of the Crystal + Structure of A-DNA Using Fiber Diffraction Data. J. Biomol. + Struct. & Dynamics 6(6):1189-1202. +*) + +let wc_tfo + = ( + (-1.0000), (0.0028), (-0.0019), + (0.0028), (0.3468), (-0.9379), + (-0.0019), (-0.9379), (-0.3468), + (-0.0080), (6.0730), (8.7208) + ) + + +let +wc nucl i j partial_inst + = [ mk_var i (dgf_base wc_tfo (get_var j partial_inst) nucl) nucl ] + + +let wc_dumas_tfo + = ( + (-0.9737), (-0.1834), (0.1352), + (-0.1779), (0.2417), (-0.9539), + (0.1422), (-0.9529), (-0.2679), + (0.4837), (6.2649), (8.0285) + ) + + +let +wc_dumas nucl i j partial_inst + = [ mk_var i (dgf_base wc_dumas_tfo (get_var j partial_inst) nucl) nucl ] + + +let helix5'_tfo + = ( + (0.9886), (-0.0961), (0.1156), + (0.1424), (0.8452), (-0.5152), + (-0.0482), (0.5258), (0.8492), + (-3.8737), (0.5480), (3.8024) + ) + + +let +helix5' nucl i j partial_inst + = [ mk_var i (dgf_base helix5'_tfo (get_var j partial_inst) nucl) nucl ] + + +let helix3'_tfo + = ( + (0.9886), (0.1424), (-0.0482), + (-0.0961), (0.8452), (0.5258), + (0.1156), (-0.5152), (0.8492), + (3.4426), (2.0474), (-3.7042) + ) + + +let +helix3' nucl i j partial_inst + = [ mk_var i (dgf_base helix3'_tfo (get_var j partial_inst) nucl) nucl ] + + +let g37_a38_tfo + = ( + (0.9991), (0.0164), (-0.0387), + (-0.0375), (0.7616), (-0.6470), + (0.0189), (0.6478), (0.7615), + (-3.3018), (0.9975), (2.5585) + ) + + +let +g37_a38 nucl i j partial_inst + = mk_var i (dgf_base g37_a38_tfo (get_var j partial_inst) nucl) nucl + + +let +stacked5' nucl i j partial_inst + = (g37_a38 nucl i j partial_inst) :: (helix5' nucl i j partial_inst) + + +let a38_g37_tfo + = ( + (0.9991), (-0.0375), (0.0189), + (0.0164), (0.7616), (0.6478), + (-0.0387), (-0.6470), (0.7615), + (3.3819), (0.7718), (-2.5321) + ) + + +let +a38_g37 nucl i j partial_inst + = mk_var i (dgf_base a38_g37_tfo (get_var j partial_inst) nucl) nucl + + +let +stacked3' nucl i j partial_inst + = (a38_g37 nucl i j partial_inst) :: (helix3' nucl i j partial_inst) + + +let +p_o3' nucls i j partial_inst + = let refnuc = get_var j partial_inst in + let align = tfo_inv_ortho + (tfo_align (atom_pos nuc_O3' refnuc) + (atom_pos nuc_C3' refnuc) + (atom_pos nuc_C4' refnuc)) in + let rec generate domains = function + [] -> domains + | n::ns -> + generate + ((mk_var i (tfo_combine (nuc_p_o3'_60_tfo n) align) n):: + (mk_var i (tfo_combine (nuc_p_o3'_180_tfo n) align) n):: + (mk_var i (tfo_combine (nuc_p_o3'_275_tfo n) align) n)::domains) + ns + in + generate [] nucls + + +(* -- PROBLEM STATEMENT -----------------------------------------------------*) + +(* Define anticodon problem -- Science 253:1255 Figure 3a, 3b and 3c *) + +let +anticodon_domains + = [ + reference rC 27; + helix5' rC 28 27; + helix5' rA 29 28; + helix5' rG 30 29; + helix5' rA 31 30; + wc rU 39 31; + helix5' rC 40 39; + helix5' rU 41 40; + helix5' rG 42 41; + helix5' rG 43 42; + stacked3' rA 38 39; + stacked3' rG 37 38; + stacked3' rA 36 37; + stacked3' rA 35 36; + stacked3' rG 34 35; (* <-. Distance *) + p_o3' rCs 32 31; (* | Constraint *) + p_o3' rUs 33 32 (* <-' 3.0 Angstroms *) + ] + + +(* Anticodon constraint *) + +let +anticodon_constraint (i,t,n as v) partial_inst = + let rec dist j = let p = atom_pos nuc_P (get_var j partial_inst) in + let o3' = atom_pos nuc_O3' v in + pt_dist p o3' + in + if i = 33 then + (dist 34) <= 3.0 + else + true + + +let +anticodon () = search [] anticodon_domains anticodon_constraint + + +(* Define pseudoknot problem -- Science 253:1255 Figure 4a and 4b *) + +let +pseudoknot_domains + = [ + reference rA 23; + wc_dumas rU 8 23; + helix3' rG 22 23; + wc_dumas rC 9 22; + helix3' rG 21 22; + wc_dumas rC 10 21; + helix3' rC 20 21; + wc_dumas rG 11 20; + helix3' rU' 19 20; (* <-. *) + wc_dumas rA 12 19; (* | Distance *) +(* | Constraint *) +(* Helix 1 | 4.0 Angstroms *) + helix3' rC 3 19; (* | *) + wc_dumas rG 13 3; (* | *) + helix3' rC 2 3; (* | *) + wc_dumas rG 14 2; (* | *) + helix3' rC 1 2; (* | *) + wc_dumas rG' 15 1; (* | *) +(* | *) +(* L2 LOOP | *) + p_o3' rUs 16 15; (* | *) + p_o3' rCs 17 16; (* | *) + p_o3' rAs 18 17; (* <-' *) +(* *) +(* L1 LOOP *) + helix3' rU 7 8; (* <-. *) + p_o3' rCs 4 3; (* | Constraint *) + stacked5' rU 5 4; (* | 4.5 Angstroms *) + stacked5' rC 6 5 (* <-' *) + ] + + +(* Pseudoknot constraint *) + +let +pseudoknot_constraint (i,t,n as v) partial_inst = + let rec dist j = + let p = atom_pos nuc_P (get_var j partial_inst) in + let o3' = atom_pos nuc_O3' v in + pt_dist p o3' + in + if i = 18 then + (dist 19) <= 4.0 + else if i = 6 then + (dist 7) <= 4.5 + else + true + + +let +pseudoknot () = search [] pseudoknot_domains pseudoknot_constraint + + +(* -- TESTING ---------------------------------------------------------------*) + +let list_of_atoms = function + ((dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6, + A (n6,n7,n9,c8,h2,h61,h62,h8))) + -> [|p;o1p;o2p;o5';c5';h5';h5'';c4';h4';o4';c1';h1';c2';h2'';o2';h2';c3'; + h3';o3';n1;n3;c2;c4;c5;c6;n6;n7;n9;c8;h2;h61;h62;h8|] + +| ((dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6, + C (n4,o2,h41,h42,h5,h6))) + -> [|p;o1p;o2p;o5';c5';h5';h5'';c4';h4';o4';c1';h1';c2';h2'';o2';h2';c3'; + h3';o3';n1;n3;c2;c4;c5;c6;n4;o2;h41;h42;h5;h6|] + +| ((dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6, + G (n2,n7,n9,c8,o6,h1,h21,h22,h8))) + -> [|p;o1p;o2p;o5';c5';h5';h5'';c4';h4';o4';c1';h1';c2';h2'';o2';h2';c3'; + h3';o3';n1;n3;c2;c4;c5;c6;n2;n7;n9;c8;o6;h1;h21;h22;h8|] + +| ((dgf_base_tfo,p_o3'_275_tfo,p_o3'_180_tfo,p_o3'_60_tfo, + p,o1p,o2p,o5',c5',h5',h5'',c4',h4',o4',c1',h1',c2',h2'',o2',h2', + c3',h3',o3',n1,n3,c2,c4,c5,c6, + U (o2,o4,h3,h5,h6))) + -> [|p;o1p;o2p;o5';c5';h5';h5'';c4';h4';o4';c1';h1';c2';h2'';o2';h2';c3'; + h3';o3';n1;n3;c2;c4;c5;c6;o2;o4;h3;h5;h6|] + + +let maximum (x::xs) = + let rec loop (m:float) = function + [] -> m + | (a::b) -> loop (if a > m then a else m) b + in + loop x xs + + +let +var_most_distant_atom ((i,t,n) as v) = + let atoms = list_of_atoms n in + let max_dist = ref 0.0 in + for i = 0 to pred (Array.length atoms) do + let p = atoms.(i) in + let distance = let (x,y,z) = absolute_pos v p + in sqrt ((x * x) + (y * y) + (z * z)) in + if distance > !max_dist then max_dist := distance + done; + !max_dist + + +let +sol_most_distant_atom s = maximum (List.map var_most_distant_atom s) + + +let +most_distant_atom sols = maximum (List.map sol_most_distant_atom sols) + + +let +check () = List.length (pseudoknot ()) + + +let +run () = most_distant_atom (pseudoknot ()) + + +let main () = (Printf.printf "%.5f" (run ()); print_newline()) + +let _ = main (); exit 0 diff --git a/test/quicksort.ml b/test/quicksort.ml new file mode 100644 index 0000000000..07d6d08500 --- /dev/null +++ b/test/quicksort.ml @@ -0,0 +1,78 @@ +(* Good test for loops. Best compiled with unsafe libraries. *) + +let rec qsort lo hi (a : int array) = + if lo < hi then begin + let i = ref lo in + let j = ref hi in + let pivot = a.(hi) in + while !i < !j do + while !i < hi & a.(!i) <= pivot do incr i done; + while !j > lo & a.(!j) >= pivot do decr j done; + if !i < !j then begin + let temp = a.(!i) in a.(!i) <- a.(!j); a.(!j) <- temp + end + done; + let temp = a.(!i) in a.(!i) <- a.(hi); a.(hi) <- temp; + qsort lo (!i-1) a; + qsort (!i+1) hi a + end + + +(* Same but abstract over the comparison to force spilling *) + +let cmp i j = i - j + +let rec qsort2 lo hi (a : int array) = + if lo < hi then begin + let i = ref lo in + let j = ref hi in + let pivot = a.(hi) in + while !i < !j do + while !i < hi & cmp a.(!i) pivot <= 0 do incr i done; + while !j > lo & cmp a.(!j) pivot >= 0 do decr j done; + if !i < !j then begin + let temp = a.(!i) in a.(!i) <- a.(!j); a.(!j) <- temp + end + done; + let temp = a.(!i) in a.(!i) <- a.(hi); a.(hi) <- temp; + qsort2 lo (!i-1) a; + qsort2 (!i+1) hi a + end + + +(* Test *) + +let seed = ref 0 + +let random() = + seed := !seed * 25173 + 17431; !seed land 0xFFF + + +exception Failed + +let test_sort sort_fun size = + let a = Array.new size 0 in + let check = Array.new 4096 0 in + for i = 0 to size-1 do + let n = random() in a.(i) <- n; check.(n) <- check.(n)+1 + done; + sort_fun 0 (size-1) a; + try + check.(a.(0)) <- check.(a.(0)) - 1; + for i = 1 to size-1 do + if a.(i-1) > a.(i) then raise Failed; + check.(a.(i)) <- check.(a.(i)) - 1 + done; + for i = 0 to 4095 do + if check.(i) <> 0 then raise Failed + done; + print_string "OK"; print_newline() + with Failed -> + print_string "failed"; print_newline() + + +let main () = + test_sort qsort 50000; + test_sort qsort2 50000 + +let _ = main(); exit 0 diff --git a/test/sets.ml b/test/sets.ml new file mode 100644 index 0000000000..1364181b95 --- /dev/null +++ b/test/sets.ml @@ -0,0 +1,21 @@ +module IntSet = Set.Make(struct type t = int let compare x y = x-y end) + +let even = List.fold_right IntSet.add [0; -2; 2; 4; 6; -10] IntSet.empty + +let odd = List.fold_right IntSet.add [9; -7; 5; 1; -3] IntSet.empty + +let _ = + for i = -10 to 10 do + Printf.printf "%d %b %b\n" i (IntSet.mem i even) (IntSet.mem i odd) + done + +module IntSetSet = Set.Make(IntSet) + +let setofset = List.fold_right IntSetSet.add [even; odd] IntSetSet.empty + +let _ = + List.iter + (fun s -> Printf.printf "%b\n" (IntSetSet.mem s setofset)) + [IntSet.empty; even; odd; IntSet.union even odd] + +let _ = exit 0 diff --git a/test/sieve.ml b/test/sieve.ml new file mode 100644 index 0000000000..0cc8fbbedf --- /dev/null +++ b/test/sieve.ml @@ -0,0 +1,42 @@ +(* Eratosthene's sieve *) + +(* interval min max = [min; min+1; ...; max-1; max] *) + +let rec interval min max = + if min > max then [] else min :: interval (min + 1) max + + +(* filter p L returns the list of the elements in list L + that satisfy predicate p *) + +let rec filter p = function + [] -> [] + | a::r -> if p a then a :: filter p r else filter p r + + +(* Application: removing all numbers multiple of n from a list of integers *) + +let remove_multiples_of n = + filter (fun m -> m mod n <> 0) + + +(* The sieve itself *) + +let sieve max = + let rec filter_again = function + [] -> [] + | n::r as l -> + if n*n > max then l else n :: filter_again (remove_multiples_of n r) + in + filter_again (interval 2 max) + + +let rec do_list f = function + [] -> () + | a::l -> f a; do_list f l + + +let _ = + do_list (fun n -> print_int n; print_string " ") (sieve 40000); + print_newline(); + exit 0 diff --git a/test/soli.ml b/test/soli.ml new file mode 100644 index 0000000000..46d06b8287 --- /dev/null +++ b/test/soli.ml @@ -0,0 +1,97 @@ + +type peg = Out | Empty | Peg + +let board = [| + [| Out; Out; Out; Out; Out; Out; Out; Out; Out|]; + [| Out; Out; Out; Peg; Peg; Peg; Out; Out; Out|]; + [| Out; Out; Out; Peg; Peg; Peg; Out; Out; Out|]; + [| Out; Peg; Peg; Peg; Peg; Peg; Peg; Peg; Out|]; + [| Out; Peg; Peg; Peg; Empty; Peg; Peg; Peg; Out|]; + [| Out; Peg; Peg; Peg; Peg; Peg; Peg; Peg; Out|]; + [| Out; Out; Out; Peg; Peg; Peg; Out; Out; Out|]; + [| Out; Out; Out; Peg; Peg; Peg; Out; Out; Out|]; + [| Out; Out; Out; Out; Out; Out; Out; Out; Out|] +|] + + +let print_peg = function + Out -> print_string "." + | Empty -> print_string " " + | Peg -> print_string "$" + + +let print_board board = + for i=0 to 8 do + for j=0 to 8 do + print_peg board.(i).(j) + done; + print_newline() + done + + +type direction = { dx: int; dy: int } + +let dir = [| {dx = 0; dy = 1}; {dx = 1; dy = 0}; + {dx = 0; dy = -1}; {dx = -1; dy = 0} |] + +type move = { x1: int; y1: int; x2: int; y2: int } + +let moves = Array.new 31 {x1=0;y1=0;x2=0;y2=0} + +let counter = ref 0 + +exception Found + +let rec solve m = + counter := !counter + 1; + if m = 31 then + begin match board.(4).(4) with Peg -> true | _ -> false end + else + try + if !counter mod 500 = 0 then begin + print_int !counter; print_newline() + end; + for i=1 to 7 do + for j=1 to 7 do + match board.(i).(j) with + Peg -> + for k=0 to 3 do + let d1 = dir.(k).dx in + let d2 = dir.(k).dy in + let i1 = i+d1 in + let i2 = i1+d1 in + let j1 = j+d2 in + let j2 = j1+d2 in + match board.(i1).(j1) with + Peg -> + begin match board.(i2).(j2) with + Empty -> +(* + print_int i; print_string ", "; + print_int j; print_string ") dir "; + print_int k; print_string "\n"; +*) + board.(i).(j) <- Empty; + board.(i1).(j1) <- Empty; + board.(i2).(j2) <- Peg; + if solve(m+1) then begin + moves.(m) <- { x1=i; y1=j; x2=i2; y2=j2 }; + raise Found + end; + board.(i).(j) <- Peg; + board.(i1).(j1) <- Peg; + board.(i2).(j2) <- Empty + | _ -> () + end + | _ -> () + done + | _ -> + () + done + done; + false + with Found -> + true + + +let _ = if solve 0 then (print_string "\n"; print_board board) diff --git a/test/takc.ml b/test/takc.ml new file mode 100644 index 0000000000..f8ba8bdabb --- /dev/null +++ b/test/takc.ml @@ -0,0 +1,9 @@ +let rec tak x y z = + if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y) + else z + +let rec repeat n = + if n <= 0 then 0 else tak 18 12 6 + repeat(n-1) + +let _ = print_int (repeat 50); print_newline(); exit 0 + diff --git a/test/taku.ml b/test/taku.ml new file mode 100644 index 0000000000..42666f82d9 --- /dev/null +++ b/test/taku.ml @@ -0,0 +1,8 @@ +let rec tak (x, y, z) = + if x > y then tak(tak (x-1, y, z), tak (y-1, z, x), tak (z-1, x, y)) + else z + +let rec repeat n = + if n <= 0 then 0 else tak(18,12,6) + repeat(n-1) + +let _ = print_int (repeat 50); print_newline(); exit 0 diff --git a/tools/.depend b/tools/.depend new file mode 100644 index 0000000000..9a9abccc03 --- /dev/null +++ b/tools/.depend @@ -0,0 +1,3 @@ +oldumpobj.zo: ../bytecomp/runtimedef.zi ../bytecomp/lambda.zi \ + ../utils/tbl.zi ../utils/config.zi opnames.zo ../typing/ident.zi \ + ../bytecomp/emitcode.zi ../parsing/asttypes.zi diff --git a/tools/Makefile b/tools/Makefile new file mode 100644 index 0000000000..e4ca7290a6 --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,44 @@ +CAMLC=../boot/camlrun ../boot/camlc -I ../boot +INCLUDES=-I ../utils -I ../parsing -I ../typing -I ../bytecomp +COMPFLAGS=$(INCLUDES) +LINKFLAGS=$(INCLUDES) + +all: dumpobj + +DUMPOBJ=opnames.zo dumpobj.zo + +dumpobj: $(DUMPOBJ) + $(CAMLC) $(LINKFLAGS) -o dumpobj misc.zo tbl.zo config.zo ident.zo opcodes.zo runtimedef.zo $(DUMPOBJ) + +clean:: + rm -f dumpobj + +opnames.ml: ../byterun/instruct.h + sed -e '/\/\*/d' \ + -e 's/enum \(.*\) {/let names_of_\1 = [|/' \ + -e 's/};$$/ |]/' \ + -e 's/\([A-Z][A-Z_0-9a-z]*\)/"\1"/g' \ + -e 's/,/;/g' \ + ../byterun/instruct.h > opnames.ml + +clean:: + rm -f opnames.ml + +beforedepend:: opnames.ml + +.SUFFIXES: +.SUFFIXES: .ml .zo .mli .zi + +.ml.zo: + $(CAMLC) -c $(COMPFLAGS) $< + +.mli.zi: + $(CAMLC) -c $(COMPFLAGS) $< + +clean:: + rm -f *.zo *.zi + +depend: beforedepend + camldep $(INCLUDES) *.mli *.ml > .depend + +include .depend diff --git a/tools/camldep b/tools/camldep new file mode 100755 index 0000000000..8f53257970 --- /dev/null +++ b/tools/camldep @@ -0,0 +1,91 @@ +#!/usr/local/bin/perl + +# To scan a Caml Light source file, find all references to external modules +# (open Foo or Foo.bar), and output the dependencies on standard output. +# +# Usage: camldep [-I path] <file> ... + +while ($#ARGV >= 0) { + $_ = shift(@ARGV); + if (/^-I(.*)$/) { + $dir = $1 ? $1 : shift(@ARGV); + $dir =~ s|/$||; + unshift(@path, $dir); + } + elsif (/(.*)\.mli$/ || /(.*)\.cmi$/) { + do scan_source ($_, "$1.cmi"); + } + elsif (/(.*)\.ml$/ || /(.*)\.cmo$/) { + do scan_source ($_, "$1.cmo"); + } + else { + die "Don't know what to do with $_"; + } +} + +sub scan_source { + local ($source_name, $target_name) = @_; + $modname = $target_name; + $modname =~ s|^.*/||; + $modname =~ s|\.z[io]$||; + undef(%imports); + open(SRC, $source_name) || return; + while(<SRC>) { + if (m/\bopen\s*([A-Z][a-zA-Z0-9_]*)\b/) { + $imports{$1} = 1; + } + while(m/\b([A-Z][a-zA-Z0-9_]*)\./) { + $imports{$1} = 1; + $_ = $'; + } + } + close(SRC); + undef(@deps); + if ($target_name =~ m/(.*)\.cmo$/ && -r ($source_name . "i")) { + push(@deps, "$1.cmi"); + } + foreach $modl (keys(%imports)) { + $modl = do lowercase($modl); + next if ($modl eq $modname); + if ($dep = do find_path ("$modl.mli")) { + $dep =~ s/\.mli$/.cmi/; + push(@deps, $dep); + } + elsif ($dep = do find_path ("$modl.ml")) { + $dep =~ s/\.ml$/.cmo/; + push(@deps, $dep); + } + } + if ($#deps >= 0) { + print "$target_name: "; + $col = length($target_name) + 2; + foreach $dep (@deps) { + next if $dep eq $target_name; + $col += length($dep) + 1; + if ($col >= 77) { + print "\\\n "; + $col = length($dep) + 5; + } + print $dep, " "; + } + print "\n"; + } +} + +sub find_path { + local ($filename) = @_; + return $filename if (-r $filename); + foreach $dir (@path) { + return "$dir/$filename" if (-r "$dir/$filename"); + } + return 0; +} + +sub lowercase { + local ($_) = @_; + m/^(.)(.*)$/; + $hd = $1; + $tl = $2; + $hd =~ tr/A-Z/a-z/; + return $hd . $tl; +} diff --git a/tools/camlsize b/tools/camlsize new file mode 100755 index 0000000000..8904d47a2c --- /dev/null +++ b/tools/camlsize @@ -0,0 +1,21 @@ +#!/usr/local/bin/perl + +foreach $f (@ARGV) { + open(FILE, $f) || die("Cannot open $f"); + seek(FILE, -28, 2); + $code_size = do read_int(); + $data_size = do read_int(); + $symbol_size = do read_int(); + $debug_size = do read_int(); + read(FILE, $magic, 12); + print $f, ":\n" if ($#ARGV > 0); + printf ("\tcode: %d data: %d symbols: %d debug: %d\n", + $code_size, $data_size, $symbol_size, $debug_size); + close(FILE); +} + +sub read_int { + read(FILE, $buff, 4) == 4 || die("Truncated bytecode file $f"); + @int = unpack("C4", $buff); + return ($int[0] << 24) + ($int[1] << 16) + ($int[2] << 8) + $int[3]; +} diff --git a/tools/convert b/tools/convert new file mode 100755 index 0000000000..b5c2bd6376 --- /dev/null +++ b/tools/convert @@ -0,0 +1,227 @@ +#!/usr/local/bin/perl + +# Conversion of a Caml Light 0.7 file to Caml 1999. + +# The conversion table + +$convtbl= +"value val +int_of_float truncate +float_of_int float +vect array +fast_really_input unsafe_really_input +io__exit exit +vect_length Array.length +make_vect Array.new +make_matrix Array.new_matrix +concat_vect Array.concat +sub_vect Array.sub +copy_vect Array.copy +fill_vect Array.fill +blit_vect Array.blit +do_vect Array.iter +map_vect Array.map +vect_of_list Array.of_list +list_of_vect Array.to_list +int_of_char Char.code +char_of_int Char.chr +char_for_read Char.escaped +fchar__char_of_int Char.unsafe_chr +hashtbl__do_table Hashtbl.iter +do_table Hashtbl.iter +lexing__create_lexer_channel Lexing.from_channel +lexing__create_lexer_string Lexing.from_string +lexing__create_lexer Lexing.from_function +lexing__get_lexeme Lexing.lexeme +lexing__get_lexeme_char Lexing.lexeme_char +lexing__get_lexeme_start Lexing.lexeme_start +lexing__get_lexeme_end Lexing.lexeme_end +create_lexer_channel Lexing.from_channel +create_lexer_string Lexing.from_string +create_lexer Lexing.from_function +get_lexeme Lexing.lexeme +get_lexeme_char Lexing.lexeme_char +get_lexeme_start Lexing.lexeme_start +get_lexeme_end Lexing.lexeme_end +list_length List.length +rev List.rev +flatten List.flatten +do_list List.iter +map List.map +it_list List.fold_left +list_it List.fold_right +do_list2 List.iter2 +map2 List.map2 +iter2 List.iter2 +it_list2 List.fold_left2 +list_it2 List.fold_right2 +for_all List.for_all +exists List.exists +mem List.mem +assoc List.assoc +mem_assoc List.mem_assoc +assq List.assq +split List.split +combine List.combine +obj__obj Obj.t +obj__repr Obj.repr +obj__magic_obj Obj.magic +obj__magic Obj.magic +obj__is_block Obj.is_block +obj__obj_tag Obj.tag +obj__obj_size Obj.size +obj__obj_field Obj.field +obj__set_obj_field Obj.set_field +obj__obj_block Obj.new_block +obj__update Obj.update +magic_obj Obj.magic +magic Obj.magic +is_block Obj.is_block +obj_tag Obj.tag +obj_size Obj.size +obj_field Obj.field +set_obj_field Obj.set_field +obj_block Obj.new_block +printexc__f Printexc.catch +sort__sort Sort.list +sort Sort.list +string_length String.length +nth_char String.get +set_nth_char String.set +sub_string String.sub +create_string String.create +make_string String.make +fill_string String.fill +blit_string String.blit +string_for_read String.escaped +fstring__nth_char String.unsafe_get +fstring__set_nth_char String.unsafe_set +fstring__blit_string String.unsafe_blit +sys__Sys_error Sys_error +sys__exit exit +sys__command_line Sys.argv +sys__O_RDONLY Sys.Open_rdonly +sys__O_WRONLY Sys.Open_wronly +sys__O_RDWR Sys.Open_rdwr +sys__O_APPEND Sys.Open_append +sys__O_CREAT Sys.Open_creat +sys__O_TRUNC Sys.Open_trunc +sys__O_EXCL Sys.Open_excl +sys__O_BINARY Sys.Open_binary +sys__O_TEXT Sys.Open_text +sys__open Sys.open_desc +sys__close Sys.close_desc +sys__system_command Sys.command +system_command Sys.command +command_line Sys.argv +O_RDONLY Sys.Open_rdonly +O_WRONLY Sys.Open_wronly +O_RDWR Sys.Open_rdwr +O_APPEND Sys.Open_append +O_CREAT Sys.Open_creat +O_TRUNC Sys.Open_trunc +O_EXCL Sys.Open_excl +O_BINARY Sys.Open_binary +O_TEXT Sys.Open_text"; + +# Initialize the table %conv +%conv = split(/\s+/, $convtbl); + +# Open input. +$infile = $ARGV[0]; +open(IN, $infile) || die("Cannot open $infile"); +$interface = ($infile =~ /\.mli$/); + +# If it's a .ml or .mll file: we must insert definitions from the .mli +# before the first definition + +if ($infile =~ /^(.*)\.(ml|mll)$/ && open(INTERFACE, "$1.mli")) { + +# Copy and translate the header of the file (first comment and #open decls) +# Stop at first definition + $_ = <IN>; + if (/^\(\*/) { + do convert(); + while (! /\*\)/) { $_ = <IN>; do convert(); } + $_ = <IN>; + } + while(/^$/ || /^#open / || /^{$/) { + do convert(); + $_ = <IN>; + } + $saved = $_; + $copy = 0; + +# Copy and translate manifest definitions from the .mli + while(<INTERFACE>) { + $copy = 1 if /^type .*=/ || /^#open/ || /^exception/; + $copy = 0 if /^type [^=]*$/ || /^value /; + do convert() if $copy; + } + close(INTERFACE); + $_ = $saved; + +# Finish translation of main file + do convert(); + while(<IN>) { + do convert(); + } + +} else { + +# For other kinds of files (.mli, .mly), just copy as is + while(<IN>) { + do convert(); + } +} + +close(IN); + +# Convert and print one line (in $_) +sub convert { + chop; +# Double-semicolon + return if /^;;\s*$/; + s/;;//; +# Identifiers that have been renamed + s/([A-Za-z][A-Za-z0-9'_]*(__[A-Za-z][A-Za-z0-9'_]*)?)/$conv{$1} || $1/eg; +# 'type glop == tau' + s/^((type|and)\s+(\(.*\)\s+)?[a-z][A-Za-z0-9'_]*\s+)==/\1=/; +# 'and' for values in .mli files -- what a terrible hack! + if ($interface) { s/^ and\b/val/; } +# Open + if (s/#\s*open\s*"([^"]*)"/"open " . do capitalize($1)/e) { + /open ([A-Za-z0-9_']+)/; + return if $opened{$1}; + $opened{$1} = 1; + } +# Module references + s/([A-Za-z][A-Za-z0-9_']*)__/do capitalize($1) . "."/eg; +# Character literals + s/`([^\\]|\\[\\`ntbr]|\\[0-9][0-9][0-9])`/do convert_char($1)/eg; +# Over! + print $_, "\n"; +} + +close(IN); +close(OUT); + +# Capitalize a string +sub capitalize { + local ($_) = @_; + m/^(.)(.*)/; + $hd = $1; + $tl = $2; + $hd =~ tr/a-z/A-Z/; + return $hd . $tl; +} + +# Convert a character literal +sub convert_char { + local ($_) = @_; + s/\\`/`/; + s/'/\\'/; + s/^/'/; + s/$/'/; + return $_; +} diff --git a/tools/dumpobj.ml b/tools/dumpobj.ml new file mode 100644 index 0000000000..30baf82dd6 --- /dev/null +++ b/tools/dumpobj.ml @@ -0,0 +1,305 @@ +(* Disassembler for executable and .zo object files *) + +open Obj +open Printf +open Config +open Asttypes +open Lambda +open Emitcode +open Opcodes +open Opnames + +(* Read signed and unsigned integers *) + +let inputu ic = + let b1 = input_byte ic in + let b2 = input_byte ic in + let b3 = input_byte ic in + let b4 = input_byte ic in + (b4 lsl 24) + (b3 lsl 16) + (b2 lsl 8) + b1 + +let inputs ic = + let b1 = input_byte ic in + let b2 = input_byte ic in + let b3 = input_byte ic in + let b4 = input_byte ic in + let b4' = if b4 >= 128 then b4-256 else b4 in + (b4' lsl 24) + (b3 lsl 16) + (b2 lsl 8) + b1 + +(* Global variables *) + +type global_table_entry = + Empty + | Global of Ident.t + | Constant of obj + +let start = ref 0 (* Position of beg. of code *) +let reloc = ref ([] : (reloc_info * int) list) (* Relocation table *) +let globals = ref ([||] : global_table_entry array) (* Global List.map *) +let objfile = ref false (* true if dumping a .zo *) + +(* Print a structured constant *) + +let rec print_struct_const = function + Const_base(Const_int i) -> + printf "%d" i + | Const_base(Const_float f) -> + printf "%s" f + | Const_base(Const_string s) -> + printf "\"%s\"" (String.escaped s) + | Const_base(Const_char c) -> + printf "'%s'" (Char.escaped c) + | Const_block(tag, args) -> + printf "<%d>" tag; + begin match args with + [] -> () + | [a1] -> + printf "("; print_struct_const a1; printf ")" + | a1::al -> + printf "("; print_struct_const a1; + List.iter (fun a -> printf ", "; print_struct_const a) al; + printf ")" + end + +(* Print an obj *) + +let rec print_obj x = + if Obj.is_block x then begin + match Obj.tag x with + 253 -> (* string *) + printf "\"%s\"" (String.escaped (Obj.magic x : string)) + | 254 -> (* float *) + printf "%.12g" (Obj.magic x : float) + | _ -> + printf "<%d>" (Obj.tag x); + begin match Obj.size x with + 0 -> () + | 1 -> + printf "("; print_obj (Obj.field x 0); printf ")" + | n -> + printf "("; print_obj (Obj.field x 0); + for i = 1 to n - 1 do + printf ", "; print_obj (Obj.field x i) + done; + printf ")" + end + end else + printf "%d" (Obj.magic x : int) + +(* Current position in input file *) + +let currpos ic = + pos_in ic - !start + +(* Access in the relocation table *) + +let rec rassoc key = function + [] -> raise Not_found + | (a,b) :: l -> if b = key then a else rassoc key l + +let find_reloc ic = + rassoc (pos_in ic - !start) !reloc + +(* Symbolic printing of global names, etc *) + +let print_getglobal_name ic = + if !objfile then begin + begin try + match find_reloc ic with + Reloc_getglobal id -> print_string (Ident.name id) + | Reloc_literal sc -> print_struct_const sc + | _ -> print_string "<wrong reloc>" + with Not_found -> + print_string "<no reloc>" + end; + inputu ic; () + end + else begin + let n = inputu ic in + if n >= Array.length !globals + then print_string "<global table overflow>" + else match !globals.(n) with + Global id -> print_string(Ident.name id) + | Constant obj -> print_obj obj + | _ -> print_string "???" + end + +let print_setglobal_name ic = + if !objfile then begin + begin try + match find_reloc ic with + Reloc_setglobal id -> print_string (Ident.name id) + | _ -> print_string "<wrong reloc>" + with Not_found -> + print_string "<no reloc>" + end; + inputu ic; () + end + else begin + let n = inputu ic in + if n >= Array.length !globals + then print_string "<global table overflow>" + else match !globals.(n) with + Global id -> print_string(Ident.name id) + | _ -> print_string "???" + end + +let print_primitive ic = + if !objfile then begin + begin try + match find_reloc ic with + Reloc_primitive s -> print_string s + | _ -> print_string "<wrong reloc>" + with Not_found -> + print_string "<no reloc>" + end; + inputu ic; () + end + else begin + let n = inputu ic in + if n >= Array.length Runtimedef.builtin_primitives + then print_string(string_of_int n) + else print_string(Runtimedef.builtin_primitives.(n)) + end + +(* Disassemble one instruction *) + +let print_instr ic = + print_int (currpos ic); print_string "\t"; + let op = inputu ic in + print_string + (if op >= Array.length names_of_instructions then "???" + else names_of_instructions.(op)); + print_string " "; + (* One unsigned int *) + if op == opATOM or op == opPUSHATOM + or op == opMAKEBLOCK1 or op == opMAKEBLOCK2 or op == opMAKEBLOCK3 + or op == opACC or op == opPUSHACC or op == opPOP or op == opASSIGN + or op == opENVACC or op == opPUSHENVACC + or op == opAPPLY or op == opAPPTERM1 or op == opAPPTERM2 or op == opAPPTERM3 + or op == opRETURN or op == opGRAB or op == opGETFIELD or op == opSETFIELD + or op == opDUMMY then + (print_int (inputu ic)) + (* One signed int *) + else if op == opCONSTINT or op == opPUSHCONSTINT + or op == opOFFSETINT or op == opOFFSETREF then + (print_int (inputs ic)) + (* Two unsigned constants *) + else if op == opAPPTERM or op == opMAKEBLOCK then + (print_int (inputu ic); print_string ", "; print_int(inputu ic)) + (* One displacement *) + else if op == opPUSH_RETADDR or op == opBRANCH or op == opBRANCHIF + or op == opBRANCHIFNOT or op == opPUSHTRAP then + (let p = currpos ic in print_int (p + inputs ic)) + (* One size, one displacement *) + else if op == opCLOSURE or op == opCLOSUREREC then + (print_int (inputu ic); print_string ", "; + let p = currpos ic in print_int (p + inputs ic)) + (* getglobal *) + else if op == opGETGLOBAL or op == opPUSHGETGLOBAL then + (print_getglobal_name ic) + (* getglobal + unsigned *) + else if op == opGETGLOBALFIELD or op == opPUSHGETGLOBALFIELD then + (print_getglobal_name ic; print_string ", "; print_int (inputu ic)) + (* setglobal *) + else if op == opSETGLOBAL then + (print_setglobal_name ic) + (* primitive *) + else if op == opC_CALL1 or op == opC_CALL2 + or op == opC_CALL3 or op == opC_CALL4 then + (print_primitive ic) + (* unsigned + primitive *) + else if op == opC_CALLN then + (print_int(inputu ic); print_string ", "; print_primitive ic) + (* switch *) + else if op == opSWITCH then + (let n = inputu ic in + let orig = currpos ic in + for i = 0 to n-1 do + print_string "\n\t"; print_int i; print_string " -> "; + print_int(orig + inputs ic) + done) + (* translate *) + else if op == opTRANSLATE then + (let n = inputu ic in + for i = 0 to n-1 do + print_string "\n\t"; print_int(inputu ic) + done) + (* default *) + else (); + print_string "\n" + +(* Disassemble a block of code *) + +let print_code ic len = + start := pos_in ic; + let stop = !start + len in + while pos_in ic < stop do print_instr ic done + +(* Dump relocation info *) + +let print_reloc (info, pos) = + printf "\t%d\t(%d)\t" pos (pos/4); + match info with + Reloc_literal sc -> print_struct_const sc; printf "\n" + | Reloc_getglobal id -> printf "require\t%s\n" (Ident.name id) + | Reloc_setglobal id -> printf "provide\t%s\n" (Ident.name id) + | Reloc_primitive s -> printf "prim\t%s\n" s + +(* Print a .zo file *) + +let dump_obj filename ic = + let buffer = String.create (String.length cmo_magic_number) in + really_input ic buffer 0 (String.length cmo_magic_number); + if buffer <> cmo_magic_number then begin + prerr_endline "Not an object file"; exit 2 + end; + let cu_pos = input_binary_int ic in + seek_in ic cu_pos; + let cu = (input_value ic : compilation_unit) in + reloc := cu.cu_reloc; + seek_in ic cu.cu_pos; + print_code ic cu.cu_codesize + +(* Print an executable file *) + +exception Not_exec + +let dump_exe ic = + seek_in ic (in_channel_length ic - 12); + if (let buff = String.create 12 in input ic buff 0 12; buff) + <> exec_magic_number + then raise Not_exec; + let trailer_pos = in_channel_length ic - 28 in + seek_in ic trailer_pos; + let code_size = input_binary_int ic in + let data_size = input_binary_int ic in + let symbol_size = input_binary_int ic in + let debug_size = input_binary_int ic in + seek_in ic (trailer_pos - debug_size - symbol_size - data_size); + let init_data = (input_value ic : obj array) in + globals := Array.new (Array.length init_data) Empty; + for i = 0 to Array.length init_data - 1 do + !globals.(i) <- Constant (init_data.(i)) + done; + if symbol_size > 0 then begin + let (_, sym_table) = (input_value ic : int * (Ident.t, int) Tbl.t) in + Tbl.iter (fun id pos -> !globals.(pos) <- Global id) sym_table + end; + seek_in ic + (trailer_pos - debug_size - symbol_size - data_size - code_size); + print_code ic code_size + +let main() = + for i = 1 to Array.length Sys.argv - 1 do + let ic = open_in_bin Sys.argv.(i) in + begin try + objfile := false; dump_exe ic + with Not_exec -> + objfile := true; seek_in ic 0; dump_obj (Sys.argv.(i)) ic + end; + close_in ic + done; + exit 0 + +let _ = Printexc.catch main (); exit 0 diff --git a/tools/make-opcodes b/tools/make-opcodes new file mode 100644 index 0000000000..c8f573c682 --- /dev/null +++ b/tools/make-opcodes @@ -0,0 +1,2 @@ +$1=="enum" {n=0; next; } + {for (i = 1; i <= NF; i++) {printf("let op%s = %d\n", $i, n++);}} diff --git a/toplevel/expunge.ml b/toplevel/expunge.ml new file mode 100644 index 0000000000..5139aa3395 --- /dev/null +++ b/toplevel/expunge.ml @@ -0,0 +1,59 @@ +(* "Expunge" a toplevel by removing compiler modules from the global List.map. + Usage: expunge <source file> <dest file> <names of modules to keep> *) + +open Sys +open Misc + +let to_keep = ref (Cset.empty: string Cset.t) + +let expunge_map tbl = + Symtable.filter_global_map + (fun id -> Cset.mem (Ident.name id) !to_keep) + tbl + +let main () = + let input_name = Sys.argv.(1) in + let output_name = Sys.argv.(2) in + Array.iter + (fun exn -> to_keep := Cset.add exn !to_keep) + Runtimedef.builtin_exceptions; + for i = 3 to Array.length Sys.argv - 1 do + to_keep := Cset.add (capitalize Sys.argv.(i)) !to_keep + done; + let ic = open_in_bin input_name in + let pos_trailer = + in_channel_length ic - 16 - String.length Config.exec_magic_number in + seek_in ic pos_trailer; + let code_size = input_binary_int ic in + let data_size = input_binary_int ic in + let symbol_size = input_binary_int ic in + let debug_size = input_binary_int ic in + let header = String.create(String.length Config.exec_magic_number) in + really_input ic header 0 (String.length Config.exec_magic_number); + if header <> Config.exec_magic_number then begin + prerr_endline "Wrong Obj.magic number"; exit 2 + end; + let oc = + open_out_gen [Sys.Open_wronly; Sys.Open_creat; Sys.Open_trunc; Sys.Open_binary] 0o777 output_name in + (* Copy the file up to the symbol section as is *) + seek_in ic 0; + copy_file_chunk ic oc (pos_trailer - symbol_size - debug_size); + (* Read, expunge and rewrite the symbol section *) + let global_map = (input_value ic : Symtable.global_map) in + let pos1 = pos_out oc in + output_compact_value oc (expunge_map global_map); + let pos2 = pos_out oc in + (* Rewrite the trailer *) + output_binary_int oc code_size; + output_binary_int oc data_size; + output_binary_int oc (pos2 - pos1); + output_binary_int oc 0; + output_string oc Config.exec_magic_number; + (* Done *) + close_in ic; + close_out oc + +let _ = Printexc.catch main (); exit 0 + + + diff --git a/toplevel/printval.ml b/toplevel/printval.ml new file mode 100644 index 0000000000..6e2ebd7960 --- /dev/null +++ b/toplevel/printval.ml @@ -0,0 +1,234 @@ +(* To print values *) + +open Obj +open Format +open Longident +open Path +open Typedtree + + +(* Given an exception val, we cannot recover its type, + hence we cannot print its arguments in general. + Here, we do a feeble attempt to print + integer, string and float arguments... *) + +let print_exception obj = + print_string (Obj.magic(Obj.field(Obj.field obj 0) 0) : string); + if Obj.size obj > 1 then begin + open_hovbox 1; + print_string "("; + for i = 1 to Obj.size obj - 1 do + if i > 1 then begin print_string ","; print_space() end; + let arg = Obj.field obj i in + if not (Obj.is_block arg) then + print_int(Obj.magic arg : int) (* Note: this could be a char! *) + else if Obj.tag arg = 253 then begin + print_string "\""; + print_string (String.escaped (Obj.magic arg : string)); + print_string "\"" + end else if Obj.tag arg = 254 then + print_float (Obj.magic arg : float) + else + print_string "_" + done; + print_string ")"; + close_box() + end + +(* Recover a constructor by its tag *) + +exception Constr_not_found + +let rec find_constr tag = function + [] -> + raise Constr_not_found + | constr :: rest -> + if tag = 0 then constr else find_constr (tag - 1) rest + +(* The user-defined printers. Also used for some builtin types. *) + +let printers = ref ([ + Pident(Ident.new "print_int"), Predef.type_int, + (fun x -> print_int (Obj.magic x : int)); + Pident(Ident.new "print_float"), Predef.type_float, + (fun x -> print_float(Obj.magic x : float)); + Pident(Ident.new "print_char"), Predef.type_char, + (fun x -> print_string "'"; + print_string (Char.escaped (Obj.magic x : char)); + print_string "'"); + Pident(Ident.new "print_string"), Predef.type_string, + (fun x -> print_string "\""; + print_string (String.escaped (Obj.magic x : string)); + print_string "\"") +] : (Path.t * type_expr * (Obj.t -> unit)) list) + +let find_printer env ty = + let rec find = function + [] -> raise Not_found + | (name, sch, printer) :: remainder -> + if Ctype.moregeneral env sch ty + then printer + else find remainder + in find !printers + +(* Print a constructor or label, giving it the same prefix as the type + it comes from. Attempt to omit the prefix if the type comes from + a module that has been opened. *) + +let print_qualified lookup_fun env ty_path name = + match ty_path with + Pident id -> + print_string name + | Pdot(p, s, pos) -> + if try + match lookup_fun (Lident name) env with + Tconstr(ty_path', _) -> Path.same ty_path ty_path' + | _ -> false + with Not_found -> false + then print_string name + else (Printtyp.path p; print_string "."; print_string name) + +let print_constr = + print_qualified (fun lid env -> (Env.lookup_constructor lid env).cstr_res) +and print_label = + print_qualified (fun lid env -> (Env.lookup_label lid env).lbl_res) + +(* The main printing function *) + +let max_printer_depth = ref 100 +let max_printer_steps = ref 300 +exception Ellipsis + +let cautious f arg = try f arg with Ellipsis -> print_string "..." + +let print_value env obj ty = + + let printer_steps = ref !max_printer_steps in + + let rec print_val prio depth obj ty = + decr printer_steps; + if !printer_steps < 0 or depth < 0 then raise Ellipsis; + try + find_printer env ty obj; () + with Not_found -> + match Ctype.repr ty with + Tvar _ -> + print_string "<poly>" + | Tarrow(ty1, ty2) -> + print_string "<fun>" + | Ttuple(ty_list) -> + if prio > 0 + then begin open_hovbox 1; print_string "(" end + else open_hovbox 0; + print_val_list 1 depth obj ty_list; + if prio > 0 then print_string ")"; + close_box() + | Tconstr(path, []) when Path.same path Predef.path_exn -> + if prio > 1 + then begin open_hovbox 2; print_string "(" end + else open_hovbox 1; + print_exception obj; + if prio > 1 then print_string ")"; + close_box() + | Tconstr(path, [ty_arg]) when Path.same path Predef.path_list -> + let rec print_conses depth cons = + if Obj.tag cons != 0 then begin + print_val 0 (depth - 1) (Obj.field cons 0) ty_arg; + let next_obj = Obj.field cons 1 in + if Obj.tag next_obj != 0 then begin + print_string ";"; print_space(); + print_conses (depth - 1) next_obj + end + end in + open_hovbox 1; + print_string "["; + cautious (print_conses depth) obj; + print_string "]"; + close_box() + | Tconstr(path, [ty_arg]) when Path.same path Predef.path_array -> + let rec print_items depth i = + if i < Obj.size obj then begin + if i > 0 then begin print_string ";"; print_space() end; + print_val 0 (depth - 1) (Obj.field obj i) ty_arg; + print_items (depth - 1) (i + 1) + end in + open_hovbox 2; + print_string "[|"; + cautious (print_items depth) 0; + print_string "|]"; + close_box() + | Tconstr(path, ty_list) -> + let decl = Env.find_type path env in + match decl.type_kind with + Type_abstract -> + print_string "<abstr>" + | Type_manifest body -> + print_val prio depth obj + (Ctype.substitute decl.type_params ty_list body) + | Type_variant constr_list -> + let tag = Obj.tag obj in + begin try + let (constr_name, constr_args) = + find_constr tag constr_list in + let ty_args = + List.map (Ctype.substitute decl.type_params ty_list) + constr_args in + match ty_args with + [] -> + print_constr env path constr_name + | [ty1] -> + if prio > 1 + then begin open_hovbox 2; print_string "(" end + else open_hovbox 1; + print_constr env path constr_name; + print_space(); + cautious (print_val 2 (depth - 1) (Obj.field obj 0)) ty1; + if prio > 1 then print_string ")"; + close_box() + | tyl -> + if prio > 1 + then begin open_hovbox 2; print_string "(" end + else open_hovbox 1; + print_constr env path constr_name; + print_space(); + open_hovbox 1; + print_string "("; + print_val_list 1 depth obj tyl; + print_string ")"; + close_box(); + if prio > 1 then print_string ")"; + close_box() + with + Constr_not_found -> + print_string "<unknown constructor>" + end + | Type_record lbl_list -> + let rec print_fields depth pos = function + [] -> () + | (lbl_name, _, lbl_arg) :: remainder -> + if pos > 0 then begin print_string ";"; print_space() end; + open_hovbox 1; + print_label env path lbl_name; + print_string "="; print_cut(); + let ty_arg = + Ctype.substitute decl.type_params ty_list lbl_arg in + cautious (print_val 0 (depth - 1) (Obj.field obj pos)) + ty_arg; + close_box(); + print_fields (depth - 1) (pos + 1) remainder in + open_hovbox 1; + print_string "{"; + cautious (print_fields depth 0) lbl_list; + print_string "}"; + close_box() + + and print_val_list prio depth obj ty_list = + let rec print_list depth i = function + [] -> () + | ty :: ty_list -> + if i > 0 then begin print_string ","; print_space() end; + print_val prio (depth - 1) (Obj.field obj i) ty; + print_list (depth - 1) (i + 1) ty_list in + cautious (print_list depth 0) ty_list + +in print_val 0 !max_printer_depth obj ty diff --git a/toplevel/printval.mli b/toplevel/printval.mli new file mode 100644 index 0000000000..324bf9d058 --- /dev/null +++ b/toplevel/printval.mli @@ -0,0 +1,10 @@ +(* Printing of values *) + +open Typedtree + +val print_exception: Obj.t -> unit +val print_value: Env.t -> Obj.t -> type_expr -> unit + +val printers: (Path.t * type_expr * (Obj.t -> unit)) list ref +val max_printer_depth: int ref +val max_printer_steps: int ref diff --git a/toplevel/topdirs.ml b/toplevel/topdirs.ml new file mode 100644 index 0000000000..9a01212652 --- /dev/null +++ b/toplevel/topdirs.ml @@ -0,0 +1,262 @@ +(* Toplevel directives *) + +open Format +open Misc +open Longident +open Path +open Typedtree +open Emitcode +open Printval +open Toploop + +(* Temporary assignment to a reference *) + +let protect r newval body = + let oldval = !r in + try + r := newval; + let res = body() in + r := oldval; + res + with x -> + r := oldval; + raise x + +(* Return the val referred to by a path *) + +let rec eval_path = function + Pident id -> Symtable.get_global_value id + | Pdot(p, s, pos) -> Obj.field (eval_path p) pos + +(* To quit *) + +let dir_quit () = exit 0; () + +let _ = Hashtbl.add directive_table "quit" (Directive_none dir_quit) + +(* To add a directory to the load path *) + +let dir_directory s = + Config.load_path := s :: !Config.load_path; + Env.reset_cache() + +let _ = Hashtbl.add directive_table "directory" (Directive_string dir_directory) + +(* To change the current directory *) + +let dir_cd s = + Sys.chdir s + +let _ = Hashtbl.add directive_table "cd" (Directive_string dir_cd) + +(* Load in-core a .cmo file *) + +let dir_load name = + try + let filename = find_in_path !Config.load_path name in + let ic = open_in_bin filename in + let buffer = String.create (String.length Config.cmo_magic_number) in + really_input ic buffer 0 (String.length Config.cmo_magic_number); + if buffer <> Config.cmo_magic_number then begin + print_string "File "; print_string name; + print_string " is not a bytecode object file."; print_newline() + end else begin + let compunit_pos = input_binary_int ic in (* Go to descriptor *) + seek_in ic compunit_pos; + let compunit = (input_value ic : compilation_unit) in + Linker.check_consistency filename compunit; + seek_in ic compunit.cu_pos; + let code_size = compunit.cu_codesize + 4 in + let code = Meta.static_alloc code_size in + unsafe_really_input ic code 0 compunit.cu_codesize; + String.unsafe_set code compunit.cu_codesize + (Char.chr Opcodes.opSTOP); + String.unsafe_set code (compunit.cu_codesize + 1) '\000'; + String.unsafe_set code (compunit.cu_codesize + 2) '\000'; + String.unsafe_set code (compunit.cu_codesize + 3) '\000'; + Symtable.patch_object code compunit.cu_reloc; + Symtable.update_global_table(); + begin try + Meta.execute_bytecode code code_size; () + with exn -> + print_exception_outcome exn + end + end; + close_in ic + with Not_found -> + print_string "Cannot find file "; print_string name; print_newline() + +let _ = Hashtbl.add directive_table "load" (Directive_string dir_load) + +(* Load commands from a file *) + +let dir_use name = + try + let filename = find_in_path !Config.load_path name in + let ic = open_in_bin filename in + let lb = Lexing.from_channel ic in + protect Location.input_name filename (fun () -> + try + while true do + execute_phrase (Parse.toplevel_phrase lb) + done + with End_of_file -> ()); + close_in ic + with Not_found -> + print_string "Cannot find file "; print_string name; print_newline() + +let _ = Hashtbl.add directive_table "use" (Directive_string dir_use) + +(* Install, remove a printer *) + +let find_printer_type lid = + try + let (path, desc) = Env.lookup_value lid !toplevel_env in + Ctype.begin_def(); + let ty_arg = Ctype.newvar() in + Ctype.unify !toplevel_env (Tarrow(ty_arg, Predef.type_unit)) + (Ctype.instance desc.val_type); + Ctype.end_def(); + Ctype.generalize ty_arg; + (ty_arg, path) + with + Not_found -> + print_string "Unbound val "; Printtyp.longident lid; + print_newline(); raise Exit + | Ctype.Unify -> + Printtyp.longident lid; + print_string " has the wrong type for a printing function"; + print_newline(); raise Exit + +let dir_install_printer lid = + try + let (ty_arg, path) = find_printer_type lid in + let v = eval_path path in + Printval.printers := + (path, ty_arg, (Obj.magic v : Obj.t -> unit)) :: !Printval.printers + with Exit -> + () + +let dir_remove_printer lid = + try + let (ty_arg, path) = find_printer_type lid in + let rec remove = function + [] -> + print_string "No printer named "; Printtyp.longident lid; + print_newline(); + [] + | (p, ty, fn as printer) :: rem -> + if Path.same p path then rem else printer :: remove rem in + Printval.printers := remove !Printval.printers + with Exit -> + () + +let _ = Hashtbl.add directive_table "install_printer" + (Directive_ident dir_install_printer) +let _ = Hashtbl.add directive_table "remove_printer" + (Directive_ident dir_remove_printer) + +(* The trace *) + +let rec trace_closure name clos_typ = + match Ctype.repr clos_typ with + Tarrow(t1, t2) -> + let starred_name = + match name with + Lident s -> Lident(s ^ "*") + | Ldot(lid, s) -> Ldot(lid, s ^ "*") in + let trace_res = trace_closure starred_name t2 in + (fun clos_val -> + Obj.repr(fun arg -> + open_hovbox 2; + Printtyp.longident name; print_string " <--"; print_space(); + print_value !toplevel_env arg t1; close_box(); print_newline(); + try + let res = (Obj.magic clos_val : Obj.t -> Obj.t)(arg) in + open_hovbox 2; + Printtyp.longident name; print_string " -->"; print_space(); + print_value !toplevel_env res t2; close_box(); print_newline(); + trace_res res + with exn -> + open_hovbox 2; + Printtyp.longident name; print_string " raises"; print_space(); + print_exception (Obj.repr exn); close_box(); print_newline(); + raise exn)) + | _ -> + (fun v -> v) + +let trace_env = ref ([] : (Path.t * Obj.t) list) + +let dir_trace lid = + try + let (path, desc) = Env.lookup_value lid !toplevel_env in + let clos = eval_path path in + (* Nothing to do if it's not a closure *) + if Obj.is_block clos & Obj.tag clos = 251 then begin + (* Make a copy of the closure *) + let old_clos = Obj.new_block 251 2 in + Obj.set_field old_clos 0 (Obj.field clos 0); + Obj.set_field old_clos 1 (Obj.field clos 1); + (* Instrument the old closure *) + let new_clos = + trace_closure lid (Ctype.instance desc.val_type) old_clos in + trace_env := (path, old_clos) :: !trace_env; + (* Overwrite the old closure *) + Obj.update clos new_clos; + match desc.val_prim with + Not_prim -> + Printtyp.longident lid; print_string " is now traced."; + print_newline() + | Primitive(_,_) -> + open_hovbox 0; + print_string "Warning: "; Printtyp.longident lid; + print_string " is an external function."; print_space(); + print_string "Direct calls will not be traced."; + close_box(); print_newline() + end else begin + Printtyp.longident lid; print_string " is not a function."; + print_newline() + end + with Not_found -> + print_string "Unbound val "; Printtyp.longident lid; + print_newline() + +let dir_untrace lid = + try + let (path, desc) = Env.lookup_value lid !toplevel_env in + let rec remove = function + [] -> + Printtyp.longident lid; print_string " was not traced."; + print_newline(); + [] + | (p, oldval) :: rem -> + if Path.same p path then begin + Obj.update (eval_path path) oldval; + Printtyp.longident lid; print_string " is no longer traced."; + print_newline(); + rem + end else remove rem in + trace_env := remove !trace_env + with Not_found -> + print_string "Unbound val "; Printtyp.longident lid; + print_newline() + +let dir_untrace_all () = + List.iter + (fun (path, oldval) -> + Obj.update (eval_path path) oldval; + Printtyp.path path; print_string " is no longer traced."; + print_newline()) + !trace_env; + trace_env := [] + +let _ = Hashtbl.add directive_table "trace" (Directive_ident dir_trace) +let _ = Hashtbl.add directive_table "untrace" (Directive_ident dir_untrace) +let _ = Hashtbl.add directive_table "untrace_all" (Directive_none dir_untrace_all) + +(* Control the printing of values *) + +let _ = Hashtbl.add directive_table "print_depth" + (Directive_int(fun n -> max_printer_depth := n)) +let _ = Hashtbl.add directive_table "print_length" + (Directive_int(fun n -> max_printer_steps := n)) diff --git a/toplevel/topdirs.mli b/toplevel/topdirs.mli new file mode 100644 index 0000000000..e5b9e7b4d7 --- /dev/null +++ b/toplevel/topdirs.mli @@ -0,0 +1,12 @@ +(* The toplevel directives. *) + +val dir_quit : unit -> unit +val dir_directory : string -> unit +val dir_cd : string -> unit +val dir_load : string -> unit +val dir_use : string -> unit +val dir_install_printer : Longident.t -> unit +val dir_remove_printer : Longident.t -> unit +val dir_trace : Longident.t -> unit +val dir_untrace : Longident.t -> unit +val dir_untrace_all : unit -> unit diff --git a/toplevel/toploop.ml b/toplevel/toploop.ml new file mode 100644 index 0000000000..a7e3741874 --- /dev/null +++ b/toplevel/toploop.ml @@ -0,0 +1,183 @@ +(* The interactive toplevel loop *) + +open Lexing +open Format +open Misc +open Parsetree +open Typedtree +open Printval + +type directive_fun = + Directive_none of (unit -> unit) + | Directive_string of (string -> unit) + | Directive_int of (int -> unit) + | Directive_ident of (Longident.t -> unit) + +(* Load in-core and execute a lambda term *) + +type evaluation_outcome = Result of Obj.t | Exception of exn + +let load_lambda lam = + if !Clflags.dump_lambda then begin + Printlambda.lambda lam; print_newline() + end; + let (init_code, fun_code) = Codegen.compile_phrase lam in + if !Clflags.dump_instr then begin + Printinstr.instrlist init_code; + Printinstr.instrlist fun_code; + print_newline() + end; + let (code, code_size, reloc) = Emitcode.to_memory init_code fun_code in + let can_free = (fun_code = []) in + let initial_symtable = Symtable.current_state() in + Symtable.patch_object code reloc; + Symtable.update_global_table(); + try + let retval = Meta.execute_bytecode code code_size in + if can_free then Meta.static_free code; + Result retval + with x -> + if can_free then Meta.static_free code; + Symtable.restore_state initial_symtable; + Exception x + +(* Print the outcome of an evaluation *) + +let print_item env = function + Tsig_value(id, decl) -> + open_hovbox 2; + begin match decl.val_prim with + Not_prim -> + print_string "val "; Printtyp.ident id; + print_string " :"; print_space(); + Printtyp.type_scheme decl.val_type; + print_string " ="; print_space(); + print_value env (Symtable.get_global_value id) decl.val_type + | Primitive(p, ar) -> + print_string "external "; Printtyp.ident id; + print_string " :"; print_space(); + Printtyp.type_scheme decl.val_type; print_space(); + print_string "= \""; print_string p; print_string "\"" + end; + close_box() + | Tsig_type(id, decl) -> + Printtyp.type_declaration id decl + | Tsig_exception(id, decl) -> + Printtyp.exception_declaration id decl + | Tsig_module(id, mty) -> + open_hovbox 2; print_string "module "; Printtyp.ident id; + print_string " :"; print_space(); Printtyp.modtype mty; close_box() + | Tsig_modtype(id, decl) -> + Printtyp.modtype_declaration id decl + +(* Print an exception produced by an evaluation *) + +let print_exception_outcome = function + Sys.Break -> + print_string "Interrupted."; print_newline() + | Out_of_memory -> + Gc.full_major(); + print_string "Out of memory during evaluation"; + print_newline() + | exn -> + open_hovbox 0; + print_string "Uncaught exception: "; + print_exception (Obj.repr exn); + print_newline() + +(* The table of toplevel directives. + Filled by functions from module topdirs. *) + +let directive_table = (Hashtbl.new 13 : (string, directive_fun) Hashtbl.t) + +(* Execute a toplevel phrase *) + +let toplevel_env = ref Env.empty + +let execute_phrase phr = + match phr with + Ptop_def sstr -> + let (str, sg, newenv) = Typemod.type_structure !toplevel_env sstr in + let lam = Translmod.transl_toplevel_definition str in + let res = load_lambda lam in + begin match res with + Result v -> + begin match str with + [Tstr_eval exp] -> + open_hovbox 0; + print_string "- : "; + Printtyp.type_scheme exp.exp_type; + print_space(); print_string "="; print_space(); + print_value newenv v exp.exp_type; + close_box(); + print_newline() + | _ -> + open_vbox 0; + List.iter (fun item -> print_item newenv item; print_space()) sg; + close_box(); + print_flush() + end; + toplevel_env := newenv + | Exception exn -> + print_exception_outcome exn + end + | Ptop_dir(dir_name, dir_arg) -> + try + match (Hashtbl.find directive_table dir_name, dir_arg) with + (Directive_none f, Pdir_none) -> f () + | (Directive_string f, Pdir_string s) -> f s + | (Directive_int f, Pdir_int n) -> f n + | (Directive_ident f, Pdir_ident lid) -> f lid + | (_, _) -> + print_string "Wrong type of argument for directive `"; + print_string dir_name; print_string "'"; print_newline() + with Not_found -> + print_string "Unknown directive `"; print_string dir_name; + print_string "'"; print_newline() + +(* Reading function -- should use input_scan_line directly... *) + +let refill_lexbuf buffer len = + output_char stdout '#'; flush stdout; + let line = input_line stdin in + let linelen = String.length line in + if linelen + 1 <= len then begin + String.blit line 0 buffer 0 linelen; + buffer.[linelen] <- '\n'; + linelen + 1 + end else begin + String.blit line 0 buffer 0 len; + len + end + +(* Discard everything already in a lexer buffer *) + +let empty_lexbuf lb = + let l = String.length lb.lex_buffer in + lb.lex_abs_pos <- (-l); + lb.lex_curr_pos <- l + +(* The loop *) + +let loop() = + print_string "\tCaml Special Light version "; + print_string Config.version; + print_newline(); print_newline(); + let lb = Lexing.from_function refill_lexbuf in + Location.input_name := ""; + Location.input_lexbuf := Some lb; + Symtable.init_toplevel(); + toplevel_env := Compile.initial_env (); + Sys.catch_break true; + while true do + try + empty_lexbuf lb; + execute_phrase (Parse.toplevel_phrase lb) + with + End_of_file -> + print_newline(); exit 0 + | Sys.Break -> + print_string "Interrupted."; print_newline() + | x -> + Errors.report_error x + done diff --git a/toplevel/toploop.mli b/toplevel/toploop.mli new file mode 100644 index 0000000000..8196ac1107 --- /dev/null +++ b/toplevel/toploop.mli @@ -0,0 +1,20 @@ +(* The interactive toplevel loop *) + +val loop: unit -> unit + +(* Interface with toplevel directives *) + +type directive_fun = + Directive_none of (unit -> unit) + | Directive_string of (string -> unit) + | Directive_int of (int -> unit) + | Directive_ident of (Longident.t -> unit) + +val directive_table: (string, directive_fun) Hashtbl.t + (* Table of known directives, with their execution function *) +val execute_phrase: Parsetree.toplevel_phrase -> unit + (* Execute the given toplevel phrase *) +val print_exception_outcome: exn -> unit + (* Print an exception resulting from the evaluation of user code. *) +val toplevel_env: Env.t ref + (* Typing environment for the toplevel *) diff --git a/toplevel/topmain.ml b/toplevel/topmain.ml new file mode 100644 index 0000000000..50ebfa1b7c --- /dev/null +++ b/toplevel/topmain.ml @@ -0,0 +1,12 @@ +open Clflags + +let main () = + Arg.parse + ["-I", Arg.String(fun dir -> include_dirs := dir :: !include_dirs); + "-fast", Arg.Unit(fun () -> fast := true); + "-dlambda", Arg.Unit(fun () -> dump_lambda := true); + "-dinstr", Arg.Unit(fun () -> dump_instr := true)] + (fun name -> raise(Arg.Bad("don't know what to do with " ^ name))); + Toploop.loop() + +let _ = Printexc.catch main () diff --git a/typing/ctype.ml b/typing/ctype.ml new file mode 100644 index 0000000000..e8b42ed38a --- /dev/null +++ b/typing/ctype.ml @@ -0,0 +1,344 @@ +(* Operations on core types *) + +open Misc +open Typedtree + + +exception Unify + +let current_level = ref 0 + +let generic_level = (-1) + +let begin_def () = incr current_level +and end_def () = decr current_level + +let newvar () = + Tvar { tvar_level = !current_level; tvar_link = None } + +let rec repr = function + Tvar({tvar_link = Some ty} as v) -> + let r = repr ty in + if r != ty then v.tvar_link <- Some r; + r + | t -> t + +let rec generalize ty = + match repr ty with + Tvar v -> + if v.tvar_level > !current_level then v.tvar_level <- generic_level + | Tarrow(t1, t2) -> + generalize t1; generalize t2 + | Ttuple tl -> + List.iter generalize tl + | Tconstr(p, []) -> + () + | Tconstr(p, tl) -> + List.iter generalize tl + +let rec make_nongen ty = + match repr ty with + Tvar v -> + if v.tvar_level > !current_level then v.tvar_level <- !current_level + | Tarrow(t1, t2) -> + make_nongen t1; make_nongen t2 + | Ttuple tl -> + List.iter make_nongen tl + | Tconstr(p, []) -> + () + | Tconstr(p, tl) -> + List.iter make_nongen tl + +let inst_subst = ref ([] : (type_expr * type_expr) list) + +let rec copy ty = + match repr ty with + Tvar v as t -> + if v.tvar_level = generic_level then begin + try + List.assq t !inst_subst + with Not_found -> + let t' = newvar() in + inst_subst := (t, t') :: !inst_subst; + t' + end else t + | Tarrow(t1, t2) -> + Tarrow(copy t1, copy t2) + | Ttuple tl -> + Ttuple(List.map copy tl) + | Tconstr(p, []) as t -> + t + | Tconstr(p, tl) -> + Tconstr(p, List.map copy tl) + +let instance sch = + inst_subst := []; + let ty = copy sch in + inst_subst := []; + ty + +let instance_constructor cstr = + inst_subst := []; + let ty_res = copy cstr.cstr_res in + let ty_args = List.map copy cstr.cstr_args in + inst_subst := []; + (ty_args, ty_res) + +let instance_label lbl = + inst_subst := []; + let ty_res = copy lbl.lbl_res in + let ty_arg = copy lbl.lbl_arg in + inst_subst := []; + (ty_arg, ty_res) + +let substitute params args body = + inst_subst := List.combine(params, args); + let ty = copy body in + inst_subst := []; + ty + +exception Cannot_expand + +let expand_abbrev env path args = + let decl = Env.find_type path env in + match decl.type_kind with + Type_manifest body -> substitute decl.type_params args body + | _ -> raise Cannot_expand + +let rec occur tvar ty = + match repr ty with + Tvar v -> + if v == tvar then raise Unify; + if v.tvar_level > tvar.tvar_level then v.tvar_level <- tvar.tvar_level + | Tarrow(t1, t2) -> + occur tvar t1; occur tvar t2 + | Ttuple tl -> + List.iter (occur tvar) tl + | Tconstr(p, []) -> + () + | Tconstr(p, tl) -> + List.iter (occur tvar) tl + +let rec unify env t1 t2 = + if t1 == t2 then () else begin + let t1 = repr t1 in + let t2 = repr t2 in + if t1 == t2 then () else begin + match (t1, t2) with + (Tvar v, _) -> + occur v t2; v.tvar_link <- Some t2 + | (_, Tvar v) -> + occur v t1; v.tvar_link <- Some t1 + | (Tarrow(t1, u1), Tarrow(t2, u2)) -> + unify env t1 t2; unify env u1 u2 + | (Ttuple tl1, Ttuple tl2) -> + unify_list env tl1 tl2 + | (Tconstr(p1, tl1), Tconstr(p2, tl2)) -> + if Path.same p1 p2 then + unify_list env tl1 tl2 + else begin + try + unify env (expand_abbrev env p1 tl1) t2 + with Cannot_expand -> + try + unify env t1 (expand_abbrev env p2 tl2) + with Cannot_expand -> + raise Unify + end + | (Tconstr(p1, tl1), _) -> + begin try + unify env (expand_abbrev env p1 tl1) t2 + with Cannot_expand -> + raise Unify + end + | (_, Tconstr(p2, tl2)) -> + begin try + unify env t1 (expand_abbrev env p2 tl2) + with Cannot_expand -> + raise Unify + end + | (_, _) -> + raise Unify + end + end + +and unify_list env tl1 tl2 = + match (tl1, tl2) with + ([], []) -> () + | (t1::r1, t2::r2) -> unify env t1 t2; unify_list env r1 r2 + | (_, _) -> raise Unify + +let rec filter_arrow env t = + match repr t with + Tvar v -> + let t1 = Tvar { tvar_level = v.tvar_level; tvar_link = None } + and t2 = Tvar { tvar_level = v.tvar_level; tvar_link = None } in + v.tvar_link <- Some(Tarrow(t1, t2)); + (t1, t2) + | Tarrow(t1, t2) -> + (t1, t2) + | Tconstr(p, tl) -> + begin try + filter_arrow env (expand_abbrev env p tl) + with Cannot_expand -> + raise Unify + end + | _ -> + raise Unify + +let rec filter env t1 t2 = + if t1 == t2 then () else begin + let t1 = repr t1 in + let t2 = repr t2 in + if t1 == t2 then () else begin + match (t1, t2) with + (Tvar v, _) -> + if v.tvar_level = generic_level then raise Unify; + occur v t2; + v.tvar_link <- Some t2 + | (Tarrow(t1, u1), Tarrow(t2, u2)) -> + filter env t1 t2; filter env u1 u2 + | (Ttuple tl1, Ttuple tl2) -> + filter_list env tl1 tl2 + | (Tconstr(p1, tl1), Tconstr(p2, tl2)) -> + if Path.same p1 p2 then + filter_list env tl1 tl2 + else begin + try + filter env (expand_abbrev env p1 tl1) t2 + with Cannot_expand -> + try + filter env t1 (expand_abbrev env p2 tl2) + with Cannot_expand -> + raise Unify + end + | (Tconstr(p1, tl1), _) -> + begin try + filter env (expand_abbrev env p1 tl1) t2 + with Cannot_expand -> + raise Unify + end + | (_, Tconstr(p2, tl2)) -> + begin try + filter env t1 (expand_abbrev env p2 tl2) + with Cannot_expand -> + raise Unify + end + | (_, _) -> + raise Unify + end + end + +and filter_list env tl1 tl2 = + match (tl1, tl2) with + ([], []) -> () + | (t1::r1, t2::r2) -> filter env t1 t2; filter_list env r1 r2 + | (_, _) -> raise Unify + +let moregeneral env sch1 sch2 = + try + filter env (instance sch1) sch2; true + with Unify -> + false + +let equal env params1 ty1 params2 ty2 = + let subst = List.combine (params1, params2) in + let rec eqtype t1 t2 = + let t1 = repr t1 in + let t2 = repr t2 in + match (t1, t2) with + (Tvar _, Tvar _) -> + begin try + List.assq t1 subst == t2 + with Not_found -> + fatal_error "Ctype.equal" + end + | (Tarrow(t1, u1), Tarrow(t2, u2)) -> + eqtype t1 t2 & eqtype u1 u2 + | (Ttuple tl1, Ttuple tl2) -> + eqtype_list tl1 tl2 + | (Tconstr(p1, tl1), Tconstr(p2, tl2)) -> + if Path.same p1 p2 then + eqtype_list tl1 tl2 + else begin + try + eqtype (expand_abbrev env p1 tl1) t2 + with Cannot_expand -> + try + eqtype t1 (expand_abbrev env p2 tl2) + with Cannot_expand -> + false + end + | (Tconstr(p1, tl1), _) -> + begin try + eqtype (expand_abbrev env p1 tl1) t2 + with Cannot_expand -> + false + end + | (_, Tconstr(p2, tl2)) -> + begin try + eqtype t1 (expand_abbrev env p2 tl2) + with Cannot_expand -> + false + end + | (_, _) -> + false + and eqtype_list tl1 tl2 = + match (tl1, tl2) with + ([], []) -> true + | (t1::r1, t2::r2) -> eqtype t1 t2 & eqtype_list r1 r2 + | (_, _) -> false + in + eqtype ty1 ty2 + +let rec closed_schema ty = + match repr ty with + Tvar v -> v.tvar_level = generic_level + | Tarrow(t1, t2) -> closed_schema t1 & closed_schema t2 + | Ttuple tl -> List.for_all closed_schema tl + | Tconstr(p, tl) -> List.for_all closed_schema tl + +let rec nondep_type env id ty = + match repr ty with + Tvar v as tvar -> tvar + | Tarrow(t1, t2) -> + Tarrow(nondep_type env id t1, nondep_type env id t2) + | Ttuple tl -> + Ttuple(List.map (nondep_type env id) tl) + | Tconstr(p, tl) -> + if Path.isfree id p then begin + let ty' = + try + expand_abbrev env p tl + with Cannot_expand -> + raise Not_found in + nondep_type env id ty' + end else + Tconstr(p, List.map (nondep_type env id) tl) + +let rec free_type_ident env id ty = + match repr ty with + Tvar _ -> false + | Tarrow(t1, t2) -> + free_type_ident env id t1 or free_type_ident env id t2 + | Ttuple tl -> + List.exists (free_type_ident env id) tl + | Tconstr(p, tl) -> + if Path.isfree id p then true else begin + try + free_type_ident env id (expand_abbrev env p tl) + with Cannot_expand -> + List.exists (free_type_ident env id) tl + end + +let is_generic ty = + match repr ty with + Tvar v -> v.tvar_level = generic_level + | _ -> fatal_error "Ctype.is_generic" + +let rec arity ty = + match repr ty with + Tarrow(t1, t2) -> 1 + arity t2 + | _ -> 0 + +let none = Ttuple [] (* Clearly ill-formed type *) diff --git a/typing/ctype.mli b/typing/ctype.mli new file mode 100644 index 0000000000..96a310137e --- /dev/null +++ b/typing/ctype.mli @@ -0,0 +1,58 @@ +(* Operations on core types *) + +open Typedtree + +val begin_def: unit -> unit + (* Raise the variable level by one at the beginning of a definition. *) +val end_def: unit -> unit + (* Lower the variable level by one at the end of a definition *) +val newvar: unit -> type_expr + (* Return a fresh variable *) +val repr: type_expr -> type_expr + (* Return the canonical representative of a type. *) +val generalize: type_expr -> unit + (* Generalize in-place the given type *) +val make_nongen: type_expr -> unit + (* Make non-generalizable the given type *) +val instance: type_expr -> type_expr + (* Take an instance of a type scheme *) +val instance_constructor: + constructor_description -> type_expr list * type_expr + (* Same, for a constructor *) +val instance_label: label_description -> type_expr * type_expr + (* Same, for a label *) +val unify: Env.t -> type_expr -> type_expr -> unit + (* Unify the two types given. Raise [Unify] if not possible. *) +val filter_arrow: Env.t -> type_expr -> type_expr * type_expr + (* A special case of unification (with 'a -> 'b). *) +val moregeneral: Env.t -> type_expr -> type_expr -> bool + (* Check if the first type scheme is more general than the second. *) +val equal: Env.t -> type_expr list -> type_expr -> + type_expr list -> type_expr -> bool + (* [equal env [x1...xn] tau [y1...yn] sigma] + checks whether the parameterized types + [/\x1.../\xn.tau] and [/\y1.../\yn.sigma] are equivalent. *) +val closed_schema: type_expr -> bool + (* Check whether the given type scheme contains no non-generic + type variables *) +val nondep_type: Env.t -> Ident.t -> type_expr -> type_expr + (* Return a type equivalent to the given type but without + references to the given module identifier. Raise [Not_found] + if no such type List.exists. *) +val free_type_ident: Env.t -> Ident.t -> type_expr -> bool + (* Test whether the given type identifier occurs free + in the given type expression. *) +val is_generic: type_expr -> bool + (* Test whether the given type variable is generic *) +val arity: type_expr -> int + (* Return the arity (as for curried functions) of the given type. *) +val none: type_expr + (* A dummy type expression *) +val substitute: + type_expr list -> type_expr list -> type_expr -> type_expr + (* [substitute [v1...vN] [t1...tN] t] + returns a copy of [t] where the [vi] are replaced + by the [ti]. *) + +exception Unify + diff --git a/typing/env.ml b/typing/env.ml new file mode 100644 index 0000000000..1998a81cb9 --- /dev/null +++ b/typing/env.ml @@ -0,0 +1,509 @@ +(* Environment handling *) + +open Format +open Config +open Misc +open Asttypes +open Longident +open Path +open Typedtree + + +type error = + Not_an_interface of string + | Corrupted_interface of string + | Illegal_renaming of string * string + +exception Error of error + +type t = { + values: (Path.t * value_description) Ident.tbl; + constrs: constructor_description Ident.tbl; + labels: label_description Ident.tbl; + types: (Path.t * type_declaration) Ident.tbl; + modules: (Path.t * module_type) Ident.tbl; + modtypes: (Path.t * modtype_declaration) Ident.tbl; + components: (Path.t * structure_components) Ident.tbl +} + +and structure_components = { + mutable comp_values: (string, (value_description * int)) Tbl.t; + mutable comp_constrs: (string, (constructor_description * int)) Tbl.t; + mutable comp_labels: (string, (label_description * int)) Tbl.t; + mutable comp_types: (string, (type_declaration * int)) Tbl.t; + mutable comp_modules: (string, (module_type * int)) Tbl.t; + mutable comp_modtypes: (string, (modtype_declaration * int)) Tbl.t; + mutable comp_components: (string, (structure_components * int)) Tbl.t +} + +let empty = { + values = Ident.empty; constrs = Ident.empty; + labels = Ident.empty; types = Ident.empty; + modules = Ident.empty; modtypes = Ident.empty; + components = Ident.empty } + +(* Persistent structure descriptions *) + +type pers_struct = + { ps_name: string; + ps_crc: int; + ps_sig: signature; + ps_comps: structure_components } + +let persistent_structures = + (Hashtbl.new 17 : (string, pers_struct) Hashtbl.t) + +let read_pers_struct modname filename = + let ic = open_in_bin filename in + try + let buffer = String.create (String.length cmi_magic_number) in + really_input ic buffer 0 (String.length cmi_magic_number); + if buffer <> cmi_magic_number then raise(Error(Not_an_interface filename)); + let ps = (input_value ic : pers_struct) in + if ps.ps_name <> modname then + raise(Error(Illegal_renaming(modname, filename))); + ps + with End_of_file | Failure _ -> + raise(Error(Corrupted_interface(filename))) + +let find_pers_struct name = + try + Hashtbl.find persistent_structures name + with Not_found -> + let ps = + read_pers_struct name + (find_in_path !load_path (lowercase name ^ ".cmi")) in + Hashtbl.add persistent_structures name ps; + ps + +let reset_cache() = + Hashtbl.clear persistent_structures + +(* Lookup by identifier *) + +let rec find_module_descr path env = + match path with + Pident id -> + begin try + let (p, desc) = Ident.find_same id env.components + in desc + with Not_found -> + if Ident.persistent id + then (find_pers_struct (Ident.name id)).ps_comps + else raise Not_found + end + | Pdot(p, s, pos) -> + let descr_p = find_module_descr p env in + let (descr, pos) = Tbl.find s descr_p.comp_components in + descr + +let find proj1 proj2 path env = + try + match path with + Pident id -> + let (p, data) = Ident.find_same id (proj1 env) + in data + | Pdot(p, s, pos) -> + let (data, pos) = Tbl.find s (proj2 (find_module_descr p env)) + in data + with Not_found -> + fatal_error "Env.find" + +let find_value = find (fun env -> env.values) (fun sc -> sc.comp_values) +and find_type = find (fun env -> env.types) (fun sc -> sc.comp_types) +and find_modtype = find (fun env -> env.modtypes) (fun sc -> sc.comp_modtypes) + +(* Lookup by name *) + +let rec lookup_module_descr lid env = + match lid with + Lident s -> + begin try + Ident.find_name s env.components + with Not_found -> + (Pident(Ident.new_persistent s), (find_pers_struct s).ps_comps) + end + | Ldot(p, s) -> + let (path, descr_p) = lookup_module_descr p env in + let (descr, pos) = Tbl.find s descr_p.comp_components in + (Pdot(path, s, pos), descr) + +let lookup proj1 proj2 lid env = + match lid with + Lident s -> + Ident.find_name s (proj1 env) + | Ldot(p, s) -> + let (path, descr) = lookup_module_descr p env in + let (data, pos) = Tbl.find s (proj2 descr) in + (Pdot(path, s, pos), data) + +let lookup_simple proj1 proj2 lid env = + match lid with + Lident s -> + Ident.find_name s (proj1 env) + | Ldot(p, s) -> + let (path, descr) = lookup_module_descr p env in + let (data, pos) = Tbl.find s (proj2 descr) in + data + +let lookup_value = + lookup (fun env -> env.values) (fun sc -> sc.comp_values) +and lookup_constructor = + lookup_simple (fun env -> env.constrs) (fun sc -> sc.comp_constrs) +and lookup_label = + lookup_simple (fun env -> env.labels) (fun sc -> sc.comp_labels) +and lookup_type = + lookup (fun env -> env.types) (fun sc -> sc.comp_types) +and lookup_modtype = + lookup (fun env -> env.modtypes) (fun sc -> sc.comp_modtypes) + +let lookup_module lid env = + match lid with + Lident s -> + begin try + Ident.find_name s env.modules + with Not_found -> + (Pident(Ident.new_persistent s), + Tmty_signature(find_pers_struct s).ps_sig) + end + | Ldot(p, s) -> + let (path, descr) = lookup_module_descr p env in + let (data, pos) = Tbl.find s descr.comp_modules in + (Pdot(path, s, pos), data) + +(* Scrape a module type *) + +let rec scrape_modtype mty env = + match mty with + Tmty_ident path -> + begin match find_modtype path env with + Tmodtype_manifest mty -> scrape_modtype mty env + | Tmodtype_abstract -> mty + end + | _ -> mty + +(* Compute constructor descriptions *) + +let constructors_of_type ty_path decl = + match decl.type_kind with + Type_variant cstrs -> + let ty_res = Tconstr(ty_path, decl.type_params) in + let num_constrs = List.length cstrs in + let rec describe_constructors num = function + [] -> [] + | (name, ty_args) :: rest -> + let cstr = + { cstr_res = ty_res; + cstr_args = ty_args; + cstr_arity = List.length ty_args; + cstr_tag = Cstr_tag num; + cstr_span = num_constrs } in + (name, cstr) :: describe_constructors (num+1) rest in + describe_constructors 0 cstrs + | _ -> [] + +(* Compute a constructor description for an exception *) + +let constructor_exception path_exc decl = + { cstr_res = Predef.type_exn; + cstr_args = decl; + cstr_arity = List.length decl; + cstr_tag = Cstr_exception path_exc; + cstr_span = -1 } + +(* Compute label descriptions *) + +let dummy_label = + { lbl_res = Ttuple []; lbl_arg = Ttuple []; lbl_mut = Immutable; + lbl_pos = (-1); lbl_all = [||] } + +let labels_of_type ty_path decl = + match decl.type_kind with + Type_record labels -> + let ty_res = Tconstr(ty_path, decl.type_params) in + let all_labels = Array.new (List.length labels) dummy_label in + let rec describe_labels num = function + [] -> [] + | (name, mut_flag, ty_arg) :: rest -> + let lbl = + { lbl_res = ty_res; + lbl_arg = ty_arg; + lbl_mut = mut_flag; + lbl_pos = num; + lbl_all = all_labels } in + all_labels.(num) <- lbl; + (name, lbl) :: describe_labels (num+1) rest in + describe_labels 0 labels + | _ -> [] + +(* Given a signature and a root path, prefix all idents in the signature + by the root path and build the corresponding substitution. *) + +let rec prefix_idents root pos sub = function + [] -> ([], sub) + | Tsig_value(id, decl) :: rem -> + let p = Pdot(root, Ident.name id, pos) in + let (pl, final_sub) = prefix_idents root (pos+1) sub rem in + (p::pl, final_sub) + | Tsig_type(id, decl) :: rem -> + let p = Pdot(root, Ident.name id, nopos) in + let (pl, final_sub) = + prefix_idents root pos (Subst.add_type id p sub) rem in + (p::pl, final_sub) + | Tsig_exception(id, decl) :: rem -> + let p = Pdot(root, Ident.name id, pos) in + let (pl, final_sub) = prefix_idents root (pos+1) sub rem in + (p::pl, final_sub) + | Tsig_module(id, mty) :: rem -> + let p = Pdot(root, Ident.name id, pos) in + let (pl, final_sub) = + prefix_idents root (pos+1) (Subst.add_module id p sub) rem in + (p::pl, final_sub) + | Tsig_modtype(id, decl) :: rem -> + let p = Pdot(root, Ident.name id, nopos) in + let (pl, final_sub) = + prefix_idents root pos + (Subst.add_modtype id (Tmty_ident p) sub) rem in + (p::pl, final_sub) + +(* Compute structure descriptions *) + +let rec components_of_module env path mty = + let c = + { comp_values = Tbl.empty; comp_constrs = Tbl.empty; + comp_labels = Tbl.empty; comp_types = Tbl.empty; + comp_modules = Tbl.empty; comp_modtypes = Tbl.empty; + comp_components = Tbl.empty } in + begin match scrape_modtype mty env with + Tmty_signature sg -> + let (pl, sub) = prefix_idents path 0 Subst.identity sg in + let env = ref env in + let pos = ref 0 in + List.iter2 (fun item path -> + match item with + Tsig_value(id, decl) -> + let decl' = Subst.value_description sub decl in + c.comp_values <- + Tbl.add (Ident.name id) (decl', !pos) c.comp_values; + incr pos + | Tsig_type(id, decl) -> + let decl' = Subst.type_declaration sub decl in + c.comp_types <- + Tbl.add (Ident.name id) (decl', nopos) c.comp_types; + List.iter + (fun (name, descr) -> + c.comp_constrs <- Tbl.add name (descr, nopos) c.comp_constrs) + (constructors_of_type path decl'); + List.iter + (fun (name, descr) -> + c.comp_labels <- Tbl.add name (descr, nopos) c.comp_labels) + (labels_of_type path decl') + | Tsig_exception(id, decl) -> + let decl' = Subst.exception_declaration sub decl in + let cstr = constructor_exception path decl' in + c.comp_constrs <- + Tbl.add (Ident.name id) (cstr, !pos) c.comp_constrs; + incr pos + | Tsig_module(id, mty) -> + let mty' = Subst.modtype sub mty in + c.comp_modules <- + Tbl.add (Ident.name id) (mty', !pos) c.comp_modules; + let comps = components_of_module !env path mty' in + c.comp_components <- + Tbl.add (Ident.name id) (comps, !pos) c.comp_components; + env := store_components id path comps !env; + incr pos + | Tsig_modtype(id, decl) -> + let decl' = Subst.modtype_declaration sub decl in + c.comp_modtypes <- + Tbl.add (Ident.name id) (decl', nopos) c.comp_modtypes; + env := store_modtype id path decl' !env) + sg pl + | _ -> () + end; + c + +(* Insertion of bindings by identifier + path *) + +and store_value id path decl env = + { values = Ident.add id (path, decl) env.values; + constrs = env.constrs; + labels = env.labels; + types = env.types; + modules = env.modules; + modtypes = env.modtypes; + components = env.components } + +and store_type id path info env = + { values = env.values; + constrs = + List.fold_right + (fun (name, descr) constrs -> + Ident.add (Ident.new name) descr constrs) + (constructors_of_type path info) + env.constrs; + labels = + List.fold_right + (fun (name, descr) labels -> + Ident.add (Ident.new name) descr labels) + (labels_of_type path info) + env.labels; + types = Ident.add id (path, info) env.types; + modules = env.modules; + modtypes = env.modtypes; + components = env.components } + +and store_exception id path decl env = + { values = env.values; + constrs = Ident.add id (constructor_exception path decl) env.constrs; + labels = env.labels; + types = env.types; + modules = env.modules; + modtypes = env.modtypes; + components = env.components } + +and store_module id path mty env = + { values = env.values; + constrs = env.constrs; + labels = env.labels; + types = env.types; + modules = Ident.add id (path, mty) env.modules; + modtypes = env.modtypes; + components = Ident.add id (path, components_of_module env path mty) + env.components } + +and store_modtype id path info env = + { values = env.values; + constrs = env.constrs; + labels = env.labels; + types = env.types; + modules = env.modules; + modtypes = Ident.add id (path, info) env.modtypes; + components = env.components } + +and store_components id path comps env = + { values = env.values; + constrs = env.constrs; + labels = env.labels; + types = env.types; + modules = env.modules; + modtypes = env.modtypes; + components = Ident.add id (path, comps) env.components } + +(* Insertion of bindings by identifier *) + +let add_value id desc env = + store_value id (Pident id) desc env + +and add_type id info env = + store_type id (Pident id) info env + +and add_exception id decl env = + store_exception id (Pident id) decl env + +and add_module id mty env = + store_module id (Pident id) mty env + +and add_modtype id info env = + store_modtype id (Pident id) info env + +(* Insertion of bindings by name *) + +let enter store_fun name data env = + let id = Ident.new name in (id, store_fun id (Pident id) data env) + +let enter_value = enter store_value +and enter_type = enter store_type +and enter_exception = enter store_exception +and enter_module = enter store_module +and enter_modtype = enter store_modtype + +(* Insertion of all components of a signature *) + +let add_signature_component comp env = + match comp with + Tsig_value(id, decl) -> add_value id decl env + | Tsig_type(id, decl) -> add_type id decl env + | Tsig_exception(id, decl) -> add_exception id decl env + | Tsig_module(id, mty) -> add_module id mty env + | Tsig_modtype(id, decl) -> add_modtype id decl env + +let add_signature = List.fold_right add_signature_component + +(* Open a signature path *) + +let open_signature root sg env = + (* First build the paths and substitution *) + let (pl, sub) = prefix_idents root 0 Subst.identity sg in + (* Then enter the components in the environment after substitution *) + List.fold_left2 + (fun env item p -> + match item with + Tsig_value(id, decl) -> + store_value (Ident.hide id) p + (Subst.value_description sub decl) env + | Tsig_type(id, decl) -> + store_type (Ident.hide id) p + (Subst.type_declaration sub decl) env + | Tsig_exception(id, decl) -> + store_exception (Ident.hide id) p + (Subst.exception_declaration sub decl) env + | Tsig_module(id, mty) -> + store_module (Ident.hide id) p (Subst.modtype sub mty) env + | Tsig_modtype(id, decl) -> + store_modtype (Ident.hide id) p + (Subst.modtype_declaration sub decl) env) + env sg pl + +(* Open a signature from a file *) + +let open_pers_signature name env = + let ps = find_pers_struct name in + open_signature (Pident(Ident.new_persistent name)) ps.ps_sig env + +(* Read a signature from a file *) + +let read_signature modname filename = + let ps = read_pers_struct modname filename in (ps.ps_sig, ps.ps_crc) + +(* Save a signature to a file *) + +let save_signature sg modname crc filename = + let ps = + { ps_name = modname; + ps_crc = crc; + ps_sig = sg; + ps_comps = + components_of_module empty (Pident(Ident.new_persistent modname)) + (Tmty_signature sg) } in + let oc = open_out_bin filename in + output_string oc cmi_magic_number; + output_value oc ps; + close_out oc + +(* Make the initial environment *) + +let initial = Predef.build_initial_env add_type add_exception empty + +(* Return the list of imported interfaces with their CRCs *) + +let imported_units() = + let l = ref [] in + Hashtbl.iter + (fun name ps -> l := (ps.ps_name, ps.ps_crc) :: !l) persistent_structures; + !l + +(* Error report *) + +let report_error = function + Not_an_interface filename -> + print_string filename; print_space(); + print_string "is not a compiled interface." + | Corrupted_interface filename -> + print_string "Corrupted compiled interface"; print_space(); + print_string filename + | Illegal_renaming(modname, filename) -> + print_string filename; print_space(); + print_string "contains the compiled interface for"; print_space(); + print_string modname + diff --git a/typing/env.mli b/typing/env.mli new file mode 100644 index 0000000000..1003457016 --- /dev/null +++ b/typing/env.mli @@ -0,0 +1,78 @@ +(* Environment handling *) + +open Typedtree + +type t + +val empty: t +val initial: t + +(* Lookup by paths *) + +val find_value: Path.t -> t -> value_description +val find_type: Path.t -> t -> type_declaration +val find_modtype: Path.t -> t -> modtype_declaration + +(* Lookup by long identifiers *) + +val lookup_value: Longident.t -> t -> Path.t * value_description +val lookup_constructor: Longident.t -> t -> constructor_description +val lookup_label: Longident.t -> t -> label_description +val lookup_type: Longident.t -> t -> Path.t * type_declaration +val lookup_module: Longident.t -> t -> Path.t * module_type +val lookup_modtype: Longident.t -> t -> Path.t * modtype_declaration + +(* Insertion by identifier *) + +val add_value: Ident.t -> value_description -> t -> t +val add_type: Ident.t -> type_declaration -> t -> t +val add_exception: Ident.t -> exception_declaration -> t -> t +val add_module: Ident.t -> module_type -> t -> t +val add_modtype: Ident.t -> modtype_declaration -> t -> t + +(* Insertion of all fields of a signature. *) + +val add_signature: signature -> t -> t + +(* Insertion of all fields of a signature, relative to the given path. + Used to implement open. *) + +val open_signature: Path.t -> signature -> t -> t +val open_pers_signature: string -> t -> t + +(* Insertion by name *) + +val enter_value: string -> value_description -> t -> Ident.t * t +val enter_type: string -> type_declaration -> t -> Ident.t * t +val enter_exception: string -> exception_declaration -> t -> Ident.t * t +val enter_module: string -> module_type -> t -> Ident.t * t +val enter_modtype: string -> modtype_declaration -> t -> Ident.t * t + +(* Reset the cache of in-core module interfaces. + To be called in particular when load_path changes. *) + +val reset_cache: unit -> unit + +(* Read, save a signature to/from a file *) + +val read_signature: string -> string -> signature * int + (* Arguments: module name, file name. + Results: signature, CRC. *) +val save_signature: signature -> string -> int -> string -> unit + (* Arguments: signature, module name, CRC, file name. *) + +(* Return the set of compilation units imported, with their CRC *) + +val imported_units: unit -> (string * int) list + +(* Error report *) + +type error = + Not_an_interface of string + | Corrupted_interface of string + | Illegal_renaming of string * string + +exception Error of error + +val report_error: error -> unit + diff --git a/typing/ident.ml b/typing/ident.ml new file mode 100644 index 0000000000..599b855929 --- /dev/null +++ b/typing/ident.ml @@ -0,0 +1,157 @@ +open Misc +open Format + +type t = { mutable stamp: int; name: string; mutable global: bool } + +(* A stamp of 0 denotes a persistent identifier *) + +let currentstamp = ref 0 + +let new s = + incr currentstamp; + { name = s; stamp = !currentstamp; global = false } + +let new_persistent s = + { name = s; stamp = 0; global = true } + +let name i = i.name + +let persistent i = (i.stamp = 0) + +let equal i1 i2 = i1.name = i2.name + +let same i1 i2 = i1 = i2 + (* Possibly more efficient version (with a real compiler, at least): + if i1.stamp <> 0 + then i1.stamp = i2.stamp + else i2.stamp = 0 & i1.name = i2.name *) + +let identify i1 i2 f = + let stamp1 = i1.stamp in + try + i1.stamp <- i2.stamp; + let res = f () in + i1.stamp <- stamp1; + res + with x -> + i1.stamp <- stamp1; + raise x + +let hide i = + { stamp = -1; name = i.name; global = i.global } + +let make_global i = + i.global <- true + +let global i = + i.global + +let print i = + print_string i.name; + match i.stamp with + 0 -> print_string "!" + | -1 -> print_string "#" + | n -> print_string "/"; print_int n; if i.global then print_string "g" + +type 'a tbl = + Empty + | Node of 'a tbl * 'a data * 'a tbl * int + +and 'a data = + { ident: t; + data: 'a; + previous: 'a data option } + +let empty = Empty + +(* Inline expansion of height for better speed + * let height = function + * Empty -> 0 + * | Node(_,_,_,h) -> h + *) + +let mknode l d r = + let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h + and hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in + Node(l, d, r, (if hl >= hr then hl + 1 else hr + 1)) + +let balance l d r = + let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h + and hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in + if hl > hr + 1 then + let (Node(ll, ld, lr, _)) = l in + if (match ll with Empty -> 0 | Node(_,_,_,h) -> h) >= + (match lr with Empty -> 0 | Node(_,_,_,h) -> h) then + mknode ll ld (mknode lr d r) + else + let (Node(lrl, lrd, lrr, _)) = lr in + mknode (mknode ll ld lrl) lrd (mknode lrr d r) + else if hr > hl + 1 then + let (Node(rl, rd, rr, _)) = r in + if (match rr with Empty -> 0 | Node(_,_,_,h) -> h) >= + (match rl with Empty -> 0 | Node(_,_,_,h) -> h) then + mknode (mknode l d rl) rd rr + else + let (Node(rll, rld, rlr, _)) = rl in + mknode (mknode l d rll) rld (mknode rlr rd rr) + else + mknode l d r + +let rec add id data = function + Empty -> + Node(Empty, {ident = id; data = data; previous = None}, Empty, 1) + | Node(l, k, r, h) -> + let c = compare id.name k.ident.name in + if c = 0 then + Node(l, {ident = id; data = data; previous = Some k}, r, h) + else if c < 0 then + balance (add id data l) k r + else + balance l k (add id data r) + +let rec find_stamp s = function + None -> + raise Not_found + | Some k -> + if k.ident.stamp = s then k.data else find_stamp s k.previous + +let rec find_same id = function + Empty -> + raise Not_found + | Node(l, k, r, _) -> + let c = compare id.name k.ident.name in + if c = 0 then + if id.stamp = k.ident.stamp + then k.data + else find_stamp id.stamp k.previous + else + find_same id (if c < 0 then l else r) + +let rec find_name name = function + Empty -> + raise Not_found + | Node(l, k, r, _) -> + let c = compare name k.ident.name in + if c = 0 then + k.data + else + find_name name (if c < 0 then l else r) + +let print_tbl print_elt tbl = + open_hovbox 2; + print_string "[["; + let rec print_tbl = function + Empty -> () + | Node(l, k, r, _) -> + print_tbl l; + print_entry k; + print_tbl r + and print_entry k = + open_hovbox 2; + print k.ident; print_string " ->"; print_space(); print_elt k.data; + print_string ";"; close_box(); print_space(); + match k.previous with None -> () | Some k -> print_entry k in + print_tbl tbl; + print_string "]]"; + close_box() + diff --git a/typing/ident.mli b/typing/ident.mli new file mode 100644 index 0000000000..8497451da9 --- /dev/null +++ b/typing/ident.mli @@ -0,0 +1,39 @@ +(* Identifiers (unique names) *) + +type t + +val new: string -> t +val new_persistent: string -> t +val name: t -> string +val persistent: t -> bool +val equal: t -> t -> bool + (* Compare identifiers by name. *) +val same: t -> t -> bool + (* Compare identifiers by binding location. + Two identifiers are the same either if they are both + non-persistent and have been created by the same call to + [new], or if they are both persistent and have the same + name. *) +val identify: t -> t -> (unit -> 'a) -> 'a + (* [identify id1 id2 f] temporarily makes [id1] and [id2] the same + during the evaluation of [f ()]. *) +val hide: t -> t + (* Return an identifier with same name as the given identifier, + but stamp different from any stamp returns by new. + When put in a 'a tbl, this identifier can only be looked + up by name. *) + +val make_global: t -> unit +val global: t -> bool + +val print: t -> unit + +type 'a tbl + (* Association tables from identifiers to type 'a. *) + +val empty: 'a tbl +val add: t -> 'a -> 'a tbl -> 'a tbl +val find_same: t -> 'a tbl -> 'a +val find_name: string -> 'a tbl -> 'a + +val print_tbl: ('a -> unit) -> 'a tbl -> unit diff --git a/typing/includecore.ml b/typing/includecore.ml new file mode 100644 index 0000000000..45ca237364 --- /dev/null +++ b/typing/includecore.ml @@ -0,0 +1,53 @@ +(* Inclusion checks for the core language *) + +open Misc +open Path +open Typedtree + + +(* Inclusion between val descriptions *) + +let value_descriptions env vd1 vd2 = + Ctype.moregeneral env vd1.val_type vd2.val_type & + begin match (vd1.val_prim, vd2.val_prim) with + (Primitive(p1, ar1), Primitive(p2, ar2)) -> p1 = p2 & ar1 = ar2 + | (Not_prim, Primitive(p, ar)) -> false + | _ -> true + end + +(* Inclusion between type declarations *) + +let type_declarations env id decl1 decl2 = + decl1.type_arity = decl2.type_arity & + begin match (decl1.type_kind, decl2.type_kind) with + (_, Type_abstract) -> + true + | (Type_manifest ty1, Type_manifest ty2) -> + Ctype.equal env decl1.type_params ty1 decl2.type_params ty2 + | (Type_variant cstrs1, Type_variant cstrs2) -> + for_all2 + (fun (cstr1, arg1) (cstr2, arg2) -> + cstr1 = cstr2 & + for_all2 + (fun ty1 ty2 -> + Ctype.equal env decl1.type_params ty1 decl2.type_params ty2) + arg1 arg2) + cstrs1 cstrs2 + | (Type_record labels1, Type_record labels2) -> + for_all2 + (fun (lbl1, mut1, ty1) (lbl2, mut2, ty2) -> + lbl1 = lbl2 & mut1 = mut2 & + Ctype.equal env decl1.type_params ty1 decl2.type_params ty2) + labels1 labels2 + | (_, Type_manifest ty2) -> + let ty1 = Tconstr(Pident id, decl2.type_params) in + Ctype.equal env [] ty1 [] ty2 + | (_, _) -> + false + end + +(* Inclusion between exception declarations *) + +let exception_declarations env ed1 ed2 = + for_all2 (fun ty1 ty2 -> Ctype.equal env [] ty1 [] ty2) ed1 ed2 + diff --git a/typing/includecore.mli b/typing/includecore.mli new file mode 100644 index 0000000000..6c6e6417c2 --- /dev/null +++ b/typing/includecore.mli @@ -0,0 +1,10 @@ +(* Inclusion checks for the core language *) + +open Typedtree + +val value_descriptions: + Env.t -> value_description -> value_description -> bool +val type_declarations: + Env.t -> Ident.t -> type_declaration -> type_declaration -> bool +val exception_declarations: + Env.t -> exception_declaration -> exception_declaration -> bool diff --git a/typing/includemod.ml b/typing/includemod.ml new file mode 100644 index 0000000000..efc1e0c19a --- /dev/null +++ b/typing/includemod.ml @@ -0,0 +1,256 @@ +(* Inclusion checks for the module language *) + +open Misc +open Path +open Typedtree + + +type error = + Missing_field of Ident.t + | Value_descriptions of Ident.t * value_description * value_description + | Type_declarations of Ident.t * type_declaration * type_declaration + | Exception_declarations of + Ident.t * exception_declaration * exception_declaration + | Module_types of module_type * module_type + | Modtype_infos of Ident.t * modtype_declaration * modtype_declaration + +exception Error of error list + +(* All functions "blah env x1 x2" check that x1 is included in x2, + i.e. that x1 is the type of an implementation that fulfills the + specification x2. If not, Error is raised with a backtrace of the error. *) + +(* Inclusion between val descriptions *) + +let value_descriptions env id vd1 vd2 = + if Includecore.value_descriptions env vd1 vd2 + then () + else raise(Error[Value_descriptions(id, vd1, vd2)]) + +(* Inclusion between type declarations *) + +let type_declarations env id decl1 decl2 = + if Includecore.type_declarations env id decl1 decl2 + then () + else raise(Error[Type_declarations(id, decl1, decl2)]) + +(* Inclusion between exception declarations *) + +let exception_declarations env id decl1 decl2 = + if Includecore.exception_declarations env decl1 decl2 + then () + else raise(Error[Exception_declarations(id, decl1, decl2)]) + +(* Expand a module type identifier when possible *) + +exception Dont_match + +let expand_module_path env path = + match Env.find_modtype path env with + Tmodtype_abstract -> raise Dont_match + | Tmodtype_manifest mty -> mty + +(* Extract name, kind and ident from a signature item *) + +type field_desc = + Field_value of string + | Field_type of string + | Field_exception of string + | Field_module of string + | Field_modtype of string + +let item_ident_name = function + Tsig_value(id, _) -> (id, Field_value(Ident.name id)) + | Tsig_type(id, _) -> (id, Field_type(Ident.name id)) + | Tsig_exception(id, _) -> (id, Field_exception(Ident.name id)) + | Tsig_module(id, _) -> (id, Field_module(Ident.name id)) + | Tsig_modtype(id, _) -> (id, Field_modtype(Ident.name id)) + +(* Simplify a structure coercion *) + +let simplify_structure_coercion cc = + let pos = ref 0 in + try + List.iter + (fun (n, c) -> + if n <> !pos or c <> Tcoerce_none then raise Exit; + incr pos) + cc; + Tcoerce_none + with Exit -> + Tcoerce_structure cc + +(* Inclusion between module types. + Return the restriction that transforms a val of the smaller type + into a val of the bigger type. *) + +let rec modtypes env mty1 mty2 = + try + try_modtypes env mty1 mty2 + with + Dont_match -> + raise(Error[Module_types(mty1, mty2)]) + | Error reasons -> + raise(Error(Module_types(mty1, mty2) :: reasons)) + +and try_modtypes env mty1 mty2 = + match (mty1, mty2) with + (Tmty_ident p1, Tmty_ident p2) when Path.same p1 p2 -> + Tcoerce_none + | (Tmty_ident p1, _) -> + try_modtypes env (expand_module_path env p1) mty2 + | (_, Tmty_ident p2) -> + try_modtypes env mty1 (expand_module_path env p2) + | (Tmty_signature sig1, Tmty_signature sig2) -> + signatures env sig1 sig2 + | (Tmty_functor(param1, arg1, res1), Tmty_functor(param2, arg2, res2)) -> + let cc_arg = + modtypes env arg2 arg1 in + let cc_res = + Ident.identify param2 param1 + (fun () -> modtypes (Env.add_module param1 arg1 env) res1 res2) in + begin match (cc_arg, cc_res) with + (Tcoerce_none, Tcoerce_none) -> Tcoerce_none + | _ -> Tcoerce_functor(cc_arg, cc_res) + end + | (_, _) -> + raise Dont_match + +(* Inclusion between signatures *) + +and signatures env sig1 sig2 = + (* Environment used to check inclusion of components *) + let new_env = + Env.add_signature sig1 env in + (* Build a table of the components of sig1, along with their positions. + The table is indexed by kind and name of component *) + let rec build_component_table pos tbl = function + [] -> tbl + | item :: rem -> + let (id, name) = item_ident_name item in + let nextpos = + match item with + Tsig_value(_,_) | Tsig_exception(_,_) | Tsig_module(_,_) -> pos+1 + | Tsig_type(_,_) | Tsig_modtype(_,_) -> pos in + build_component_table nextpos + (Tbl.add name (id, item, pos) tbl) rem in + let comps1 = + build_component_table 0 Tbl.empty sig1 in + (* Pair each component of sig2 with a component of sig1, + identifying the names along the way. + Return a coercion list indicating, for all run-time components + of sig2, the position of the matching run-time components of sig1 + and the coercion to be applied to it. *) + let rec pair_components paired unpaired = function + [] -> + begin match unpaired with + [] -> signature_components new_env (List.rev paired) + | _ -> raise(Error unpaired) + end + | item2 :: rem -> + let (id2, name2) = item_ident_name item2 in + begin try + let (id1, item1, pos1) = Tbl.find name2 comps1 in + Ident.identify id1 id2 + (fun () -> + pair_components ((item1, item2, pos1) :: paired) unpaired rem) + with Not_found -> + pair_components paired (Missing_field id2 :: unpaired) rem + end in + (* Do the pairing and checking, and return the final coercion *) + simplify_structure_coercion(pair_components [] [] sig2) + +(* Inclusion between signature components *) + +and signature_components env = function + [] -> [] + | (Tsig_value(id1, valdecl1), Tsig_value(id2, valdecl2), pos) :: rem -> + value_descriptions env id1 valdecl1 valdecl2; + (pos, Tcoerce_none) :: signature_components env rem + | (Tsig_type(id1, tydecl1), Tsig_type(id2, tydecl2), pos) :: rem -> + type_declarations env id1 tydecl1 tydecl2; + signature_components env rem + | (Tsig_exception(id1, excdecl1), Tsig_exception(id2, excdecl2), pos) + :: rem -> + exception_declarations env id1 excdecl1 excdecl2; + (pos, Tcoerce_none) :: signature_components env rem + | (Tsig_module(id1, mty1), Tsig_module(id2, mty2), pos) :: rem -> + let cc = modtypes env mty1 mty2 in + (pos, cc) :: signature_components env rem + | (Tsig_modtype(id1, info1), Tsig_modtype(id2, info2), pos) :: rem -> + modtype_infos env id1 info1 info2; + signature_components env rem + | _ -> + fatal_error "Includemod.signature_components" + +(* Inclusion between module type specifications *) + +and modtype_infos env id info1 info2 = + try + match (info1, info2) with + (Tmodtype_abstract, Tmodtype_abstract) -> () + | (Tmodtype_manifest mty1, Tmodtype_abstract) -> () + | (Tmodtype_manifest mty1, Tmodtype_manifest mty2) -> + modtypes env mty1 mty2; modtypes env mty2 mty1; () + | (_, Tmodtype_manifest mty2) -> + let mty1 = Tmty_ident(Pident id) in + modtypes env mty1 mty2; modtypes env mty2 mty1; () + with Error reasons -> + raise(Error(Modtype_infos(id, info1, info2) :: reasons)) + +(* Error report *) + +open Format +open Printtyp + +let include_err = function + Missing_field id -> + print_string "Missing field "; ident id + | Value_descriptions(id, d1, d2) -> + open_hvbox 2; + print_string "Values do not match:"; print_space(); + value_description id d1; + print_break(1, -2); + print_string "is not included in"; print_space(); + value_description id d2; + close_box() + | Type_declarations(id, d1, d2) -> + open_hvbox 2; + print_string "Type declarations do not match:"; print_space(); + type_declaration id d1; + print_break(1, -2); + print_string "is not included in"; print_space(); + type_declaration id d2; + close_box() + | Exception_declarations(id, d1, d2) -> + open_hvbox 2; + print_string "Exception declarations do not match:"; print_space(); + exception_declaration id d1; + print_break(1, -2); + print_string "is not included in"; print_space(); + exception_declaration id d2; + close_box() + | Module_types(mty1, mty2)-> + open_hvbox 2; + print_string "Modules do not match:"; print_space(); + modtype mty1; + print_break(1, -2); + print_string "is not included in"; print_space(); + modtype mty2; + close_box() + | Modtype_infos(id, d1, d2) -> + open_hvbox 2; + print_string "Module type declarations do not match:"; print_space(); + modtype_declaration id d1; + print_break(1, -2); + print_string "is not included in"; print_space(); + modtype_declaration id d2; + close_box() + +let report_error errlist = + match List.rev errlist with + [] -> () + | err :: rem -> + include_err err; + List.iter (fun err -> print_space(); include_err err) rem + diff --git a/typing/includemod.mli b/typing/includemod.mli new file mode 100644 index 0000000000..2a6ca3e0a4 --- /dev/null +++ b/typing/includemod.mli @@ -0,0 +1,19 @@ +(* Inclusion checks for the module language *) + +open Typedtree + +val modtypes: Env.t -> module_type -> module_type -> module_coercion +val signatures: Env.t -> signature -> signature -> module_coercion + +type error = + Missing_field of Ident.t + | Value_descriptions of Ident.t * value_description * value_description + | Type_declarations of Ident.t * type_declaration * type_declaration + | Exception_declarations of + Ident.t * exception_declaration * exception_declaration + | Module_types of module_type * module_type + | Modtype_infos of Ident.t * modtype_declaration * modtype_declaration + +exception Error of error list + +val report_error: error list -> unit diff --git a/typing/mtype.ml b/typing/mtype.ml new file mode 100644 index 0000000000..96d364fd98 --- /dev/null +++ b/typing/mtype.ml @@ -0,0 +1,147 @@ +(* Operations on module types *) + +open Path +open Typedtree + + +let rec scrape env mty = + match mty with + Tmty_ident p -> + begin match Env.find_modtype p env with + Tmodtype_abstract -> mty + | Tmodtype_manifest mty' -> scrape env mty' + end + | _ -> mty + +let rec strengthen env mty p = + match scrape env mty with + Tmty_signature sg -> + Tmty_signature(strengthen_sig env sg p) + | mty -> + mty + +and strengthen_sig env sg p = + match sg with + [] -> [] + | (Tsig_value(id, desc) as sigelt) :: rem -> + sigelt :: strengthen_sig env rem p + | Tsig_type(id, decl) :: rem -> + let newdecl = + match decl.type_kind with + Type_abstract -> + { type_params = decl.type_params; + type_arity = decl.type_arity; + type_kind = Type_manifest(Tconstr(Pdot(p, Ident.name id, nopos), + decl.type_params)) } + | _ -> decl in + Tsig_type(id, newdecl) :: strengthen_sig env rem p + | (Tsig_exception(id, d) as sigelt) :: rem -> + sigelt :: strengthen_sig env rem p + | Tsig_module(id, mty) :: rem -> + Tsig_module(id, strengthen env mty (Pdot(p, Ident.name id, nopos))) :: + strengthen_sig (Env.add_module id mty env) rem p + (* Need to add the module in case it defines manifest module types *) + | Tsig_modtype(id, decl) :: rem -> + let newdecl = + match decl with + Tmodtype_abstract -> + Tmodtype_manifest(Tmty_ident(Pdot(p, Ident.name id, nopos))) + | Tmodtype_manifest _ -> + decl in + Tsig_modtype(id, newdecl) :: + strengthen_sig (Env.add_modtype id decl env) rem p + (* Need to add the module type in case it is manifest *) + +(* In nondep_supertype, env is only used for the type it assigns to id. + Hence there is no need to keep env up-to-date by adding the bindings + traversed. *) + +type variance = Co | Contra | Strict + +let nondep_supertype env id mty = + + let rec nondep_mty var mty = + match mty with + Tmty_ident p -> + if Path.isfree id p then begin + match Env.find_modtype p env with + Tmodtype_abstract -> raise Not_found + | Tmodtype_manifest mty -> nondep_mty var mty + end else mty + | Tmty_signature sg -> + Tmty_signature(nondep_sig var sg) + | Tmty_functor(param, arg, res) -> + let var_inv = + match var with Co -> Contra | Contra -> Co | Strict -> Strict in + Tmty_functor(param, nondep_mty var_inv arg, nondep_mty var res) + + and nondep_sig var = function + [] -> [] + | item :: rem -> + let rem' = nondep_sig var rem in + match item with + Tsig_value(id, d) -> + begin try + Tsig_value(id, {val_type = Ctype.nondep_type env id d.val_type; + val_prim = d.val_prim}) :: rem' + with Not_found -> + match var with Co -> rem' | _ -> raise Not_found + end + | Tsig_type(id, d) -> + begin try + Tsig_type(id, nondep_type_decl d) :: rem' + with Not_found -> + match var with + Co -> Tsig_type(id, abstract_type_decl d) :: rem' + | _ -> raise Not_found + end + | Tsig_exception(id, d) -> + begin try + Tsig_exception(id, List.map (Ctype.nondep_type env id) d) :: rem' + with Not_found -> + match var with Co -> rem' | _ -> raise Not_found + end + | Tsig_module(id, mty) -> + begin try + Tsig_module(id, nondep_mty var mty) :: rem' + with Not_found -> + match var with Co -> rem' | _ -> raise Not_found + end + | Tsig_modtype(id, d) -> + begin try + Tsig_modtype(id, nondep_modtype_decl d) :: rem' + with Not_found -> + match var with + Co -> Tsig_modtype(id, Tmodtype_abstract) :: rem' + | _ -> raise Not_found + end + + and nondep_type_decl d = + {type_params = d.type_params; + type_arity = d.type_arity; + type_kind = + match d.type_kind with + Type_abstract -> + Type_abstract + | Type_manifest ty -> + Type_manifest(Ctype.nondep_type env id ty) + | Type_variant cstrs -> + Type_variant(List.map + (fun (c, tl) -> (c, List.map (Ctype.nondep_type env id) tl)) + cstrs) + | Type_record lbls -> + Type_record(List.map + (fun (c, mut, t) -> (c, mut, Ctype.nondep_type env id t)) + lbls)} + + and abstract_type_decl d = + {type_params = d.type_params; + type_arity = d.type_arity; + type_kind = Type_abstract} + + and nondep_modtype_decl = function + Tmodtype_abstract -> Tmodtype_abstract + | Tmodtype_manifest mty -> Tmodtype_manifest(nondep_mty Strict mty) + + in + nondep_mty Co mty diff --git a/typing/mtype.mli b/typing/mtype.mli new file mode 100644 index 0000000000..ca2016fb57 --- /dev/null +++ b/typing/mtype.mli @@ -0,0 +1,15 @@ +(* Operations on module types *) + +open Typedtree + +val scrape: Env.t -> module_type -> module_type + (* Expand toplevel module type abbreviations + till hitting a "hard" module type (signature, functor, + or abstract module type ident. *) +val strengthen: Env.t -> module_type -> Path.t -> module_type + (* Strengthen abstract type components relative to the + given path. *) +val nondep_supertype: Env.t -> Ident.t -> module_type -> module_type + (* Return the smallest supertype of the given type + in which the given ident does not appear. + Raise [Not_found] if no such type List.exists. *) diff --git a/typing/parmatch.ml b/typing/parmatch.ml new file mode 100644 index 0000000000..9b09624e65 --- /dev/null +++ b/typing/parmatch.ml @@ -0,0 +1,263 @@ +(* Detection of partial matches and unused match cases. *) + +open Misc +open Asttypes +open Typedtree + + +let make_pat desc ty = + {pat_desc = desc; pat_loc = Location.none; pat_type = ty} + +let omega = make_pat Tpat_any Ctype.none + +let rec omegas i = + if i <= 0 then [] else omega :: omegas (i-1) + +let omega_list l = omegas(List.length l) + +let has_guard act = + match act.exp_desc with + Texp_when(_, _) -> true + | _ -> false + +let simple_match p1 p2 = + match p1.pat_desc, p2.pat_desc with + Tpat_construct(c1, _), Tpat_construct(c2, _) -> + c1.cstr_tag = c2.cstr_tag + | Tpat_constant(c1), Tpat_constant(c2) -> + c1 = c2 + | Tpat_tuple(_), Tpat_tuple(_) -> true + | Tpat_record(_), Tpat_record(_) -> true + | _, (Tpat_any | Tpat_var(_)) -> true + | _, _ -> false + +(* Return the set of labels and number of fields for a record pattern. *) + +let record_labels p = + match p.pat_desc with + Tpat_record((lbl1, pat1) :: rem) -> Array.to_list lbl1.lbl_all + | _ -> fatal_error "Parmatch.record_labels" + +let record_num_fields p = + match p.pat_desc with + Tpat_record((lbl1, pat1) :: rem) -> Array.length lbl1.lbl_all + | _ -> fatal_error "Parmatch.record_num_fields" + +let set_fields size l = + let v = Array.new size omega in + let rec change_rec l = match l with + (lbl,p)::l -> v.(lbl.lbl_pos) <- p ; change_rec l + | [] -> () in + change_rec l; + Array.to_list v + +let simple_match_args p1 p2 = + match p2.pat_desc with + Tpat_construct(cstr, args) -> args + | Tpat_tuple(args) -> args + | Tpat_record(args) -> set_fields (record_num_fields p1) args + | (Tpat_any | Tpat_var(_)) -> + begin match p1.pat_desc with + Tpat_construct(_, args) -> omega_list args + | Tpat_tuple(args) -> omega_list args + | Tpat_record(args) -> omega_list args + | _ -> [] + end + | _ -> [] + +(* + Computes the discriminating pattern for matching by the first + column of pss, that is: + checks for a tuple or a record when q is a variable. +*) + +let rec simple_pat q pss = match pss with + ({pat_desc = Tpat_alias(p,_)}::ps)::pss -> + simple_pat q ((p::ps)::pss) + | ({pat_desc = Tpat_or(p1,p2)}::ps)::pss -> + simple_pat q ((p1::ps)::(p2::ps)::pss) + | ({pat_desc = (Tpat_any | Tpat_var(_))}::_)::pss -> + simple_pat q pss + | (({pat_desc = Tpat_tuple(args)} as p)::_)::_ -> + make_pat (Tpat_tuple(omega_list args)) p.pat_type + | (({pat_desc = Tpat_record(args)} as p)::_)::pss -> + make_pat (Tpat_record (List.map (fun lbl -> (lbl,omega)) (record_labels p))) + p.pat_type + | _ -> q + +let filter_one q pss = + let rec filter_rec = function + ({pat_desc = Tpat_alias(p,_)}::ps)::pss -> + filter_rec ((p::ps)::pss) + | ({pat_desc = Tpat_or(p1,p2)}::ps)::pss -> + filter_rec ((p1::ps)::(p2::ps)::pss) + | (p::ps)::pss -> + if simple_match q p + then (simple_match_args q p @ ps) :: filter_rec pss + else filter_rec pss + | _ -> [] in + filter_rec pss + +let filter_extra pss = + let rec filter_rec = function + ({pat_desc = Tpat_alias(p,_)}::ps)::pss -> + filter_rec ((p::ps)::pss) + | ({pat_desc = Tpat_or(p1,p2)}::ps)::pss -> + filter_rec ((p1::ps)::(p2::ps)::pss) + | ({pat_desc = (Tpat_any | Tpat_var(_))} :: qs) :: pss -> + qs :: filter_rec pss + | _::pss -> filter_rec pss + | [] -> [] in + filter_rec pss + +let filter_all pat0 pss = + + let rec insert q qs env = + match env with + [] -> [q, [simple_match_args q q @ qs]] + | ((p,pss) as c)::env -> + if simple_match q p + then (p, ((simple_match_args p q @ qs) :: pss)) :: env + else c :: insert q qs env in + + let rec filter_rec env = function + ({pat_desc = Tpat_alias(p,_)}::ps)::pss -> + filter_rec env ((p::ps)::pss) + | ({pat_desc = Tpat_or(p1,p2)}::ps)::pss -> + filter_rec env ((p1::ps)::(p2::ps)::pss) + | ({pat_desc = (Tpat_any | Tpat_var(_))}::_)::pss -> + filter_rec env pss + | (p::ps)::pss -> + filter_rec (insert p ps env) pss + | _ -> env + + and filter_omega env = function + ({pat_desc = Tpat_alias(p,_)}::ps)::pss -> + filter_omega env ((p::ps)::pss) + | ({pat_desc = Tpat_or(p1,p2)}::ps)::pss -> + filter_omega env ((p1::ps)::(p2::ps)::pss) + | ({pat_desc = (Tpat_any | Tpat_var(_))}::ps)::pss -> + filter_omega + (List.map (fun (q,qss) -> (q,(simple_match_args q omega @ ps) :: qss)) env) + pss + | _::pss -> filter_omega env pss + | [] -> env in + + filter_omega + (filter_rec + (match pat0.pat_desc with + (Tpat_record(_) | Tpat_tuple(_)) -> [pat0,[]] + | _ -> []) + pss) + pss + + +let full_match env = + match env with + ({pat_desc = Tpat_construct(c,_)},_) :: _ -> + List.length env = c.cstr_span + | ({pat_desc = Tpat_constant(Const_char _)},_) :: _ -> + List.length env = 256 + | ({pat_desc = Tpat_constant(_)},_) :: _ -> false + | ({pat_desc = Tpat_tuple(_)},_) :: _ -> true + | ({pat_desc = Tpat_record(_)},_) :: _ -> true + | _ -> fatal_error "Parmatch.full_match" + +(* + Is the last row of pattern matrix pss + qs satisfiable ? + That is : + Does there List.exists at least one val vector, es such that : + 1/ for all ps in pss ps # es (ps and es are not compatible) + 2/ qs <= es (es matches qs) +*) + +let rec satisfiable pss qs = + match pss with + [] -> true + | _ -> + match qs with + [] -> false + | {pat_desc = Tpat_or(q1,q2)}::qs -> + satisfiable pss (q1::qs) or satisfiable pss (q2::qs) + | {pat_desc = Tpat_alias(q,_)}::qs -> + satisfiable pss (q::qs) + | {pat_desc = (Tpat_any | Tpat_var(_))}::qs -> + let q0 = simple_pat omega pss in + begin match filter_all q0 pss with + (* first column of pss is made of variables only *) + [] -> satisfiable (filter_extra pss) qs + | constrs -> + let try_non_omega (p,pss) = + satisfiable pss (simple_match_args p omega @ qs) in + if full_match constrs + then List.exists try_non_omega constrs + else satisfiable (filter_extra pss) qs or + List.exists try_non_omega constrs + end + | q::qs -> + let q0 = simple_pat q pss in + satisfiable (filter_one q0 pss) (simple_match_args q0 q @ qs) + +let rec initial_matrix = function + [] -> [] + | (pat, act) :: rem -> + if has_guard act + then initial_matrix rem + else [pat] :: initial_matrix rem + +let rec le_pat p q = + match (p.pat_desc, q.pat_desc) with + (Tpat_var _ | Tpat_any), _ -> true + | Tpat_alias(p,_), _ -> le_pat p q + | _, Tpat_alias(q,_) -> le_pat p q + | Tpat_or(p1,p2), _ -> le_pat p1 q or le_pat p2 q + | _, Tpat_or(q1,q2) -> le_pat p q1 & le_pat p q2 + | Tpat_constant(c1), Tpat_constant(c2) -> c1 = c2 + | Tpat_construct(c1,ps), Tpat_construct(c2,qs) -> + c1.cstr_tag = c2.cstr_tag & le_pats ps qs + | Tpat_tuple(ps), Tpat_tuple(qs) -> le_pats ps qs + | Tpat_record(l1), Tpat_record(l2) -> + let size = record_num_fields p in + le_pats (set_fields size l1) (set_fields size l2) + | _, _ -> false + +and le_pats ps qs = + match ps,qs with + p::ps, q::qs -> le_pat p q & le_pats ps qs + | _, _ -> true + +let get_mins ps = + let rec select_rec r = function + [] -> r + | p::ps -> + if List.exists (fun p0 -> le_pats p0 p) ps + then select_rec r ps + else select_rec (p::r) ps in + select_rec [] (select_rec [] ps) + +let check_partial loc casel = + let pss = get_mins (initial_matrix casel) in + if match pss with + [] -> true + | ps::_ -> satisfiable pss (List.map (fun _ -> omega) ps) + then Location.print_warning loc "this pattern-matching is not exhaustive" + +let location_of_clause = function + pat :: _ -> pat.pat_loc + | _ -> fatal_error "Parmatch.location_of_clause" + +let check_unused casel = + let prefs = + List.fold_right + (fun (pat,act as clause) r -> + if has_guard act + then ([], ([pat], act)) :: r + else ([], ([pat], act)) :: + List.map (fun (pss,clause) -> [pat]::pss,clause) r) + casel [] in + List.iter + (fun (pss, ((qs, _) as clause)) -> + if not (satisfiable pss qs) then + Location.print_warning (location_of_clause qs) + "this match case is unused.") + prefs diff --git a/typing/parmatch.mli b/typing/parmatch.mli new file mode 100644 index 0000000000..93aa55957c --- /dev/null +++ b/typing/parmatch.mli @@ -0,0 +1,6 @@ +(* Detection of partial matches and unused match cases. *) + +open Typedtree + +val check_partial: Location.t -> (pattern * expression) list -> unit +val check_unused: (pattern * expression) list -> unit diff --git a/typing/path.ml b/typing/path.ml new file mode 100644 index 0000000000..d67a619454 --- /dev/null +++ b/typing/path.ml @@ -0,0 +1,17 @@ +type t = + Pident of Ident.t + | Pdot of t * string * int + +let nopos = -1 + +let rec same p1 p2 = + match (p1, p2) with + (Pident id1, Pident id2) -> Ident.same id1 id2 + | (Pdot(p1, s1, pos1), Pdot(p2, s2, pos2)) -> s1 = s2 & same p1 p2 + | (_, _) -> false + +let rec root = function + Pident id -> id + | Pdot(p, s, pos) -> root p + +let isfree id p = Ident.same id (root p) diff --git a/typing/path.mli b/typing/path.mli new file mode 100644 index 0000000000..51a0ac96cd --- /dev/null +++ b/typing/path.mli @@ -0,0 +1,10 @@ +(* Access paths *) + +type t = + Pident of Ident.t + | Pdot of t * string * int + +val same: t -> t -> bool +val isfree: Ident.t -> t -> bool + +val nopos: int diff --git a/typing/predef.ml b/typing/predef.ml new file mode 100644 index 0000000000..05ea8f1328 --- /dev/null +++ b/typing/predef.ml @@ -0,0 +1,98 @@ +(* Predefined type constructors (with special typing rules in typecore) *) + +open Misc +open Path +open Typedtree + + +let ident_int = Ident.new "int" +and ident_char = Ident.new "char" +and ident_string = Ident.new "string" +and ident_float = Ident.new "float" +and ident_bool = Ident.new "bool" +and ident_unit = Ident.new "unit" +and ident_exn = Ident.new "exn" +and ident_array = Ident.new "array" +and ident_list = Ident.new "list" +and ident_format = Ident.new "format" + +let path_int = Pident ident_int +and path_char = Pident ident_char +and path_string = Pident ident_string +and path_float = Pident ident_float +and path_bool = Pident ident_bool +and path_unit = Pident ident_unit +and path_exn = Pident ident_exn +and path_array = Pident ident_array +and path_list = Pident ident_list +and path_format = Pident ident_format + +let type_int = Tconstr(Pident ident_int, []) +and type_char = Tconstr(Pident ident_char, []) +and type_string = Tconstr(Pident ident_string, []) +and type_float = Tconstr(Pident ident_float, []) +and type_bool = Tconstr(Pident ident_bool, []) +and type_unit = Tconstr(Pident ident_unit, []) +and type_exn = Tconstr(Pident ident_exn, []) +and type_array t = Tconstr(path_array, [t]) +and type_list t = Tconstr(path_list, [t]) + +let ident_match_failure = Ident.new "Match_failure" +and ident_out_of_memory = Ident.new "Out_of_memory" +and ident_invalid_argument = Ident.new "Invalid_argument" +and ident_failure = Ident.new "Failure" +and ident_not_found = Ident.new "Not_found" +and ident_sys_error = Ident.new "Sys_error" +and ident_end_of_file = Ident.new "End_of_file" +and ident_division_by_zero = Ident.new "Division_by_zero" + +let path_match_failure = Pident ident_match_failure + +let build_initial_env add_type add_exception empty_env = + let newvar() = + (* Cannot call Ctype.newvar here because ctype imports predef via env *) + Tvar{tvar_level = -1 (*generic_level*); tvar_link = None} in + let decl_abstr = + {type_params = []; type_arity = 0; type_kind = Type_abstract} + and decl_bool = + {type_params = []; type_arity = 0; + type_kind = Type_variant["false",[]; "true",[]]} + and decl_unit = + {type_params = []; type_arity = 0; type_kind = Type_variant["()",[]]} + and decl_exn = + {type_params = []; type_arity = 0; type_kind = Type_variant[]} + and decl_array = + let tvar = newvar() in + {type_params = [tvar]; type_arity = 1; type_kind = Type_abstract} + and decl_list = + let tvar = newvar() in + {type_params = [tvar]; type_arity = 1; + type_kind = Type_variant["[]", []; "::", [tvar; type_list tvar]]} + and decl_format = + {type_params = [newvar(); newvar(); newvar()]; type_arity = 3; + type_kind = Type_abstract} in + add_exception ident_match_failure [type_string; type_int; type_int] ( + add_exception ident_out_of_memory [] ( + add_exception ident_invalid_argument [type_string] ( + add_exception ident_failure [type_string] ( + add_exception ident_not_found [] ( + add_exception ident_sys_error [type_string] ( + add_exception ident_end_of_file [] ( + add_exception ident_division_by_zero [] ( + add_type ident_format decl_format ( + add_type ident_list decl_list ( + add_type ident_array decl_array ( + add_type ident_exn decl_exn ( + add_type ident_unit decl_unit ( + add_type ident_bool decl_bool ( + add_type ident_float decl_abstr ( + add_type ident_string decl_abstr ( + add_type ident_char decl_abstr ( + add_type ident_int decl_abstr ( + empty_env)))))))))))))))))) + +let builtin_values = + List.map (fun id -> Ident.make_global id; (Ident.name id, id)) + [ident_match_failure; ident_out_of_memory; ident_invalid_argument; + ident_failure; ident_not_found; ident_sys_error; ident_end_of_file; + ident_division_by_zero] diff --git a/typing/predef.mli b/typing/predef.mli new file mode 100644 index 0000000000..da2d17ae0e --- /dev/null +++ b/typing/predef.mli @@ -0,0 +1,39 @@ +(* Predefined type constructors (with special typing rules in typecore) *) + +open Typedtree + +val type_int: type_expr +val type_char: type_expr +val type_string: type_expr +val type_float: type_expr +val type_bool: type_expr +val type_unit: type_expr +val type_exn: type_expr +val type_array: type_expr -> type_expr +val type_list: type_expr -> type_expr + +val path_int: Path.t +val path_char: Path.t +val path_string: Path.t +val path_float: Path.t +val path_bool: Path.t +val path_unit: Path.t +val path_exn: Path.t +val path_array: Path.t +val path_list: Path.t +val path_format: Path.t + +val path_match_failure: Path.t + +(* To build the initial environment. Since there is a nasty mutual + recursion between predef and env, we break it by parameterizing + over Env.t, Env.add_type and Env.add_exception. *) + +val build_initial_env: + (Ident.t -> type_declaration -> 'a -> 'a) -> + (Ident.t -> exception_declaration -> 'a -> 'a) -> + 'a -> 'a + +(* To initialize linker tables *) + +val builtin_values: (string * Ident.t) list diff --git a/typing/printtyp.ml b/typing/printtyp.ml new file mode 100644 index 0000000000..c1b82d7acc --- /dev/null +++ b/typing/printtyp.ml @@ -0,0 +1,215 @@ +(* Printing functions *) + +open Format +open Misc +open Longident +open Path +open Asttypes +open Typedtree + + +(* Print a long identifier *) + +let rec longident = function + Lident s -> print_string s + | Ldot(p, s) -> longident p; print_string "."; print_string s + +(* Print an identifier *) + +let ident id = + print_string(Ident.name id) + +(* Print a path *) + +let ident_pervasive = Ident.new_persistent "Pervasives" + +let rec path = function + Pident id -> + ident id + | Pdot(Pident id, s, pos) when Ident.same id ident_pervasive -> + print_string s + | Pdot(p, s, pos) -> + path p; print_string "."; print_string s + +(* Print a type expression *) + +let var_names = ref ([] : (type_expr * string) list) +let var_counter = ref 0 + +let reset_var_names () = var_names := []; var_counter := 0 + +let name_of_var v = + try + List.assq v !var_names + with Not_found -> + let name = + if !var_counter < 26 + then String.make 1 (Char.chr(97 + !var_counter)) + else String.make 1 (Char.chr(97 + !var_counter mod 26)) ^ + string_of_int(!var_counter / 26) in + var_names := (v, name) :: !var_names; + incr var_counter; + name + +let rec typexp sch prio = function + Tvar {tvar_link = Some ty} -> + typexp sch prio ty + | Tvar {tvar_link = None; tvar_level = lvl} as v -> + if not sch or lvl = -1 (* generic *) + then print_string "'" + else print_string "'_"; + print_string(name_of_var v) + | Tarrow(ty1, ty2) -> + if prio >= 1 then begin open_hovbox 1; print_string "(" end + else open_hovbox 0; + typexp sch 1 ty1; + print_string " ->"; print_space(); + typexp sch 0 ty2; + if prio >= 1 then print_string ")"; + close_box() + | Ttuple tyl -> + if prio >= 2 then begin open_hovbox 1; print_string "(" end + else open_hovbox 0; + typlist sch 2 " *" tyl; + if prio >= 2 then print_string ")"; + close_box() + | Tconstr(p, tyl) -> + open_hovbox 0; + begin match tyl with + [] -> () + | [ty1] -> + typexp sch 2 ty1; print_space() + | tyl -> + open_hovbox 1; print_string "("; typlist sch 0 "," tyl; + print_string ")"; close_box(); print_space() + end; + path p; + close_box() + +and typlist sch prio sep = function + [] -> () + | [ty] -> typexp sch prio ty + | ty::tyl -> + typexp sch prio ty; print_string sep; print_space(); + typlist sch prio sep tyl + +let type_expr ty = typexp false 0 ty +and type_scheme ty = reset_var_names(); typexp true 0 ty + +(* Print one type declaration *) + +let rec type_declaration id decl = + reset_var_names(); + open_hvbox 2; + print_string "type "; + type_expr (Tconstr(Pident id, decl.type_params)); + begin match decl.type_kind with + Type_abstract -> () + | Type_manifest ty -> + print_string " ="; print_space(); type_expr ty + | Type_variant (cstr1 :: cstrs) -> + print_string " ="; print_break(1,2); + constructor cstr1; + List.iter (fun cstr -> print_space(); print_string "| "; constructor cstr) + cstrs + | Type_record (lbl1 :: lbls) -> + print_string " ="; print_space(); + print_string "{ "; label lbl1; + List.iter (fun lbl -> print_string ";"; print_break(1,2); label lbl) + lbls; + print_string " }" + | _ -> + () (* A fatal error actually, except when printing type exn... *) + end; + close_box() + +and constructor (name, args) = + print_string name; + match args with + [] -> () + | _ -> print_string " of "; + open_hovbox 2; typlist false 2 " *" args; close_box() + +and label (name, mut, arg) = + begin match mut with + Immutable -> () + | Mutable -> print_string "mutable " + end; + print_string name; + print_string ": "; + type_expr arg + +(* Print an exception declaration *) + +let exception_declaration id decl = + print_string "exception "; constructor (Ident.name id, decl) + +(* Print a val declaration *) + +let value_description id decl = + open_hovbox 2; + begin match decl.val_prim with + Not_prim -> + print_string "val "; ident id; print_string " :"; print_space(); + type_scheme decl.val_type + | Primitive(p, ar) -> + print_string "val "; ident id; print_string " :"; print_space(); + type_scheme decl.val_type; print_space(); + print_string "= \""; print_string p; print_string "\"" + end; + close_box() + +(* Print a module type *) + +let rec modtype = function + Tmty_ident p -> + path p + | Tmty_signature [] -> + print_string "sig end" + | Tmty_signature(item :: rem) -> + open_hvbox 2; + print_string "sig"; print_space(); + signature_item item; + List.iter + (fun item -> print_space(); signature_item item) + rem; + print_break(1, -2); print_string "end"; + close_box() + | Tmty_functor(param, ty_arg, ty_res) -> + open_hovbox 2; + print_string "functor"; print_cut(); + print_string "("; ident param; print_string " : "; + modtype ty_arg; + print_string ") ->"; print_space(); + modtype ty_res; + close_box() + +and signature_item = function + Tsig_value(id, decl) -> + value_description id decl + | Tsig_type(id, decl) -> + type_declaration id decl + | Tsig_exception(id, decl) -> + exception_declaration id decl + | Tsig_module(id, mty) -> + open_hovbox 2; print_string "module "; ident id; print_string " :"; + print_space(); modtype mty; close_box() + | Tsig_modtype(id, decl) -> + modtype_declaration id decl + +and modtype_declaration id decl = + open_hovbox 2; print_string "module type "; ident id; + begin match decl with + Tmodtype_abstract -> () + | Tmodtype_manifest mty -> + print_string " ="; print_space(); modtype mty + end; + close_box() + +(* Print a signature body (used when compiling a .mli and printing results + in interactive use). *) + +let signature sg = + open_vbox 0; + List.iter (fun item -> signature_item item; print_space()) sg; + close_box() diff --git a/typing/printtyp.mli b/typing/printtyp.mli new file mode 100644 index 0000000000..2a99c48d7e --- /dev/null +++ b/typing/printtyp.mli @@ -0,0 +1,17 @@ +(* Printing functions *) + +open Typedtree + +val longident: Longident.t -> unit +val ident: Ident.t -> unit +val path: Path.t -> unit +val reset_var_names: unit -> unit +val type_expr: type_expr -> unit +val type_scheme: type_expr -> unit +val value_description: Ident.t -> value_description -> unit +val type_declaration: Ident.t -> type_declaration -> unit +val exception_declaration: Ident.t -> exception_declaration -> unit +val modtype: module_type -> unit +val signature: signature -> unit +val signature_item: signature_item -> unit +val modtype_declaration: Ident.t -> modtype_declaration -> unit diff --git a/typing/subst.ml b/typing/subst.ml new file mode 100644 index 0000000000..b888ceee8c --- /dev/null +++ b/typing/subst.ml @@ -0,0 +1,97 @@ +(* Substitutions *) + +open Misc +open Path +open Typedtree + + +type t = + { types: Path.t Ident.tbl; + modules: Path.t Ident.tbl; + modtypes: module_type Ident.tbl } + +let identity = + { types = Ident.empty; modules = Ident.empty; modtypes = Ident.empty } + +let add_type id p s = + { types = Ident.add id p s.types; + modules = s.modules; + modtypes = s.modtypes } + +let add_module id p s = + { types = s.types; + modules = Ident.add id p s.modules; + modtypes = s.modtypes } + +let add_modtype id ty s = + { types = s.types; + modules = s.modules; + modtypes = Ident.add id ty s.modtypes } + +let rec module_path s = function + Pident id as p -> + begin try Ident.find_same id s.modules with Not_found -> p end + | Pdot(p, n, pos) -> + Pdot(module_path s p, n, pos) + +let type_path s = function + Pident id as p -> + begin try Ident.find_same id s.types with Not_found -> p end + | Pdot(p, n, pos) -> + Pdot(module_path s p, n, pos) + +let rec type_expr s = function + Tvar{tvar_link = None} as ty -> ty + | Tvar{tvar_link = Some ty} -> type_expr s ty + | Tarrow(t1, t2) -> Tarrow(type_expr s t1, type_expr s t2) + | Ttuple tl -> Ttuple(List.map (type_expr s) tl) + | Tconstr(p, []) -> Tconstr(type_path s p, []) + | Tconstr(p, tl) -> Tconstr(type_path s p, List.map (type_expr s) tl) + +let value_description s descr = + { val_type = type_expr s descr.val_type; + val_prim = descr.val_prim } + +let type_declaration s decl = + { type_params = decl.type_params; + type_arity = decl.type_arity; + type_kind = + match decl.type_kind with + Type_abstract -> Type_abstract + | Type_manifest ty -> Type_manifest(type_expr s ty) + | Type_variant cstrs -> + Type_variant(List.map (fun (n, args) -> (n, List.map (type_expr s) args)) + cstrs) + | Type_record lbls -> + Type_record(List.map (fun (n, mut, arg) -> (n, mut, type_expr s arg)) + lbls) + } + +let exception_declaration s tyl = + List.map (type_expr s) tyl + +let rec modtype s = function + Tmty_ident p as mty -> + begin match p with + Pident id -> + begin try Ident.find_same id s.modtypes with Not_found -> mty end + | Pdot(p, n, pos) -> + Tmty_ident(Pdot(module_path s p, n, pos)) + end + | Tmty_signature sg -> + Tmty_signature(signature s sg) + | Tmty_functor(id, arg, res) -> + Tmty_functor(id, modtype s arg, modtype s res) + +and signature s sg = List.map (signature_item s) sg + +and signature_item s = function + Tsig_value(id, d) -> Tsig_value(id, value_description s d) + | Tsig_type(id, d) -> Tsig_type(id, type_declaration s d) + | Tsig_exception(id, d) -> Tsig_exception(id, exception_declaration s d) + | Tsig_module(id, mty) -> Tsig_module(id, modtype s mty) + | Tsig_modtype(id, d) -> Tsig_modtype(id, modtype_declaration s d) + +and modtype_declaration s = function + Tmodtype_abstract -> Tmodtype_abstract + | Tmodtype_manifest mty -> Tmodtype_manifest(modtype s mty) diff --git a/typing/subst.mli b/typing/subst.mli new file mode 100644 index 0000000000..52caf5238e --- /dev/null +++ b/typing/subst.mli @@ -0,0 +1,21 @@ +(* Substitutions *) + +open Typedtree + +type t + +val identity: t + +val add_type: Ident.t -> Path.t -> t -> t +val add_module: Ident.t -> Path.t -> t -> t +val add_modtype: Ident.t -> module_type -> t -> t + +val type_expr: t -> type_expr -> type_expr +val value_description: t -> value_description -> value_description +val type_declaration: t -> type_declaration -> type_declaration +val exception_declaration: + t -> exception_declaration -> exception_declaration +val modtype: t -> module_type -> module_type +val signature: t -> signature -> signature +val modtype_declaration: t -> modtype_declaration -> modtype_declaration + diff --git a/typing/typecore.ml b/typing/typecore.ml new file mode 100644 index 0000000000..882748fab0 --- /dev/null +++ b/typing/typecore.ml @@ -0,0 +1,601 @@ +(* Typechecking for the core language *) + +open Misc +open Asttypes +open Parsetree +open Typedtree +open Ctype + + +type error = + Unbound_value of Longident.t + | Unbound_constructor of Longident.t + | Unbound_label of Longident.t + | Constructor_arity_mismatch of Longident.t * int * int + | Label_mismatch of Longident.t * type_expr * type_expr + | Pattern_type_clash of type_expr * type_expr + | Multiply_bound_variable + | Orpat_not_closed + | Expr_type_clash of type_expr * type_expr + | Apply_non_function of type_expr + | Label_multiply_defined of Longident.t + | Label_missing + | Label_not_mutable of Longident.t + | Non_generalizable of type_expr + | Bad_format_letter of char + +exception Error of Location.t * error + +(* Typing of constants *) + +let type_constant = function + Const_int _ -> Predef.type_int + | Const_char _ -> Predef.type_char + | Const_string _ -> Predef.type_string + | Const_float _ -> Predef.type_float + +(* Typing of patterns *) + +let unify_pat env pat expected_ty = + try + unify env pat.pat_type expected_ty + with Unify -> + raise(Error(pat.pat_loc, Pattern_type_clash(pat.pat_type, expected_ty))) + +let pattern_variables = ref ([]: (Ident.t * type_expr) list) + +let enter_variable loc name ty = + if List.exists (fun (id, ty) -> Ident.name id = name) !pattern_variables + then raise(Error(loc, Multiply_bound_variable)); + let id = Ident.new name in + pattern_variables := (id, ty) :: !pattern_variables; + id + +let rec type_pat env sp = + match sp.ppat_desc with + Ppat_any -> + { pat_desc = Tpat_any; + pat_loc = sp.ppat_loc; + pat_type = newvar() } + | Ppat_var name -> + let ty = newvar() in + let id = enter_variable sp.ppat_loc name ty in + { pat_desc = Tpat_var id; + pat_loc = sp.ppat_loc; + pat_type = ty } + | Ppat_alias(sp, name) -> + let p = type_pat env sp in + let id = enter_variable sp.ppat_loc name p.pat_type in + { pat_desc = Tpat_alias(p, id); + pat_loc = sp.ppat_loc; + pat_type = p.pat_type } + | Ppat_constant cst -> + { pat_desc = Tpat_constant cst; + pat_loc = sp.ppat_loc; + pat_type = type_constant cst } + | Ppat_tuple spl -> + let pl = List.map (type_pat env) spl in + { pat_desc = Tpat_tuple pl; + pat_loc = sp.ppat_loc; + pat_type = Ttuple(List.map (fun p -> p.pat_type) pl) } + | Ppat_construct(lid, sarg) -> + let constr = + try + Env.lookup_constructor lid env + with Not_found -> + raise(Error(sp.ppat_loc, Unbound_constructor lid)) in + let sargs = + match sarg with + None -> [] + | Some {ppat_desc = Ppat_tuple spl} when constr.cstr_arity > 1 -> spl + | Some sp -> [sp] in + if List.length sargs <> constr.cstr_arity then + raise(Error(sp.ppat_loc, Constructor_arity_mismatch(lid, + constr.cstr_arity, List.length sargs))); + let args = List.map (type_pat env) sargs in + let (ty_args, ty_res) = instance_constructor constr in + List.iter2 (unify_pat env) args ty_args; + { pat_desc = Tpat_construct(constr, args); + pat_loc = sp.ppat_loc; + pat_type = ty_res } + | Ppat_record lid_sp_list -> + let ty = newvar() in + let type_label_pat (lid, sarg) = + let label = + try + Env.lookup_label lid env + with Not_found -> + raise(Error(sp.ppat_loc, Unbound_label lid)) in + let (ty_arg, ty_res) = instance_label label in + begin try + unify env ty_res ty + with Unify -> + raise(Error(sp.ppat_loc, Label_mismatch(lid, ty_res, ty))) + end; + let arg = type_pat env sarg in + unify_pat env arg ty_arg; + (label, arg) + in + { pat_desc = Tpat_record(List.map type_label_pat lid_sp_list); + pat_loc = sp.ppat_loc; + pat_type = ty } + | Ppat_or(sp1, sp2) -> + let initial_pattern_variables = !pattern_variables in + let p1 = type_pat env sp1 in + let p2 = type_pat env sp2 in + if !pattern_variables != initial_pattern_variables then + raise(Error(sp.ppat_loc, Orpat_not_closed)); + unify_pat env p2 p1.pat_type; + { pat_desc = Tpat_or(p1, p2); + pat_loc = sp.ppat_loc; + pat_type = p1.pat_type } + | Ppat_constraint(sp, sty) -> + let p = type_pat env sp in + let ty = Typetexp.transl_simple_type env false sty in + unify_pat env p ty; + p + +let add_pattern_variables env = + let pv = !pattern_variables in + pattern_variables := []; + List.fold_right + (fun (id, ty) env -> + Env.add_value id {val_type = ty; val_prim = Not_prim} env) + pv env + +let type_pattern env spat = + pattern_variables := []; + let pat = type_pat env spat in + let new_env = add_pattern_variables env in + (pat, new_env) + +let type_pattern_list env spatl = + pattern_variables := []; + let patl = List.map (type_pat env) spatl in + let new_env = add_pattern_variables env in + (patl, new_env) + +(* Generalization criterion for expressions *) + +let rec is_nonexpansive exp = + match exp.exp_desc with + Texp_ident(_,_) -> true + | Texp_constant _ -> true + | Texp_let(rec_flag, pat_exp_list, body) -> + List.for_all (fun (pat, exp) -> is_nonexpansive exp) pat_exp_list & + is_nonexpansive body + | Texp_function _ -> true + | Texp_tuple el -> + List.for_all is_nonexpansive el + | Texp_construct(_, el) -> + List.for_all is_nonexpansive el + | Texp_record lbl_exp_list -> + List.for_all (fun (lbl, exp) -> lbl.lbl_mut = Immutable & is_nonexpansive exp) + lbl_exp_list + | Texp_field(exp, lbl) -> is_nonexpansive exp + | _ -> false + +(* Typing of printf formats *) + +let type_format loc fmt = + let len = String.length fmt in + let ty_input = newvar() + and ty_result = newvar() in + let rec skip_args j = + if j >= len then j else + match fmt.[j] with + '0' .. '9' | ' ' | '.' | '-' -> skip_args (j+1) + | _ -> j in + let rec scan_format i = + if i >= len then ty_result else + match fmt.[i] with + '%' -> + let j = skip_args(i+1) in + begin match fmt.[j] with + '%' -> + scan_format (j+1) + | 's' -> + Tarrow(Predef.type_string, scan_format (j+1)) + | 'c' -> + Tarrow(Predef.type_char, scan_format (j+1)) + | 'd' | 'o' | 'x' | 'X' | 'u' -> + Tarrow(Predef.type_int, scan_format (j+1)) + | 'f' | 'e' | 'E' | 'g' | 'G' -> + Tarrow(Predef.type_float, scan_format (j+1)) + | 'b' -> + Tarrow(Predef.type_bool, scan_format (j+1)) + | 'a' -> + let ty_arg = newvar() in + Tarrow (Tarrow(ty_input, Tarrow (ty_arg, ty_result)), + Tarrow (ty_arg, scan_format (j+1))) + | 't' -> + Tarrow(Tarrow(ty_input, ty_result), scan_format (j+1)) + | c -> + raise(Error(loc, Bad_format_letter c)) + end + | _ -> scan_format (i+1) in + Tconstr(Predef.path_format, [scan_format 0; ty_input; ty_result]) + +(* Typing of expressions *) + +let unify_exp env exp expected_ty = + try + unify env exp.exp_type expected_ty + with Unify -> + raise(Error(exp.exp_loc, Expr_type_clash(exp.exp_type, expected_ty))) + +let rec type_exp env sexp = + match sexp.pexp_desc with + Pexp_ident lid -> + begin try + let (path, desc) = Env.lookup_value lid env in + { exp_desc = Texp_ident(path, desc); + exp_loc = sexp.pexp_loc; + exp_type = instance desc.val_type } + with Not_found -> + raise(Error(sexp.pexp_loc, Unbound_value lid)) + end + | Pexp_constant cst -> + { exp_desc = Texp_constant cst; + exp_loc = sexp.pexp_loc; + exp_type = type_constant cst } + | Pexp_let(rec_flag, spat_sexp_list, sbody) -> + let (pat_exp_list, new_env) = type_let env rec_flag spat_sexp_list in + let body = type_exp new_env sbody in + { exp_desc = Texp_let(rec_flag, pat_exp_list, body); + exp_loc = sexp.pexp_loc; + exp_type = body.exp_type } + | Pexp_function caselist -> + let ty_arg = newvar() and ty_res = newvar() in + let cases = type_cases env ty_arg ty_res caselist in + Parmatch.check_unused cases; + Parmatch.check_partial sexp.pexp_loc cases; + { exp_desc = Texp_function cases; + exp_loc = sexp.pexp_loc; + exp_type = Tarrow(ty_arg, ty_res) } + | Pexp_apply(sfunct, sargs) -> + let funct = type_exp env sfunct in + let rec type_args ty_fun = function + [] -> + ([], ty_fun) + | sarg1 :: sargl -> + let (ty1, ty2) = + try + filter_arrow env ty_fun + with Unify -> + raise(Error(sfunct.pexp_loc, + Apply_non_function funct.exp_type)) in + let arg1 = type_expect env sarg1 ty1 in + let (argl, ty_res) = type_args ty2 sargl in + (arg1 :: argl, ty_res) in + let (args, ty_res) = type_args funct.exp_type sargs in + { exp_desc = Texp_apply(funct, args); + exp_loc = sexp.pexp_loc; + exp_type = ty_res } + | Pexp_match(sarg, caselist) -> + let arg = type_exp env sarg in + let ty_res = newvar() in + let cases = type_cases env arg.exp_type ty_res caselist in + Parmatch.check_unused cases; + Parmatch.check_partial sexp.pexp_loc cases; + { exp_desc = Texp_match(arg, cases); + exp_loc = sexp.pexp_loc; + exp_type = ty_res } + | Pexp_try(sbody, caselist) -> + let body = type_exp env sbody in + let cases = type_cases env Predef.type_exn body.exp_type caselist in + Parmatch.check_unused cases; + { exp_desc = Texp_try(body, cases); + exp_loc = sexp.pexp_loc; + exp_type = body.exp_type } + | Pexp_tuple sexpl -> + let expl = List.map (type_exp env) sexpl in + { exp_desc = Texp_tuple expl; + exp_loc = sexp.pexp_loc; + exp_type = Ttuple(List.map (fun exp -> exp.exp_type) expl) } + | Pexp_construct(lid, sarg) -> + let constr = + try + Env.lookup_constructor lid env + with Not_found -> + raise(Error(sexp.pexp_loc, Unbound_constructor lid)) in + let sargs = + match sarg with + None -> [] + | Some {pexp_desc = Pexp_tuple sel} when constr.cstr_arity > 1 -> sel + | Some se -> [se] in + if List.length sargs <> constr.cstr_arity then + raise(Error(sexp.pexp_loc, Constructor_arity_mismatch(lid, + constr.cstr_arity, List.length sargs))); + let (ty_args, ty_res) = instance_constructor constr in + let args = List.map2 (type_expect env) sargs ty_args in + { exp_desc = Texp_construct(constr, args); + exp_loc = sexp.pexp_loc; + exp_type = ty_res } + | Pexp_record lid_sexp_list -> + let ty = newvar() in + let num_fields = ref 0 in + let type_label_exp (lid, sarg) = + let label = + try + Env.lookup_label lid env + with Not_found -> + raise(Error(sexp.pexp_loc, Unbound_label lid)) in + let (ty_arg, ty_res) = instance_label label in + begin try + unify env ty_res ty + with Unify -> + raise(Error(sexp.pexp_loc, Label_mismatch(lid, ty_res, ty))) + end; + let arg = type_expect env sarg ty_arg in + num_fields := Array.length label.lbl_all; + (label, arg) in + let lbl_exp_list = List.map type_label_exp lid_sexp_list in + let rec check_duplicates = function + [] -> () + | (lid, sarg) :: remainder -> + if List.mem_assoc lid remainder + then raise(Error(sexp.pexp_loc, Label_multiply_defined lid)) + else check_duplicates remainder in + check_duplicates lid_sexp_list; + if List.length lid_sexp_list <> !num_fields then + raise(Error(sexp.pexp_loc, Label_missing)); + { exp_desc = Texp_record lbl_exp_list; + exp_loc = sexp.pexp_loc; + exp_type = ty } + | Pexp_field(sarg, lid) -> + let arg = type_exp env sarg in + let label = + try + Env.lookup_label lid env + with Not_found -> + raise(Error(sexp.pexp_loc, Unbound_label lid)) in + let (ty_arg, ty_res) = instance_label label in + unify_exp env arg ty_res; + { exp_desc = Texp_field(arg, label); + exp_loc = sexp.pexp_loc; + exp_type = ty_arg } + | Pexp_setfield(srecord, lid, snewval) -> + let record = type_exp env srecord in + let label = + try + Env.lookup_label lid env + with Not_found -> + raise(Error(sexp.pexp_loc, Unbound_label lid)) in + if label.lbl_mut = Immutable then + raise(Error(sexp.pexp_loc, Label_not_mutable lid)); + let (ty_arg, ty_res) = instance_label label in + unify_exp env record ty_res; + let newval = type_expect env snewval ty_arg in + { exp_desc = Texp_setfield(record, label, newval); + exp_loc = sexp.pexp_loc; + exp_type = Predef.type_unit } + | Pexp_array(sargl) -> + let ty = newvar() in + let argl = List.map (fun sarg -> type_expect env sarg ty) sargl in + { exp_desc = Texp_array argl; + exp_loc = sexp.pexp_loc; + exp_type = Predef.type_array ty } + | Pexp_ifthenelse(scond, sifso, sifnot) -> + let cond = type_expect env scond Predef.type_bool in + begin match sifnot with + None -> + let ifso = type_expect env sifso Predef.type_unit in + { exp_desc = Texp_ifthenelse(cond, ifso, None); + exp_loc = sexp.pexp_loc; + exp_type = Predef.type_unit } + | Some sexp -> + let ifso = type_exp env sifso in + let ifnot = type_expect env sexp ifso.exp_type in + { exp_desc = Texp_ifthenelse(cond, ifso, Some ifnot); + exp_loc = sexp.pexp_loc; + exp_type = ifso.exp_type } + end + | Pexp_sequence(sexp1, sexp2) -> + let exp1 = type_statement env sexp1 in + let exp2 = type_exp env sexp2 in + { exp_desc = Texp_sequence(exp1, exp2); + exp_loc = sexp.pexp_loc; + exp_type = exp2.exp_type } + | Pexp_while(scond, sbody) -> + let cond = type_expect env scond Predef.type_bool in + let body = type_statement env sbody in + { exp_desc = Texp_while(cond, body); + exp_loc = sexp.pexp_loc; + exp_type = Predef.type_unit } + | Pexp_for(param, slow, shigh, dir, sbody) -> + let low = type_expect env slow Predef.type_int in + let high = type_expect env shigh Predef.type_int in + let (id, new_env) = + Env.enter_value param {val_type = Predef.type_int; + val_prim = Not_prim} env in + let body = type_statement new_env sbody in + { exp_desc = Texp_for(id, low, high, dir, body); + exp_loc = sexp.pexp_loc; + exp_type = Predef.type_unit } + | Pexp_constraint(sarg, sty) -> + let ty = Typetexp.transl_simple_type env false sty in + let arg = type_expect env sarg ty in + { exp_desc = arg.exp_desc; + exp_loc = arg.exp_loc; + exp_type = ty } + | Pexp_when(scond, sbody) -> + let cond = type_expect env scond Predef.type_bool in + let body = type_exp env sbody in + { exp_desc = Texp_when(cond, body); + exp_loc = sexp.pexp_loc; + exp_type = body.exp_type } + +(* Typing of an expression with an expected type. + Some constructs are treated specially to provide better error messages. *) + +and type_expect env sexp ty_expected = + match sexp.pexp_desc with + Pexp_constant(Const_string s as cst) -> + let exp = + { exp_desc = Texp_constant cst; + exp_loc = sexp.pexp_loc; + exp_type = + (* Terrible hack for format strings *) + match Ctype.repr ty_expected with + Tconstr(path, _) when Path.same path Predef.path_format -> + type_format sexp.pexp_loc s + | _ -> Predef.type_string } in + unify_exp env exp ty_expected; + exp + | Pexp_let(rec_flag, spat_sexp_list, sbody) -> + let (pat_exp_list, new_env) = type_let env rec_flag spat_sexp_list in + let body = type_expect new_env sbody ty_expected in + { exp_desc = Texp_let(rec_flag, pat_exp_list, body); + exp_loc = sexp.pexp_loc; + exp_type = body.exp_type } + | Pexp_sequence(sexp1, sexp2) -> + let exp1 = type_statement env sexp1 in + let exp2 = type_expect env sexp2 ty_expected in + { exp_desc = Texp_sequence(exp1, exp2); + exp_loc = sexp.pexp_loc; + exp_type = exp2.exp_type } + | _ -> + let exp = type_exp env sexp in + unify_exp env exp ty_expected; + exp + +(* Typing of statements (expressions whose values are discarded) *) + +and type_statement env sexp = + let exp = type_exp env sexp in + match Ctype.repr exp.exp_type with + Tarrow(_, _) -> + Location.print_warning sexp.pexp_loc + "this function application is partial,\n\ + maybe some arguments are missing."; + exp + | _ -> exp + +(* Typing of match cases *) + +and type_cases env ty_arg ty_res caselist = + List.map + (fun (spat, sexp) -> + let (pat, ext_env) = type_pattern env spat in + unify_pat env pat ty_arg; + let exp = type_expect ext_env sexp ty_res in + (pat, exp)) + caselist + +(* Typing of let bindings *) + +and type_let env rec_flag spat_sexp_list = + begin_def(); + let (pat_list, new_env) = + type_pattern_list env (List.map (fun (spat, sexp) -> spat) spat_sexp_list) in + let exp_env = + match rec_flag with Nonrecursive -> env | Recursive -> new_env in + let exp_list = + List.map (fun (spat, sexp) -> type_exp exp_env sexp) spat_sexp_list in + List.iter2 + (fun pat exp -> unify_pat env pat exp.exp_type) + pat_list exp_list; + List.iter2 + (fun pat exp -> Parmatch.check_partial pat.pat_loc [pat, exp]) + pat_list exp_list; + end_def(); + List.iter + (fun exp -> if not (is_nonexpansive exp) then make_nongen exp.exp_type) + exp_list; + List.iter + (fun exp -> generalize exp.exp_type) + exp_list; + (List.combine(pat_list, exp_list), new_env) + +(* Typing of toplevel bindings *) + +let type_binding env rec_flag spat_sexp_list = + Typetexp.reset_type_variables(); + let (pat_exp_list, new_env as result) = + type_let env rec_flag spat_sexp_list in + List.iter + (fun (pat, exp) -> + if not (closed_schema exp.exp_type) then + raise(Error(exp.exp_loc, Non_generalizable exp.exp_type))) + pat_exp_list; + result + +(* Typing of toplevel expressions *) + +let type_expression env sexp = + Typetexp.reset_type_variables(); + begin_def(); + let exp = type_exp env sexp in + end_def(); + if is_nonexpansive exp then generalize exp.exp_type; + exp + +(* Error report *) + +open Format +open Printtyp + +let report_error = function + Unbound_value lid -> + print_string "Unbound val "; longident lid + | Unbound_constructor lid -> + print_string "Unbound constructor "; longident lid + | Unbound_label lid -> + print_string "Unbound label "; longident lid + | Constructor_arity_mismatch(lid, expected, provided) -> + open_hovbox 0; + print_string "The constructor "; longident lid; + print_space(); print_string "expects "; print_int expected; + print_string " argument(s),"; print_space(); + print_string "but is here applied to "; print_int provided; + print_string " argument(s)"; + close_box() + | Label_mismatch(lid, actual, expected) -> + open_hovbox 0; + print_string "The label "; longident lid; + print_space(); print_string "belongs to the type"; print_space(); + type_expr actual; print_space(); + print_string "but is here mixed with labels of type"; print_space(); + type_expr expected; + close_box() + | Pattern_type_clash(inferred, expected) -> + open_hovbox 0; + print_string "This pattern matches values of type"; print_space(); + type_expr inferred; print_space(); + print_string "but is here used to match values of type"; print_space(); + type_expr expected; + close_box() + | Multiply_bound_variable -> + print_string "This variable is bound several times in this matching" + | Orpat_not_closed -> + print_string "A pattern with | must not bind variables" + | Expr_type_clash(inferred, expected) -> + open_hovbox 0; + print_string "This expression has type"; print_space(); + type_expr inferred; print_space(); + print_string "but is here used with type"; print_space(); + type_expr expected; + close_box() + | Apply_non_function typ -> + begin match Ctype.repr typ with + Tarrow(_, _) -> + print_string "This function is applied to too many arguments" + | _ -> + print_string "This expression is not a function, it cannot be applied" + end + | Label_multiply_defined lid -> + print_string "The label "; longident lid; + print_string " is defined several times" + | Label_missing -> + print_string "Some labels are undefined" + | Label_not_mutable lid -> + print_string "The label "; longident lid; + print_string " is not mutable" + | Non_generalizable typ -> + open_hovbox 0; + print_string "The type of this expression,"; print_space(); + type_scheme typ; print_string ","; print_space(); + print_string "contains type variables that cannot be generalized" + | Bad_format_letter c -> + print_string "Bad format letter `%"; print_char c; print_string "'" diff --git a/typing/typecore.mli b/typing/typecore.mli new file mode 100644 index 0000000000..56fa562c62 --- /dev/null +++ b/typing/typecore.mli @@ -0,0 +1,32 @@ +(* Type inference for the core language *) + +open Asttypes +open Typedtree + +val type_binding: + Env.t -> rec_flag -> + (Parsetree.pattern * Parsetree.expression) list -> + (Typedtree.pattern * Typedtree.expression) list * Env.t +val type_expression: + Env.t -> Parsetree.expression -> Typedtree.expression + +type error = + Unbound_value of Longident.t + | Unbound_constructor of Longident.t + | Unbound_label of Longident.t + | Constructor_arity_mismatch of Longident.t * int * int + | Label_mismatch of Longident.t * type_expr * type_expr + | Pattern_type_clash of type_expr * type_expr + | Multiply_bound_variable + | Orpat_not_closed + | Expr_type_clash of type_expr * type_expr + | Apply_non_function of type_expr + | Label_multiply_defined of Longident.t + | Label_missing + | Label_not_mutable of Longident.t + | Non_generalizable of type_expr + | Bad_format_letter of char + +exception Error of Location.t * error + +val report_error: error -> unit diff --git a/typing/typedecl.ml b/typing/typedecl.ml new file mode 100644 index 0000000000..3b813b6ab8 --- /dev/null +++ b/typing/typedecl.ml @@ -0,0 +1,119 @@ +(* Typing of type definitions *) + +open Parsetree +open Typedtree +open Typetexp + + +type error = + Repeated_parameter + | Duplicate_constructor of string + | Duplicate_label of string + | Recursive_abbrev of string + +exception Error of Location.t * error + +(* Enter all declared types in the environment as abstract types *) + +let rec enter_types env = function + [] -> + ([], env) + | (name, sdecl) :: srem -> + let decl = + { type_params = []; (*this field is unused when kind = Type_abstract*) + type_arity = List.length sdecl.ptype_params; + type_kind = Type_abstract } in + let (id, extenv) = Env.enter_type name decl env in + let (rem_id, final_env) = enter_types extenv srem in + (id :: rem_id, final_env) + +(* Translate one type declaration *) + +let transl_declaration env (name, sdecl) id = + Ctype.begin_def(); + reset_type_variables(); + let params = + try + List.map enter_type_variable sdecl.ptype_params + with Already_bound -> + raise(Error(sdecl.ptype_loc, Repeated_parameter)) in + let kind = + match sdecl.ptype_kind with + Ptype_abstract -> + Type_abstract + | Ptype_manifest sty -> + Type_manifest(transl_simple_type env true sty) + | Ptype_variant cstrs -> + let all_constrs = ref Cset.empty in + List.iter + (fun (name, args) -> + if Cset.mem name !all_constrs then + raise(Error(sdecl.ptype_loc, Duplicate_constructor name)); + all_constrs := Cset.add name !all_constrs) + cstrs; + Type_variant(List.map + (fun (name, args) -> + (name, List.map (transl_simple_type env true) args)) + cstrs) + | Ptype_record lbls -> + let all_labels = ref Cset.empty in + List.iter + (fun (name, mut, arg) -> + if Cset.mem name !all_labels then + raise(Error(sdecl.ptype_loc, Duplicate_label name)); + all_labels := Cset.add name !all_labels) + lbls; + Type_record(List.map + (fun (name, mut, arg) -> + (name, mut, transl_simple_type env true arg)) + lbls) in + Ctype.end_def(); + List.iter Ctype.generalize params; + (id, + {type_params = params; type_arity = List.length params; type_kind = kind}) + +(* Check for recursive abbrevs *) + +let check_recursive_abbrev env (name, sdecl) (id, decl) = + match decl.type_kind with + Type_manifest ty -> + if Ctype.free_type_ident env id ty + then raise(Error(sdecl.ptype_loc, Recursive_abbrev name)) + | _ -> () + +(* Translate a set of mutually recursive type declarations *) + +let transl_type_decl env name_sdecl_list = + (* Enter the types as abstract *) + let (id_list, temp_env) = enter_types env name_sdecl_list in + (* Translate each declaration *) + let decls = List.map2 (transl_declaration temp_env) name_sdecl_list id_list in + (* Build the final env *) + let newenv = + List.fold_right (fun (id, decl) env -> Env.add_type id decl env) decls env in + (* Check for recursive abbrevs *) + List.iter2 (check_recursive_abbrev newenv) name_sdecl_list decls; + (* Done *) + (decls, newenv) + +(* Translate an exception declaration *) + +let transl_exception env excdecl = + reset_type_variables(); + List.map (transl_simple_type env true) excdecl + +(* Error report *) + +open Format + +let report_error = function + Repeated_parameter -> + print_string "A type parameter occurs several times" + | Duplicate_constructor s -> + print_string "Two constructors are named "; print_string s + | Duplicate_label s -> + print_string "Two labels are named "; print_string s + | Recursive_abbrev s -> + print_string "The type abbreviation "; print_string s; + print_string " is cyclic" + diff --git a/typing/typedecl.mli b/typing/typedecl.mli new file mode 100644 index 0000000000..8404925057 --- /dev/null +++ b/typing/typedecl.mli @@ -0,0 +1,19 @@ +(* Typing of type definitions *) + +open Typedtree + +val transl_type_decl: + Env.t -> (string * Parsetree.type_declaration) list -> + (Ident.t * type_declaration) list * Env.t +val transl_exception: + Env.t -> Parsetree.exception_declaration -> exception_declaration + +type error = + Repeated_parameter + | Duplicate_constructor of string + | Duplicate_label of string + | Recursive_abbrev of string + +exception Error of Location.t * error + +val report_error: error -> unit diff --git a/typing/typedtree.ml b/typing/typedtree.ml new file mode 100644 index 0000000000..c795799846 --- /dev/null +++ b/typing/typedtree.ml @@ -0,0 +1,186 @@ +(* Abstract syntax tree after typing *) + +open Misc +open Asttypes + +(* Type expressions for the core language *) + +type type_expr = + Tvar of type_variable + | Tarrow of type_expr * type_expr + | Ttuple of type_expr list + | Tconstr of Path.t * type_expr list + +and type_variable = + { mutable tvar_level: int; + mutable tvar_link: type_expr option } + +(* Value descriptions *) + +type value_description = + { val_type: type_expr; (* Type of the val *) + val_prim: primitive_description } (* Is this a primitive? *) +and primitive_description = + Not_prim + | Primitive of string * int + +(* Constructor descriptions *) + +type constructor_description = + { cstr_res: type_expr; (* Type of the result *) + cstr_args: type_expr list; (* Type of the arguments *) + cstr_arity: int; (* Number of arguments *) + cstr_tag: constructor_tag; (* Tag for heap blocks *) + cstr_span: int } (* Number of constructors in type *) + +and constructor_tag = + Cstr_tag of int (* Regular constructor *) + | Cstr_exception of Path.t (* Exception constructor *) + +(* Record label descriptions *) + +type label_description = + { lbl_res: type_expr; (* Type of the result *) + lbl_arg: type_expr; (* Type of the argument *) + lbl_mut: mutable_flag; (* Is this a mutable field? *) + lbl_pos: int; (* Position in block *) + lbl_all: label_description array (* All the labels in this type *) + } + +(* Value expressions for the core language *) + +type pattern = + { pat_desc: pattern_desc; + pat_loc: Location.t; + pat_type: type_expr } + +and pattern_desc = + Tpat_any + | Tpat_var of Ident.t + | Tpat_alias of pattern * Ident.t + | Tpat_constant of constant + | Tpat_tuple of pattern list + | Tpat_construct of constructor_description * pattern list + | Tpat_record of (label_description * pattern) list + | Tpat_or of pattern * pattern + +type expression = + { exp_desc: expression_desc; + exp_loc: Location.t; + exp_type: type_expr } + +and expression_desc = + Texp_ident of Path.t * value_description + | Texp_constant of constant + | Texp_let of rec_flag * (pattern * expression) list * expression + | Texp_function of (pattern * expression) list + | Texp_apply of expression * expression list + | Texp_match of expression * (pattern * expression) list + | Texp_try of expression * (pattern * expression) list + | Texp_tuple of expression list + | Texp_construct of constructor_description * expression list + | Texp_record of (label_description * expression) list + | Texp_field of expression * label_description + | Texp_setfield of expression * label_description * expression + | Texp_array of expression list + | Texp_ifthenelse of expression * expression * expression option + | Texp_sequence of expression * expression + | Texp_while of expression * expression + | Texp_for of + Ident.t * expression * expression * direction_flag * expression + | Texp_when of expression * expression + +(* Type definitions *) + +type type_declaration = + { mutable type_params: type_expr list; + type_arity: int; + mutable type_kind: type_kind } + +and type_kind = + Type_abstract + | Type_manifest of type_expr + | Type_variant of (string * type_expr list) list + | Type_record of (string * mutable_flag * type_expr) list + +type exception_declaration = type_expr list + +(* Type expressions for the module language *) + +type module_type = + Tmty_ident of Path.t + | Tmty_signature of signature + | Tmty_functor of Ident.t * module_type * module_type + +and signature = signature_item list + +and signature_item = + Tsig_value of Ident.t * value_description + | Tsig_type of Ident.t * type_declaration + | Tsig_exception of Ident.t * exception_declaration + | Tsig_module of Ident.t * module_type + | Tsig_modtype of Ident.t * modtype_declaration + +and modtype_declaration = + Tmodtype_abstract + | Tmodtype_manifest of module_type + +(* Value expressions for the module language *) + +type module_expr = + { mod_desc: module_expr_desc; + mod_loc: Location.t; + mod_type: module_type } + +and module_expr_desc = + Tmod_ident of Path.t + | Tmod_structure of structure + | Tmod_functor of Ident.t * module_type * module_expr + | Tmod_apply of module_expr * module_expr * module_coercion + | Tmod_constraint of module_expr * module_type * module_coercion + +and structure = structure_item list + +and structure_item = + Tstr_eval of expression + | Tstr_value of rec_flag * (pattern * expression) list + | Tstr_primitive of Ident.t * value_description + | Tstr_type of (Ident.t * type_declaration) list + | Tstr_exception of Ident.t * exception_declaration + | Tstr_module of Ident.t * module_expr + | Tstr_modtype of Ident.t * module_type + | Tstr_open of Path.t + +and module_coercion = + Tcoerce_none + | Tcoerce_structure of (int * module_coercion) list + | Tcoerce_functor of module_coercion * module_coercion + +(* Auxiliary functions over the a.s.t. *) + +(* List the identifiers bound by a pattern or a let *) + +let idents = ref([]: Ident.t list) + +let rec bound_idents pat = + match pat.pat_desc with + Tpat_any -> () + | Tpat_var id -> idents := id :: !idents + | Tpat_alias(p, id) -> bound_idents p; idents := id :: !idents + | Tpat_constant cst -> () + | Tpat_tuple patl -> List.iter bound_idents patl + | Tpat_construct(cstr, patl) -> List.iter bound_idents patl + | Tpat_record lbl_pat_list -> + List.iter (fun (lbl, pat) -> bound_idents pat) lbl_pat_list + | Tpat_or(p1, p2) -> bound_idents p1; bound_idents p2 + +let pat_bound_idents pat = + idents := []; bound_idents pat; let res = !idents in idents := []; res + +let let_bound_idents pat_expr_list = + idents := []; + List.iter (fun (pat, expr) -> bound_idents pat) pat_expr_list; + let res = !idents in idents := []; res + + + diff --git a/typing/typedtree.mli b/typing/typedtree.mli new file mode 100644 index 0000000000..1c52b126fd --- /dev/null +++ b/typing/typedtree.mli @@ -0,0 +1,162 @@ +(* Abstract syntax tree after typing *) + +open Misc +open Asttypes + +(* Type expressions for the core language *) + +type type_expr = + Tvar of type_variable + | Tarrow of type_expr * type_expr + | Ttuple of type_expr list + | Tconstr of Path.t * type_expr list + +and type_variable = + { mutable tvar_level: int; + mutable tvar_link: type_expr option } + +(* Value descriptions *) + +type value_description = + { val_type: type_expr; (* Type of the val *) + val_prim: primitive_description } (* Is this a primitive? *) +and primitive_description = + Not_prim + | Primitive of string * int + +(* Constructor descriptions *) + +type constructor_description = + { cstr_res: type_expr; (* Type of the result *) + cstr_args: type_expr list; (* Type of the arguments *) + cstr_arity: int; (* Number of arguments *) + cstr_tag: constructor_tag; (* Tag for heap blocks *) + cstr_span: int } (* Number of constructors in type *) + +and constructor_tag = + Cstr_tag of int (* Regular constructor *) + | Cstr_exception of Path.t (* Exception constructor *) + +(* Record label descriptions *) + +type label_description = + { lbl_res: type_expr; (* Type of the result *) + lbl_arg: type_expr; (* Type of the argument *) + lbl_mut: mutable_flag; (* Is this a mutable field? *) + lbl_pos: int; (* Position in block *) + lbl_all: label_description array (* All the labels in this type *) + } + +(* Value expressions for the core language *) + +type pattern = + { pat_desc: pattern_desc; + pat_loc: Location.t; + pat_type: type_expr } + +and pattern_desc = + Tpat_any + | Tpat_var of Ident.t + | Tpat_alias of pattern * Ident.t + | Tpat_constant of constant + | Tpat_tuple of pattern list + | Tpat_construct of constructor_description * pattern list + | Tpat_record of (label_description * pattern) list + | Tpat_or of pattern * pattern + +type expression = + { exp_desc: expression_desc; + exp_loc: Location.t; + exp_type: type_expr } + +and expression_desc = + Texp_ident of Path.t * value_description + | Texp_constant of constant + | Texp_let of rec_flag * (pattern * expression) list * expression + | Texp_function of (pattern * expression) list + | Texp_apply of expression * expression list + | Texp_match of expression * (pattern * expression) list + | Texp_try of expression * (pattern * expression) list + | Texp_tuple of expression list + | Texp_construct of constructor_description * expression list + | Texp_record of (label_description * expression) list + | Texp_field of expression * label_description + | Texp_setfield of expression * label_description * expression + | Texp_array of expression list + | Texp_ifthenelse of expression * expression * expression option + | Texp_sequence of expression * expression + | Texp_while of expression * expression + | Texp_for of + Ident.t * expression * expression * direction_flag * expression + | Texp_when of expression * expression + +(* Type definitions *) + +type type_declaration = + { mutable type_params: type_expr list; + type_arity: int; + mutable type_kind: type_kind } + +and type_kind = + Type_abstract + | Type_manifest of type_expr + | Type_variant of (string * type_expr list) list + | Type_record of (string * mutable_flag * type_expr) list + +type exception_declaration = type_expr list + +(* Type expressions for the module language *) + +type module_type = + Tmty_ident of Path.t + | Tmty_signature of signature + | Tmty_functor of Ident.t * module_type * module_type + +and signature = signature_item list + +and signature_item = + Tsig_value of Ident.t * value_description + | Tsig_type of Ident.t * type_declaration + | Tsig_exception of Ident.t * exception_declaration + | Tsig_module of Ident.t * module_type + | Tsig_modtype of Ident.t * modtype_declaration + +and modtype_declaration = + Tmodtype_abstract + | Tmodtype_manifest of module_type + +(* Value expressions for the module language *) + +type module_expr = + { mod_desc: module_expr_desc; + mod_loc: Location.t; + mod_type: module_type } + +and module_expr_desc = + Tmod_ident of Path.t + | Tmod_structure of structure + | Tmod_functor of Ident.t * module_type * module_expr + | Tmod_apply of module_expr * module_expr * module_coercion + | Tmod_constraint of module_expr * module_type * module_coercion + +and structure = structure_item list + +and structure_item = + Tstr_eval of expression + | Tstr_value of rec_flag * (pattern * expression) list + | Tstr_primitive of Ident.t * value_description + | Tstr_type of (Ident.t * type_declaration) list + | Tstr_exception of Ident.t * exception_declaration + | Tstr_module of Ident.t * module_expr + | Tstr_modtype of Ident.t * module_type + | Tstr_open of Path.t + +and module_coercion = + Tcoerce_none + | Tcoerce_structure of (int * module_coercion) list + | Tcoerce_functor of module_coercion * module_coercion + +(* Auxiliary functions over the a.s.t. *) + +val pat_bound_idents: pattern -> Ident.t list +val let_bound_idents: (pattern * expression) list -> Ident.t list diff --git a/typing/typemod.ml b/typing/typemod.ml new file mode 100644 index 0000000000..f0e6fbf535 --- /dev/null +++ b/typing/typemod.ml @@ -0,0 +1,306 @@ +(* Type-checking of the module language *) + +open Misc +open Path +open Parsetree +open Typedtree + + +type error = + Unbound_module of Longident.t + | Unbound_modtype of Longident.t + | Cannot_apply of module_type + | Not_included of Includemod.error list + | Cannot_eliminate_dependency of module_type + | Signature_expected + | Structure_expected of module_type + | With_not_abstract of string + | With_arity_mismatch of string + +exception Error of Location.t * error + +(* Merge a set of type definitions in a signature *) + +let merge_constraints loc env sg decls = + let sub = ref Subst.identity in + let rec merge_one_constraint id decl = function + [] -> + [Tsig_type(id, decl)] + | (Tsig_type(id', decl') as item) :: rem -> + if Ident.equal id id' then begin + if decl'.type_kind <> Type_abstract then + raise(Error(loc, With_not_abstract(Ident.name id))); + if decl'.type_arity <> decl.type_arity then + raise(Error(loc, With_arity_mismatch(Ident.name id))); + sub := Subst.add_type id (Pident id') !sub; + Tsig_type(id', decl) :: rem + end else + item :: merge_one_constraint id decl rem + | item :: rem -> + item :: merge_one_constraint id decl rem in + let rec merge_all_constraints sg = function + [] -> + sg + | (id, decl) :: rem -> + merge_all_constraints (merge_one_constraint id decl sg) rem in + let newsig = merge_all_constraints sg decls in + Subst.signature !sub newsig + +(* Lookup and strengthen the type of a module path *) + +let type_module_path env loc lid = + try + Env.lookup_module lid env + with Not_found -> + raise(Error(loc, Unbound_module lid)) + +(* Extract a signature from a module type *) + +let extract_sig env loc mty = + match Mtype.scrape env mty with + Tmty_signature sg -> sg + | _ -> raise(Error(loc, Signature_expected)) + +let extract_sig_open env loc mty = + match Mtype.scrape env mty with + Tmty_signature sg -> sg + | _ -> raise(Error(loc, Structure_expected mty)) + +(* Check and translate a module type expression *) + +let rec transl_modtype env smty = + match smty.pmty_desc with + Pmty_ident lid -> + begin try + let (path, info) = Env.lookup_modtype lid env in + Tmty_ident path + with Not_found -> + raise(Error(smty.pmty_loc, Unbound_modtype lid)) + end + | Pmty_signature sg -> + Tmty_signature (transl_signature env sg) + | Pmty_functor(param, sarg, sres) -> + let arg = transl_modtype env sarg in + let (id, newenv) = Env.enter_module param arg env in + let res = transl_modtype newenv sres in + Tmty_functor(id, arg, res) + | Pmty_with(sbody, sdecls) -> + let body = transl_modtype env sbody in + let sg = extract_sig env sbody.pmty_loc body in + let (decls, newenv) = + Typedecl.transl_type_decl env sdecls in + Tmty_signature(merge_constraints smty.pmty_loc env sg decls) + +and transl_signature env sg = + match sg with + [] -> [] + | Psig_value(name, sdesc) :: srem -> + let ty = Typetexp.transl_type_scheme env sdesc.pval_type in + let prim = + match sdesc.pval_prim with + None -> Not_prim + | Some p -> Primitive(p, Ctype.arity ty) in + let desc = { val_type = ty; val_prim = prim } in + let (id, newenv) = Env.enter_value name desc env in + let rem = transl_signature newenv srem in + Tsig_value(id, desc) :: rem + | Psig_type sdecls :: srem -> + let (decls, newenv) = Typedecl.transl_type_decl env sdecls in + let rem = transl_signature newenv srem in + map_end (fun (id, info) -> Tsig_type(id, info)) decls rem + | Psig_exception(name, sarg) :: srem -> + let arg = Typedecl.transl_exception env sarg in + let (id, newenv) = Env.enter_exception name arg env in + let rem = transl_signature newenv srem in + Tsig_exception(id, arg) :: rem + | Psig_module(name, smty) :: srem -> + let mty = transl_modtype env smty in + let (id, newenv) = Env.enter_module name mty env in + let rem = transl_signature newenv srem in + Tsig_module(id, mty) :: rem + | Psig_modtype(name, sinfo) :: srem -> + let info = transl_modtype_info env sinfo in + let (id, newenv) = Env.enter_modtype name info env in + let rem = transl_signature newenv srem in + Tsig_modtype(id, info) :: rem + | Psig_open(lid, loc) :: srem -> + let (path, mty) = type_module_path env loc lid in + let sg = extract_sig_open env loc mty in + let newenv = Env.open_signature path sg env in + transl_signature newenv srem + | Psig_include smty :: srem -> + let mty = transl_modtype env smty in + let sg = extract_sig env smty.pmty_loc mty in + let newenv = Env.add_signature sg env in + let rem = transl_signature newenv srem in + sg @ rem + +and transl_modtype_info env sinfo = + match sinfo with + Pmodtype_abstract -> + Tmodtype_abstract + | Pmodtype_manifest smty -> + Tmodtype_manifest(transl_modtype env smty) + +(* Type a module val expression *) + +let rec type_module env smod = + match smod.pmod_desc with + Pmod_ident lid -> + let (path, mty) = type_module_path env smod.pmod_loc lid in + { mod_desc = Tmod_ident path; + mod_type = Mtype.strengthen env mty path; + mod_loc = smod.pmod_loc } + | Pmod_structure sstr -> + let (str, sg, _) = type_structure env sstr in + { mod_desc = Tmod_structure str; + mod_type = Tmty_signature sg; + mod_loc = smod.pmod_loc } + | Pmod_functor(name, smty, sbody) -> + let mty = transl_modtype env smty in + let (id, newenv) = Env.enter_module name mty env in + let body = type_module newenv sbody in + { mod_desc = Tmod_functor(id, mty, body); + mod_type = Tmty_functor(id, mty, body.mod_type); + mod_loc = smod.pmod_loc } + | Pmod_apply(sfunct, sarg) -> + let funct = type_module env sfunct in + let arg = type_module env sarg in + begin match Mtype.scrape env funct.mod_type with + Tmty_functor(param, mty_param, mty_res) as mty_functor -> + let coercion = + try + Includemod.modtypes env arg.mod_type mty_param + with Includemod.Error msg -> + raise(Error(sarg.pmod_loc, Not_included msg)) in + let mty_appl = + match arg with + {mod_desc = Tmod_ident path} -> + Subst.modtype (Subst.add_module param path Subst.identity) + mty_res + | _ -> + try + Mtype.nondep_supertype + (Env.add_module param arg.mod_type env) param mty_res + with Not_found -> + raise(Error(smod.pmod_loc, + Cannot_eliminate_dependency mty_functor)) in + { mod_desc = Tmod_apply(funct, arg, coercion); + mod_type = mty_appl; + mod_loc = smod.pmod_loc } + | _ -> + raise(Error(sfunct.pmod_loc, Cannot_apply funct.mod_type)) + end + | Pmod_constraint(sarg, smty) -> + let arg = type_module env sarg in + let mty = transl_modtype env smty in + let coercion = + try + Includemod.modtypes env arg.mod_type mty + with Includemod.Error msg -> + raise(Error(sarg.pmod_loc, Not_included msg)) in + { mod_desc = Tmod_constraint(arg, mty, coercion); + mod_type = mty; + mod_loc = smod.pmod_loc } + +and type_structure env = function + [] -> + ([], [], env) + | Pstr_eval sexpr :: srem -> + let expr = Typecore.type_expression env sexpr in + let (str_rem, sig_rem, final_env) = type_structure env srem in + (Tstr_eval expr :: str_rem, sig_rem, final_env) + | Pstr_value(rec_flag, sdefs) :: srem -> + let (defs, newenv) = + Typecore.type_binding env rec_flag sdefs in + let (str_rem, sig_rem, final_env) = type_structure newenv srem in + let bound_idents = List.rev(let_bound_idents defs) in + let make_sig_value id = + Tsig_value(id, Env.find_value (Pident id) newenv) in + (Tstr_value(rec_flag, defs) :: str_rem, + map_end make_sig_value bound_idents sig_rem, + final_env) + | Pstr_primitive(name, sdesc) :: srem -> + let ty = Typetexp.transl_type_scheme env sdesc.pval_type in + let prim = + match sdesc.pval_prim with + None -> Not_prim + | Some p -> Primitive(p, Ctype.arity ty) in + let desc = { val_type = ty; val_prim = prim } in + let (id, newenv) = Env.enter_value name desc env in + let (str_rem, sig_rem, final_env) = type_structure newenv srem in + (Tstr_primitive(id, desc) :: str_rem, + Tsig_value(id, desc) :: sig_rem, + final_env) + | Pstr_type sdecls :: srem -> + let (decls, newenv) = Typedecl.transl_type_decl env sdecls in + let (str_rem, sig_rem, final_env) = type_structure newenv srem in + (Tstr_type decls :: str_rem, + map_end (fun (id, info) -> Tsig_type(id, info)) decls sig_rem, + final_env) + | Pstr_exception(name, sarg) :: srem -> + let arg = Typedecl.transl_exception env sarg in + let (id, newenv) = Env.enter_exception name arg env in + let (str_rem, sig_rem, final_env) = type_structure newenv srem in + (Tstr_exception(id, arg) :: str_rem, + Tsig_exception(id, arg) :: sig_rem, + final_env) + | Pstr_module(name, smodl) :: srem -> + let modl = type_module env smodl in + let (id, newenv) = Env.enter_module name modl.mod_type env in + let (str_rem, sig_rem, final_env) = type_structure newenv srem in + (Tstr_module(id, modl) :: str_rem, + Tsig_module(id, modl.mod_type) :: sig_rem, + final_env) + | Pstr_modtype(name, smty) :: srem -> + let mty = transl_modtype env smty in + let (id, newenv) = Env.enter_modtype name (Tmodtype_manifest mty) env in + let (str_rem, sig_rem, final_env) = type_structure newenv srem in + (Tstr_modtype(id, mty) :: str_rem, + Tsig_modtype(id, Tmodtype_manifest mty) :: sig_rem, + final_env) + | Pstr_open(lid, loc) :: srem -> + let (path, mty) = type_module_path env loc lid in + let sg = extract_sig_open env loc mty in + type_structure (Env.open_signature path sg env) srem + +(* Error report *) + +open Format +open Printtyp + +let report_error = function + Unbound_module lid -> + print_string "Unbound module "; longident lid + | Unbound_modtype lid -> + print_string "Unbound module type "; longident lid + | Cannot_apply mty -> + open_hovbox 0; + print_string "This module is not a functor; it has type"; + print_space(); modtype mty; + close_box() + | Not_included errs -> + open_vbox 0; + print_string "Signature mismatch:"; print_space(); + Includemod.report_error errs; + close_box() + | Cannot_eliminate_dependency mty -> + open_hovbox 0; + print_string "This functor has type"; + print_space(); modtype mty; print_space(); + print_string "The parameter cannot be eliminated in the result type."; + print_space(); + print_string "Please bind the argument to a module identifier."; + close_box() + | Signature_expected -> + print_string "This module type is not a signature" + | Structure_expected mty -> + open_hovbox 0; + print_string "This module is not a structure; it has type"; + print_space(); modtype mty; + close_box() + | With_not_abstract s -> + print_string "The type "; print_string s; print_string " is not abstract" + | With_arity_mismatch s -> + print_string "Arity mismatch in `with' constraint over type "; + print_string s diff --git a/typing/typemod.mli b/typing/typemod.mli new file mode 100644 index 0000000000..4a138eebe9 --- /dev/null +++ b/typing/typemod.mli @@ -0,0 +1,23 @@ +(* Type-checking of the module language *) + +open Typedtree + +val type_structure: + Env.t -> Parsetree.structure -> structure * signature * Env.t +val transl_signature: + Env.t -> Parsetree.signature -> signature + +type error = + Unbound_module of Longident.t + | Unbound_modtype of Longident.t + | Cannot_apply of module_type + | Not_included of Includemod.error list + | Cannot_eliminate_dependency of module_type + | Signature_expected + | Structure_expected of module_type + | With_not_abstract of string + | With_arity_mismatch of string + +exception Error of Location.t * error + +val report_error: error -> unit diff --git a/typing/typetexp.ml b/typing/typetexp.ml new file mode 100644 index 0000000000..e214682d93 --- /dev/null +++ b/typing/typetexp.ml @@ -0,0 +1,86 @@ +(* Typechecking of type expressions for the core language *) + +open Parsetree +open Typedtree +open Ctype + +exception Already_bound + +type error = + Unbound_type_variable of string + | Unbound_type_constructor of Longident.t + | Type_arity_mismatch of Longident.t * int * int + +exception Error of Location.t * error + +(* Translation of type expressions *) + +let type_variables = ref (Tbl.empty : (string, type_expr) Tbl.t) + +let reset_type_variables () = + type_variables := Tbl.empty + +let enter_type_variable name = + try + Tbl.find name !type_variables; raise Already_bound + with Not_found -> + let v = newvar() in + type_variables := Tbl.add name v !type_variables; + v + +let rec transl_simple_type env fixed styp = + match styp.ptyp_desc with + Ptyp_var name -> + begin try + Tbl.find name !type_variables + with Not_found -> + if fixed then + raise(Error(styp.ptyp_loc, Unbound_type_variable name)) + else begin + let v = newvar() in + type_variables := Tbl.add name v !type_variables; + v + end + end + | Ptyp_arrow(st1, st2) -> + Tarrow(transl_simple_type env fixed st1, + transl_simple_type env fixed st2) + | Ptyp_tuple stl -> + Ttuple(List.map (transl_simple_type env fixed) stl) + | Ptyp_constr(lid, stl) -> + let (path, decl) = + try + Env.lookup_type lid env + with Not_found -> + raise(Error(styp.ptyp_loc, Unbound_type_constructor lid)) in + if List.length stl <> decl.type_arity then + raise(Error(styp.ptyp_loc, Type_arity_mismatch(lid, decl.type_arity, + List.length stl))); + Tconstr(path, List.map (transl_simple_type env fixed) stl) + +let transl_type_scheme env styp = + reset_type_variables(); + begin_def(); + let typ = transl_simple_type env false styp in + end_def(); + generalize typ; + typ + +(* Error report *) + +open Format +open Printtyp + +let report_error = function + Unbound_type_variable name -> + print_string "Unbound type parameter "; print_string name + | Unbound_type_constructor lid -> + print_string "Unbound type constructor "; longident lid + | Type_arity_mismatch(lid, expected, provided) -> + open_hovbox 0; + print_string "The type constructor "; longident lid; + print_space(); print_string "expects "; print_int expected; + print_string " argument(s),"; print_space(); + print_string "but is here applied to "; print_int provided; + print_string " argument(s)"; + close_box() diff --git a/typing/typetexp.mli b/typing/typetexp.mli new file mode 100644 index 0000000000..57fb18d245 --- /dev/null +++ b/typing/typetexp.mli @@ -0,0 +1,19 @@ +(* Typechecking of type expressions for the core language *) + +val transl_simple_type: + Env.t -> bool -> Parsetree.core_type -> Typedtree.type_expr +val transl_type_scheme: + Env.t -> Parsetree.core_type -> Typedtree.type_expr +val reset_type_variables: unit -> unit +val enter_type_variable: string -> Typedtree.type_expr + +exception Already_bound + +type error = + Unbound_type_variable of string + | Unbound_type_constructor of Longident.t + | Type_arity_mismatch of Longident.t * int * int + +exception Error of Location.t * error + +val report_error: error -> unit diff --git a/utils/clflags.ml b/utils/clflags.ml new file mode 100644 index 0000000000..243e358ec5 --- /dev/null +++ b/utils/clflags.ml @@ -0,0 +1,19 @@ +(* Command-line parameters *) + +let objfiles = ref ([] : string list) (* .cmo and .cma files *) +and ccobjs = ref ([] : string list) (* .o, .a and -lxxx files *) + +let compile_only = ref false (* -c *) +and exec_name = ref "a.out" (* -o *) +and archive_name = ref "library.cma" (* -o *) +and include_dirs = ref ([] : string list)(* - I *) +and print_types = ref false (* -i *) +and make_archive = ref false (* -a *) +and fast = ref false (* -fast *) +and link_everything = ref false (* -linkall *) +and custom_runtime = ref false (* -custom *) +and ccopts = ref ([] : string list) (* -ccopt *) +and nopervasives = ref false (* -nopervasives *) + +let dump_lambda = ref false (* -dlambda *) +and dump_instr = ref false (* -dinstr *) diff --git a/utils/config.mli b/utils/config.mli new file mode 100644 index 0000000000..05e6528fc6 --- /dev/null +++ b/utils/config.mli @@ -0,0 +1,23 @@ +(* System configuration *) + +val version: string + (* The current version number of the system *) + +val standard_library: string + (* The directory containing the standard libraries *) +val c_compiler: string + (* The C compiler to use for custom runtime mode *) +val c_libraries: string + (* The C libraries to link with custom runtimes *) + +val load_path: string list ref + (* Directories in the search path for .cmi and .cmo files *) + +val exec_magic_number: string + (* Magic number for bytecode executable files *) +val cmi_magic_number: string + (* Magic number for compiled interface files *) +val cmo_magic_number: string + (* Magic number for object bytecode files *) +val cma_magic_number: string + (* Magic number for archive files *) diff --git a/utils/config.mlp b/utils/config.mlp new file mode 100644 index 0000000000..5cee742840 --- /dev/null +++ b/utils/config.mlp @@ -0,0 +1,15 @@ +let standard_library = "%%LIBDIR%%" + +let c_compiler = "%%CC%%" + +let c_libraries = "%%CCLIBS%%" + +let version = "1.00" + +let exec_magic_number = "Caml1999X001" +and cmi_magic_number = "Caml1999I001" +and cmo_magic_number = "Caml1999O001" +and cma_magic_number = "Caml1999A001" + +let load_path = ref ([] : string list) + diff --git a/utils/crc.ml b/utils/crc.ml new file mode 100644 index 0000000000..e296ac9122 --- /dev/null +++ b/utils/crc.ml @@ -0,0 +1,12 @@ +(* CRC computation *) + +external unsafe_for_string: string -> int -> int -> int = "crc_string" + +let for_string str ofs len = + if ofs < 0 or ofs + len > String.length str + then invalid_arg "Crc.for_string" + else unsafe_for_string str ofs len + +external for_channel: in_channel -> int -> int = "crc_chan" + + diff --git a/utils/crc.mli b/utils/crc.mli new file mode 100644 index 0000000000..0663267cbc --- /dev/null +++ b/utils/crc.mli @@ -0,0 +1,6 @@ +(* CRC computation *) + +val for_string: string -> int -> int -> int +val for_channel: in_channel -> int -> int = "crc_chan" + + diff --git a/utils/cset.ml b/utils/cset.ml new file mode 100644 index 0000000000..f9305e5036 --- /dev/null +++ b/utils/cset.ml @@ -0,0 +1,103 @@ +(* Sets over ordered types *) + +type 'a t = Empty | Node of 'a t * 'a * 'a t * int + +let empty = Empty + +(* Compute the size (number of nodes and leaves) of a tree. *) + +let size = function + Empty -> 1 + | Node(_, _, _, s) -> s + +(* Creates a new node with left son l, val x and right son r. + l and r must be balanced and size l / size r must be between 1/N and N. + Inline expansion of size for better speed. *) + +let new l x r = + let sl = match l with Empty -> 0 | Node(_,_,_,s) -> s in + let sr = match r with Empty -> 0 | Node(_,_,_,s) -> s in + Node(l, x, r, sl + sr + 1) + +(* Same as new, but performs rebalancing if necessary. + Assumes l and r balanced, and size l / size r "reasonable". + Inline expansion of new for better speed in the most frequent case + where no rebalancing is required. *) + +let bal l x r = + let sl = match l with Empty -> 0 | Node(_,_,_,s) -> s in + let sr = match r with Empty -> 0 | Node(_,_,_,s) -> s in + if sl > 3 * sr then begin + match l with + Empty -> invalid_arg "Cset.bal" + | Node(ll, lv, lr, _) -> + if size ll >= size lr then + new ll lv (new lr x r) + else begin + match lr with + Empty -> invalid_arg "Cset.bal" + | Node(lrl, lrv, lrr, _)-> + new (new ll lv lrl) lrv (new lrr x r) + end + end else if sr > 3 * sl then begin + match r with + Empty -> invalid_arg "Cset.bal" + | Node(rl, rv, rr, _) -> + if size rr >= size rl then + new (new l x rl) rv rr + else begin + match rl with + Empty -> invalid_arg "Cset.bal" + | Node(rll, rlv, rlr, _) -> + new (new l x rll) rlv (new rlr rv rr) + end + end else + Node(l, x, r, sl + sr + 1) + +(* Merge two trees l and r into one. + All elements of l must precede the elements of r. + Assumes size l / size r between 1/N and N. *) + +let rec merge l r = + match (l, r) with + (Empty, t) -> t + | (t, Empty) -> t + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + bal l1 v1 (bal (merge r1 l2) v2 r2) + +(* Insertion *) + +let rec add x = function + Empty -> + Node(Empty, x, Empty, 1) + | Node(l, v, r, _) as t -> + let c = compare x v in + if c = 0 then t else + if c < 0 then bal (add x l) v r else bal l v (add x r) + +(* Membership *) + +let rec mem x = function + Empty -> + false + | Node(l, v, r, _) -> + let c = compare x v in + c = 0 or mem x (if c < 0 then l else r) + +(* Removal *) + +let rec remove x = function + Empty -> + Empty + | Node(l, v, r, _) -> + let c = compare x v in + if c = 0 then merge l r else + if c < 0 then bal (remove x l) v r else bal l v (remove x r) + +(* Contents *) + +let elements s = + let rec elements accu = function + Empty -> accu + | Node(l, v, r, _) -> elements (v :: elements accu r) l + in elements [] s diff --git a/utils/cset.mli b/utils/cset.mli new file mode 100644 index 0000000000..a7f4cae9c7 --- /dev/null +++ b/utils/cset.mli @@ -0,0 +1,9 @@ +(* Sets over types ordered with the default ordering *) + +type 'a t + +val empty: 'a t +val mem: 'a -> 'a t -> bool +val add: 'a -> 'a t -> 'a t +val remove: 'a -> 'a t -> 'a t +val elements: 'a t -> 'a list diff --git a/utils/meta.ml b/utils/meta.ml new file mode 100644 index 0000000000..08077cf993 --- /dev/null +++ b/utils/meta.ml @@ -0,0 +1,7 @@ +external global_data : unit -> Obj.t array = "get_global_data" +external realloc_global_data : int -> unit = "realloc_global" +external static_alloc : int -> string = "static_alloc" +external static_free : string -> unit = "static_free" +external static_resize : string -> int -> string = "static_resize" +external execute_bytecode : string -> int -> Obj.t = "execute_bytecode" +external available_primitives : unit -> string array = "available_primitives" diff --git a/utils/meta.mli b/utils/meta.mli new file mode 100644 index 0000000000..1c6392a41b --- /dev/null +++ b/utils/meta.mli @@ -0,0 +1,9 @@ +(* To control the runtime system and bytecode interpreter *) + +val global_data : unit -> Obj.t array = "get_global_data" +val realloc_global_data : int -> unit = "realloc_global" +val static_alloc : int -> string = "static_alloc" +val static_free : string -> unit = "static_free" +val static_resize : string -> int -> string = "static_resize" +val execute_bytecode : string -> int -> Obj.t = "execute_bytecode" +val available_primitives : unit -> string array = "available_primitives" diff --git a/utils/misc.ml b/utils/misc.ml new file mode 100644 index 0000000000..0d1a909429 --- /dev/null +++ b/utils/misc.ml @@ -0,0 +1,112 @@ +(* Errors *) + +type 'a option = None | Some of 'a + +exception Fatal_error + +let fatal_error msg = + prerr_string ">> Fatal error: "; prerr_endline msg; raise Fatal_error + +(* List functions *) + +let rec map_end f l1 l2 = + match l1 with + [] -> l2 + | hd::tl -> f hd :: map_end f tl l2 + +let rec for_all2 pred l1 l2 = + match (l1, l2) with + ([], []) -> true + | (hd1::tl1, hd2::tl2) -> pred hd1 hd2 & for_all2 pred tl1 tl2 + | (_, _) -> false + +(* File functions *) + +let file_exists filename = + try + Sys.close_desc(Sys.open_desc filename [Sys.Open_rdonly] 0); true + with Sys_error msg -> + false + +let find_in_path path name = + if Filename.is_absolute name then + if file_exists name then name else raise Not_found + else begin + let rec try_dir = function + [] -> raise Not_found + | dir::rem -> + let fullname = Filename.concat dir name in + if file_exists fullname then fullname else try_dir rem + in try_dir path + end + +let remove_file filename = + try + Sys.remove filename + with Sys_error msg -> + () + +let temp_file base suffix = + let rec try_name counter = + let name = "/tmp/" ^ base ^ string_of_int counter ^ suffix in + if file_exists name then try_name (counter + 1) else name + in try_name 0 + +(* Hashtable functions *) + +let create_hashtable size init = + let tbl = Hashtbl.new size in + List.iter (fun (key, data) -> Hashtbl.add tbl key data) init; + tbl + +(* String functions *) + +let capitalize s = + let r = String.create (String.length s) in + String.blit s 0 r 0 (String.length s); + let c = s.[0] in + if c >= 'a' & c <= 'z' then r.[0] <- Char.chr(Char.code c - 32); + r + +let lowercase s = + let r = String.create (String.length s) in + String.blit s 0 r 0 (String.length s); + let c = s.[0] in + if c >= 'A' & c <= 'Z' then r.[0] <- Char.chr(Char.code c + 32); + r + +let concat_strings sep l = + match l with + [] -> "" + | hd :: tl -> + let num = ref 0 and len = ref 0 in + List.iter (fun s -> incr num; len := !len + String.length s) l; + let r = String.create (!len + String.length sep * (!num - 1)) in + String.blit hd 0 r 0 (String.length hd); + let pos = ref(String.length hd) in + List.iter + (fun s -> + String.blit sep 0 r !pos (String.length sep); + pos := !pos + String.length sep; + String.blit s 0 r !pos (String.length s); + pos := !pos + String.length s) + tl; + r + +(* File copy *) + +let copy_file ic oc = + let buff = String.create 0x1000 in + let rec copy () = + let n = input ic buff 0 0x1000 in + if n = 0 then () else (output oc buff 0 n; copy()) + in copy() + +let copy_file_chunk ic oc len = + let buff = String.create 0x1000 in + let rec copy n = + if n <= 0 then () else begin + let r = input ic buff 0 (min n 0x1000) in + if r = 0 then raise End_of_file else (output oc buff 0 r; copy(n-r)) + end + in copy len diff --git a/utils/misc.mli b/utils/misc.mli new file mode 100644 index 0000000000..a8411f3352 --- /dev/null +++ b/utils/misc.mli @@ -0,0 +1,35 @@ +(* Miscellaneous useful types and functions *) + +type 'a option = None | Some of 'a + +val fatal_error: string -> 'a +exception Fatal_error + +val map_end: ('a -> 'b) -> 'a list -> 'b list -> 'b list +val for_all2: ('a -> 'b -> bool) -> 'a list -> 'b list -> bool + +val file_exists: string -> bool + (* Check if the given file name List.exists. *) +val find_in_path: string list -> string -> string + (* Search a file in a list of directories. *) +val remove_file: string -> unit + (* Delete the given file if it List.exists. Never raise an error. *) +val temp_file: string -> string -> string + (* Return the name of a non-existent temporary file in [/tmp]. *) + +val create_hashtable: int -> ('a * 'b) list -> ('a, 'b) Hashtbl.t + (* Create a hashtable of the given size and fills it with the + given bindings. *) + +val capitalize: string -> string +val lowercase: string -> string + +val concat_strings: string -> string list -> string + +val copy_file: in_channel -> out_channel -> unit + (* [copy_file ic oc] reads the contents of file [ic] and copies + them to [oc]. It stops when encountering EOF on [ic]. *) +val copy_file_chunk: in_channel -> out_channel -> int -> unit + (* [copy_file_chunk ic oc n] reads [n] bytes from [ic] and copies + them to [oc]. It raises [End_of_file] when encountering + EOF on [ic]. *) diff --git a/utils/tbl.ml b/utils/tbl.ml new file mode 100644 index 0000000000..a921b693b4 --- /dev/null +++ b/utils/tbl.ml @@ -0,0 +1,71 @@ +type ('a, 'b) t = + Empty + | Node of ('a, 'b) t * 'a * 'b * ('a, 'b) t * int + +let empty = Empty + +let height = function + Empty -> 0 + | Node(_,_,_,_,h) -> h + +let new l x d r = + let hl = height l and hr = height r in + Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1)) + +let bal l x d r = + let hl = height l and hr = height r in + if hl > hr + 1 then + let (Node(ll, lv, ld, lr, _)) = l in + if height ll >= height lr then + new ll lv ld (new lr x d r) + else + let (Node(lrl, lrv, lrd, lrr, _)) = lr in + new (new ll lv ld lrl) lrv lrd (new lrr x d r) + else if hr > hl + 1 then + let (Node(rl, rv, rd, rr, _)) = r in + if height rr >= height rl then + new (new l x d rl) rv rd rr + else + let (Node(rll, rlv, rld, rlr, _)) = rl in + new (new l x d rll) rlv rld (new rlr rv rd rr) + else + new l x d r + +let rec add x data = function + Empty -> + Node(Empty, x, data, Empty, 1) + | Node(l, v, d, r, h) as t -> + let c = compare x v in + if c = 0 then + Node(l, x, data, r, h) + else if c < 0 then + bal (add x data l) v d r + else + bal l v d (add x data r) + +let rec find x = function + Empty -> + raise Not_found + | Node(l, v, d, r, _) -> + let c = compare x v in + if c = 0 then d + else find x (if c < 0 then l else r) + +let rec iter f = function + Empty -> () + | Node(l, v, d, r, _) -> + iter f l; f v d; iter f r + +open Format + +let print print_key print_data tbl = + open_hovbox 2; + print_string "[["; + iter (fun k d -> + open_hovbox 2; + print_key k; print_string " ->"; print_space(); + print_data d; print_string ";"; + close_box()) + tbl; + print_string "]]"; + close_box() diff --git a/utils/tbl.mli b/utils/tbl.mli new file mode 100644 index 0000000000..9ab22e0cba --- /dev/null +++ b/utils/tbl.mli @@ -0,0 +1,12 @@ +(* Association tables from any ordered type to any type. + We use the generic ordering to compare keys. *) + +type ('a, 'b) t + +val empty: ('a, 'b) t +val add: 'a -> 'b -> ('a, 'b) t -> ('a, 'b) t +val find: 'a -> ('a, 'b) t -> 'b + +val iter: ('a -> 'b -> 'c) -> ('a, 'b) t -> unit + +val print: ('a -> unit) -> ('b -> unit) -> ('a, 'b) t -> unit diff --git a/utils/terminfo.ml b/utils/terminfo.ml new file mode 100644 index 0000000000..682edaa81f --- /dev/null +++ b/utils/terminfo.ml @@ -0,0 +1,7 @@ +(* Basic interface to the terminfo database *) + +external setupterm: unit -> unit = "terminfo_setup" +external getstr: string -> string = "terminfo_getstr" +external getnum: string -> int = "terminfo_getnum" +external puts: out_channel -> string -> int -> unit = "terminfo_puts" + diff --git a/utils/terminfo.mli b/utils/terminfo.mli new file mode 100644 index 0000000000..1989b424d5 --- /dev/null +++ b/utils/terminfo.mli @@ -0,0 +1,7 @@ +(* Basic interface to the terminfo database *) + +val setupterm: unit -> unit = "terminfo_setup" +val getstr: string -> string = "terminfo_getstr" +val getnum: string -> int = "terminfo_getnum" +val puts: out_channel -> string -> int -> unit = "terminfo_puts" + diff --git a/yacc/Makefile b/yacc/Makefile new file mode 100644 index 0000000000..c044a512d8 --- /dev/null +++ b/yacc/Makefile @@ -0,0 +1,31 @@ +# Makefile for the parser generator. + +include ../Makefile.config + +CFLAGS=-O -DNDEBUG $(CCCOMPFLAGS) + +OBJS= closure.o error.o lalr.o lr0.o main.o mkpar.o output.o reader.o \ + skeleton.o symtab.o verbose.o warshall.o + +all: camlyacc + +camlyacc: $(OBJS) + $(CC) $(CCCOMPFLAGS) $(CCLINKFLAGS) -o camlyacc $(OBJS) + +clean: + rm -f *.o camlyacc *~ + +depend: + +closure.o: defs.h +error.o: defs.h +lalr.o: defs.h +lr0.o: defs.h +main.o: defs.h +mkpar.o: defs.h +output.o: defs.h +reader.o: defs.h +skeleton.o: defs.h +symtab.o: defs.h +verbose.o: defs.h +warshall.o: defs.h diff --git a/yacc/closure.c b/yacc/closure.c new file mode 100644 index 0000000000..c69457c5df --- /dev/null +++ b/yacc/closure.c @@ -0,0 +1,265 @@ +#include "defs.h" + +short *itemset; +short *itemsetend; +unsigned *ruleset; + +static unsigned *first_derives; +static unsigned *EFF; + + +set_EFF() +{ + register unsigned *row; + register int symbol; + register short *sp; + register int rowsize; + register int i; + register int rule; + + rowsize = WORDSIZE(nvars); + EFF = NEW2(nvars * rowsize, unsigned); + + row = EFF; + for (i = start_symbol; i < nsyms; i++) + { + sp = derives[i]; + for (rule = *sp; rule > 0; rule = *++sp) + { + symbol = ritem[rrhs[rule]]; + if (ISVAR(symbol)) + { + symbol -= start_symbol; + SETBIT(row, symbol); + } + } + row += rowsize; + } + + reflexive_transitive_closure(EFF, nvars); + +#ifdef DEBUG + print_EFF(); +#endif +} + + +set_first_derives() +{ + register unsigned *rrow; + register unsigned *vrow; + register int j; + register unsigned mask; + register unsigned cword; + register short *rp; + + int rule; + int i; + int rulesetsize; + int varsetsize; + + rulesetsize = WORDSIZE(nrules); + varsetsize = WORDSIZE(nvars); + first_derives = NEW2(nvars * rulesetsize, unsigned) - ntokens * rulesetsize; + + set_EFF(); + + rrow = first_derives + ntokens * rulesetsize; + for (i = start_symbol; i < nsyms; i++) + { + vrow = EFF + ((i - ntokens) * varsetsize); + cword = *vrow++; + mask = 1; + for (j = start_symbol; j < nsyms; j++) + { + if (cword & mask) + { + rp = derives[j]; + while ((rule = *rp++) >= 0) + { + SETBIT(rrow, rule); + } + } + + mask <<= 1; + if (mask == 0) + { + cword = *vrow++; + mask = 1; + } + } + + vrow += varsetsize; + rrow += rulesetsize; + } + +#ifdef DEBUG + print_first_derives(); +#endif + + FREE(EFF); +} + + +closure(nucleus, n) +short *nucleus; +int n; +{ + register int ruleno; + register unsigned word; + register unsigned mask; + register short *csp; + register unsigned *dsp; + register unsigned *rsp; + register int rulesetsize; + + short *csend; + unsigned *rsend; + int symbol; + int itemno; + + rulesetsize = WORDSIZE(nrules); + rsp = ruleset; + rsend = ruleset + rulesetsize; + for (rsp = ruleset; rsp < rsend; rsp++) + *rsp = 0; + + csend = nucleus + n; + for (csp = nucleus; csp < csend; ++csp) + { + symbol = ritem[*csp]; + if (ISVAR(symbol)) + { + dsp = first_derives + symbol * rulesetsize; + rsp = ruleset; + while (rsp < rsend) + *rsp++ |= *dsp++; + } + } + + ruleno = 0; + itemsetend = itemset; + csp = nucleus; + for (rsp = ruleset; rsp < rsend; ++rsp) + { + word = *rsp; + if (word == 0) + ruleno += BITS_PER_WORD; + else + { + mask = 1; + while (mask) + { + if (word & mask) + { + itemno = rrhs[ruleno]; + while (csp < csend && *csp < itemno) + *itemsetend++ = *csp++; + *itemsetend++ = itemno; + while (csp < csend && *csp == itemno) + ++csp; + } + + mask <<= 1; + ++ruleno; + } + } + } + + while (csp < csend) + *itemsetend++ = *csp++; + +#ifdef DEBUG + print_closure(n); +#endif +} + + + +finalize_closure() +{ + FREE(itemset); + FREE(ruleset); + FREE(first_derives + ntokens * WORDSIZE(nrules)); +} + + +#ifdef DEBUG + +print_closure(n) +int n; +{ + register short *isp; + + printf("\n\nn = %d\n\n", n); + for (isp = itemset; isp < itemsetend; isp++) + printf(" %d\n", *isp); +} + + +print_EFF() +{ + register int i, j, k; + register unsigned *rowp; + register unsigned word; + register unsigned mask; + + printf("\n\nEpsilon Free Firsts\n"); + + for (i = start_symbol; i < nsyms; i++) + { + printf("\n%s", symbol_name[i]); + rowp = EFF + ((i - start_symbol) * WORDSIZE(nvars)); + word = *rowp++; + + mask = 1; + for (j = 0; j < nvars; j++) + { + if (word & mask) + printf(" %s", symbol_name[start_symbol + j]); + + mask <<= 1; + if (mask == 0) + { + word = *rowp++; + mask = 1; + } + } + } +} + + +print_first_derives() +{ + register int i; + register int j; + register unsigned *rp; + register unsigned cword; + register unsigned mask; + + printf("\n\n\nFirst Derives\n"); + + for (i = start_symbol; i < nsyms; i++) + { + printf("\n%s derives\n", symbol_name[i]); + rp = first_derives + i * WORDSIZE(nrules); + cword = *rp++; + mask = 1; + for (j = 0; j <= nrules; j++) + { + if (cword & mask) + printf(" %d\n", j); + + mask <<= 1; + if (mask == 0) + { + cword = *rp++; + mask = 1; + } + } + } + + fflush(stdout); +} + +#endif diff --git a/yacc/defs.h b/yacc/defs.h new file mode 100644 index 0000000000..4a6eaca29c --- /dev/null +++ b/yacc/defs.h @@ -0,0 +1,310 @@ +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#ifdef ANSI +#include <stdlib.h> +#endif + +#ifdef macintosh +#include <CursorCtl.h> +#endif + +/* machine-dependent definitions */ +/* the following definitions are for the Tahoe */ +/* they might have to be changed for other machines */ + +/* MAXCHAR is the largest unsigned character value */ +/* MAXSHORT is the largest value of a C short */ +/* MINSHORT is the most negative value of a C short */ +/* MAXTABLE is the maximum table size */ +/* BITS_PER_WORD is the number of bits in a C unsigned */ +/* WORDSIZE computes the number of words needed to */ +/* store n bits */ +/* BIT returns the value of the n-th bit starting */ +/* from r (0-indexed) */ +/* SETBIT sets the n-th bit starting from r */ + +#define MAXCHAR 255 +#define MAXSHORT 32767 +#define MINSHORT -32768 +#define MAXTABLE 32500 + +#define BITS_PER_WORD 32 +#define WORDSIZE(n) (((n)+(BITS_PER_WORD-1))/BITS_PER_WORD) +#define BIT(r, n) ((((r)[(n)>>5])>>((n)&31))&1) +#define SETBIT(r, n) ((r)[(n)>>5]|=((unsigned)1<<((n)&31))) + +/* character names */ + +#define NUL '\0' /* the null character */ +#define NEWLINE '\n' /* line feed */ +#define SP ' ' /* space */ +#define BS '\b' /* backspace */ +#define HT '\t' /* horizontal tab */ +#define VT '\013' /* vertical tab */ +#define CR '\r' /* carriage return */ +#define FF '\f' /* form feed */ +#define QUOTE '\'' /* single quote */ +#define DOUBLE_QUOTE '\"' /* double quote */ +#define BACKSLASH '\\' /* backslash */ + + +/* defines for constructing filenames */ + +#define CODE_SUFFIX ".code.c" +#define DEFINES_SUFFIX ".tab.h" +#define OUTPUT_SUFFIX ".ml" +#define VERBOSE_SUFFIX ".output" +#define INTERFACE_SUFFIX ".mli" + +/* keyword codes */ + +#define TOKEN 0 +#define LEFT 1 +#define RIGHT 2 +#define NONASSOC 3 +#define MARK 4 +#define TEXT 5 +#define TYPE 6 +#define START 7 +#define UNION 8 +#define IDENT 9 + +/* symbol classes */ + +#define UNKNOWN 0 +#define TERM 1 +#define NONTERM 2 + + +/* the undefined value */ + +#define UNDEFINED (-1) + + +/* action codes */ + +#define SHIFT 1 +#define REDUCE 2 + + +/* character macros */ + +#define IS_IDENT(c) (isalnum(c) || (c) == '_' || (c) == '.' || (c) == '$') +#define IS_OCTAL(c) ((c) >= '0' && (c) <= '7') +#define NUMERIC_VALUE(c) ((c) - '0') + + +/* symbol macros */ + +#define ISTOKEN(s) ((s) < start_symbol) +#define ISVAR(s) ((s) >= start_symbol) + + +/* storage allocation macros */ + +#define CALLOC(k,n) (calloc((unsigned)(k),(unsigned)(n))) +#ifdef macintosh +#define FREE(x) (SpinCursor ((short) 1), free((char*)(x))) +#else +#define FREE(x) (free((char*)(x))) +#endif +#define MALLOC(n) (malloc((unsigned)(n))) +#define NEW(t) ((t*)allocate(sizeof(t))) +#define NEW2(n,t) ((t*)allocate((unsigned)((n)*sizeof(t)))) +#define REALLOC(p,n) (realloc((char*)(p),(unsigned)(n))) + + +/* the structure of a symbol table entry */ + +typedef struct bucket bucket; +struct bucket +{ + struct bucket *link; + struct bucket *next; + char *name; + char *tag; + short value; + short index; + short prec; + char class; + char assoc; + char entry; + char true_token; +}; + +/* TABLE_SIZE is the number of entries in the symbol table. */ +/* TABLE_SIZE must be a power of two. */ + +#define TABLE_SIZE 1024 + +/* the structure of the LR(0) state machine */ + +typedef struct core core; +struct core +{ + struct core *next; + struct core *link; + short number; + short accessing_symbol; + short nitems; + short items[1]; +}; + + +/* the structure used to record shifts */ + +typedef struct shifts shifts; +struct shifts +{ + struct shifts *next; + short number; + short nshifts; + short shift[1]; +}; + + +/* the structure used to store reductions */ + +typedef struct reductions reductions; +struct reductions +{ + struct reductions *next; + short number; + short nreds; + short rules[1]; +}; + + +/* the structure used to represent parser actions */ + +typedef struct action action; +struct action +{ + struct action *next; + short symbol; + short number; + short prec; + char action_code; + char assoc; + char suppressed; +}; + + +/* global variables */ + +extern char dflag; +extern char lflag; +extern char rflag; +extern char tflag; +extern char vflag; +extern char sflag; +extern char big_endian; + +extern char *myname; +extern char *cptr; +extern char *line; +extern int lineno; +extern int outline; + +extern char *action_file_name; +extern char *entry_file_name; +extern char *code_file_name; +extern char *defines_file_name; +extern char *input_file_name; +extern char *output_file_name; +extern char *text_file_name; +extern char *union_file_name; +extern char *verbose_file_name; +extern char *interface_file_name; + +extern FILE *action_file; +extern FILE *entry_file; +extern FILE *code_file; +extern FILE *defines_file; +extern FILE *input_file; +extern FILE *output_file; +extern FILE *text_file; +extern FILE *union_file; +extern FILE *verbose_file; +extern FILE *interface_file; + +extern int nitems; +extern int nrules; +extern int ntotalrules; +extern int nsyms; +extern int ntokens; +extern int nvars; +extern int ntags; + +extern char unionized; +extern char line_format[]; + +extern int start_symbol; +extern char **symbol_name; +extern short *symbol_value; +extern short *symbol_prec; +extern char *symbol_assoc; +extern char **symbol_tag; +extern char *symbol_true_token; + +extern short *ritem; +extern short *rlhs; +extern short *rrhs; +extern short *rprec; +extern char *rassoc; + +extern short **derives; +extern char *nullable; + +extern bucket *first_symbol; +extern bucket *last_symbol; + +extern int nstates; +extern core *first_state; +extern shifts *first_shift; +extern reductions *first_reduction; +extern short *accessing_symbol; +extern core **state_table; +extern shifts **shift_table; +extern reductions **reduction_table; +extern unsigned *LA; +extern short *LAruleno; +extern short *lookaheads; +extern short *goto_map; +extern short *from_state; +extern short *to_state; + +extern action **parser; +extern int SRtotal; +extern int RRtotal; +extern short *SRconflicts; +extern short *RRconflicts; +extern short *defred; +extern short *rules_used; +extern short nunused; +extern short final_state; + +/* global functions */ + +extern char *allocate(); +extern bucket *lookup(); +extern bucket *make_bucket(); + + +/* system variables */ + +extern int errno; + + +/* system functions */ + +#ifndef ANSI + +extern void free(); +extern char *calloc(); +extern char *malloc(); +extern char *realloc(); +extern char *strcpy(); + +#endif diff --git a/yacc/error.c b/yacc/error.c new file mode 100644 index 0000000000..231768cafb --- /dev/null +++ b/yacc/error.c @@ -0,0 +1,335 @@ +/* routines for printing error messages */ + +#include "defs.h" + + +fatal(msg) +char *msg; +{ + fprintf(stderr, "%s: f - %s\n", myname, msg); + done(2); +} + + +no_space() +{ + fprintf(stderr, "%s: f - out of space\n", myname); + done(2); +} + + +open_error(filename) +char *filename; +{ + fprintf(stderr, "%s: f - cannot open \"%s\"\n", myname, filename); + done(2); +} + + +unexpected_EOF() +{ + fprintf(stderr, "%s: e - line %d of \"%s\", unexpected end-of-file\n", + myname, lineno, input_file_name); + done(1); +} + + +print_pos(st_line, st_cptr) +char *st_line; +char *st_cptr; +{ + register char *s; + + if (st_line == 0) return; + for (s = st_line; *s != '\n'; ++s) + { + if (isprint(*s) || *s == '\t') + putc(*s, stderr); + else + putc('?', stderr); + } + putc('\n', stderr); + for (s = st_line; s < st_cptr; ++s) + { + if (*s == '\t') + putc('\t', stderr); + else + putc(' ', stderr); + } + putc('^', stderr); + putc('\n', stderr); +} + + +syntax_error(st_lineno, st_line, st_cptr) +int st_lineno; +char *st_line; +char *st_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", syntax error\n", + myname, st_lineno, input_file_name); + print_pos(st_line, st_cptr); + done(1); +} + + +unterminated_comment(c_lineno, c_line, c_cptr) +int c_lineno; +char *c_line; +char *c_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", unmatched /*\n", + myname, c_lineno, input_file_name); + print_pos(c_line, c_cptr); + done(1); +} + + +unterminated_string(s_lineno, s_line, s_cptr) +int s_lineno; +char *s_line; +char *s_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", unterminated string\n", + myname, s_lineno, input_file_name); + print_pos(s_line, s_cptr); + done(1); +} + + +unterminated_text(t_lineno, t_line, t_cptr) +int t_lineno; +char *t_line; +char *t_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", unmatched %%{\n", + myname, t_lineno, input_file_name); + print_pos(t_line, t_cptr); + done(1); +} + + +unterminated_union(u_lineno, u_line, u_cptr) +int u_lineno; +char *u_line; +char *u_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", unterminated %%union \ +declaration\n", myname, u_lineno, input_file_name); + print_pos(u_line, u_cptr); + done(1); +} + + +over_unionized(u_cptr) +char *u_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", too many %%union \ +declarations\n", myname, lineno, input_file_name); + print_pos(line, u_cptr); + done(1); +} + + +illegal_tag(t_lineno, t_line, t_cptr) +int t_lineno; +char *t_line; +char *t_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", illegal tag\n", + myname, t_lineno, input_file_name); + print_pos(t_line, t_cptr); + done(1); +} + + +illegal_character(c_cptr) +char *c_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", illegal character\n", + myname, lineno, input_file_name); + print_pos(line, c_cptr); + done(1); +} + + +used_reserved(s) +char *s; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", illegal use of reserved symbol \ +%s\n", myname, lineno, input_file_name, s); + done(1); +} + + +tokenized_start(s) +char *s; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", the start symbol %s cannot be \ +declared to be a token\n", myname, lineno, input_file_name, s); + done(1); +} + + +retyped_warning(s) +char *s; +{ + fprintf(stderr, "%s: w - line %d of \"%s\", the type of %s has been \ +redeclared\n", myname, lineno, input_file_name, s); +} + + +reprec_warning(s) +char *s; +{ + fprintf(stderr, "%s: w - line %d of \"%s\", the precedence of %s has been \ +redeclared\n", myname, lineno, input_file_name, s); +} + + +revalued_warning(s) +char *s; +{ + fprintf(stderr, "%s: w - line %d of \"%s\", the value of %s has been \ +redeclared\n", myname, lineno, input_file_name, s); +} + + +terminal_start(s) +char *s; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", the entry point %s is a \ +token\n", myname, lineno, input_file_name, s); + done(1); +} + +too_many_entries() +{ + fprintf(stderr, "%s: e - line %d of \"%s\", more than 256 entry points\n", + myname, lineno, input_file_name); + done(1); +} + + +no_grammar() +{ + fprintf(stderr, "%s: e - line %d of \"%s\", no grammar has been \ +specified\n", myname, lineno, input_file_name); + done(1); +} + + +terminal_lhs(s_lineno) +int s_lineno; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", a token appears on the lhs \ +of a production\n", myname, s_lineno, input_file_name); + done(1); +} + + +prec_redeclared() +{ + fprintf(stderr, "%s: w - line %d of \"%s\", conflicting %%prec \ +specifiers\n", myname, lineno, input_file_name); +} + + +unterminated_action(a_lineno, a_line, a_cptr) +int a_lineno; +char *a_line; +char *a_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", unterminated action\n", + myname, a_lineno, input_file_name); + print_pos(a_line, a_cptr); + done(1); +} + + +dollar_warning(a_lineno, i) +int a_lineno; +int i; +{ + fprintf(stderr, "%s: w - line %d of \"%s\", $%d references beyond the \ +end of the current rule\n", myname, a_lineno, input_file_name, i); +} + + +dollar_error(a_lineno, a_line, a_cptr) +int a_lineno; +char *a_line; +char *a_cptr; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", illegal $-name\n", + myname, a_lineno, input_file_name); + print_pos(a_line, a_cptr); + done(1); +} + + +untyped_lhs() +{ + fprintf(stderr, "%s: e - line %d of \"%s\", $$ is untyped\n", + myname, lineno, input_file_name); + done(1); +} + + +untyped_rhs(i, s) +int i; +char *s; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", $%d (%s) is untyped\n", + myname, lineno, input_file_name, i, s); + done(1); +} + + +unknown_rhs(i) +int i; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", $%d is unbound\n", + myname, lineno, input_file_name, i); + done(1); +} + +illegal_token_ref(i, name) +int i; +char *name; +{ + fprintf(stderr, "%s: e - line %d of \"%s\", $%d refers to terminal `%s', which has no argument\n", + myname, lineno, input_file_name, i, name); + done(1); +} + +default_action_warning() +{ + fprintf(stderr, "%s: w - line %d of \"%s\", the default action assigns an \ +undefined value to $$\n", myname, lineno, input_file_name); +} + + +undefined_goal(s) +char *s; +{ + fprintf(stderr, "%s: e - the start symbol %s is undefined\n", myname, s); + done(1); +} + + +undefined_symbol_warning(s) +char *s; +{ + fprintf(stderr, "%s: w - the symbol %s is undefined\n", myname, s); +} + + +entry_without_type(s) +char *s; +{ + fprintf(stderr, + "%s: e - no type has been declared for the start symbol %s\n", + myname, s); + done(1); +} diff --git a/yacc/lalr.c b/yacc/lalr.c new file mode 100644 index 0000000000..640ddc4ca7 --- /dev/null +++ b/yacc/lalr.c @@ -0,0 +1,638 @@ +#include "defs.h" + +typedef + struct shorts + { + struct shorts *next; + short value; + } + shorts; + +int tokensetsize; +short *lookaheads; +short *LAruleno; +unsigned *LA; +short *accessing_symbol; +core **state_table; +shifts **shift_table; +reductions **reduction_table; +short *goto_map; +short *from_state; +short *to_state; + +short **transpose(); + +static int infinity; +static int maxrhs; +static int ngotos; +static unsigned *F; +static short **includes; +static shorts **lookback; +static short **R; +static short *INDEX; +static short *VERTICES; +static int top; + + +lalr() +{ + tokensetsize = WORDSIZE(ntokens); + + set_state_table(); + set_accessing_symbol(); + set_shift_table(); + set_reduction_table(); + set_maxrhs(); + initialize_LA(); + set_goto_map(); + initialize_F(); + build_relations(); + compute_FOLLOWS(); + compute_lookaheads(); +} + + + +set_state_table() +{ + register core *sp; + + state_table = NEW2(nstates, core *); + for (sp = first_state; sp; sp = sp->next) + state_table[sp->number] = sp; +} + + + +set_accessing_symbol() +{ + register core *sp; + + accessing_symbol = NEW2(nstates, short); + for (sp = first_state; sp; sp = sp->next) + accessing_symbol[sp->number] = sp->accessing_symbol; +} + + + +set_shift_table() +{ + register shifts *sp; + + shift_table = NEW2(nstates, shifts *); + for (sp = first_shift; sp; sp = sp->next) + shift_table[sp->number] = sp; +} + + + +set_reduction_table() +{ + register reductions *rp; + + reduction_table = NEW2(nstates, reductions *); + for (rp = first_reduction; rp; rp = rp->next) + reduction_table[rp->number] = rp; +} + + + +set_maxrhs() +{ + register short *itemp; + register short *item_end; + register int length; + register int max; + + length = 0; + max = 0; + item_end = ritem + nitems; + for (itemp = ritem; itemp < item_end; itemp++) + { + if (*itemp >= 0) + { + length++; + } + else + { + if (length > max) max = length; + length = 0; + } + } + + maxrhs = max; +} + + + +initialize_LA() +{ + register int i, j, k; + register reductions *rp; + + lookaheads = NEW2(nstates + 1, short); + + k = 0; + for (i = 0; i < nstates; i++) + { + lookaheads[i] = k; + rp = reduction_table[i]; + if (rp) + k += rp->nreds; + } + lookaheads[nstates] = k; + + LA = NEW2(k * tokensetsize, unsigned); + LAruleno = NEW2(k, short); + lookback = NEW2(k, shorts *); + + k = 0; + for (i = 0; i < nstates; i++) + { + rp = reduction_table[i]; + if (rp) + { + for (j = 0; j < rp->nreds; j++) + { + LAruleno[k] = rp->rules[j]; + k++; + } + } + } +} + + +set_goto_map() +{ + register shifts *sp; + register int i; + register int symbol; + register int k; + register short *temp_map; + register int state2; + register int state1; + + goto_map = NEW2(nvars + 1, short) - ntokens; + temp_map = NEW2(nvars + 1, short) - ntokens; + + ngotos = 0; + for (sp = first_shift; sp; sp = sp->next) + { + for (i = sp->nshifts - 1; i >= 0; i--) + { + symbol = accessing_symbol[sp->shift[i]]; + + if (ISTOKEN(symbol)) break; + + if (ngotos == MAXSHORT) + fatal("too many gotos"); + + ngotos++; + goto_map[symbol]++; + } + } + + k = 0; + for (i = ntokens; i < nsyms; i++) + { + temp_map[i] = k; + k += goto_map[i]; + } + + for (i = ntokens; i < nsyms; i++) + goto_map[i] = temp_map[i]; + + goto_map[nsyms] = ngotos; + temp_map[nsyms] = ngotos; + + from_state = NEW2(ngotos, short); + to_state = NEW2(ngotos, short); + + for (sp = first_shift; sp; sp = sp->next) + { + state1 = sp->number; + for (i = sp->nshifts - 1; i >= 0; i--) + { + state2 = sp->shift[i]; + symbol = accessing_symbol[state2]; + + if (ISTOKEN(symbol)) break; + + k = temp_map[symbol]++; + from_state[k] = state1; + to_state[k] = state2; + } + } + + FREE(temp_map + ntokens); +} + + + +/* Map_goto maps a state/symbol pair into its numeric representation. */ + +int +map_goto(state, symbol) +int state; +int symbol; +{ + register int high; + register int low; + register int middle; + register int s; + + low = goto_map[symbol]; + high = goto_map[symbol + 1]; + + for (;;) + { + assert(low <= high); + middle = (low + high) >> 1; + s = from_state[middle]; + if (s == state) + return (middle); + else if (s < state) + low = middle + 1; + else + high = middle - 1; + } +} + + + +initialize_F() +{ + register int i; + register int j; + register int k; + register shifts *sp; + register short *edge; + register unsigned *rowp; + register short *rp; + register short **reads; + register int nedges; + register int stateno; + register int symbol; + register int nwords; + + nwords = ngotos * tokensetsize; + F = NEW2(nwords, unsigned); + + reads = NEW2(ngotos, short *); + edge = NEW2(ngotos + 1, short); + nedges = 0; + + rowp = F; + for (i = 0; i < ngotos; i++) + { + stateno = to_state[i]; + sp = shift_table[stateno]; + + if (sp) + { + k = sp->nshifts; + + for (j = 0; j < k; j++) + { + symbol = accessing_symbol[sp->shift[j]]; + if (ISVAR(symbol)) + break; + SETBIT(rowp, symbol); + } + + for (; j < k; j++) + { + symbol = accessing_symbol[sp->shift[j]]; + if (nullable[symbol]) + edge[nedges++] = map_goto(stateno, symbol); + } + + if (nedges) + { + reads[i] = rp = NEW2(nedges + 1, short); + + for (j = 0; j < nedges; j++) + rp[j] = edge[j]; + + rp[nedges] = -1; + nedges = 0; + } + } + + rowp += tokensetsize; + } + + SETBIT(F, 0); + digraph(reads); + + for (i = 0; i < ngotos; i++) + { + if (reads[i]) + FREE(reads[i]); + } + + FREE(reads); + FREE(edge); +} + + + +build_relations() +{ + register int i; + register int j; + register int k; + register short *rulep; + register short *rp; + register shifts *sp; + register int length; + register int nedges; + register int done; + register int state1; + register int stateno; + register int symbol1; + register int symbol2; + register short *shortp; + register short *edge; + register short *states; + register short **new_includes; + + includes = NEW2(ngotos, short *); + edge = NEW2(ngotos + 1, short); + states = NEW2(maxrhs + 1, short); + + for (i = 0; i < ngotos; i++) + { + nedges = 0; + state1 = from_state[i]; + symbol1 = accessing_symbol[to_state[i]]; + + for (rulep = derives[symbol1]; *rulep >= 0; rulep++) + { + length = 1; + states[0] = state1; + stateno = state1; + + for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++) + { + symbol2 = *rp; + sp = shift_table[stateno]; + k = sp->nshifts; + + for (j = 0; j < k; j++) + { + stateno = sp->shift[j]; + if (accessing_symbol[stateno] == symbol2) break; + } + + states[length++] = stateno; + } + + add_lookback_edge(stateno, *rulep, i); + + length--; + done = 0; + while (!done) + { + done = 1; + rp--; + if (ISVAR(*rp)) + { + stateno = states[--length]; + edge[nedges++] = map_goto(stateno, *rp); + if (nullable[*rp] && length > 0) done = 0; + } + } + } + + if (nedges) + { + includes[i] = shortp = NEW2(nedges + 1, short); + for (j = 0; j < nedges; j++) + shortp[j] = edge[j]; + shortp[nedges] = -1; + } + } + + new_includes = transpose(includes, ngotos); + + for (i = 0; i < ngotos; i++) + if (includes[i]) + FREE(includes[i]); + + FREE(includes); + + includes = new_includes; + + FREE(edge); + FREE(states); +} + + +add_lookback_edge(stateno, ruleno, gotono) +int stateno, ruleno, gotono; +{ + register int i, k; + register int found; + register shorts *sp; + + i = lookaheads[stateno]; + k = lookaheads[stateno + 1]; + found = 0; + while (!found && i < k) + { + if (LAruleno[i] == ruleno) + found = 1; + else + ++i; + } + assert(found); + + sp = NEW(shorts); + sp->next = lookback[i]; + sp->value = gotono; + lookback[i] = sp; +} + + + +short ** +transpose(R, n) +short **R; +int n; +{ + register short **new_R; + register short **temp_R; + register short *nedges; + register short *sp; + register int i; + register int k; + + nedges = NEW2(n, short); + + for (i = 0; i < n; i++) + { + sp = R[i]; + if (sp) + { + while (*sp >= 0) + nedges[*sp++]++; + } + } + + new_R = NEW2(n, short *); + temp_R = NEW2(n, short *); + + for (i = 0; i < n; i++) + { + k = nedges[i]; + if (k > 0) + { + sp = NEW2(k + 1, short); + new_R[i] = sp; + temp_R[i] = sp; + sp[k] = -1; + } + } + + FREE(nedges); + + for (i = 0; i < n; i++) + { + sp = R[i]; + if (sp) + { + while (*sp >= 0) + *temp_R[*sp++]++ = i; + } + } + + FREE(temp_R); + + return (new_R); +} + + + +compute_FOLLOWS() +{ + digraph(includes); +} + + +compute_lookaheads() +{ + register int i, n; + register unsigned *fp1, *fp2, *fp3; + register shorts *sp, *next; + register unsigned *rowp; + + rowp = LA; + n = lookaheads[nstates]; + for (i = 0; i < n; i++) + { + fp3 = rowp + tokensetsize; + for (sp = lookback[i]; sp; sp = sp->next) + { + fp1 = rowp; + fp2 = F + tokensetsize * sp->value; + while (fp1 < fp3) + *fp1++ |= *fp2++; + } + rowp = fp3; + } + + for (i = 0; i < n; i++) + for (sp = lookback[i]; sp; sp = next) + { + next = sp->next; + FREE(sp); + } + + FREE(lookback); + FREE(F); +} + + +digraph(relation) +short **relation; +{ + register int i; + + infinity = ngotos + 2; + INDEX = NEW2(ngotos + 1, short); + VERTICES = NEW2(ngotos + 1, short); + top = 0; + + R = relation; + + for (i = 0; i < ngotos; i++) + INDEX[i] = 0; + + for (i = 0; i < ngotos; i++) + { + if (INDEX[i] == 0 && R[i]) + traverse(i); + } + + FREE(INDEX); + FREE(VERTICES); +} + + + +traverse(i) +register int i; +{ + register unsigned *fp1; + register unsigned *fp2; + register unsigned *fp3; + register int j; + register short *rp; + + int height; + unsigned *base; + + VERTICES[++top] = i; + INDEX[i] = height = top; + + base = F + i * tokensetsize; + fp3 = base + tokensetsize; + + rp = R[i]; + if (rp) + { + while ((j = *rp++) >= 0) + { + if (INDEX[j] == 0) + traverse(j); + + if (INDEX[i] > INDEX[j]) + INDEX[i] = INDEX[j]; + + fp1 = base; + fp2 = F + j * tokensetsize; + + while (fp1 < fp3) + *fp1++ |= *fp2++; + } + } + + if (INDEX[i] == height) + { + for (;;) + { + j = VERTICES[top--]; + INDEX[j] = infinity; + + if (i == j) + break; + + fp1 = base; + fp2 = F + j * tokensetsize; + + while (fp1 < fp3) + *fp2++ = *fp1++; + } + } +} diff --git a/yacc/lr0.c b/yacc/lr0.c new file mode 100644 index 0000000000..3ee42a8840 --- /dev/null +++ b/yacc/lr0.c @@ -0,0 +1,598 @@ + +#include "defs.h" + +extern short *itemset; +extern short *itemsetend; +extern unsigned *ruleset; + +int nstates; +core *first_state; +shifts *first_shift; +reductions *first_reduction; + +int get_state(); +core *new_state(); + +static core **state_set; +static core *this_state; +static core *last_state; +static shifts *last_shift; +static reductions *last_reduction; + +static int nshifts; +static short *shift_symbol; + +static short *redset; +static short *shiftset; + +static short **kernel_base; +static short **kernel_end; +static short *kernel_items; + + +allocate_itemsets() +{ + register short *itemp; + register short *item_end; + register int symbol; + register int i; + register int count; + register int max; + register short *symbol_count; + + count = 0; + symbol_count = NEW2(nsyms, short); + + item_end = ritem + nitems; + for (itemp = ritem; itemp < item_end; itemp++) + { + symbol = *itemp; + if (symbol >= 0) + { + count++; + symbol_count[symbol]++; + } + } + + kernel_base = NEW2(nsyms, short *); + kernel_items = NEW2(count, short); + + count = 0; + max = 0; + for (i = 0; i < nsyms; i++) + { + kernel_base[i] = kernel_items + count; + count += symbol_count[i]; + if (max < symbol_count[i]) + max = symbol_count[i]; + } + + shift_symbol = symbol_count; + kernel_end = NEW2(nsyms, short *); +} + + +allocate_storage() +{ + allocate_itemsets(); + shiftset = NEW2(nsyms, short); + redset = NEW2(nrules + 1, short); + state_set = NEW2(nitems, core *); +} + + +append_states() +{ + register int i; + register int j; + register int symbol; + +#ifdef TRACE + fprintf(stderr, "Entering append_states()\n"); +#endif + for (i = 1; i < nshifts; i++) + { + symbol = shift_symbol[i]; + j = i; + while (j > 0 && shift_symbol[j - 1] > symbol) + { + shift_symbol[j] = shift_symbol[j - 1]; + j--; + } + shift_symbol[j] = symbol; + } + + for (i = 0; i < nshifts; i++) + { + symbol = shift_symbol[i]; + shiftset[i] = get_state(symbol); + } +} + + +free_storage() +{ + FREE(shift_symbol); + FREE(redset); + FREE(shiftset); + FREE(kernel_base); + FREE(kernel_end); + FREE(kernel_items); + FREE(state_set); +} + + + +generate_states() +{ + allocate_storage(); + itemset = NEW2(nitems, short); + ruleset = NEW2(WORDSIZE(nrules), unsigned); + set_first_derives(); + initialize_states(); + + while (this_state) + { + closure(this_state->items, this_state->nitems); + save_reductions(); + new_itemsets(); + append_states(); + + if (nshifts > 0) + save_shifts(); + + this_state = this_state->next; + } + + finalize_closure(); + free_storage(); +} + + + +int +get_state(symbol) +int symbol; +{ + register int key; + register short *isp1; + register short *isp2; + register short *iend; + register core *sp; + register int found; + register int n; + +#ifdef TRACE + fprintf(stderr, "Entering get_state(%d)\n", symbol); +#endif + + isp1 = kernel_base[symbol]; + iend = kernel_end[symbol]; + n = iend - isp1; + + key = *isp1; + assert(0 <= key && key < nitems); + sp = state_set[key]; + if (sp) + { + found = 0; + while (!found) + { + if (sp->nitems == n) + { + found = 1; + isp1 = kernel_base[symbol]; + isp2 = sp->items; + + while (found && isp1 < iend) + { + if (*isp1++ != *isp2++) + found = 0; + } + } + + if (!found) + { + if (sp->link) + { + sp = sp->link; + } + else + { + sp = sp->link = new_state(symbol); + found = 1; + } + } + } + } + else + { + state_set[key] = sp = new_state(symbol); + } + + return (sp->number); +} + + + +initialize_states() +{ + register int i; + register short *start_derives; + register core *p; + + start_derives = derives[start_symbol]; + for (i = 0; start_derives[i] >= 0; ++i) + continue; + + p = (core *) MALLOC(sizeof(core) + i*sizeof(short)); + if (p == 0) no_space(); + + p->next = 0; + p->link = 0; + p->number = 0; + p->accessing_symbol = 0; + p->nitems = i; + + for (i = 0; start_derives[i] >= 0; ++i) + p->items[i] = rrhs[start_derives[i]]; + + first_state = last_state = this_state = p; + nstates = 1; +} + + +new_itemsets() +{ + register int i; + register int shiftcount; + register short *isp; + register short *ksp; + register int symbol; + + for (i = 0; i < nsyms; i++) + kernel_end[i] = 0; + + shiftcount = 0; + isp = itemset; + while (isp < itemsetend) + { + i = *isp++; + symbol = ritem[i]; + if (symbol > 0) + { + ksp = kernel_end[symbol]; + if (!ksp) + { + shift_symbol[shiftcount++] = symbol; + ksp = kernel_base[symbol]; + } + + *ksp++ = i + 1; + kernel_end[symbol] = ksp; + } + } + + nshifts = shiftcount; +} + + + +core * +new_state(symbol) +int symbol; +{ + register int n; + register core *p; + register short *isp1; + register short *isp2; + register short *iend; + +#ifdef TRACE + fprintf(stderr, "Entering new_state(%d)\n", symbol); +#endif + + if (nstates >= MAXSHORT) + fatal("too many states"); + + isp1 = kernel_base[symbol]; + iend = kernel_end[symbol]; + n = iend - isp1; + + p = (core *) allocate((unsigned) (sizeof(core) + (n - 1) * sizeof(short))); + p->accessing_symbol = symbol; + p->number = nstates; + p->nitems = n; + + isp2 = p->items; + while (isp1 < iend) + *isp2++ = *isp1++; + + last_state->next = p; + last_state = p; + + nstates++; + + return (p); +} + + +/* show_cores is used for debugging */ + +show_cores() +{ + core *p; + int i, j, k, n; + int itemno; + + k = 0; + for (p = first_state; p; ++k, p = p->next) + { + if (k) printf("\n"); + printf("state %d, number = %d, accessing symbol = %s\n", + k, p->number, symbol_name[p->accessing_symbol]); + n = p->nitems; + for (i = 0; i < n; ++i) + { + itemno = p->items[i]; + printf("%4d ", itemno); + j = itemno; + while (ritem[j] >= 0) ++j; + printf("%s :", symbol_name[rlhs[-ritem[j]]]); + j = rrhs[-ritem[j]]; + while (j < itemno) + printf(" %s", symbol_name[ritem[j++]]); + printf(" ."); + while (ritem[j] >= 0) + printf(" %s", symbol_name[ritem[j++]]); + printf("\n"); + fflush(stdout); + } + } +} + + +/* show_ritems is used for debugging */ + +show_ritems() +{ + int i; + + for (i = 0; i < nitems; ++i) + printf("ritem[%d] = %d\n", i, ritem[i]); +} + + +/* show_rrhs is used for debugging */ +show_rrhs() +{ + int i; + + for (i = 0; i < nrules; ++i) + printf("rrhs[%d] = %d\n", i, rrhs[i]); +} + + +/* show_shifts is used for debugging */ + +show_shifts() +{ + shifts *p; + int i, j, k; + + k = 0; + for (p = first_shift; p; ++k, p = p->next) + { + if (k) printf("\n"); + printf("shift %d, number = %d, nshifts = %d\n", k, p->number, + p->nshifts); + j = p->nshifts; + for (i = 0; i < j; ++i) + printf("\t%d\n", p->shift[i]); + } +} + + +save_shifts() +{ + register shifts *p; + register short *sp1; + register short *sp2; + register short *send; + + p = (shifts *) allocate((unsigned) (sizeof(shifts) + + (nshifts - 1) * sizeof(short))); + + p->number = this_state->number; + p->nshifts = nshifts; + + sp1 = shiftset; + sp2 = p->shift; + send = shiftset + nshifts; + + while (sp1 < send) + *sp2++ = *sp1++; + + if (last_shift) + { + last_shift->next = p; + last_shift = p; + } + else + { + first_shift = p; + last_shift = p; + } +} + + + +save_reductions() +{ + register short *isp; + register short *rp1; + register short *rp2; + register int item; + register int count; + register reductions *p; + register short *rend; + + count = 0; + for (isp = itemset; isp < itemsetend; isp++) + { + item = ritem[*isp]; + if (item < 0) + { + redset[count++] = -item; + } + } + + if (count) + { + p = (reductions *) allocate((unsigned) (sizeof(reductions) + + (count - 1) * sizeof(short))); + + p->number = this_state->number; + p->nreds = count; + + rp1 = redset; + rp2 = p->rules; + rend = rp1 + count; + + while (rp1 < rend) + *rp2++ = *rp1++; + + if (last_reduction) + { + last_reduction->next = p; + last_reduction = p; + } + else + { + first_reduction = p; + last_reduction = p; + } + } +} + + +set_derives() +{ + register int i, k; + register int lhs; + register short *rules; + + derives = NEW2(nsyms, short *); + rules = NEW2(nvars + nrules, short); + + k = 0; + for (lhs = start_symbol; lhs < nsyms; lhs++) + { + derives[lhs] = rules + k; + for (i = 0; i < nrules; i++) + { + if (rlhs[i] == lhs) + { + rules[k] = i; + k++; + } + } + rules[k] = -1; + k++; + } + +#ifdef DEBUG + print_derives(); +#endif +} + +free_derives() +{ + FREE(derives[start_symbol]); + FREE(derives); +} + +#ifdef DEBUG +print_derives() +{ + register int i; + register short *sp; + + printf("\nDERIVES\n\n"); + + for (i = start_symbol; i < nsyms; i++) + { + printf("%s derives ", symbol_name[i]); + for (sp = derives[i]; *sp >= 0; sp++) + { + printf(" %d", *sp); + } + putchar('\n'); + } + + putchar('\n'); +} +#endif + + +set_nullable() +{ + register int i, j; + register int empty; + int done; + + nullable = MALLOC(nsyms); + if (nullable == 0) no_space(); + + for (i = 0; i < nsyms; ++i) + nullable[i] = 0; + + done = 0; + while (!done) + { + done = 1; + for (i = 1; i < nitems; i++) + { + empty = 1; + while ((j = ritem[i]) >= 0) + { + if (!nullable[j]) + empty = 0; + ++i; + } + if (empty) + { + j = rlhs[-j]; + if (!nullable[j]) + { + nullable[j] = 1; + done = 0; + } + } + } + } + +#ifdef DEBUG + for (i = 0; i < nsyms; i++) + { + if (nullable[i]) + printf("%s is nullable\n", symbol_name[i]); + else + printf("%s is not nullable\n", symbol_name[i]); + } +#endif +} + + +free_nullable() +{ + FREE(nullable); +} + + +lr0() +{ + set_derives(); + set_nullable(); + generate_states(); +} diff --git a/yacc/main.c b/yacc/main.c new file mode 100644 index 0000000000..cb93a3d294 --- /dev/null +++ b/yacc/main.c @@ -0,0 +1,388 @@ +#include <signal.h> +#include "defs.h" + +char dflag; +char lflag; +char rflag; +char tflag; +char vflag; +char sflag; +char big_endian; + +char *file_prefix = 0; +char *myname = "yacc"; +#ifdef NO_UNIX +char temp_form[] = "yacc.X"; +#else +char temp_form[] = "yacc.XXXXXXX"; +#endif + +int lineno; +int outline; + +char *action_file_name; +char *entry_file_name; +char *code_file_name; +char *interface_file_name; +char *defines_file_name; +char *input_file_name = ""; +char *output_file_name; +char *text_file_name; +char *union_file_name; +char *verbose_file_name; + +FILE *action_file; /* a temp file, used to save actions associated */ + /* with rules until the parser is written */ +FILE *entry_file; +FILE *code_file; /* y.code.c (used when the -r option is specified) */ +FILE *defines_file; /* y.tab.h */ +FILE *input_file; /* the input file */ +FILE *output_file; /* y.tab.c */ +FILE *text_file; /* a temp file, used to save text until all */ + /* symbols have been defined */ +FILE *union_file; /* a temp file, used to save the union */ + /* definition until all symbol have been */ + /* defined */ +FILE *verbose_file; /* y.output */ +FILE *interface_file; + +int nitems; +int nrules; +int ntotalrules; +int nsyms; +int ntokens; +int nvars; + +int start_symbol; +char **symbol_name; +short *symbol_value; +short *symbol_prec; +char *symbol_assoc; +char **symbol_tag; +char *symbol_true_token; + +short *ritem; +short *rlhs; +short *rrhs; +short *rprec; +char *rassoc; +short **derives; +char *nullable; + +extern char *mktemp(); +extern char *getenv(); + + +done(k) +int k; +{ + if (action_file) { fclose(action_file); unlink(action_file_name); } + if (entry_file) { fclose(entry_file); unlink(entry_file_name); } + if (text_file) { fclose(text_file); unlink(text_file_name); } + if (union_file) { fclose(union_file); unlink(union_file_name); } + if (output_file && k > 0) { + fclose(output_file); unlink(output_file_name); + } + if (interface_file && k > 0) { + fclose(interface_file); unlink(interface_file_name); + } + exit(k); +} + + +void onintr(dummy) + int dummy; +{ + done(1); +} + + +set_signals() +{ +#ifdef SIGINT + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, onintr); +#endif +#ifdef SIGTERM + if (signal(SIGTERM, SIG_IGN) != SIG_IGN) + signal(SIGTERM, onintr); +#endif +#ifdef SIGHUP + if (signal(SIGHUP, SIG_IGN) != SIG_IGN) + signal(SIGHUP, onintr); +#endif +} + + +usage() +{ + fprintf(stderr, "usage: %s [-vs] [-b file_prefix] [-el|-eb] filename\n", + myname); + exit(1); +} + +getargs(argc, argv) +int argc; +char *argv[]; +{ + register int i; + register char *s; + + if (argc > 0) myname = argv[0]; + for (i = 1; i < argc; ++i) + { + s = argv[i]; + if (*s != '-') break; + switch (*++s) + { + case '\0': + input_file = stdin; + if (i + 1 < argc) usage(); + return; + + case '-': + ++i; + goto no_more_options; + + case 'b': + if (*++s) + file_prefix = s; + else if (++i < argc) + file_prefix = argv[i]; + else + usage(); + continue; + + case 'v': + vflag = 1; + break; + + case 's': + sflag = 1; + break; + + default: + usage(); + } + + for (;;) + { + switch (*++s) + { + case '\0': + goto end_of_option; + + case 'v': + vflag = 1; + break; + + case 's': + sflag = 1; + break; + + default: + usage(); + } + } +end_of_option:; + } + +no_more_options:; + if (i + 1 != argc) usage(); + input_file_name = argv[i]; + if (file_prefix == 0) { + int len; + len = strlen(argv[i]); + file_prefix = malloc(len + 1); + if (file_prefix == 0) no_space(); + strcpy(file_prefix, argv[i]); + while (len > 0) { + len--; + if (file_prefix[len] == '.') { + file_prefix[len] = 0; + break; + } + } + } +} + + +char * +allocate(n) +unsigned n; +{ + register char *p; + + p = NULL; + if (n) + { + p = CALLOC(1, n); + if (!p) no_space(); + } + return (p); +} + + +create_file_names() +{ + int i, len; + char *tmpdir; + +#ifdef NO_UNIX + len = 0; + i = sizeof(temp_form); +#else + tmpdir = getenv("TMPDIR"); + if (tmpdir == 0) tmpdir = "/tmp"; + len = strlen(tmpdir); + i = len + sizeof(temp_form); + if (len && tmpdir[len-1] != '/') + ++i; +#endif + + action_file_name = MALLOC(i); + if (action_file_name == 0) no_space(); + entry_file_name = MALLOC(i); + if (entry_file_name == 0) no_space(); + text_file_name = MALLOC(i); + if (text_file_name == 0) no_space(); + union_file_name = MALLOC(i); + if (union_file_name == 0) no_space(); + +#ifndef NO_UNIX + strcpy(action_file_name, tmpdir); + strcpy(entry_file_name, tmpdir); + strcpy(text_file_name, tmpdir); + strcpy(union_file_name, tmpdir); + + if (len && tmpdir[len - 1] != '/') + { + action_file_name[len] = '/'; + entry_file_name[len] = '/'; + text_file_name[len] = '/'; + union_file_name[len] = '/'; + ++len; + } +#endif + + strcpy(action_file_name + len, temp_form); + strcpy(entry_file_name + len, temp_form); + strcpy(text_file_name + len, temp_form); + strcpy(union_file_name + len, temp_form); + + action_file_name[len + 5] = 'a'; + entry_file_name[len + 5] = 'e'; + text_file_name[len + 5] = 't'; + union_file_name[len + 5] = 'u'; + +#ifndef NO_UNIX + mktemp(action_file_name); + mktemp(entry_file_name); + mktemp(text_file_name); + mktemp(union_file_name); +#endif + + len = strlen(file_prefix); + + output_file_name = MALLOC(len + 7); + if (output_file_name == 0) + no_space(); + strcpy(output_file_name, file_prefix); + strcpy(output_file_name + len, OUTPUT_SUFFIX); + + code_file_name = output_file_name; + + if (vflag) + { + verbose_file_name = MALLOC(len + 8); + if (verbose_file_name == 0) + no_space(); + strcpy(verbose_file_name, file_prefix); + strcpy(verbose_file_name + len, VERBOSE_SUFFIX); + } + + interface_file_name = MALLOC(len + 8); + if (interface_file_name == 0) + no_space(); + strcpy(interface_file_name, file_prefix); + strcpy(interface_file_name + len, INTERFACE_SUFFIX); + +} + + +open_files() +{ + create_file_names(); + + if (input_file == 0) + { + input_file = fopen(input_file_name, "r"); + if (input_file == 0) + open_error(input_file_name); + } + + action_file = fopen(action_file_name, "w"); + if (action_file == 0) + open_error(action_file_name); + + entry_file = fopen(entry_file_name, "w"); + if (entry_file == 0) + open_error(entry_file_name); + + text_file = fopen(text_file_name, "w"); + if (text_file == 0) + open_error(text_file_name); + + if (vflag) + { + verbose_file = fopen(verbose_file_name, "w"); + if (verbose_file == 0) + open_error(verbose_file_name); + } + + if (dflag) + { + defines_file = fopen(defines_file_name, "w"); + if (defines_file == 0) + open_error(defines_file_name); + union_file = fopen(union_file_name, "w"); + if (union_file == 0) + open_error(union_file_name); + } + + output_file = fopen(output_file_name, "w"); + if (output_file == 0) + open_error(output_file_name); + + if (rflag) + { + code_file = fopen(code_file_name, "w"); + if (code_file == 0) + open_error(code_file_name); + } + else + code_file = output_file; + + + interface_file = fopen(interface_file_name, "w"); + if (interface_file == 0) + open_error(interface_file_name); +} + + +main(argc, argv) +int argc; +char *argv[]; +{ + set_signals(); + getargs(argc, argv); + open_files(); + reader(); + lr0(); + lalr(); + make_parser(); + verbose(); + output(); + done(0); + /*NOTREACHED*/ +} diff --git a/yacc/mkpar.c b/yacc/mkpar.c new file mode 100644 index 0000000000..e1aef60fec --- /dev/null +++ b/yacc/mkpar.c @@ -0,0 +1,357 @@ + +#include "defs.h" + +action **parser; +int SRtotal; +int RRtotal; +short *SRconflicts; +short *RRconflicts; +short *defred; +short *rules_used; +short nunused; +short final_state; + +static int SRcount; +static int RRcount; + +extern action *parse_actions(); +extern action *get_shifts(); +extern action *add_reductions(); +extern action *add_reduce(); + + +make_parser() +{ + register int i; + + parser = NEW2(nstates, action *); + for (i = 0; i < nstates; i++) + parser[i] = parse_actions(i); + + find_final_state(); + remove_conflicts(); + unused_rules(); + if (SRtotal + RRtotal > 0) total_conflicts(); + defreds(); +} + + +action * +parse_actions(stateno) +register int stateno; +{ + register action *actions; + + actions = get_shifts(stateno); + actions = add_reductions(stateno, actions); + return (actions); +} + + +action * +get_shifts(stateno) +int stateno; +{ + register action *actions, *temp; + register shifts *sp; + register short *to_state; + register int i, k; + register int symbol; + + actions = 0; + sp = shift_table[stateno]; + if (sp) + { + to_state = sp->shift; + for (i = sp->nshifts - 1; i >= 0; i--) + { + k = to_state[i]; + symbol = accessing_symbol[k]; + if (ISTOKEN(symbol)) + { + temp = NEW(action); + temp->next = actions; + temp->symbol = symbol; + temp->number = k; + temp->prec = symbol_prec[symbol]; + temp->action_code = SHIFT; + temp->assoc = symbol_assoc[symbol]; + actions = temp; + } + } + } + return (actions); +} + +action * +add_reductions(stateno, actions) +int stateno; +register action *actions; +{ + register int i, j, m, n; + register int ruleno, tokensetsize; + register unsigned *rowp; + + tokensetsize = WORDSIZE(ntokens); + m = lookaheads[stateno]; + n = lookaheads[stateno + 1]; + for (i = m; i < n; i++) + { + ruleno = LAruleno[i]; + rowp = LA + i * tokensetsize; + for (j = ntokens - 1; j >= 0; j--) + { + if (BIT(rowp, j)) + actions = add_reduce(actions, ruleno, j); + } + } + return (actions); +} + + +action * +add_reduce(actions, ruleno, symbol) +register action *actions; +register int ruleno, symbol; +{ + register action *temp, *prev, *next; + + prev = 0; + for (next = actions; next && next->symbol < symbol; next = next->next) + prev = next; + + while (next && next->symbol == symbol && next->action_code == SHIFT) + { + prev = next; + next = next->next; + } + + while (next && next->symbol == symbol && + next->action_code == REDUCE && next->number < ruleno) + { + prev = next; + next = next->next; + } + + temp = NEW(action); + temp->next = next; + temp->symbol = symbol; + temp->number = ruleno; + temp->prec = rprec[ruleno]; + temp->action_code = REDUCE; + temp->assoc = rassoc[ruleno]; + + if (prev) + prev->next = temp; + else + actions = temp; + + return (actions); +} + + +find_final_state() +{ + register int goal, i; + register short *to_state; + register shifts *p; + + p = shift_table[0]; + to_state = p->shift; + goal = ritem[1]; + for (i = p->nshifts - 1; i >= 0; --i) + { + final_state = to_state[i]; + if (accessing_symbol[final_state] == goal) break; + } +} + + +unused_rules() +{ + register int i; + register action *p; + + rules_used = (short *) MALLOC(nrules*sizeof(short)); + if (rules_used == 0) no_space(); + + for (i = 0; i < nrules; ++i) + rules_used[i] = 0; + + for (i = 0; i < nstates; ++i) + { + for (p = parser[i]; p; p = p->next) + { + if (p->action_code == REDUCE && p->suppressed == 0) + rules_used[p->number] = 1; + } + } + + nunused = 0; + for (i = 3; i < nrules; ++i) + if (!rules_used[i]) ++nunused; + + if (nunused) + if (nunused == 1) + fprintf(stderr, "%s: 1 rule never reduced\n", myname); + else + fprintf(stderr, "%s: %d rules never reduced\n", myname, nunused); +} + + +remove_conflicts() +{ + register int i; + register int symbol; + register action *p, *pref; + + SRtotal = 0; + RRtotal = 0; + SRconflicts = NEW2(nstates, short); + RRconflicts = NEW2(nstates, short); + for (i = 0; i < nstates; i++) + { + SRcount = 0; + RRcount = 0; + symbol = -1; + for (p = parser[i]; p; p = p->next) + { + if (p->symbol != symbol) + { + pref = p; + symbol = p->symbol; + } + else if (i == final_state && symbol == 0) + { + SRcount++; + p->suppressed = 1; + } + else if (pref->action_code == SHIFT) + { + if (pref->prec > 0 && p->prec > 0) + { + if (pref->prec < p->prec) + { + pref->suppressed = 2; + pref = p; + } + else if (pref->prec > p->prec) + { + p->suppressed = 2; + } + else if (pref->assoc == LEFT) + { + pref->suppressed = 2; + pref = p; + } + else if (pref->assoc == RIGHT) + { + p->suppressed = 2; + } + else + { + pref->suppressed = 2; + p->suppressed = 2; + } + } + else + { + SRcount++; + p->suppressed = 1; + } + } + else + { + RRcount++; + p->suppressed = 1; + } + } + SRtotal += SRcount; + RRtotal += RRcount; + SRconflicts[i] = SRcount; + RRconflicts[i] = RRcount; + } +} + + +total_conflicts() +{ + fprintf(stderr, "%s: ", myname); + if (SRtotal == 1) + fprintf(stderr, "1 shift/reduce conflict"); + else if (SRtotal > 1) + fprintf(stderr, "%d shift/reduce conflicts", SRtotal); + + if (SRtotal && RRtotal) + fprintf(stderr, ", "); + + if (RRtotal == 1) + fprintf(stderr, "1 reduce/reduce conflict"); + else if (RRtotal > 1) + fprintf(stderr, "%d reduce/reduce conflicts", RRtotal); + + fprintf(stderr, ".\n"); +} + + +int +sole_reduction(stateno) +int stateno; +{ + register int count, ruleno; + register action *p; + + count = 0; + ruleno = 0; + for (p = parser[stateno]; p; p = p->next) + { + if (p->action_code == SHIFT && p->suppressed == 0) + return (0); + else if (p->action_code == REDUCE && p->suppressed == 0) + { + if (ruleno > 0 && p->number != ruleno) + return (0); + if (p->symbol != 1) + ++count; + ruleno = p->number; + } + } + + if (count == 0) + return (0); + return (ruleno); +} + + +defreds() +{ + register int i; + + defred = NEW2(nstates, short); + for (i = 0; i < nstates; i++) + defred[i] = sole_reduction(i); +} + +free_action_row(p) +register action *p; +{ + register action *q; + + while (p) + { + q = p->next; + FREE(p); + p = q; + } +} + +free_parser() +{ + register int i; + + for (i = 0; i < nstates; i++) + free_action_row(parser[i]); + + FREE(parser); +} + diff --git a/yacc/output.c b/yacc/output.c new file mode 100644 index 0000000000..5ebc5b26ba --- /dev/null +++ b/yacc/output.c @@ -0,0 +1,900 @@ +#include "defs.h" + +static int nvectors; +static int nentries; +static short **froms; +static short **tos; +static short *tally; +static short *width; +static short *state_count; +static short *order; +static short *base; +static short *pos; +static int maxtable; +static short *table; +static short *check; +static int lowzero; +static int high; + + +output() +{ + extern char *header[], *define_tables[]; + + free_itemsets(); + free_shifts(); + free_reductions(); + write_section(header); + output_stored_text(); + output_transl(); + output_rule_data(); + output_yydefred(); + output_actions(); + free_parser(); + output_debug(); + output_trailing_text(); + if (sflag) + fprintf(output_file, + "let yyact = Array.new %d (fun () -> (failwith \"parser\" : Obj.t))\n", + ntotalrules); + else + fprintf(output_file, + "let yyact = [|\n (fun () -> failwith \"parser\")\n"); + output_semantic_actions(); + if (!sflag) + fprintf(output_file, "|]\n"); + write_section(define_tables); + output_entries(); +} + + +static void output_char(n) + unsigned n; +{ + n = n & 0xFF; + putc('\\', output_file); + putc('0' + n / 100, output_file); + putc('0' + (n / 10) % 10, output_file); + putc('0' + n % 10, output_file); +} + +static void output_short(n) + int n; +{ + output_char(n); + output_char(n >> 8); +} + +output_rule_data() +{ + register int i; + register int j; + + + fprintf(output_file, "let yylhs = \""); + output_short(symbol_value[start_symbol]); + + j = 8; + for (i = 3; i < nrules; i++) + { + if (j >= 8) + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + else + ++j; + + output_short(symbol_value[rlhs[i]]); + } + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); + + fprintf(output_file, "let yylen = \""); + output_short(2); + + j = 8; + for (i = 3; i < nrules; i++) + { + if (j >= 8) + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + else + j++; + + output_short(rrhs[i + 1] - rrhs[i] - 1); + } + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); +} + + +output_yydefred() +{ + register int i, j; + + fprintf(output_file, "let yydefred = \""); + output_short(defred[0] ? defred[0] - 2 : 0); + + j = 8; + for (i = 1; i < nstates; i++) + { + if (j < 8) + ++j; + else + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + + output_short(defred[i] ? defred[i] - 2 : 0); + } + + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); +} + + +output_actions() +{ + nvectors = 2*nstates + nvars; + + froms = NEW2(nvectors, short *); + tos = NEW2(nvectors, short *); + tally = NEW2(nvectors, short); + width = NEW2(nvectors, short); + + token_actions(); + FREE(lookaheads); + FREE(LA); + FREE(LAruleno); + FREE(accessing_symbol); + + goto_actions(); + FREE(goto_map + ntokens); + FREE(from_state); + FREE(to_state); + + sort_actions(); + pack_table(); + output_base(); + output_table(); + output_check(); +} + + +token_actions() +{ + register int i, j; + register int shiftcount, reducecount; + register int max, min; + register short *actionrow, *r, *s; + register action *p; + + actionrow = NEW2(2*ntokens, short); + for (i = 0; i < nstates; ++i) + { + if (parser[i]) + { + for (j = 0; j < 2*ntokens; ++j) + actionrow[j] = 0; + + shiftcount = 0; + reducecount = 0; + for (p = parser[i]; p; p = p->next) + { + if (p->suppressed == 0) + { + if (p->action_code == SHIFT) + { + ++shiftcount; + actionrow[p->symbol] = p->number; + } + else if (p->action_code == REDUCE && p->number != defred[i]) + { + ++reducecount; + actionrow[p->symbol + ntokens] = p->number; + } + } + } + + tally[i] = shiftcount; + tally[nstates+i] = reducecount; + width[i] = 0; + width[nstates+i] = 0; + if (shiftcount > 0) + { + froms[i] = r = NEW2(shiftcount, short); + tos[i] = s = NEW2(shiftcount, short); + min = MAXSHORT; + max = 0; + for (j = 0; j < ntokens; ++j) + { + if (actionrow[j]) + { + if (min > symbol_value[j]) + min = symbol_value[j]; + if (max < symbol_value[j]) + max = symbol_value[j]; + *r++ = symbol_value[j]; + *s++ = actionrow[j]; + } + } + width[i] = max - min + 1; + } + if (reducecount > 0) + { + froms[nstates+i] = r = NEW2(reducecount, short); + tos[nstates+i] = s = NEW2(reducecount, short); + min = MAXSHORT; + max = 0; + for (j = 0; j < ntokens; ++j) + { + if (actionrow[ntokens+j]) + { + if (min > symbol_value[j]) + min = symbol_value[j]; + if (max < symbol_value[j]) + max = symbol_value[j]; + *r++ = symbol_value[j]; + *s++ = actionrow[ntokens+j] - 2; + } + } + width[nstates+i] = max - min + 1; + } + } + } + FREE(actionrow); +} + +goto_actions() +{ + register int i, j, k; + + state_count = NEW2(nstates, short); + + k = default_goto(start_symbol + 1); + fprintf(output_file, "let yydgoto = \""); + output_short(k); + + save_column(start_symbol + 1, k); + + j = 8; + for (i = start_symbol + 2; i < nsyms; i++) + { + if (j >= 8) + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + else + ++j; + + k = default_goto(i); + output_short(k); + save_column(i, k); + } + + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); + FREE(state_count); +} + +int +default_goto(symbol) +int symbol; +{ + register int i; + register int m; + register int n; + register int default_state; + register int max; + + m = goto_map[symbol]; + n = goto_map[symbol + 1]; + + if (m == n) return (0); + + for (i = 0; i < nstates; i++) + state_count[i] = 0; + + for (i = m; i < n; i++) + state_count[to_state[i]]++; + + max = 0; + default_state = 0; + for (i = 0; i < nstates; i++) + { + if (state_count[i] > max) + { + max = state_count[i]; + default_state = i; + } + } + + return (default_state); +} + + + +save_column(symbol, default_state) +int symbol; +int default_state; +{ + register int i; + register int m; + register int n; + register short *sp; + register short *sp1; + register short *sp2; + register int count; + register int symno; + + m = goto_map[symbol]; + n = goto_map[symbol + 1]; + + count = 0; + for (i = m; i < n; i++) + { + if (to_state[i] != default_state) + ++count; + } + if (count == 0) return; + + symno = symbol_value[symbol] + 2*nstates; + + froms[symno] = sp1 = sp = NEW2(count, short); + tos[symno] = sp2 = NEW2(count, short); + + for (i = m; i < n; i++) + { + if (to_state[i] != default_state) + { + *sp1++ = from_state[i]; + *sp2++ = to_state[i]; + } + } + + tally[symno] = count; + width[symno] = sp1[-1] - sp[0] + 1; +} + +sort_actions() +{ + register int i; + register int j; + register int k; + register int t; + register int w; + + order = NEW2(nvectors, short); + nentries = 0; + + for (i = 0; i < nvectors; i++) + { + if (tally[i] > 0) + { + t = tally[i]; + w = width[i]; + j = nentries - 1; + + while (j >= 0 && (width[order[j]] < w)) + j--; + + while (j >= 0 && (width[order[j]] == w) && (tally[order[j]] < t)) + j--; + + for (k = nentries - 1; k > j; k--) + order[k + 1] = order[k]; + + order[j + 1] = i; + nentries++; + } + } +} + + +pack_table() +{ + register int i; + register int place; + register int state; + + base = NEW2(nvectors, short); + pos = NEW2(nentries, short); + + maxtable = 1000; + table = NEW2(maxtable, short); + check = NEW2(maxtable, short); + + lowzero = 0; + high = 0; + + for (i = 0; i < maxtable; i++) + check[i] = -1; + + for (i = 0; i < nentries; i++) + { + state = matching_vector(i); + + if (state < 0) + place = pack_vector(i); + else + place = base[state]; + + pos[i] = place; + base[order[i]] = place; + } + + for (i = 0; i < nvectors; i++) + { + if (froms[i]) + FREE(froms[i]); + if (tos[i]) + FREE(tos[i]); + } + + FREE(froms); + FREE(tos); + FREE(pos); +} + + +/* The function matching_vector determines if the vector specified by */ +/* the input parameter matches a previously considered vector. The */ +/* test at the start of the function checks if the vector represents */ +/* a row of shifts over terminal symbols or a row of reductions, or a */ +/* column of shifts over a nonterminal symbol. Berkeley Yacc does not */ +/* check if a column of shifts over a nonterminal symbols matches a */ +/* previously considered vector. Because of the nature of LR parsing */ +/* tables, no two columns can match. Therefore, the only possible */ +/* match would be between a row and a column. Such matches are */ +/* unlikely. Therefore, to save time, no attempt is made to see if a */ +/* column matches a previously considered vector. */ +/* */ +/* Matching_vector is poorly designed. The test could easily be made */ +/* faster. Also, it depends on the vectors being in a specific */ +/* order. */ + +int +matching_vector(vector) +int vector; +{ + register int i; + register int j; + register int k; + register int t; + register int w; + register int match; + register int prev; + + i = order[vector]; + if (i >= 2*nstates) + return (-1); + + t = tally[i]; + w = width[i]; + + for (prev = vector - 1; prev >= 0; prev--) + { + j = order[prev]; + if (width[j] != w || tally[j] != t) + return (-1); + + match = 1; + for (k = 0; match && k < t; k++) + { + if (tos[j][k] != tos[i][k] || froms[j][k] != froms[i][k]) + match = 0; + } + + if (match) + return (j); + } + + return (-1); +} + + + +int +pack_vector(vector) +int vector; +{ + register int i, j, k, l; + register int t; + register int loc; + register int ok; + register short *from; + register short *to; + int newmax; + + i = order[vector]; + t = tally[i]; + assert(t); + + from = froms[i]; + to = tos[i]; + + j = lowzero - from[0]; + for (k = 1; k < t; ++k) + if (lowzero - from[k] > j) + j = lowzero - from[k]; + for (;; ++j) + { + if (j == 0) + continue; + ok = 1; + for (k = 0; ok && k < t; k++) + { + loc = j + from[k]; + if (loc >= maxtable) + { + if (loc >= MAXTABLE) + fatal("maximum table size exceeded"); + + newmax = maxtable; + do { newmax += 200; } while (newmax <= loc); + table = (short *) REALLOC(table, newmax*sizeof(short)); + if (table == 0) no_space(); + check = (short *) REALLOC(check, newmax*sizeof(short)); + if (check == 0) no_space(); + for (l = maxtable; l < newmax; ++l) + { + table[l] = 0; + check[l] = -1; + } + maxtable = newmax; + } + + if (check[loc] != -1) + ok = 0; + } + for (k = 0; ok && k < vector; k++) + { + if (pos[k] == j) + ok = 0; + } + if (ok) + { + for (k = 0; k < t; k++) + { + loc = j + from[k]; + table[loc] = to[k]; + check[loc] = from[k]; + if (loc > high) high = loc; + } + + while (check[lowzero] != -1) + ++lowzero; + + return (j); + } + } +} + + + +output_base() +{ + register int i, j; + + fprintf(output_file, "let yysindex = \""); + output_short(base[0]); + + j = 8; + for (i = 1; i < nstates; i++) + { + if (j >= 8) + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + else + ++j; + + output_short(base[i]); + } + + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); + + fprintf(output_file, "let yyrindex = \""); + output_short(base[nstates]); + + j = 8; + for (i = nstates + 1; i < 2*nstates; i++) + { + if (j >= 8) + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + else + ++j; + + output_short(base[i]); + } + + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); + + fprintf(output_file, "let yygindex = \""); + output_short(base[2*nstates]); + + j = 8; + for (i = 2*nstates + 1; i < nvectors - 1; i++) + { + if (j >= 8) + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + else + ++j; + + output_short(base[i]); + } + + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); + FREE(base); +} + + + +output_table() +{ + register int i; + register int j; + + ++outline; + fprintf(code_file, "let yytablesize = %d\n", high); + fprintf(output_file, "let yytable = \""); + output_short(table[0]); + + j = 8; + for (i = 1; i <= high; i++) + { + if (j >= 8) + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + else + ++j; + + output_short(table[i]); + } + + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); + FREE(table); +} + + + +output_check() +{ + register int i; + register int j; + + fprintf(output_file, "let yycheck = \""); + output_short(check[0]); + + j = 8; + for (i = 1; i <= high; i++) + { + if (j >= 8) + { + if (!rflag) ++outline; + fprintf(output_file, "\\\n"); + j = 1; + } + else + ++j; + + output_short(check[i]); + } + + if (!rflag) outline += 2; + fprintf(output_file, "\"\n\n"); + FREE(check); +} + + +output_transl() +{ + int i; + + fprintf(code_file, "let yytransl = [|\n"); + for (i = 0; i < ntokens; i++) { + if (symbol_true_token[i]) { + fprintf(code_file, " %3d (* %s *);\n", symbol_value[i], symbol_name[i]); + } + } + fprintf(code_file, " 0|]\n\n"); +} + +output_stored_text() +{ + register int c; + register FILE *in, *out; + + fclose(text_file); + text_file = fopen(text_file_name, "r"); + if (text_file == NULL) + open_error(text_file_name); + in = text_file; + if ((c = getc(in)) == EOF) + return; + out = code_file; + if (c == '\n') + ++outline; + putc(c, out); + while ((c = getc(in)) != EOF) + { + if (c == '\n') + ++outline; + putc(c, out); + } + if (!lflag) + fprintf(out, line_format, ++outline + 1, code_file_name); +} + + +output_debug() +{ +} + +output_trailing_text() +{ + register int c, last; + register FILE *in, *out; + + if (line == 0) + return; + + in = input_file; + out = code_file; + c = *cptr; + if (c == '\n') + { + ++lineno; + if ((c = getc(in)) == EOF) + return; + if (!lflag) + { + ++outline; + fprintf(out, line_format, lineno, input_file_name); + } + if (c == '\n') + ++outline; + putc(c, out); + last = c; + } + else + { + if (!lflag) + { + ++outline; + fprintf(out, line_format, lineno, input_file_name); + } + do { putc(c, out); } while ((c = *++cptr) != '\n'); + ++outline; + putc('\n', out); + last = '\n'; + } + + while ((c = getc(in)) != EOF) + { + if (c == '\n') + ++outline; + putc(c, out); + last = c; + } + + if (last != '\n') + { + ++outline; + putc('\n', out); + } + if (!lflag) + fprintf(out, line_format, ++outline + 1, code_file_name); +} + + +copy_file(file, file_name) + FILE ** file; + char * file_name; +{ + register int c, last; + register FILE *out; + + fclose(*file); + *file = fopen(file_name, "r"); + if (*file == NULL) + open_error(file_name); + + if ((c = getc(*file)) == EOF) + return; + + out = code_file; + last = c; + if (c == '\n') + ++outline; + putc(c, out); + while ((c = getc(*file)) != EOF) + { + if (c == '\n') + ++outline; + putc(c, out); + last = c; + } + + if (last != '\n') + { + ++outline; + putc('\n', out); + } + +} + +output_semantic_actions() +{ + copy_file (&action_file, action_file_name); +} + +output_entries() +{ + copy_file (&entry_file, entry_file_name); +} + +free_itemsets() +{ + register core *cp, *next; + + FREE(state_table); + for (cp = first_state; cp; cp = next) + { + next = cp->next; + FREE(cp); + } +} + + +free_shifts() +{ + register shifts *sp, *next; + + FREE(shift_table); + for (sp = first_shift; sp; sp = next) + { + next = sp->next; + FREE(sp); + } +} + + + +free_reductions() +{ + register reductions *rp, *next; + + FREE(reduction_table); + for (rp = first_reduction; rp; rp = next) + { + next = rp->next; + FREE(rp); + } +} diff --git a/yacc/parsing.c b/yacc/parsing.c new file mode 100644 index 0000000000..1b60993229 --- /dev/null +++ b/yacc/parsing.c @@ -0,0 +1,136 @@ +int yydebug; +int yynerrs; +int yyerrflag; +int yychar; +short *yyssp; +YYSTYPE *yyvsp; +YYSTYPE yyval; +YYSTYPE yylval; +short yyss[YYSTACKSIZE]; +YYSTYPE yyvs[YYSTACKSIZE]; +#define yystacksize YYSTACKSIZE +#define YYABORT goto yyabort +#define YYACCEPT goto yyaccept +#define YYERROR goto yyerrlab + +value yyparse(tables, entrypoint, lexbuf) + value tables, entrypoint, lexbuf; +{ + register int yym, yyn, yystate; + +#define yyact FIELD(tables,0) +#define yytransl FIELD(tables,1) +#define yylhs FIELD(tables, 2) +#define yylen FIELD(tables, 3) +#define yydefred FIELD(tables, 4) +#define yydgoto FIELD(tables, 5) +#define yysindex FIELD(tables, 6) +#define yyrindex FIELD(tables, 7) +#define yygindex FIELD(tables, 8) +#define YYTABLESIZE CINT(FIELD(tables, 9)) +#define yytable FIELD(tables, 10) +#define yycheck FIELD(tables, 11) + + yynerrs = 0; + yyerrflag = 0; + yychar = (-1); + + yyssp = yyss; + yyvsp = yyvs; + *yyssp = yystate = 0; + + yychar = CINT(entrypoint); + +yyloop: + if (yyn = yydefred[yystate]) goto yyreduce; + if (yychar < 0) { + token = yylex(lexbuf); + yychar = CINT(yytransl[TAG(token)]); + yylval = FIELD(token, 0); + } + if ((yyn = CINT(yysindex[yystate])) && (yyn += yychar) >= 0 && + yyn <= YYTABLESIZE && CINT(yycheck[yyn]) == yychar) + { + if (yyssp >= yyss + yystacksize - 1) grow_stacks(); + + *++yyssp = yystate = CINT(yytable[yyn]); + *++yyvsp = yylval; + yychar = (-1); + if (yyerrflag > 0) --yyerrflag; + goto yyloop; + } + if ((yyn = CINT(yyrindex[yystate])) && (yyn += yychar) >= 0 && + yyn <= YYTABLESIZE && CINT(yycheck[yyn]) == yychar) + { + yyn = yytable[yyn]; + goto yyreduce; + } + if (yyerrflag) goto yyinrecovery; + +yynewerror: + v = alloc(1, EXN_PARSING); + FIELD(v, 0) = MLINT(yychar); + mlraise(v); + +yyerrlab: + ++yynerrs; + +yyinrecovery: + if (yyerrflag < 3) + { + yyerrflag = 3; + for (;;) + { + if ((yyn = CINT(yysindex[*yyssp])) && (yyn += YYERRCODE) >= 0 && + yyn <= YYTABLESIZE && CINT(yycheck[yyn]) == YYERRCODE) + { + if (yyssp >= yyss + yystacksize - 1) grow_stacks(); + + *++yyssp = yystate = yytable[yyn]; + *++yyvsp = yylval; + goto yyloop; + } + else + { + if (yyssp <= yyss) goto yyabort; + --yyssp; + --yyvsp; + } + } + } + else + { + if (yychar == 0) goto yyabort; + yychar = (-1); + goto yyloop; + } + +yyreduce: + yym = yylen[yyn]; + yyval = mlapply(FIELD(yyact, yyn), atom(0)); + yyssp -= yym; + yystate = *yyssp; + yyvsp -= yym; + yym = yylhs[yyn]; + if (yystate == 0 && yym == 0) + { + yystate = YYFINAL; + *++yyssp = YYFINAL; + *++yyvsp = yyval; + if (yychar < 0) + { + if ((yychar = yylex()) < 0) yychar = 0; + } + if (yychar == 0) goto yyaccept; + goto yyloop; + } + if ((yyn = CINT(yygindex[yym])) && (yyn += yystate) >= 0 && + yyn <= YYTABLESIZE && CINT(yycheck[yyn]) == yystate) + yystate = CINT(yytable[yyn]); + else + yystate = CINT(yydgoto[yym]); + if (yyssp >= yyss + yystacksize - 1) grow_stacks(); + *++yyssp = yystate; + *++yyvsp = yyval; + goto yyloop; +} diff --git a/yacc/reader.c b/yacc/reader.c new file mode 100644 index 0000000000..621f9ad8fc --- /dev/null +++ b/yacc/reader.c @@ -0,0 +1,1763 @@ +#include "defs.h" + +/* The line size must be a positive integer. One hundred was chosen */ +/* because few lines in Yacc input grammars exceed 100 characters. */ +/* Note that if a line exceeds LINESIZE characters, the line buffer */ +/* will be expanded to accomodate it. */ + +#define LINESIZE 100 + +char *cache; +int cinc, cache_size; + +int ntags, tagmax; +char **tag_table; + +char saw_eof, unionized; +char *cptr, *line; +int linesize; + +bucket *goal; +int prec; +int gensym; +char last_was_action; + +int maxitems; +bucket **pitem; + +int maxrules; +bucket **plhs; + +int name_pool_size; +char *name_pool; + +char line_format[] = "(* Line %d, file %s *)\n"; + + +cachec(c) +int c; +{ + assert(cinc >= 0); + if (cinc >= cache_size) + { + cache_size += 256; + cache = REALLOC(cache, cache_size); + if (cache == 0) no_space(); + } + cache[cinc] = c; + ++cinc; +} + + +get_line() +{ + register FILE *f = input_file; + register int c; + register int i; + + if (saw_eof || (c = getc(f)) == EOF) + { + if (line) { FREE(line); line = 0; } + cptr = 0; + saw_eof = 1; + return; + } + + if (line == 0 || linesize != (LINESIZE + 1)) + { + if (line) FREE(line); + linesize = LINESIZE + 1; + line = MALLOC(linesize); + if (line == 0) no_space(); + } + + i = 0; + ++lineno; + for (;;) + { + line[i] = c; + if (c == '\n') { cptr = line; return; } + if (++i >= linesize) + { + linesize += LINESIZE; + line = REALLOC(line, linesize); + if (line == 0) no_space(); + } + c = getc(f); + if (c == EOF) + { + line[i] = '\n'; + saw_eof = 1; + cptr = line; + return; + } + } +} + + +char * +dup_line() +{ + register char *p, *s, *t; + + if (line == 0) return (0); + s = line; + while (*s != '\n') ++s; + p = MALLOC(s - line + 1); + if (p == 0) no_space(); + + s = line; + t = p; + while ((*t++ = *s++) != '\n') continue; + return (p); +} + + +skip_comment() +{ + register char *s; + + int st_lineno = lineno; + char *st_line = dup_line(); + char *st_cptr = st_line + (cptr - line); + + s = cptr + 2; + for (;;) + { + if (*s == '*' && s[1] == '/') + { + cptr = s + 2; + FREE(st_line); + return; + } + if (*s == '\n') + { + get_line(); + if (line == 0) + unterminated_comment(st_lineno, st_line, st_cptr); + s = cptr; + } + else + ++s; + } +} + + +int +nextc() +{ + register char *s; + + if (line == 0) + { + get_line(); + if (line == 0) + return (EOF); + } + + s = cptr; + for (;;) + { + switch (*s) + { + case '\n': + get_line(); + if (line == 0) return (EOF); + s = cptr; + break; + + case ' ': + case '\t': + case '\f': + case '\r': + case '\v': + case ',': + case ';': + ++s; + break; + + case '\\': + cptr = s; + return ('%'); + + case '/': + if (s[1] == '*') + { + cptr = s; + skip_comment(); + s = cptr; + break; + } + else if (s[1] == '/') + { + get_line(); + if (line == 0) return (EOF); + s = cptr; + break; + } + /* fall through */ + + default: + cptr = s; + return (*s); + } + } +} + + +int +keyword() +{ + register int c; + char *t_cptr = cptr; + + c = *++cptr; + if (isalpha(c)) + { + cinc = 0; + for (;;) + { + if (isalpha(c)) + { + if (isupper(c)) c = tolower(c); + cachec(c); + } + else if (isdigit(c) || c == '_' || c == '.' || c == '$') + cachec(c); + else + break; + c = *++cptr; + } + cachec(NUL); + + if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0) + return (TOKEN); + if (strcmp(cache, "type") == 0) + return (TYPE); + if (strcmp(cache, "left") == 0) + return (LEFT); + if (strcmp(cache, "right") == 0) + return (RIGHT); + if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0) + return (NONASSOC); + if (strcmp(cache, "start") == 0) + return (START); + if (strcmp(cache, "union") == 0) + return (UNION); + if (strcmp(cache, "ident") == 0) + return (IDENT); + } + else + { + ++cptr; + if (c == '{') + return (TEXT); + if (c == '%' || c == '\\') + return (MARK); + if (c == '<') + return (LEFT); + if (c == '>') + return (RIGHT); + if (c == '0') + return (TOKEN); + if (c == '2') + return (NONASSOC); + } + syntax_error(lineno, line, t_cptr); + /*NOTREACHED*/ +} + + +copy_ident() +{ + register int c; + register FILE *f = output_file; + + c = nextc(); + if (c == EOF) unexpected_EOF(); + if (c != '"') syntax_error(lineno, line, cptr); + ++outline; + fprintf(f, "#ident \""); + for (;;) + { + c = *++cptr; + if (c == '\n') + { + fprintf(f, "\"\n"); + return; + } + putc(c, f); + if (c == '"') + { + putc('\n', f); + ++cptr; + return; + } + } +} + + +copy_text() +{ + register int c; + int quote; + register FILE *f = text_file; + int need_newline = 0; + int t_lineno = lineno; + char *t_line = dup_line(); + char *t_cptr = t_line + (cptr - line - 2); + + if (*cptr == '\n') + { + get_line(); + if (line == 0) + unterminated_text(t_lineno, t_line, t_cptr); + } + +loop: + c = *cptr++; + switch (c) + { + case '\n': + next_line: + putc('\n', f); + need_newline = 0; + get_line(); + if (line) goto loop; + unterminated_text(t_lineno, t_line, t_cptr); + + case '`': + case '"': + { + int s_lineno = lineno; + char *s_line = dup_line(); + char *s_cptr = s_line + (cptr - line - 1); + + quote = c; + putc(c, f); + for (;;) + { + c = *cptr++; + putc(c, f); + if (c == quote) + { + need_newline = 1; + FREE(s_line); + goto loop; + } + if (c == '\n') + unterminated_string(s_lineno, s_line, s_cptr); + if (c == '\\') + { + c = *cptr++; + putc(c, f); + if (c == '\n') + { + get_line(); + if (line == 0) + unterminated_string(s_lineno, s_line, s_cptr); + } + } + } + } + + case '(': + putc(c, f); + need_newline = 1; + c = *cptr; + if (c == '*') + { + int c_lineno = lineno; + char *c_line = dup_line(); + char *c_cptr = c_line + (cptr - line - 1); + + putc('*', f); + ++cptr; + for (;;) + { + c = *cptr++; + putc(c, f); + if (c == '*' && *cptr == ')') + { + putc(')', f); + ++cptr; + FREE(c_line); + goto loop; + } + if (c == '\n') + { + get_line(); + if (line == 0) + unterminated_comment(c_lineno, c_line, c_cptr); + } + } + } + need_newline = 1; + goto loop; + + case '%': + case '\\': + if (*cptr == '}') + { + if (need_newline) putc('\n', f); + ++cptr; + FREE(t_line); + return; + } + /* fall through */ + + default: + putc(c, f); + need_newline = 1; + goto loop; + } +} + + +copy_union() +{ + register int c; + int quote; + int depth; + int u_lineno = lineno; + char *u_line = dup_line(); + char *u_cptr = u_line + (cptr - line - 6); + + if (unionized) over_unionized(cptr - 6); + unionized = 1; + + if (!lflag) + fprintf(text_file, line_format, lineno, input_file_name); + + fprintf(text_file, "typedef union"); + if (dflag) fprintf(union_file, "typedef union"); + + depth = 1; + cptr++; + +loop: + c = *cptr++; + putc(c, text_file); + if (dflag) putc(c, union_file); + switch (c) + { + case '\n': + next_line: + get_line(); + if (line == 0) unterminated_union(u_lineno, u_line, u_cptr); + goto loop; + + case '{': + ++depth; + goto loop; + + case '}': + --depth; + if (c == '}' && depth == 0) { + fprintf(text_file, " YYSTYPE;\n"); + FREE(u_line); + return; + } + goto loop; + + case '\'': + case '"': + { + int s_lineno = lineno; + char *s_line = dup_line(); + char *s_cptr = s_line + (cptr - line - 1); + + quote = c; + for (;;) + { + c = *cptr++; + putc(c, text_file); + if (dflag) putc(c, union_file); + if (c == quote) + { + FREE(s_line); + goto loop; + } + if (c == '\n') + unterminated_string(s_lineno, s_line, s_cptr); + if (c == '\\') + { + c = *cptr++; + putc(c, text_file); + if (dflag) putc(c, union_file); + if (c == '\n') + { + get_line(); + if (line == 0) + unterminated_string(s_lineno, s_line, s_cptr); + } + } + } + } + + case '(': + c = *cptr; + if (c == '*') + { + int c_lineno = lineno; + char *c_line = dup_line(); + char *c_cptr = c_line + (cptr - line - 1); + + putc('*', text_file); + if (dflag) putc('*', union_file); + ++cptr; + for (;;) + { + c = *cptr++; + putc(c, text_file); + if (dflag) putc(c, union_file); + if (c == '*' && *cptr == ')') + { + putc(')', text_file); + if (dflag) putc(')', union_file); + ++cptr; + FREE(c_line); + goto loop; + } + if (c == '\n') + { + get_line(); + if (line == 0) + unterminated_comment(c_lineno, c_line, c_cptr); + } + } + } + goto loop; + + default: + goto loop; + } +} + + +int +hexval(c) +int c; +{ + if (c >= '0' && c <= '9') + return (c - '0'); + if (c >= 'A' && c <= 'F') + return (c - 'A' + 10); + if (c >= 'a' && c <= 'f') + return (c - 'a' + 10); + return (-1); +} + + +bucket * +get_literal() +{ + register int c, quote; + register int i; + register int n; + register char *s; + register bucket *bp; + int s_lineno = lineno; + char *s_line = dup_line(); + char *s_cptr = s_line + (cptr - line); + + quote = *cptr++; + cinc = 0; + for (;;) + { + c = *cptr++; + if (c == quote) break; + if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr); + if (c == '\\') + { + char *c_cptr = cptr - 1; + + c = *cptr++; + switch (c) + { + case '\n': + get_line(); + if (line == 0) unterminated_string(s_lineno, s_line, s_cptr); + continue; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + n = c - '0'; + c = *cptr; + if (IS_OCTAL(c)) + { + n = (n << 3) + (c - '0'); + c = *++cptr; + if (IS_OCTAL(c)) + { + n = (n << 3) + (c - '0'); + ++cptr; + } + } + if (n > MAXCHAR) illegal_character(c_cptr); + c = n; + break; + + case 'x': + c = *cptr++; + n = hexval(c); + if (n < 0 || n >= 16) + illegal_character(c_cptr); + for (;;) + { + c = *cptr; + i = hexval(c); + if (i < 0 || i >= 16) break; + ++cptr; + n = (n << 4) + i; + if (n > MAXCHAR) illegal_character(c_cptr); + } + c = n; + break; + + case 'a': c = 7; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + } + } + cachec(c); + } + FREE(s_line); + + n = cinc; + s = MALLOC(n); + if (s == 0) no_space(); + + for (i = 0; i < n; ++i) + s[i] = cache[i]; + + cinc = 0; + if (n == 1) + cachec('\''); + else + cachec('"'); + + for (i = 0; i < n; ++i) + { + c = ((unsigned char *)s)[i]; + if (c == '\\' || c == cache[0]) + { + cachec('\\'); + cachec(c); + } + else if (isprint(c)) + cachec(c); + else + { + cachec('\\'); + switch (c) + { + case 7: cachec('a'); break; + case '\b': cachec('b'); break; + case '\f': cachec('f'); break; + case '\n': cachec('n'); break; + case '\r': cachec('r'); break; + case '\t': cachec('t'); break; + case '\v': cachec('v'); break; + default: + cachec(((c >> 6) & 7) + '0'); + cachec(((c >> 3) & 7) + '0'); + cachec((c & 7) + '0'); + break; + } + } + } + + if (n == 1) + cachec('\''); + else + cachec('"'); + + cachec(NUL); + bp = lookup(cache); + bp->class = TERM; + if (n == 1 && bp->value == UNDEFINED) + bp->value = *(unsigned char *)s; + FREE(s); + + return (bp); +} + + +int +is_reserved(name) +char *name; +{ + char *s; + + if (strcmp(name, ".") == 0 || + strcmp(name, "$accept") == 0 || + strcmp(name, "$end") == 0) + return (1); + + if (name[0] == '$' && name[1] == '$' && isdigit(name[2])) + { + s = name + 3; + while (isdigit(*s)) ++s; + if (*s == NUL) return (1); + } + + return (0); +} + + +bucket * +get_name() +{ + register int c; + + cinc = 0; + for (c = *cptr; IS_IDENT(c); c = *++cptr) + cachec(c); + cachec(NUL); + + if (is_reserved(cache)) used_reserved(cache); + + return (lookup(cache)); +} + + +int +get_number() +{ + register int c; + register int n; + + n = 0; + for (c = *cptr; isdigit(c); c = *++cptr) + n = 10*n + (c - '0'); + + return (n); +} + + +char * +get_tag() +{ + register int c; + register int i; + register char *s; + int t_lineno = lineno; + char *t_line = dup_line(); + char *t_cptr = t_line + (cptr - line); + + cinc = 0; + while (1) { + c = *++cptr; + if (c == EOF) unexpected_EOF(); + if (c == '>') break; + cachec(c); + } + ++cptr; + cachec(NUL); + + for (i = 0; i < ntags; ++i) + { + if (strcmp(cache, tag_table[i]) == 0) + return (tag_table[i]); + } + + if (ntags >= tagmax) + { + tagmax += 16; + tag_table = (char **) + (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *)) + : MALLOC(tagmax*sizeof(char *))); + if (tag_table == 0) no_space(); + } + + s = MALLOC(cinc); + if (s == 0) no_space(); + strcpy(s, cache); + tag_table[ntags] = s; + ++ntags; + FREE(t_line); + return (s); +} + + +declare_tokens(assoc) +int assoc; +{ + register int c; + register bucket *bp; + int value; + char *tag = 0; + + if (assoc != TOKEN) ++prec; + + c = nextc(); + if (c == EOF) unexpected_EOF(); + if (c == '<') + { + tag = get_tag(); + c = nextc(); + if (c == EOF) unexpected_EOF(); + } + + for (;;) + { + if (isalpha(c) || c == '_' || c == '.' || c == '$') + bp = get_name(); + else if (c == '\'' || c == '"') + bp = get_literal(); + else + return; + + if (bp == goal) tokenized_start(bp->name); + bp->class = TERM; + + if (tag) + { + if (bp->tag && tag != bp->tag) + retyped_warning(bp->name); + bp->tag = tag; + } + + if (assoc == TOKEN) + { + bp->true_token = 1; + } + else + { + if (bp->prec && prec != bp->prec) + reprec_warning(bp->name); + bp->assoc = assoc; + bp->prec = prec; + } + + + c = nextc(); + if (c == EOF) unexpected_EOF(); + value = UNDEFINED; + if (isdigit(c)) + { + value = get_number(); + if (bp->value != UNDEFINED && value != bp->value) + revalued_warning(bp->name); + bp->value = value; + c = nextc(); + if (c == EOF) unexpected_EOF(); + } + } +} + + +declare_types() +{ + register int c; + register bucket *bp; + char *tag; + + c = nextc(); + if (c == EOF) unexpected_EOF(); + if (c != '<') syntax_error(lineno, line, cptr); + tag = get_tag(); + + for (;;) + { + c = nextc(); + if (isalpha(c) || c == '_' || c == '.' || c == '$') + bp = get_name(); + else if (c == '\'' || c == '"') + bp = get_literal(); + else + return; + + if (bp->tag && tag != bp->tag) + retyped_warning(bp->name); + bp->tag = tag; + } +} + + +declare_start() +{ + register int c; + register bucket *bp; + static int entry_counter = 0; + + c = nextc(); + if (c == EOF) unexpected_EOF(); + if (!isalpha(c) && c != '_' && c != '.' && c != '$') + syntax_error(lineno, line, cptr); + bp = get_name(); + + if (bp->class == TERM) + terminal_start(bp->name); + bp->entry = ++entry_counter; + if (entry_counter == 256) + too_many_entries(); +} + + +read_declarations() +{ + register int c, k; + + cache_size = 256; + cache = MALLOC(cache_size); + if (cache == 0) no_space(); + + for (;;) + { + c = nextc(); + if (c == EOF) unexpected_EOF(); + if (c != '%') syntax_error(lineno, line, cptr); + switch (k = keyword()) + { + case MARK: + return; + + case IDENT: + copy_ident(); + break; + + case TEXT: + copy_text(); + break; + + case UNION: + copy_union(); + break; + + case TOKEN: + case LEFT: + case RIGHT: + case NONASSOC: + declare_tokens(k); + break; + + case TYPE: + declare_types(); + break; + + case START: + declare_start(); + break; + } + } +} + +output_token_type() +{ + bucket * bp; + int n; + + fprintf(interface_file, "type token =\n"); + fprintf(output_file, "type token =\n"); + n = 0; + for (bp = first_symbol; bp; bp = bp->next) { + if (bp->class == TERM && bp->true_token) { + fprintf(interface_file, " %c %s", n == 0 ? ' ' : '|', bp->name); + fprintf(output_file, " %c %s", n == 0 ? ' ' : '|', bp->name); + if (bp->tag) { + fprintf(interface_file, " of %s", bp->tag); + fprintf(output_file, " of %s", bp->tag); + } + fprintf(interface_file, "\n"); + fprintf(output_file, "\n"); + n++; + } + } + fprintf(interface_file, "\n"); + fprintf(output_file, "\n"); +} + +initialize_grammar() +{ + nitems = 4; + maxitems = 300; + pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *)); + if (pitem == 0) no_space(); + pitem[0] = 0; + pitem[1] = 0; + pitem[2] = 0; + pitem[3] = 0; + + nrules = 3; + maxrules = 100; + plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *)); + if (plhs == 0) no_space(); + plhs[0] = 0; + plhs[1] = 0; + plhs[2] = 0; + rprec = (short *) MALLOC(maxrules*sizeof(short)); + if (rprec == 0) no_space(); + rprec[0] = 0; + rprec[1] = 0; + rprec[2] = 0; + rassoc = (char *) MALLOC(maxrules*sizeof(char)); + if (rassoc == 0) no_space(); + rassoc[0] = TOKEN; + rassoc[1] = TOKEN; + rassoc[2] = TOKEN; +} + + +expand_items() +{ + maxitems += 300; + pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *)); + if (pitem == 0) no_space(); +} + + +expand_rules() +{ + maxrules += 100; + plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *)); + if (plhs == 0) no_space(); + rprec = (short *) REALLOC(rprec, maxrules*sizeof(short)); + if (rprec == 0) no_space(); + rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char)); + if (rassoc == 0) no_space(); +} + + +advance_to_start() +{ + register int c; + register bucket *bp; + char *s_cptr; + int s_lineno; + + for (;;) + { + c = nextc(); + if (c != '%') break; + s_cptr = cptr; + switch (keyword()) + { + case MARK: + no_grammar(); + + case TEXT: + copy_text(); + break; + + case START: + declare_start(); + break; + + default: + syntax_error(lineno, line, s_cptr); + } + } + + c = nextc(); + if (!isalpha(c) && c != '_' && c != '.' && c != '_') + syntax_error(lineno, line, cptr); + bp = get_name(); + if (goal == 0) + { + if (bp->class == TERM) + terminal_start(bp->name); + goal = bp; + } + + s_lineno = lineno; + c = nextc(); + if (c == EOF) unexpected_EOF(); + if (c != ':') syntax_error(lineno, line, cptr); + start_rule(bp, s_lineno); + ++cptr; +} + + +start_rule(bp, s_lineno) +register bucket *bp; +int s_lineno; +{ + if (bp->class == TERM) + terminal_lhs(s_lineno); + bp->class = NONTERM; + if (nrules >= maxrules) + expand_rules(); + plhs[nrules] = bp; + rprec[nrules] = UNDEFINED; + rassoc[nrules] = TOKEN; +} + + +end_rule() +{ + register int i; + + if (!last_was_action && plhs[nrules]->tag) + { + for (i = nitems - 1; pitem[i]; --i) continue; + if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag) + default_action_warning(); + } + + last_was_action = 0; + if (nitems >= maxitems) expand_items(); + pitem[nitems] = 0; + ++nitems; + ++nrules; +} + + +insert_empty_rule() +{ + register bucket *bp, **bpp; + + assert(cache); + sprintf(cache, "$$%d", ++gensym); + bp = make_bucket(cache); + last_symbol->next = bp; + last_symbol = bp; + bp->tag = plhs[nrules]->tag; + bp->class = NONTERM; + + if ((nitems += 2) > maxitems) + expand_items(); + bpp = pitem + nitems - 1; + *bpp-- = bp; + while (bpp[0] = bpp[-1]) --bpp; + + if (++nrules >= maxrules) + expand_rules(); + plhs[nrules] = plhs[nrules-1]; + plhs[nrules-1] = bp; + rprec[nrules] = rprec[nrules-1]; + rprec[nrules-1] = 0; + rassoc[nrules] = rassoc[nrules-1]; + rassoc[nrules-1] = TOKEN; +} + + +add_symbol() +{ + register int c; + register bucket *bp; + int s_lineno = lineno; + + c = *cptr; + if (c == '\'' || c == '"') + bp = get_literal(); + else + bp = get_name(); + + c = nextc(); + if (c == ':') + { + end_rule(); + start_rule(bp, s_lineno); + ++cptr; + return; + } + + if (last_was_action) + insert_empty_rule(); + last_was_action = 0; + + if (++nitems > maxitems) + expand_items(); + pitem[nitems-1] = bp; +} + + +copy_action() +{ + register int c; + register int i, n; + int depth; + int quote; + bucket *item; + char *tagres; + register FILE *f = action_file; + int a_lineno = lineno; + char *a_line = dup_line(); + char *a_cptr = a_line + (cptr - line); + + if (last_was_action) + insert_empty_rule(); + last_was_action = 1; + + fprintf(f, "(* Rule %d, file %s, line %d *)\n", + nrules-2, input_file_name, lineno); + if (sflag) + fprintf(f, "yyact.(%d) <- (fun () -> Obj.repr((", nrules-2); + else + fprintf(f, "; (fun () -> Obj.repr(("); + + n = 0; + for (i = nitems - 1; pitem[i]; --i) ++n; + + depth = 1; + cptr++; + +loop: + c = *cptr; + if (c == '$') + { + if (isdigit(cptr[1])) + { + ++cptr; + i = get_number(); + + if (i <= 0 || i > n) + unknown_rhs(i); + item = pitem[nitems + i - n - 1]; + if (item->tag) { + fprintf(f, "(peek_val %d : %s)", n - i, item->tag); + } else { + if (item->class == TERM) + illegal_token_ref(i, item->name); + if (sflag) + fprintf(f, "(peek_val %d)", n - i); + else + fprintf(f, "(peek_val %d : '%s)", n - i, item->name); + } + goto loop; + } + } + if (isalpha(c) || c == '_' || c == '$') + { + do + { + putc(c, f); + c = *++cptr; + } while (isalnum(c) || c == '_' || c == '$'); + goto loop; + } + if (c == '}' && depth == 1) { + cptr++; + tagres = plhs[nrules]->tag; + if (tagres) + fprintf(f, ") : %s))\n", tagres); + else if (sflag) + fprintf(f, ")))\n"); + else + fprintf(f, ") : '%s))\n", plhs[nrules]->name); + if (sflag) + fprintf(f, "\n"); + return; + } + putc(c, f); + ++cptr; + switch (c) + { + case '\n': + next_line: + get_line(); + if (line) goto loop; + unterminated_action(a_lineno, a_line, a_cptr); + + case '{': + ++depth; + goto loop; + + case '}': + --depth; + goto loop; + + case '`': + case '"': + { + int s_lineno = lineno; + char *s_line = dup_line(); + char *s_cptr = s_line + (cptr - line - 1); + + quote = c; + for (;;) + { + c = *cptr++; + putc(c, f); + if (c == quote) + { + FREE(s_line); + goto loop; + } + if (c == '\n') + unterminated_string(s_lineno, s_line, s_cptr); + if (c == '\\') + { + c = *cptr++; + putc(c, f); + if (c == '\n') + { + get_line(); + if (line == 0) + unterminated_string(s_lineno, s_line, s_cptr); + } + } + } + } + + case '(': + c = *cptr; + if (c == '*') + { + int c_lineno = lineno; + char *c_line = dup_line(); + char *c_cptr = c_line + (cptr - line - 1); + + putc('*', f); + ++cptr; + for (;;) + { + c = *cptr++; + putc(c, f); + if (c == '*' && *cptr == ')') + { + putc(')', f); + ++cptr; + FREE(c_line); + goto loop; + } + if (c == '\n') + { + get_line(); + if (line == 0) + unterminated_comment(c_lineno, c_line, c_cptr); + } + } + } + goto loop; + + default: + goto loop; + } +} + + +int +mark_symbol() +{ + register int c; + register bucket *bp; + + c = cptr[1]; + if (c == '%' || c == '\\') + { + cptr += 2; + return (1); + } + + if (c == '=') + cptr += 2; + else if ((c == 'p' || c == 'P') && + ((c = cptr[2]) == 'r' || c == 'R') && + ((c = cptr[3]) == 'e' || c == 'E') && + ((c = cptr[4]) == 'c' || c == 'C') && + ((c = cptr[5], !IS_IDENT(c)))) + cptr += 5; + else + syntax_error(lineno, line, cptr); + + c = nextc(); + if (isalpha(c) || c == '_' || c == '.' || c == '$') + bp = get_name(); + else if (c == '\'' || c == '"') + bp = get_literal(); + else + { + syntax_error(lineno, line, cptr); + /*NOTREACHED*/ + } + + if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules]) + prec_redeclared(); + + rprec[nrules] = bp->prec; + rassoc[nrules] = bp->assoc; + return (0); +} + + +read_grammar() +{ + register int c; + + initialize_grammar(); + advance_to_start(); + + for (;;) + { + c = nextc(); + if (c == EOF) break; + if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' || + c == '"') + add_symbol(); + else if (c == '{' || c == '=') + copy_action(); + else if (c == '|') + { + end_rule(); + start_rule(plhs[nrules-1], 0); + ++cptr; + } + else if (c == '%') + { + if (mark_symbol()) break; + } + else + syntax_error(lineno, line, cptr); + } + end_rule(); +} + + +free_tags() +{ + register int i; + + if (tag_table == 0) return; + + for (i = 0; i < ntags; ++i) + { + assert(tag_table[i]); + FREE(tag_table[i]); + } + FREE(tag_table); +} + + +pack_names() +{ + register bucket *bp; + register char *p, *s, *t; + + name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */ + for (bp = first_symbol; bp; bp = bp->next) + name_pool_size += strlen(bp->name) + 1; + name_pool = MALLOC(name_pool_size); + if (name_pool == 0) no_space(); + + strcpy(name_pool, "$accept"); + strcpy(name_pool+8, "$end"); + t = name_pool + 13; + for (bp = first_symbol; bp; bp = bp->next) + { + p = t; + s = bp->name; + while (*t++ = *s++) continue; + FREE(bp->name); + bp->name = p; + } +} + + +check_symbols() +{ + register bucket *bp; + + if (goal->class == UNKNOWN) + undefined_goal(goal->name); + + for (bp = first_symbol; bp; bp = bp->next) + { + if (bp->class == UNKNOWN) + { + undefined_symbol_warning(bp->name); + bp->class = TERM; + } + } +} + + +pack_symbols() +{ + register bucket *bp; + register bucket **v; + register int i, j, k, n; + + nsyms = 2; + ntokens = 1; + for (bp = first_symbol; bp; bp = bp->next) + { + ++nsyms; + if (bp->class == TERM) ++ntokens; + } + start_symbol = ntokens; + nvars = nsyms - ntokens; + + symbol_name = (char **) MALLOC(nsyms*sizeof(char *)); + if (symbol_name == 0) no_space(); + symbol_value = (short *) MALLOC(nsyms*sizeof(short)); + if (symbol_value == 0) no_space(); + symbol_prec = (short *) MALLOC(nsyms*sizeof(short)); + if (symbol_prec == 0) no_space(); + symbol_assoc = MALLOC(nsyms); + if (symbol_assoc == 0) no_space(); + symbol_tag = (char **) MALLOC(nsyms*sizeof(char *)); + if (symbol_tag == 0) no_space(); + symbol_true_token = (char *) MALLOC(nsyms*sizeof(char)); + if (symbol_true_token == 0) no_space(); + + v = (bucket **) MALLOC(nsyms*sizeof(bucket *)); + if (v == 0) no_space(); + + v[0] = 0; + v[start_symbol] = 0; + + i = 1; + j = start_symbol + 1; + for (bp = first_symbol; bp; bp = bp->next) + { + if (bp->class == TERM) + v[i++] = bp; + else + v[j++] = bp; + } + assert(i == ntokens && j == nsyms); + + for (i = 1; i < ntokens; ++i) + v[i]->index = i; + + goal->index = start_symbol + 1; + k = start_symbol + 2; + while (++i < nsyms) + if (v[i] != goal) + { + v[i]->index = k; + ++k; + } + + goal->value = 0; + k = 1; + for (i = start_symbol + 1; i < nsyms; ++i) + { + if (v[i] != goal) + { + v[i]->value = k; + ++k; + } + } + + k = 0; + for (i = 1; i < ntokens; ++i) + { + n = v[i]->value; + if (n > 256) + { + for (j = k++; j > 0 && symbol_value[j-1] > n; --j) + symbol_value[j] = symbol_value[j-1]; + symbol_value[j] = n; + } + } + + if (v[1]->value == UNDEFINED) + v[1]->value = 256; + + j = 0; + n = 257; + for (i = 2; i < ntokens; ++i) + { + if (v[i]->value == UNDEFINED) + { + while (j < k && n == symbol_value[j]) + { + while (++j < k && n == symbol_value[j]) continue; + ++n; + } + v[i]->value = n; + ++n; + } + } + + symbol_name[0] = name_pool + 8; + symbol_value[0] = 0; + symbol_prec[0] = 0; + symbol_assoc[0] = TOKEN; + symbol_tag[0] = ""; + symbol_true_token[0] = 0; + for (i = 1; i < ntokens; ++i) + { + symbol_name[i] = v[i]->name; + symbol_value[i] = v[i]->value; + symbol_prec[i] = v[i]->prec; + symbol_assoc[i] = v[i]->assoc; + symbol_tag[i] = v[i]->tag; + symbol_true_token[i] = v[i]->true_token; + } + symbol_name[start_symbol] = name_pool; + symbol_value[start_symbol] = -1; + symbol_prec[start_symbol] = 0; + symbol_assoc[start_symbol] = TOKEN; + symbol_tag[start_symbol] = ""; + symbol_true_token[start_symbol] = 0; + for (++i; i < nsyms; ++i) + { + k = v[i]->index; + symbol_name[k] = v[i]->name; + symbol_value[k] = v[i]->value; + symbol_prec[k] = v[i]->prec; + symbol_assoc[k] = v[i]->assoc; + symbol_tag[i] = v[i]->tag; + symbol_true_token[i] = v[i]->true_token; + } + + FREE(v); +} + + +make_goal() +{ + static char name[7] = "'\\xxx'"; + bucket * bp; + bucket * bc; + + goal = lookup("%entry%"); + ntotalrules = nrules - 2; + for(bp = first_symbol; bp != 0; bp = bp->next) { + if (bp->entry) { + start_rule(goal, 0); + if (nitems + 2> maxitems) + expand_items(); + name[2] = '0' + ((bp->entry >> 6) & 7); + name[3] = '0' + ((bp->entry >> 3) & 7); + name[4] = '0' + (bp->entry & 7); + bc = lookup(name); + bc->class = TERM; + bc->value = (unsigned char) bp->entry; + pitem[nitems++] = bc; + pitem[nitems++] = bp; + fprintf(entry_file, + "let %s lexfun lexbuf = yyparse yytables %d lexfun lexbuf\n", + bp->name, bp->entry); + if (bp->tag == NULL) + entry_without_type(bp->name); + fprintf(interface_file, + "val %s :\n (Lexing.lexbuf -> token) -> Lexing.lexbuf -> %s\n", + bp->name, + bp->tag); + fprintf(action_file, + "(* Entry %s *)\n", bp->name); + if (sflag) + fprintf(action_file, + "yyact.(%d) <- (fun () -> raise (YYexit (peek_val 0)))\n", + ntotalrules); + else + fprintf(action_file, + "; (fun () -> raise (YYexit (peek_val 0)))\n"); + ntotalrules++; + last_was_action = 1; + end_rule(); + } + } +} + +pack_grammar() +{ + register int i, j; + int assoc, prec; + + ritem = (short *) MALLOC(nitems*sizeof(short)); + if (ritem == 0) no_space(); + rlhs = (short *) MALLOC(nrules*sizeof(short)); + if (rlhs == 0) no_space(); + rrhs = (short *) MALLOC((nrules+1)*sizeof(short)); + if (rrhs == 0) no_space(); + rprec = (short *) REALLOC(rprec, nrules*sizeof(short)); + if (rprec == 0) no_space(); + rassoc = REALLOC(rassoc, nrules); + if (rassoc == 0) no_space(); + + ritem[0] = -1; + ritem[1] = goal->index; + ritem[2] = 0; + ritem[3] = -2; + rlhs[0] = 0; + rlhs[1] = 0; + rlhs[2] = start_symbol; + rrhs[0] = 0; + rrhs[1] = 0; + rrhs[2] = 1; + + j = 4; + for (i = 3; i < nrules; ++i) + { + rlhs[i] = plhs[i]->index; + rrhs[i] = j; + assoc = TOKEN; + prec = 0; + while (pitem[j]) + { + ritem[j] = pitem[j]->index; + if (pitem[j]->class == TERM) + { + prec = pitem[j]->prec; + assoc = pitem[j]->assoc; + } + ++j; + } + ritem[j] = -i; + ++j; + if (rprec[i] == UNDEFINED) + { + rprec[i] = prec; + rassoc[i] = assoc; + } + } + rrhs[i] = j; + + FREE(plhs); + FREE(pitem); +} + + +print_grammar() +{ + register int i, j, k; + int spacing; + register FILE *f = verbose_file; + + if (!vflag) return; + + k = 1; + for (i = 2; i < nrules; ++i) + { + if (rlhs[i] != rlhs[i-1]) + { + if (i != 2) fprintf(f, "\n"); + fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]); + spacing = strlen(symbol_name[rlhs[i]]) + 1; + } + else + { + fprintf(f, "%4d ", i - 2); + j = spacing; + while (--j >= 0) putc(' ', f); + putc('|', f); + } + + while (ritem[k] >= 0) + { + fprintf(f, " %s", symbol_name[ritem[k]]); + ++k; + } + ++k; + putc('\n', f); + } +} + + +reader() +{ + create_symbol_table(); + read_declarations(); + output_token_type(); + read_grammar(); + make_goal(); + free_symbol_table(); + free_tags(); + pack_names(); + check_symbols(); + pack_symbols(); + pack_grammar(); + free_symbols(); + print_grammar(); +} diff --git a/yacc/skeleton.c b/yacc/skeleton.c new file mode 100644 index 0000000000..41ecb4e5c2 --- /dev/null +++ b/yacc/skeleton.c @@ -0,0 +1,39 @@ +#include "defs.h" + +char *header[] = +{ + "open Parsing", + 0 +}; + +char *define_tables[] = +{ + "let yytables =", + " { actions=yyact;", + " transl=yytransl;", + " lhs=yylhs;", + " len=yylen;", + " defred=yydefred;", + " dgoto=yydgoto;", + " sindex=yysindex;", + " rindex=yyrindex;", + " gindex=yygindex;", + " tablesize=yytablesize;", + " table=yytable;", + " check=yycheck }", + 0 +}; + +write_section(section) +char *section[]; +{ + register int i; + register FILE *fp; + + fp = code_file; + for (i = 0; section[i]; ++i) + { + ++outline; + fprintf(fp, "%s\n", section[i]); + } +} diff --git a/yacc/symtab.c b/yacc/symtab.c new file mode 100644 index 0000000000..91e6bf1000 --- /dev/null +++ b/yacc/symtab.c @@ -0,0 +1,115 @@ +#include "defs.h" + + +bucket **symbol_table; +bucket *first_symbol; +bucket *last_symbol; + + +int +hash(name) +char *name; +{ + register char *s; + register int c, k; + + assert(name && *name); + s = name; + k = *s; + while (c = *++s) + k = (31*k + c) & (TABLE_SIZE - 1); + + return (k); +} + + +bucket * +make_bucket(name) +char *name; +{ + register bucket *bp; + + assert(name); + bp = (bucket *) MALLOC(sizeof(bucket)); + if (bp == 0) no_space(); + bp->link = 0; + bp->next = 0; + bp->name = MALLOC(strlen(name) + 1); + if (bp->name == 0) no_space(); + bp->tag = 0; + bp->value = UNDEFINED; + bp->index = 0; + bp->prec = 0; + bp-> class = UNKNOWN; + bp->assoc = TOKEN; + bp->entry = 0; + bp->true_token = 0; + + if (bp->name == 0) no_space(); + strcpy(bp->name, name); + + return (bp); +} + + +bucket * +lookup(name) +char *name; +{ + register bucket *bp, **bpp; + + bpp = symbol_table + hash(name); + bp = *bpp; + + while (bp) + { + if (strcmp(name, bp->name) == 0) return (bp); + bpp = &bp->link; + bp = *bpp; + } + + *bpp = bp = make_bucket(name); + last_symbol->next = bp; + last_symbol = bp; + + return (bp); +} + + +create_symbol_table() +{ + register int i; + register bucket *bp; + + symbol_table = (bucket **) MALLOC(TABLE_SIZE*sizeof(bucket *)); + if (symbol_table == 0) no_space(); + for (i = 0; i < TABLE_SIZE; i++) + symbol_table[i] = 0; + + bp = make_bucket("error"); + bp->index = 1; + bp->class = TERM; + + first_symbol = bp; + last_symbol = bp; + symbol_table[hash("error")] = bp; +} + + +free_symbol_table() +{ + FREE(symbol_table); + symbol_table = 0; +} + + +free_symbols() +{ + register bucket *p, *q; + + for (p = first_symbol; p; p = q) + { + q = p->next; + FREE(p); + } +} diff --git a/yacc/verbose.c b/yacc/verbose.c new file mode 100644 index 0000000000..2c7cc52c77 --- /dev/null +++ b/yacc/verbose.c @@ -0,0 +1,329 @@ + +#include "defs.h" + + +static short *null_rules; + +verbose() +{ + register int i; + + if (!vflag) return; + + null_rules = (short *) MALLOC(nrules*sizeof(short)); + if (null_rules == 0) no_space(); + fprintf(verbose_file, "\f\n"); + for (i = 0; i < nstates; i++) + print_state(i); + FREE(null_rules); + + if (nunused) + log_unused(); + if (SRtotal || RRtotal) + log_conflicts(); + + fprintf(verbose_file, "\n\n%d terminals, %d nonterminals\n", ntokens, + nvars); + fprintf(verbose_file, "%d grammar rules, %d states\n", nrules - 2, nstates); +} + + +log_unused() +{ + register int i; + register short *p; + + fprintf(verbose_file, "\n\nRules never reduced:\n"); + for (i = 3; i < nrules; ++i) + { + if (!rules_used[i]) + { + fprintf(verbose_file, "\t%s :", symbol_name[rlhs[i]]); + for (p = ritem + rrhs[i]; *p >= 0; ++p) + fprintf(verbose_file, " %s", symbol_name[*p]); + fprintf(verbose_file, " (%d)\n", i - 2); + } + } +} + + +log_conflicts() +{ + register int i; + + fprintf(verbose_file, "\n\n"); + for (i = 0; i < nstates; i++) + { + if (SRconflicts[i] || RRconflicts[i]) + { + fprintf(verbose_file, "State %d contains ", i); + if (SRconflicts[i] == 1) + fprintf(verbose_file, "1 shift/reduce conflict"); + else if (SRconflicts[i] > 1) + fprintf(verbose_file, "%d shift/reduce conflicts", + SRconflicts[i]); + if (SRconflicts[i] && RRconflicts[i]) + fprintf(verbose_file, ", "); + if (RRconflicts[i] == 1) + fprintf(verbose_file, "1 reduce/reduce conflict"); + else if (RRconflicts[i] > 1) + fprintf(verbose_file, "%d reduce/reduce conflicts", + RRconflicts[i]); + fprintf(verbose_file, ".\n"); + } + } +} + + +print_state(state) +int state; +{ + if (state) + fprintf(verbose_file, "\n\n"); + if (SRconflicts[state] || RRconflicts[state]) + print_conflicts(state); + fprintf(verbose_file, "state %d\n", state); + print_core(state); + print_nulls(state); + print_actions(state); +} + + +print_conflicts(state) +int state; +{ + register int symbol, act, number; + register action *p; + + symbol = -1; + for (p = parser[state]; p; p = p->next) + { + if (p->suppressed == 2) + continue; + + if (p->symbol != symbol) + { + symbol = p->symbol; + number = p->number; + if (p->action_code == SHIFT) + act = SHIFT; + else + act = REDUCE; + } + else if (p->suppressed == 1) + { + if (state == final_state && symbol == 0) + { + fprintf(verbose_file, "%d: shift/reduce conflict \ +(accept, reduce %d) on $end\n", state, p->number - 2); + } + else + { + if (act == SHIFT) + { + fprintf(verbose_file, "%d: shift/reduce conflict \ +(shift %d, reduce %d) on %s\n", state, number, p->number - 2, + symbol_name[symbol]); + } + else + { + fprintf(verbose_file, "%d: reduce/reduce conflict \ +(reduce %d, reduce %d) on %s\n", state, number - 2, p->number - 2, + symbol_name[symbol]); + } + } + } + } +} + + +print_core(state) +int state; +{ + register int i; + register int k; + register int rule; + register core *statep; + register short *sp; + register short *sp1; + + statep = state_table[state]; + k = statep->nitems; + + for (i = 0; i < k; i++) + { + sp1 = sp = ritem + statep->items[i]; + + while (*sp >= 0) ++sp; + rule = -(*sp); + fprintf(verbose_file, "\t%s : ", symbol_name[rlhs[rule]]); + + for (sp = ritem + rrhs[rule]; sp < sp1; sp++) + fprintf(verbose_file, "%s ", symbol_name[*sp]); + + putc('.', verbose_file); + + while (*sp >= 0) + { + fprintf(verbose_file, " %s", symbol_name[*sp]); + sp++; + } + fprintf(verbose_file, " (%d)\n", -2 - *sp); + } +} + + +print_nulls(state) +int state; +{ + register action *p; + register int i, j, k, nnulls; + + nnulls = 0; + for (p = parser[state]; p; p = p->next) + { + if (p->action_code == REDUCE && + (p->suppressed == 0 || p->suppressed == 1)) + { + i = p->number; + if (rrhs[i] + 1 == rrhs[i+1]) + { + for (j = 0; j < nnulls && i > null_rules[j]; ++j) + continue; + + if (j == nnulls) + { + ++nnulls; + null_rules[j] = i; + } + else if (i != null_rules[j]) + { + ++nnulls; + for (k = nnulls - 1; k > j; --k) + null_rules[k] = null_rules[k-1]; + null_rules[j] = i; + } + } + } + } + + for (i = 0; i < nnulls; ++i) + { + j = null_rules[i]; + fprintf(verbose_file, "\t%s : . (%d)\n", symbol_name[rlhs[j]], + j - 2); + } + fprintf(verbose_file, "\n"); +} + + +print_actions(stateno) +int stateno; +{ + register action *p; + register shifts *sp; + register int as; + + if (stateno == final_state) + fprintf(verbose_file, "\t$end accept\n"); + + p = parser[stateno]; + if (p) + { + print_shifts(p); + print_reductions(p, defred[stateno]); + } + + sp = shift_table[stateno]; + if (sp && sp->nshifts > 0) + { + as = accessing_symbol[sp->shift[sp->nshifts - 1]]; + if (ISVAR(as)) + print_gotos(stateno); + } +} + + +print_shifts(p) +register action *p; +{ + register int count; + register action *q; + + count = 0; + for (q = p; q; q = q->next) + { + if (q->suppressed < 2 && q->action_code == SHIFT) + ++count; + } + + if (count > 0) + { + for (; p; p = p->next) + { + if (p->action_code == SHIFT && p->suppressed == 0) + fprintf(verbose_file, "\t%s shift %d\n", + symbol_name[p->symbol], p->number); + } + } +} + + +print_reductions(p, defred) +register action *p; +register int defred; +{ + register int k, anyreds; + register action *q; + + anyreds = 0; + for (q = p; q ; q = q->next) + { + if (q->action_code == REDUCE && q->suppressed < 2) + { + anyreds = 1; + break; + } + } + + if (anyreds == 0) + fprintf(verbose_file, "\t. error\n"); + else + { + for (; p; p = p->next) + { + if (p->action_code == REDUCE && p->number != defred) + { + k = p->number - 2; + if (p->suppressed == 0) + fprintf(verbose_file, "\t%s reduce %d\n", + symbol_name[p->symbol], k); + } + } + + if (defred > 0) + fprintf(verbose_file, "\t. reduce %d\n", defred - 2); + } +} + + +print_gotos(stateno) +int stateno; +{ + register int i, k; + register int as; + register short *to_state; + register shifts *sp; + + putc('\n', verbose_file); + sp = shift_table[stateno]; + to_state = sp->shift; + for (i = 0; i < sp->nshifts; ++i) + { + k = to_state[i]; + as = accessing_symbol[k]; + if (ISVAR(as)) + fprintf(verbose_file, "\t%s goto %d\n", symbol_name[as], k); + } +} + diff --git a/yacc/warshall.c b/yacc/warshall.c new file mode 100644 index 0000000000..4d22ad7414 --- /dev/null +++ b/yacc/warshall.c @@ -0,0 +1,84 @@ +#include "defs.h" + +transitive_closure(R, n) +unsigned *R; +int n; +{ + register int rowsize; + register unsigned mask; + register unsigned *rowj; + register unsigned *rp; + register unsigned *rend; + register unsigned *ccol; + register unsigned *relend; + register unsigned *cword; + register unsigned *rowi; + + rowsize = WORDSIZE(n); + relend = R + n*rowsize; + + cword = R; + mask = 1; + rowi = R; + while (rowi < relend) + { + ccol = cword; + rowj = R; + + while (rowj < relend) + { + if (*ccol & mask) + { + rp = rowi; + rend = rowj + rowsize; + while (rowj < rend) + *rowj++ |= *rp++; + } + else + { + rowj += rowsize; + } + + ccol += rowsize; + } + + mask <<= 1; + if (mask == 0) + { + mask = 1; + cword++; + } + + rowi += rowsize; + } +} + +reflexive_transitive_closure(R, n) +unsigned *R; +int n; +{ + register int rowsize; + register unsigned mask; + register unsigned *rp; + register unsigned *relend; + + transitive_closure(R, n); + + rowsize = WORDSIZE(n); + relend = R + n*rowsize; + + mask = 1; + rp = R; + while (rp < relend) + { + *rp |= mask; + mask <<= 1; + if (mask == 0) + { + mask = 1; + rp++; + } + + rp += rowsize; + } +} |