diff options
Diffstat (limited to 'test/Lex/lexgen.ml')
-rw-r--r-- | test/Lex/lexgen.ml | 252 |
1 files changed, 0 insertions, 252 deletions
diff --git a/test/Lex/lexgen.ml b/test/Lex/lexgen.ml deleted file mode 100644 index 73d011577f..0000000000 --- a/test/Lex/lexgen.ml +++ /dev/null @@ -1,252 +0,0 @@ -(* Compiling a lexer definition *) - -open Syntax - -(* Deep abstract syntax for regular expressions *) - -type regexp = - Empty - | Chars of int - | Action of int - | Seq of regexp * regexp - | Alt of regexp * regexp - | Star of regexp - -(* From shallow to deep syntax *) - -(*** - -let print_char_class c = - let print_interval low high = - prerr_int low; - if high - 1 > low then begin - prerr_char '-'; - prerr_int (high-1) - end; - prerr_char ' ' in - let rec print_class first next = function - [] -> print_interval first next - | c::l -> - if char.code c = next - then print_class first (next+1) l - else begin - print_interval first next; - print_class (char.code c) (char.code c + 1) l - end in - match c with - [] -> prerr_newline() - | c::l -> print_class (char.code c) (char.code c + 1) l; prerr_newline() - - -let rec print_regexp = function - Empty -> prerr_string "Empty" - | Chars n -> prerr_string "Chars "; prerr_int n - | Action n -> prerr_string "Action "; prerr_int n - | Seq(r1,r2) -> print_regexp r1; prerr_string "; "; print_regexp r2 - | Alt(r1,r2) -> prerr_string "("; print_regexp r1; prerr_string " | "; print_regexp r2; prerr_string ")" - | Star r -> prerr_string "("; print_regexp r; prerr_string ")*" - -***) - -let chars = ref ([] : char list list) -let chars_count = ref 0 -let actions = ref ([] : (int * location) list) -let actions_count = ref 0 - -let rec encode_regexp = function - Epsilon -> Empty - | Characters cl -> - let n = !chars_count in -(*** prerr_int n; prerr_char ' '; print_char_class cl; ***) - chars := cl :: !chars; - chars_count := !chars_count + 1; - Chars(n) - | Sequence(r1,r2) -> - Seq(encode_regexp r1, encode_regexp r2) - | Alternative(r1,r2) -> - Alt(encode_regexp r1, encode_regexp r2) - | Repetition r -> - Star (encode_regexp r) - - -let encode_casedef = - List.fold_left - (fun reg (expr,act) -> - let act_num = !actions_count in - actions_count := !actions_count + 1; - actions := (act_num, act) :: !actions; - Alt(reg, Seq(encode_regexp expr, Action act_num))) - Empty - - -let encode_lexdef (Lexdef(_, ld)) = - chars := []; - chars_count := 0; - actions := []; - actions_count := 0; - let name_regexp_list = - List.map (fun (name, casedef) -> (name, encode_casedef casedef)) ld in -(* List.iter print_char_class chars; *) - let chr = Array.of_list (List.rev !chars) - and act = !actions in - chars := []; - actions := []; - (chr, name_regexp_list, act) - - -(* To generate directly a NFA from a regular expression. - Confer Aho-Sethi-Ullman, dragon book, chap. 3 *) - -type transition = - OnChars of int - | ToAction of int - - -let rec merge_trans l1 l2 = - match (l1, l2) with - ([], s2) -> s2 - | (s1, []) -> s1 - | ((OnChars n1 as t1) :: r1 as s1), ((OnChars n2 as t2) :: r2 as s2) -> - if n1 = n2 then t1 :: merge_trans r1 r2 else - if n1 < n2 then t1 :: merge_trans r1 s2 else - t2 :: merge_trans s1 r2 - | ((ToAction n1 as t1) :: r1 as s1), ((ToAction n2 as t2) :: r2 as s2) -> - if n1 = n2 then t1 :: merge_trans r1 r2 else - if n1 < n2 then t1 :: merge_trans r1 s2 else - t2 :: merge_trans s1 r2 - | ((OnChars n1 as t1) :: r1 as s1), ((ToAction n2 as t2) :: r2 as s2) -> - t1 :: merge_trans r1 s2 - | ((ToAction n1 as t1) :: r1 as s1), ((OnChars n2 as t2) :: r2 as s2) -> - t2 :: merge_trans s1 r2 - - -let rec nullable = function - Empty -> true - | Chars _ -> false - | Action _ -> false - | Seq(r1,r2) -> nullable r1 & nullable r2 - | Alt(r1,r2) -> nullable r1 or nullable r2 - | Star r -> true - - -let rec firstpos = function - Empty -> [] - | Chars pos -> [OnChars pos] - | Action act -> [ToAction act] - | Seq(r1,r2) -> if nullable r1 - then merge_trans (firstpos r1) (firstpos r2) - else firstpos r1 - | Alt(r1,r2) -> merge_trans (firstpos r1) (firstpos r2) - | Star r -> firstpos r - - -let rec lastpos = function - Empty -> [] - | Chars pos -> [OnChars pos] - | Action act -> [ToAction act] - | Seq(r1,r2) -> if nullable r2 - then merge_trans (lastpos r1) (lastpos r2) - else lastpos r2 - | Alt(r1,r2) -> merge_trans (lastpos r1) (lastpos r2) - | Star r -> lastpos r - - -let followpos size name_regexp_list = - let v = Array.new size [] in - let fill_pos first = function - OnChars pos -> v.(pos) <- merge_trans first v.(pos); () - | ToAction _ -> () in - let rec fill = function - Seq(r1,r2) -> - fill r1; fill r2; - List.iter (fill_pos (firstpos r2)) (lastpos r1) - | Alt(r1,r2) -> - fill r1; fill r2 - | Star r -> - fill r; - List.iter (fill_pos (firstpos r)) (lastpos r) - | _ -> () in - List.iter (fun (name, regexp) -> fill regexp) name_regexp_list; - v - - -let no_action = 0x3FFFFFFF - -let split_trans_set = - List.fold_left - (fun (act, pos_set as act_pos_set) trans -> - match trans with - OnChars pos -> (act, pos :: pos_set) - | ToAction act1 -> if act1 < act then (act1, pos_set) - else act_pos_set) - (no_action, []) - - -let memory = (Hashtbl.new 131 : (transition list, int) Hashtbl.t) -let todo = ref ([] : (transition list * int) list) -let next = ref 0 - -let get_state st = - try - Hashtbl.find memory st - with Not_found -> - let nbr = !next in - next := !next + 1; - Hashtbl.add memory st nbr; - todo := (st, nbr) :: !todo; - nbr - -let rec map_on_states f = - match !todo with - [] -> [] - | (st,i)::r -> todo := r; let res = f st in (res,i) :: map_on_states f - -let number_of_states () = !next - -let goto_state = function - [] -> Backtrack - | ps -> Goto (get_state ps) - - -let transition_from chars follow pos_set = - let tr = Array.new 256 [] - and shift = Array.new 256 Backtrack in - List.iter - (fun pos -> - List.iter - (fun c -> - tr.(Char.code c) <- - merge_trans tr.(Char.code c) follow.(pos)) - chars.(pos)) - pos_set; - for i = 0 to 255 do - shift.(i) <- goto_state tr.(i) - done; - shift - - -let translate_state chars follow state = - match split_trans_set state with - n, [] -> Perform n - | n, ps -> Shift( (if n = no_action then No_remember else Remember n), - transition_from chars follow ps) - - -let make_dfa lexdef = - let (chars, name_regexp_list, actions) = - encode_lexdef lexdef in -(** - List.iter (fun (name, regexp) -> prerr_string name; prerr_string " = "; print_regexp regexp; prerr_newline()) name_regexp_list; -**) - let follow = - followpos (Array.length chars) name_regexp_list in - let initial_states = - List.map (fun (name, regexp) -> (name, get_state(firstpos regexp))) - name_regexp_list in - let states = - map_on_states (translate_state chars follow) in - let v = - Array.new (number_of_states()) (Perform 0) in - List.iter (fun (auto, i) -> v.(i) <- auto) states; - (initial_states, v, actions) - |