diff options
Diffstat (limited to 'camlp4/ocaml_src/lib/plexer.ml')
-rw-r--r-- | camlp4/ocaml_src/lib/plexer.ml | 596 |
1 files changed, 213 insertions, 383 deletions
diff --git a/camlp4/ocaml_src/lib/plexer.ml b/camlp4/ocaml_src/lib/plexer.ml index 4b5dcca151..43d5c8d95a 100644 --- a/camlp4/ocaml_src/lib/plexer.ml +++ b/camlp4/ocaml_src/lib/plexer.ml @@ -92,6 +92,9 @@ and digits_under kind len (strm__ : _ Stream.t) = | _ -> match Stream.peek strm__ with Some '_' -> Stream.junk strm__; digits_under kind len strm__ + | Some 'l' -> Stream.junk strm__; "INT32", get_buff len + | Some 'L' -> Stream.junk strm__; "INT64", get_buff len + | Some 'n' -> Stream.junk strm__; "NATIVEINT", get_buff len | _ -> "INT", get_buff len and octal (strm__ : _ Stream.t) = match Stream.peek strm__ with @@ -145,371 +148,85 @@ and end_exponent_part_under len (strm__ : _ Stream.t) = let error_on_unknown_keywords = ref false;; let err loc msg = raise_with_loc loc (Token.Error msg);; -(* -value next_token_fun dfa find_kwd = - let keyword_or_error loc s = - try (("", find_kwd s), loc) with - [ Not_found -> - if error_on_unknown_keywords.val then err loc ("illegal token: " ^ s) - else (("", s), loc) ] - in - let rec next_token = - parser bp - [ [: `' ' | '\010' | '\013' | '\t' | '\026' | '\012'; s :] -> - next_token s - | [: `'('; s :] -> left_paren bp s - | [: `'#'; s :] -> do { spaces_tabs s; linenum bp s } - | [: `('A'..'Z' | '\192'..'\214' | '\216'..'\222' as c); s :] -> - let id = get_buff (ident (store 0 c) s) in - let loc = (bp, Stream.count s) in - (try ("", find_kwd id) with [ Not_found -> ("UIDENT", id) ], loc) - | [: `('a'..'z' | '\223'..'\246' | '\248'..'\255' | '_' as c); s :] -> - let id = get_buff (ident (store 0 c) s) in - let loc = (bp, Stream.count s) in - (try ("", find_kwd id) with [ Not_found -> ("LIDENT", id) ], loc) - | [: `('1'..'9' as c); s :] -> - let tok = number (store 0 c) s in - let loc = (bp, Stream.count s) in - (tok, loc) - | [: `'0'; s :] -> - let tok = base_number (store 0 '0') s in - let loc = (bp, Stream.count s) in - (tok, loc) - | [: `'''; s :] -> - match Stream.npeek 3 s with - [ [_; '''; _] | ['\\'; _; _] | ['\x0D'; '\x0A'; '''] -> - let tok = ("CHAR", get_buff (char bp 0 s)) in - let loc = (bp, Stream.count s) in - (tok, loc) - | _ -> keyword_or_error (bp, Stream.count s) "'" ] - | [: `'"'; s :] -> - let tok = ("STRING", get_buff (string bp 0 s)) in - let loc = (bp, Stream.count s) in - (tok, loc) - | [: `'$'; s :] -> - let tok = dollar bp 0 s in - let loc = (bp, Stream.count s) in - (tok, loc) - | [: `('!' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' | '%' as c); - s :] -> - let id = get_buff (ident2 (store 0 c) s) in - keyword_or_error (bp, Stream.count s) id - | [: `('~' as c); - a = - parser - [ [: `('a'..'z' as c); len = ident (store 0 c) :] ep -> - (("TILDEIDENT", get_buff len), (bp, ep)) - | [: s :] -> - let id = get_buff (ident2 (store 0 c) s) in - keyword_or_error (bp, Stream.count s) id ] :] -> - a - | [: `('?' as c); - a = - parser - [ [: `('a'..'z' as c); len = ident (store 0 c) :] ep -> - (("QUESTIONIDENT", get_buff len), (bp, ep)) - | [: s :] -> - let id = get_buff (ident2 (store 0 c) s) in - keyword_or_error (bp, Stream.count s) id ] :] -> - a - | [: `'<'; s :] -> less bp s - | [: `(':' as c1); - len = - parser - [ [: `(']' | ':' | '=' | '>' as c2) :] -> store (store 0 c1) c2 - | [: :] -> store 0 c1 ] :] ep -> - let id = get_buff len in - keyword_or_error (bp, ep) id - | [: `('>' | '|' as c1); - len = - parser - [ [: `(']' | '}' as c2) :] -> store (store 0 c1) c2 - | [: a = ident2 (store 0 c1) :] -> a ] :] ep -> - let id = get_buff len in - keyword_or_error (bp, ep) id - | [: `('[' | '{' as c1); s :] -> - let len = - match Stream.npeek 2 s with - [ ['<'; '<' | ':'] -> store 0 c1 - | _ -> - match s with parser - [ [: `('|' | '<' | ':' as c2) :] -> store (store 0 c1) c2 - | [: :] -> store 0 c1 ] ] - in - let ep = Stream.count s in - let id = get_buff len in - keyword_or_error (bp, ep) id - | [: `'.'; - id = - parser - [ [: `'.' :] -> ".." - | [: :] -> if ssd && after_space then " ." else "." ] :] ep -> - keyword_or_error (bp, ep) id - | [: `';'; - id = - parser - [ [: `';' :] -> ";;" - | [: :] -> ";" ] :] ep -> - keyword_or_error (bp, ep) id - | [: `'\\'; s :] ep -> (("LIDENT", get_buff (ident3 0 s)), (bp, ep)) - | [: `c :] ep -> keyword_or_error (bp, ep) (String.make 1 c) - | [: _ = Stream.empty :] -> (("EOI", ""), (bp, succ bp)) ] - and less bp strm = - if no_quotations.val then - match strm with parser - [ [: len = ident2 (store 0 '<') :] ep -> - let id = get_buff len in - keyword_or_error (bp, ep) id ] - else - match strm with parser - [ [: `'<'; len = quotation bp 0 :] ep -> - (("QUOTATION", ":" ^ get_buff len), (bp, ep)) - | [: `':'; i = parser [: len = ident 0 :] -> get_buff len; - `'<' ? "character '<' expected"; len = quotation bp 0 :] ep -> - (("QUOTATION", i ^ ":" ^ get_buff len), (bp, ep)) - | [: len = ident2 (store 0 '<') :] ep -> - let id = get_buff len in - keyword_or_error (bp, ep) id ] - and string bp len = - parser - [ [: `'"' :] -> len - | [: `'\\'; `c; s :] -> string bp (store (store len '\\') c) s - | [: `c; s :] -> string bp (store len c) s - | [: :] ep -> err (bp, ep) "string not terminated" ] - and char bp len = - parser - [ [: `'''; s :] -> if len = 0 then char bp (store len ''') s else len - | [: `'\\'; `c; s :] -> char bp (store (store len '\\') c) s - | [: `c; s :] -> char bp (store len c) s - | [: :] ep -> err (bp, ep) "char not terminated" ] - and dollar bp len = - parser - [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len) - | [: `('a'..'z' | 'A'..'Z' as c); s :] -> antiquot bp (store len c) s - | [: `('0'..'9' as c); s :] -> maybe_locate bp (store len c) s - | [: `':'; s :] -> - let k = get_buff len in - ("ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s) - | [: `'\\'; `c; s :] -> - ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) - | [: s :] -> - if dfa then - match s with parser - [ [: `c :] -> - ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) - | [: :] ep -> err (bp, ep) "antiquotation not terminated" ] - else ("", get_buff (ident2 (store 0 '$') s)) ] - and maybe_locate bp len = - parser - [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len) - | [: `('0'..'9' as c); s :] -> maybe_locate bp (store len c) s - | [: `':'; s :] -> - ("LOCATE", get_buff len ^ ":" ^ locate_or_antiquot_rest bp 0 s) - | [: `'\\'; `c; s :] -> - ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) - | [: `c; s :] -> - ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) - | [: :] ep -> err (bp, ep) "antiquotation not terminated" ] - and antiquot bp len = - parser - [ [: `'$' :] -> ("ANTIQUOT", ":" ^ get_buff len) - | [: `('a'..'z' | 'A'..'Z' | '0'..'9' as c); s :] -> - antiquot bp (store len c) s - | [: `':'; s :] -> - let k = get_buff len in - ("ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s) - | [: `'\\'; `c; s :] -> - ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) - | [: `c; s :] -> - ("ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s) - | [: :] ep -> err (bp, ep) "antiquotation not terminated" ] - and locate_or_antiquot_rest bp len = - parser - [ [: `'$' :] -> get_buff len - | [: `'\\'; `c; s :] -> locate_or_antiquot_rest bp (store len c) s - | [: `c; s :] -> locate_or_antiquot_rest bp (store len c) s - | [: :] ep -> err (bp, ep) "antiquotation not terminated" ] - and quotation bp len = - parser - [ [: `'>'; s :] -> maybe_end_quotation bp len s - | [: `'<'; s :] -> - quotation bp (maybe_nested_quotation bp (store len '<') s) s - | [: `'\\'; - len = - parser - [ [: `('>' | '<' | '\\' as c) :] -> store len c - | [: :] -> store len '\\' ]; - s :] -> - quotation bp len s - | [: `c; s :] -> quotation bp (store len c) s - | [: :] ep -> err (bp, ep) "quotation not terminated" ] - and maybe_nested_quotation bp len = - parser - [ [: `'<'; s :] -> mstore (quotation bp (store len '<') s) ">>" - | [: `':'; len = ident (store len ':'); - a = - parser - [ [: `'<'; s :] -> mstore (quotation bp (store len '<') s) ">>" - | [: :] -> len ] :] -> - a - | [: :] -> len ] - and maybe_end_quotation bp len = - parser - [ [: `'>' :] -> len - | [: a = quotation bp (store len '>') :] -> a ] - and left_paren bp = - parser - [ [: `'*'; _ = comment bp; a = next_token True :] -> a - | [: :] ep -> keyword_or_error (bp, ep) "(" ] - and comment bp = - parser - [ [: `'('; s :] -> left_paren_in_comment bp s - | [: `'*'; s :] -> star_in_comment bp s - | [: `'"'; _ = string bp 0; s :] -> comment bp s - | [: `'''; s :] -> quote_in_comment bp s - | [: `c; s :] -> comment bp s - | [: :] ep -> err (bp, ep) "comment not terminated" ] - and quote_in_comment bp = - parser - [ [: `'''; s :] -> comment bp s - | [: `'\013'; s :] -> quote_cr_in_comment bp s - | [: `'\\'; s :] -> quote_antislash_in_comment bp s - | [: `'('; s :] -> quote_left_paren_in_comment bp s - | [: `'*'; s :] -> quote_star_in_comment bp s - | [: `'"'; s :] -> quote_doublequote_in_comment bp s - | [: `_; s :] -> quote_any_in_comment bp s - | [: s :] -> comment bp s ] - and quote_any_in_comment bp = - parser - [ [: `'''; s :] -> comment bp s - | [: s :] -> comment bp s ] - and quote_cr_in_comment bp = - parser - [ [: `'\010'; s :] -> quote_any_in_comment bp s - | [: s :] -> quote_any_in_comment bp s ] - and quote_left_paren_in_comment bp = - parser - [ [: `'''; s :] -> comment bp s - | [: s :] -> left_paren_in_comment bp s ] - and quote_star_in_comment bp = - parser - [ [: `'''; s :] -> comment bp s - | [: s :] -> star_in_comment bp s ] - and quote_doublequote_in_comment bp = - parser - [ [: `'''; s :] -> comment bp s - | [: _ = string bp 0; s :] -> comment bp s ] - and quote_antislash_in_comment bp = - parser - [ [: `'''; s :] -> quote_antislash_quote_in_comment bp s - | [: `('\\' | '"' | 'n' | 't' | 'b' | 'r'); s :] -> - quote_any_in_comment bp s - | [: `('0'..'9'); s :] -> quote_antislash_digit_in_comment bp s - | [: `'x'; s :] -> quote_antislash_x_in_comment bp s - | [: s :] -> comment bp s ] - and quote_antislash_quote_in_comment bp = - parser - [ [: `'''; s :] -> comment bp s - | [: s :] -> quote_in_comment bp s ] - and quote_antislash_digit_in_comment bp = - parser - [ [: `('0'..'9'); s :] -> quote_antislash_digit2_in_comment bp s - | [: s :] -> comment bp s ] - and quote_antislash_digit2_in_comment bp = - parser - [ [: `('0'..'9'); s :] -> quote_any_in_comment bp s - | [: s :] -> comment bp s ] - and quote_antislash_x_in_comment bp = - parser - [ [: _ = hexa; s :] -> quote_antislash_x_digit_in_comment bp s - | [: s :] -> comment bp s ] - and quote_antislash_x_digit_in_comment bp = - parser - [ [: _ = hexa; s :] -> quote_any_in_comment bp s - | [: s :] -> comment bp s ] - and left_paren_in_comment bp = - parser - [ [: `'*'; s :] -> do { comment bp s; comment bp s } - | [: a = comment bp :] -> a ] - and star_in_comment bp = - parser - [ [: `')' :] -> () - | [: a = comment bp :] -> a ] - and linedir n s = - match stream_peek_nth n s with - [ Some (' ' | '\t') -> linedir (n + 1) s - | Some ('0'..'9') -> linedir_digits (n + 1) s - | _ -> False ] - and linedir_digits n s = - match stream_peek_nth n s with - [ Some ('0'..'9') -> linedir_digits (n + 1) s - | _ -> linedir_quote n s ] - and linedir_quote n s = - match stream_peek_nth n s with - [ Some (' ' | '\t') -> linedir_quote (n + 1) s - | Some '"' -> True - | _ -> False ] - and any_to_nl = - parser - [ [: `'\013' | '\010' :] ep -> bolpos.val := ep - | [: `_; s :] -> any_to_nl s - | [: :] -> () ] +(* Debugging positions and locations *) +let eprint_pos msg p = + Printf.eprintf "%s: fname=%s; lnum=%d; bol=%d; cnum=%d\n%!" msg + p.Lexing.pos_fname p.Lexing.pos_lnum p.Lexing.pos_bol p.Lexing.pos_cnum +;; + +let eprint_loc (bp, ep) = eprint_pos "P1" bp; eprint_pos "P2" ep;; + +let check_location msg (bp, ep as loc) = + let ok = + if bp.Lexing.pos_lnum > ep.Lexing.pos_lnum || + bp.Lexing.pos_bol > ep.Lexing.pos_bol || + bp.Lexing.pos_cnum > ep.Lexing.pos_cnum || bp.Lexing.pos_lnum < 0 || + ep.Lexing.pos_lnum < 0 || bp.Lexing.pos_bol < 0 || + ep.Lexing.pos_bol < 0 || bp.Lexing.pos_cnum < 0 || + ep.Lexing.pos_cnum < 0 + then + begin + Printf.eprintf "*** Warning: (%s) strange positions ***\n" msg; + eprint_loc loc; + false + end + else true in - fun cstrm -> - try - let glex = glexr.val in - let comm_bp = Stream.count cstrm in - let r = next_token False cstrm in - do { - match glex.tok_comm with - [ Some list -> - if fst (snd r) > comm_bp then - let comm_loc = (comm_bp, fst (snd r)) in - glex.tok_comm := Some [comm_loc :: list] - else () - | None -> () ]; - r - } - with - [ Stream.Error str -> - err (Stream.count cstrm, Stream.count cstrm + 1) str ] -; -*) + ok, loc +;; -let next_token_fun dfa ssd find_kwd bolpos glexr = - let keyword_or_error loc s = +let next_token_fun dfa ssd find_kwd fname lnum bolpos glexr = + let make_pos p = + {Lexing.pos_fname = !fname; Lexing.pos_lnum = !lnum; + Lexing.pos_bol = !bolpos; Lexing.pos_cnum = p} + in + let mkloc (bp, ep) = make_pos bp, make_pos ep in + let keyword_or_error (bp, ep) s = + let loc = mkloc (bp, ep) in try ("", find_kwd s), loc with Not_found -> if !error_on_unknown_keywords then err loc ("illegal token: " ^ s) else ("", s), loc in - let error_if_keyword ((_, id), loc as a) = + let error_if_keyword ((_, id as a), bep) = + let loc = mkloc bep in try ignore (find_kwd id); err loc ("illegal use of a keyword as a label: " ^ id) with - Not_found -> a + Not_found -> a, loc in let rec next_token after_space (strm__ : _ Stream.t) = let bp = Stream.count strm__ in match Stream.peek strm__ with - Some ('\010' | '\013') -> + Some '\010' -> + Stream.junk strm__; + let s = strm__ in + let ep = Stream.count strm__ in + bolpos := ep; incr lnum; next_token true s + | Some '\013' -> Stream.junk strm__; let s = strm__ in - let ep = Stream.count strm__ in bolpos := ep; next_token true s + let ep = Stream.count strm__ in + let ep = + match Stream.peek s with + Some '\010' -> Stream.junk s; ep + 1 + | _ -> ep + in + bolpos := ep; incr lnum; next_token true s | Some (' ' | '\t' | '\026' | '\012') -> Stream.junk strm__; next_token true strm__ | Some '#' when bp = !bolpos -> Stream.junk strm__; let s = strm__ in - if linedir 1 s then begin any_to_nl s; next_token true s end + if linedir 1 s then begin line_directive s; next_token true s end else keyword_or_error (bp, bp + 1) "#" | Some '(' -> Stream.junk strm__; left_paren bp strm__ | Some ('A'..'Z' | '\192'..'\214' | '\216'..'\222' as c) -> Stream.junk strm__; let s = strm__ in let id = get_buff (ident (store 0 c) s) in - let loc = bp, Stream.count s in + let loc = mkloc (bp, Stream.count s) in (try "", find_kwd id with Not_found -> "UIDENT", id), loc @@ -517,35 +234,35 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = Stream.junk strm__; let s = strm__ in let id = get_buff (ident (store 0 c) s) in - let loc = bp, Stream.count s in + let loc = mkloc (bp, Stream.count s) in (try "", find_kwd id with Not_found -> "LIDENT", id), loc | Some ('1'..'9' as c) -> Stream.junk strm__; let tok = number (store 0 c) strm__ in - let loc = bp, Stream.count strm__ in tok, loc + let loc = mkloc (bp, Stream.count strm__) in tok, loc | Some '0' -> Stream.junk strm__; let tok = base_number (store 0 '0') strm__ in - let loc = bp, Stream.count strm__ in tok, loc + let loc = mkloc (bp, Stream.count strm__) in tok, loc | Some '\'' -> Stream.junk strm__; let s = strm__ in begin match Stream.npeek 2 s with [_; '\''] | ['\\'; _] -> let tok = "CHAR", get_buff (char bp 0 s) in - let loc = bp, Stream.count s in tok, loc + let loc = mkloc (bp, Stream.count s) in tok, loc | _ -> keyword_or_error (bp, Stream.count s) "'" end | Some '\"' -> Stream.junk strm__; let tok = "STRING", get_buff (string bp 0 strm__) in - let loc = bp, Stream.count strm__ in tok, loc + let loc = mkloc (bp, Stream.count strm__) in tok, loc | Some '$' -> Stream.junk strm__; let tok = dollar bp 0 strm__ in - let loc = bp, Stream.count strm__ in tok, loc + let loc = mkloc (bp, Stream.count strm__) in tok, loc | Some ('!' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' | '%' as c) -> Stream.junk strm__; let id = get_buff (ident2 (store 0 c) strm__) in @@ -671,12 +388,12 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = | Some '\\' -> Stream.junk strm__; let ep = Stream.count strm__ in - ("LIDENT", get_buff (ident3 0 strm__)), (bp, ep) + ("LIDENT", get_buff (ident3 0 strm__)), mkloc (bp, ep) | Some c -> Stream.junk strm__; let ep = Stream.count strm__ in keyword_or_error (bp, ep) (String.make 1 c) - | _ -> let _ = Stream.empty strm__ in ("EOI", ""), (bp, succ bp) + | _ -> let _ = Stream.empty strm__ in ("EOI", ""), mkloc (bp, succ bp) and less bp strm = if !no_quotations then let (strm__ : _ Stream.t) = strm in @@ -693,7 +410,7 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = Stream.Failure -> raise (Stream.Error "") in let ep = Stream.count strm__ in - ("QUOTATION", ":" ^ get_buff len), (bp, ep) + ("QUOTATION", ":" ^ get_buff len), mkloc (bp, ep) | Some ':' -> Stream.junk strm__; let i = @@ -708,7 +425,7 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = Stream.Failure -> raise (Stream.Error "") in let ep = Stream.count strm__ in - ("QUOTATION", i ^ ":" ^ get_buff len), (bp, ep) + ("QUOTATION", i ^ ":" ^ get_buff len), mkloc (bp, ep) | _ -> raise (Stream.Error "character '<' expected") end | _ -> @@ -727,9 +444,26 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = string bp (store (store len '\\') c) strm__ | _ -> raise (Stream.Error "") end + | Some '\010' -> + Stream.junk strm__; + let s = strm__ in + let ep = Stream.count strm__ in + bolpos := ep; incr lnum; string bp len s + | Some '\013' -> + Stream.junk strm__; + let s = strm__ in + let ep = Stream.count strm__ in + let (len, ep) = + match Stream.peek s with + Some '\010' -> + Stream.junk s; store (store len '\013') '\010', ep + 1 + | _ -> store len '\013', ep + in + bolpos := ep; incr lnum; string bp len s | Some c -> Stream.junk strm__; string bp (store len c) strm__ | _ -> - let ep = Stream.count strm__ in err (bp, ep) "string not terminated" + let ep = Stream.count strm__ in + err (mkloc (bp, ep)) "string not terminated" and char bp len (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '\'' -> @@ -742,8 +476,23 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = Stream.junk strm__; char bp (store (store len '\\') c) strm__ | _ -> raise (Stream.Error "") end + | Some '\010' -> + Stream.junk strm__; + let s = strm__ in + bolpos := bp + 1; incr lnum; char bp (store len '\010') s + | Some '\013' -> + Stream.junk strm__; + let s = strm__ in + let bol = + match Stream.peek s with + Some '\010' -> Stream.junk s; bp + 2 + | _ -> bp + 1 + in + bolpos := bol; incr lnum; char bp (store len '\013') s | Some c -> Stream.junk strm__; char bp (store len c) strm__ - | _ -> let ep = Stream.count strm__ in err (bp, ep) "char not terminated" + | _ -> + let ep = Stream.count strm__ in + err (mkloc (bp, ep)) "char not terminated" and dollar bp len (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '$' -> Stream.junk strm__; "ANTIQUOT", ":" ^ get_buff len @@ -773,7 +522,7 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = "ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s | _ -> let ep = Stream.count strm__ in - err (bp, ep) "antiquotation not terminated" + err (mkloc (bp, ep)) "antiquotation not terminated" else "", get_buff (ident2 (store 0 '$') s) and maybe_locate bp len (strm__ : _ Stream.t) = match Stream.peek strm__ with @@ -796,7 +545,7 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = "ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) strm__ | _ -> let ep = Stream.count strm__ in - err (bp, ep) "antiquotation not terminated" + err (mkloc (bp, ep)) "antiquotation not terminated" and antiquot bp len (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '$' -> Stream.junk strm__; "ANTIQUOT", ":" ^ get_buff len @@ -819,7 +568,7 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = "ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) strm__ | _ -> let ep = Stream.count strm__ in - err (bp, ep) "antiquotation not terminated" + err (mkloc (bp, ep)) "antiquotation not terminated" and locate_or_antiquot_rest bp len (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '$' -> Stream.junk strm__; get_buff len @@ -835,7 +584,7 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = Stream.junk strm__; locate_or_antiquot_rest bp (store len c) strm__ | _ -> let ep = Stream.count strm__ in - err (bp, ep) "antiquotation not terminated" + err (mkloc (bp, ep)) "antiquotation not terminated" and quotation bp len (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '>' -> Stream.junk strm__; maybe_end_quotation bp len strm__ @@ -856,7 +605,7 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = | Some c -> Stream.junk strm__; quotation bp (store len c) strm__ | _ -> let ep = Stream.count strm__ in - err (bp, ep) "quotation not terminated" + err (mkloc (bp, ep)) "quotation not terminated" and maybe_nested_quotation bp len (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '<' -> @@ -905,9 +654,24 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = in comment bp strm__ | Some '\'' -> Stream.junk strm__; quote_in_comment bp strm__ + | Some '\010' -> + Stream.junk strm__; + let s = strm__ in + let ep = Stream.count strm__ in bolpos := ep; incr lnum; comment bp s + | Some '\013' -> + Stream.junk strm__; + let s = strm__ in + let ep = Stream.count strm__ in + let ep = + match Stream.peek s with + Some '\010' -> Stream.junk s; ep + 1 + | _ -> ep + in + bolpos := ep; incr lnum; comment bp s | Some c -> Stream.junk strm__; comment bp strm__ | _ -> - let ep = Stream.count strm__ in err (bp, ep) "comment not terminated" + let ep = Stream.count strm__ in + err (mkloc (bp, ep)) "comment not terminated" and quote_in_comment bp (strm__ : _ Stream.t) = match Stream.peek strm__ with Some '\'' -> Stream.junk strm__; comment bp strm__ @@ -915,7 +679,19 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = | _ -> let s = strm__ in begin match Stream.npeek 2 s with - [_; '\''] -> Stream.junk s; Stream.junk s + ['\013' | '\010'; '\''] -> + bolpos := bp + 1; incr lnum; Stream.junk s; Stream.junk s + | ['\013'; '\010'] -> + begin match Stream.npeek 3 s with + [_; _; '\''] -> + bolpos := bp + 2; + incr lnum; + Stream.junk s; + Stream.junk s; + Stream.junk s + | _ -> () + end + | [_; '\''] -> Stream.junk s; Stream.junk s | _ -> () end; comment bp s @@ -952,23 +728,73 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = and linedir n s = match stream_peek_nth n s with Some (' ' | '\t') -> linedir (n + 1) s - | Some ('0'..'9') -> linedir_digits (n + 1) s - | _ -> false - and linedir_digits n s = - match stream_peek_nth n s with - Some ('0'..'9') -> linedir_digits (n + 1) s - | _ -> linedir_quote n s - and linedir_quote n s = - match stream_peek_nth n s with - Some (' ' | '\t') -> linedir_quote (n + 1) s - | Some '\"' -> true + | Some ('0'..'9') -> true | _ -> false and any_to_nl (strm__ : _ Stream.t) = match Stream.peek strm__ with - Some ('\013' | '\010') -> - Stream.junk strm__; let ep = Stream.count strm__ in bolpos := ep + Some '\010' -> + Stream.junk strm__; + let s = strm__ in + let ep = Stream.count strm__ in bolpos := ep; incr lnum + | Some '\013' -> + Stream.junk strm__; + let s = strm__ in + let ep = Stream.count strm__ in + let ep = + match Stream.peek s with + Some '\010' -> Stream.junk s; ep + 1 + | _ -> ep + in + bolpos := ep; incr lnum | Some _ -> Stream.junk strm__; any_to_nl strm__ | _ -> () + and line_directive (strm__ : _ Stream.t) = + let _ = skip_spaces strm__ in + let n = + try line_directive_number 0 strm__ with + Stream.Failure -> raise (Stream.Error "") + in + let _ = + try skip_spaces strm__ with + Stream.Failure -> raise (Stream.Error "") + in + let _ = + try line_directive_string strm__ with + Stream.Failure -> raise (Stream.Error "") + in + let _ = + try any_to_nl strm__ with + Stream.Failure -> raise (Stream.Error "") + in + let ep = Stream.count strm__ in bolpos := ep; lnum := n + and skip_spaces (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some (' ' | '\t') -> Stream.junk strm__; skip_spaces strm__ + | _ -> () + and line_directive_number n (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some ('0'..'9' as c) -> + Stream.junk strm__; + line_directive_number (10 * n + (Char.code c - Char.code '0')) strm__ + | _ -> n + and line_directive_string (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some '\"' -> + Stream.junk strm__; + let _ = + try line_directive_string_contents 0 strm__ with + Stream.Failure -> raise (Stream.Error "") + in + () + | _ -> () + and line_directive_string_contents len (strm__ : _ Stream.t) = + match Stream.peek strm__ with + Some ('\010' | '\013') -> Stream.junk strm__; () + | Some '\"' -> Stream.junk strm__; fname := get_buff len + | Some c -> + Stream.junk strm__; + line_directive_string_contents (store len c) strm__ + | _ -> raise Stream.Failure in fun cstrm -> try @@ -977,14 +803,16 @@ let next_token_fun dfa ssd find_kwd bolpos glexr = let r = next_token false cstrm in begin match glex.tok_comm with Some list -> - if fst (snd r) > comm_bp then - let comm_loc = comm_bp, fst (snd r) in + let next_bp = (fst (snd r)).Lexing.pos_cnum in + if next_bp > comm_bp then + let comm_loc = mkloc (comm_bp, next_bp) in glex.tok_comm <- Some (comm_loc :: list) | None -> () end; r with - Stream.Error str -> err (Stream.count cstrm, Stream.count cstrm + 1) str + Stream.Error str -> + err (mkloc (Stream.count cstrm, Stream.count cstrm + 1)) str ;; @@ -993,10 +821,13 @@ let specific_space_dot = ref false;; let func kwd_table glexr = let bolpos = ref 0 in + let lnum = ref 1 in + let fname = ref "" in let find = Hashtbl.find kwd_table in let dfa = !dollar_for_antiquotation in let ssd = !specific_space_dot in - Token.lexer_func_of_parser (next_token_fun dfa ssd find bolpos glexr) + Token.lexer_func_of_parser + (next_token_fun dfa ssd find fname lnum bolpos glexr) ;; let rec check_keyword_stream (strm__ : _ Stream.t) = @@ -1211,11 +1042,11 @@ let gmake () = let id_table = Hashtbl.create 301 in let glexr = ref - {tok_func = (fun _ -> raise (Match_failure ("plexer.ml", 972, 17))); - tok_using = (fun _ -> raise (Match_failure ("plexer.ml", 972, 37))); - tok_removing = (fun _ -> raise (Match_failure ("plexer.ml", 972, 60))); - tok_match = (fun _ -> raise (Match_failure ("plexer.ml", 973, 18))); - tok_text = (fun _ -> raise (Match_failure ("plexer.ml", 973, 37))); + {tok_func = (fun _ -> raise (Match_failure ("", 741, 17))); + tok_using = (fun _ -> raise (Match_failure ("", 741, 37))); + tok_removing = (fun _ -> raise (Match_failure ("", 741, 60))); + tok_match = (fun _ -> raise (Match_failure ("", 742, 18))); + tok_text = (fun _ -> raise (Match_failure ("", 742, 37))); tok_comm = None} in let glex = @@ -1245,12 +1076,11 @@ let make () = let id_table = Hashtbl.create 301 in let glexr = ref - {tok_func = (fun _ -> raise (Match_failure ("plexer.ml", 1001, 17))); - tok_using = (fun _ -> raise (Match_failure ("plexer.ml", 1001, 37))); - tok_removing = - (fun _ -> raise (Match_failure ("plexer.ml", 1001, 60))); - tok_match = (fun _ -> raise (Match_failure ("plexer.ml", 1002, 18))); - tok_text = (fun _ -> raise (Match_failure ("plexer.ml", 1002, 37))); + {tok_func = (fun _ -> raise (Match_failure ("", 770, 17))); + tok_using = (fun _ -> raise (Match_failure ("", 770, 37))); + tok_removing = (fun _ -> raise (Match_failure ("", 770, 60))); + tok_match = (fun _ -> raise (Match_failure ("", 771, 18))); + tok_text = (fun _ -> raise (Match_failure ("", 771, 37))); tok_comm = None} in {func = func kwd_table glexr; using = using_token kwd_table id_table; |