diff options
Diffstat (limited to 'lex/lexer.mll')
-rw-r--r-- | lex/lexer.mll | 108 |
1 files changed, 68 insertions, 40 deletions
diff --git a/lex/lexer.mll b/lex/lexer.mll index 5249ca8bc0..fa1d15bda6 100644 --- a/lex/lexer.mll +++ b/lex/lexer.mll @@ -25,7 +25,7 @@ and comment_depth = ref 0 let in_pattern () = !brace_depth = 0 && !comment_depth = 0 -exception Lexical_error of string * int * int +exception Lexical_error of string * string * int * int let string_buff = Buffer.create 256 @@ -42,24 +42,32 @@ let char_for_backslash = function | 'r' -> '\r' | c -> c - -let line_num = ref 1 -let line_start_pos = ref 0 +let raise_lexical_error lexbuf msg = + let p = Lexing.lexeme_start_p lexbuf in + raise (Lexical_error (msg, + p.Lexing.pos_fname, + p.Lexing.pos_lnum, + p.Lexing.pos_cnum - p.Lexing.pos_bol + 1)) +;; let handle_lexical_error fn lexbuf = - let line = !line_num - and column = Lexing.lexeme_start lexbuf - !line_start_pos + 1 in + let p = Lexing.lexeme_start_p lexbuf in + let line = p.Lexing.pos_lnum + and column = p.Lexing.pos_cnum - p.Lexing.pos_bol + 1 + and file = p.Lexing.pos_fname + in try fn lexbuf - with Lexical_error (msg, 0, 0) -> - raise(Lexical_error(msg, line, column)) + with Lexical_error (msg, "", 0, 0) -> + raise(Lexical_error(msg, file, line, column)) let get_input_name () = Sys.argv.(Array.length Sys.argv - 1) let warning lexbuf msg = + let p = Lexing.lexeme_start_p lexbuf in Printf.eprintf "ocamllex warning:\nFile \"%s\", line %d, character %d: %s.\n" - (get_input_name ()) !line_num - (Lexing.lexeme_start lexbuf - !line_start_pos+1) msg; + p.Lexing.pos_fname p.Lexing.pos_lnum + (p.Lexing.pos_cnum - p.Lexing.pos_bol + 1) msg; flush stderr let decimal_code c d u = @@ -78,6 +86,27 @@ let char_for_hexadecimal_code d u = in Char.chr (val1 * 16 + val2) +let incr_loc lexbuf delta = + let pos = lexbuf.Lexing.lex_curr_p in + lexbuf.Lexing.lex_curr_p <- { pos with + Lexing.pos_lnum = pos.Lexing.pos_lnum + 1; + Lexing.pos_bol = pos.Lexing.pos_cnum - delta; + } +;; + +let update_loc lexbuf opt_file line = + let pos = lexbuf.Lexing.lex_curr_p in + let new_file = match opt_file with + | None -> pos.Lexing.pos_fname + | Some f -> f + in + lexbuf.Lexing.lex_curr_p <- { pos with + Lexing.pos_fname = new_file; + Lexing.pos_lnum = line; + Lexing.pos_bol = pos.Lexing.pos_cnum; + } +;; + } let identstart = @@ -91,9 +120,14 @@ rule main = parse [' ' '\013' '\009' '\012' ] + { main lexbuf } | '\010' - { line_start_pos := Lexing.lexeme_end lexbuf; - incr line_num; + { incr_loc lexbuf 0; main lexbuf } + | "#" [' ' '\t']* (['0'-'9']+ as num) [' ' '\t']* + ('\"' ([^ '\010' '\013' '\"']* as name) '\"')? + [^ '\010' '\013']* '\010' + { update_loc lexbuf name (int_of_string num); + main lexbuf + } | "(*" { comment_depth := 1; handle_lexical_error comment lexbuf; @@ -121,25 +155,22 @@ rule main = parse | "'" '\\' (['0'-'9'] as c) (['0'-'9'] as d) (['0'-'9'] as u)"'" { let v = decimal_code c d u in if v > 255 then - raise - (Lexical_error - (Printf.sprintf "illegal escape sequence \\%c%c%c" c d u, - !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1)) + raise_lexical_error lexbuf + (Printf.sprintf "illegal escape sequence \\%c%c%c" c d u) else Tchar v } | "'" '\\' 'x' (['0'-'9' 'a'-'f' 'A'-'F'] as d) (['0'-'9' 'a'-'f' 'A'-'F'] as u) "'" { Tchar(Char.code(char_for_hexadecimal_code d u)) } | "'" '\\' (_ as c) - { raise - (Lexical_error - (Printf.sprintf "illegal escape sequence \\%c" c, - !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1)) + { raise_lexical_error lexbuf + (Printf.sprintf "illegal escape sequence \\%c" c) } | '{' - { let n1 = Lexing.lexeme_end lexbuf - and l1 = !line_num - and s1 = !line_start_pos in + { let p = Lexing.lexeme_end_p lexbuf in + let n1 = p.Lexing.pos_cnum + and l1 = p.Lexing.pos_lnum + and s1 = p.Lexing.pos_bol in brace_depth := 1; let n2 = handle_lexical_error action lexbuf in Taction({start_pos = n1; end_pos = n2; @@ -155,20 +186,20 @@ rule main = parse | ')' { Trparen } | '^' { Tcaret } | '-' { Tdash } + | '#' { Tsharp } | eof { Tend } | _ - { raise(Lexical_error - ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf), - !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1)) } + { raise_lexical_error lexbuf + ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf)) + } (* String parsing comes from the compiler lexer *) and string = parse '"' { () } - | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] * - { line_start_pos := Lexing.lexeme_end lexbuf; - incr line_num; + | '\\' ("\010" | "\013" | "\013\010") ([' ' '\009'] * as spaces) + { incr_loc lexbuf (String.length spaces); string lexbuf } | '\\' (backslash_escapes as c) { store_string_char(char_for_backslash c); @@ -192,11 +223,10 @@ and string = parse store_string_char c ; string lexbuf } | eof - { raise(Lexical_error("unterminated string", 0, 0)) } + { raise(Lexical_error("unterminated string", "", 0, 0)) } | '\010' { store_string_char '\010'; - line_start_pos := Lexing.lexeme_end lexbuf; - incr line_num; + incr_loc lexbuf 0; string lexbuf } | _ as c { store_string_char c; @@ -223,10 +253,9 @@ and comment = parse { skip_char lexbuf ; comment lexbuf } | eof - { raise(Lexical_error("unterminated comment", 0, 0)) } + { raise(Lexical_error("unterminated comment", "", 0, 0)) } | '\010' - { line_start_pos := Lexing.lexeme_end lexbuf; - incr line_num; + { incr_loc lexbuf 0; comment lexbuf } | _ { comment lexbuf } @@ -251,18 +280,17 @@ and action = parse comment lexbuf; action lexbuf } | eof - { raise (Lexical_error("unterminated action", 0, 0)) } + { raise (Lexical_error("unterminated action", "", 0, 0)) } | '\010' - { line_start_pos := Lexing.lexeme_end lexbuf; - incr line_num; + { incr_loc lexbuf 0; action lexbuf } | _ { action lexbuf } and skip_char = parse | '\\'? '\010' "'" - { line_start_pos := Lexing.lexeme_end lexbuf; - incr line_num } + { incr_loc lexbuf 1; + } | [^ '\\' '\''] "'" (* regular character *) (* one character and numeric escape sequences *) | '\\' _ "'" |