summaryrefslogtreecommitdiff
path: root/lex
diff options
context:
space:
mode:
authorJun FURUSE / 古瀬 淳 <jun.furuse@gmail.com>2004-06-18 05:04:14 +0000
committerJun FURUSE / 古瀬 淳 <jun.furuse@gmail.com>2004-06-18 05:04:14 +0000
commit5e1bf20850aaa9b1ceb86a971848609ee9e84c47 (patch)
treef3a6e5b5c38263fe527e6275ff95425f12637226 /lex
parent8ec769214e067da9ee8b33d05f4ef275e9269dd5 (diff)
downloadocaml-gcaml.tar.gz
port to the latest ocaml (2004/06/18)gcaml
git-svn-id: http://caml.inria.fr/svn/ocaml/branches/gcaml@6419 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
Diffstat (limited to 'lex')
-rw-r--r--lex/cset.ml5
-rw-r--r--lex/cset.mli5
-rw-r--r--lex/lexer.mli4
-rw-r--r--lex/lexer.mll108
-rw-r--r--lex/lexgen.ml2
-rw-r--r--lex/main.ml20
-rw-r--r--lex/parser.mly13
7 files changed, 109 insertions, 48 deletions
diff --git a/lex/cset.ml b/lex/cset.ml
index 84c2a77142..ec68ee1c8b 100644
--- a/lex/cset.ml
+++ b/lex/cset.ml
@@ -11,6 +11,11 @@
(* *)
(***********************************************************************)
+(* $Id$ *)
+
+
+exception Bad
+
type t = (int * int) list
diff --git a/lex/cset.mli b/lex/cset.mli
index 0ebcac0e5f..fc2c9930c3 100644
--- a/lex/cset.mli
+++ b/lex/cset.mli
@@ -11,13 +11,18 @@
(* *)
(***********************************************************************)
+(* $Id$ *)
+
(* Set of characters encoded as list of intervals *)
type t
+exception Bad
val empty : t
val is_empty : t -> bool
val all_chars : t
+exception Bad
+
val all_chars_eof : t
val eof : t
val singleton : int -> t
diff --git a/lex/lexer.mli b/lex/lexer.mli
index 569a5b266f..be34674eb3 100644
--- a/lex/lexer.mli
+++ b/lex/lexer.mli
@@ -14,7 +14,9 @@
val main: Lexing.lexbuf -> Parser.token
-exception Lexical_error of string * int * int
+exception Lexical_error of string * string * int * int
+(*n
val line_num: int ref
val line_start_pos: int ref
+*)
diff --git a/lex/lexer.mll b/lex/lexer.mll
index 5249ca8bc0..fa1d15bda6 100644
--- a/lex/lexer.mll
+++ b/lex/lexer.mll
@@ -25,7 +25,7 @@ and comment_depth = ref 0
let in_pattern () = !brace_depth = 0 && !comment_depth = 0
-exception Lexical_error of string * int * int
+exception Lexical_error of string * string * int * int
let string_buff = Buffer.create 256
@@ -42,24 +42,32 @@ let char_for_backslash = function
| 'r' -> '\r'
| c -> c
-
-let line_num = ref 1
-let line_start_pos = ref 0
+let raise_lexical_error lexbuf msg =
+ let p = Lexing.lexeme_start_p lexbuf in
+ raise (Lexical_error (msg,
+ p.Lexing.pos_fname,
+ p.Lexing.pos_lnum,
+ p.Lexing.pos_cnum - p.Lexing.pos_bol + 1))
+;;
let handle_lexical_error fn lexbuf =
- let line = !line_num
- and column = Lexing.lexeme_start lexbuf - !line_start_pos + 1 in
+ let p = Lexing.lexeme_start_p lexbuf in
+ let line = p.Lexing.pos_lnum
+ and column = p.Lexing.pos_cnum - p.Lexing.pos_bol + 1
+ and file = p.Lexing.pos_fname
+ in
try
fn lexbuf
- with Lexical_error (msg, 0, 0) ->
- raise(Lexical_error(msg, line, column))
+ with Lexical_error (msg, "", 0, 0) ->
+ raise(Lexical_error(msg, file, line, column))
let get_input_name () = Sys.argv.(Array.length Sys.argv - 1)
let warning lexbuf msg =
+ let p = Lexing.lexeme_start_p lexbuf in
Printf.eprintf "ocamllex warning:\nFile \"%s\", line %d, character %d: %s.\n"
- (get_input_name ()) !line_num
- (Lexing.lexeme_start lexbuf - !line_start_pos+1) msg;
+ p.Lexing.pos_fname p.Lexing.pos_lnum
+ (p.Lexing.pos_cnum - p.Lexing.pos_bol + 1) msg;
flush stderr
let decimal_code c d u =
@@ -78,6 +86,27 @@ let char_for_hexadecimal_code d u =
in
Char.chr (val1 * 16 + val2)
+let incr_loc lexbuf delta =
+ let pos = lexbuf.Lexing.lex_curr_p in
+ lexbuf.Lexing.lex_curr_p <- { pos with
+ Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
+ Lexing.pos_bol = pos.Lexing.pos_cnum - delta;
+ }
+;;
+
+let update_loc lexbuf opt_file line =
+ let pos = lexbuf.Lexing.lex_curr_p in
+ let new_file = match opt_file with
+ | None -> pos.Lexing.pos_fname
+ | Some f -> f
+ in
+ lexbuf.Lexing.lex_curr_p <- { pos with
+ Lexing.pos_fname = new_file;
+ Lexing.pos_lnum = line;
+ Lexing.pos_bol = pos.Lexing.pos_cnum;
+ }
+;;
+
}
let identstart =
@@ -91,9 +120,14 @@ rule main = parse
[' ' '\013' '\009' '\012' ] +
{ main lexbuf }
| '\010'
- { line_start_pos := Lexing.lexeme_end lexbuf;
- incr line_num;
+ { incr_loc lexbuf 0;
main lexbuf }
+ | "#" [' ' '\t']* (['0'-'9']+ as num) [' ' '\t']*
+ ('\"' ([^ '\010' '\013' '\"']* as name) '\"')?
+ [^ '\010' '\013']* '\010'
+ { update_loc lexbuf name (int_of_string num);
+ main lexbuf
+ }
| "(*"
{ comment_depth := 1;
handle_lexical_error comment lexbuf;
@@ -121,25 +155,22 @@ rule main = parse
| "'" '\\' (['0'-'9'] as c) (['0'-'9'] as d) (['0'-'9'] as u)"'"
{ let v = decimal_code c d u in
if v > 255 then
- raise
- (Lexical_error
- (Printf.sprintf "illegal escape sequence \\%c%c%c" c d u,
- !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1))
+ raise_lexical_error lexbuf
+ (Printf.sprintf "illegal escape sequence \\%c%c%c" c d u)
else
Tchar v }
| "'" '\\' 'x'
(['0'-'9' 'a'-'f' 'A'-'F'] as d) (['0'-'9' 'a'-'f' 'A'-'F'] as u) "'"
{ Tchar(Char.code(char_for_hexadecimal_code d u)) }
| "'" '\\' (_ as c)
- { raise
- (Lexical_error
- (Printf.sprintf "illegal escape sequence \\%c" c,
- !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1))
+ { raise_lexical_error lexbuf
+ (Printf.sprintf "illegal escape sequence \\%c" c)
}
| '{'
- { let n1 = Lexing.lexeme_end lexbuf
- and l1 = !line_num
- and s1 = !line_start_pos in
+ { let p = Lexing.lexeme_end_p lexbuf in
+ let n1 = p.Lexing.pos_cnum
+ and l1 = p.Lexing.pos_lnum
+ and s1 = p.Lexing.pos_bol in
brace_depth := 1;
let n2 = handle_lexical_error action lexbuf in
Taction({start_pos = n1; end_pos = n2;
@@ -155,20 +186,20 @@ rule main = parse
| ')' { Trparen }
| '^' { Tcaret }
| '-' { Tdash }
+ | '#' { Tsharp }
| eof { Tend }
| _
- { raise(Lexical_error
- ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
- !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1)) }
+ { raise_lexical_error lexbuf
+ ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf))
+ }
(* String parsing comes from the compiler lexer *)
and string = parse
'"'
{ () }
- | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
- { line_start_pos := Lexing.lexeme_end lexbuf;
- incr line_num;
+ | '\\' ("\010" | "\013" | "\013\010") ([' ' '\009'] * as spaces)
+ { incr_loc lexbuf (String.length spaces);
string lexbuf }
| '\\' (backslash_escapes as c)
{ store_string_char(char_for_backslash c);
@@ -192,11 +223,10 @@ and string = parse
store_string_char c ;
string lexbuf }
| eof
- { raise(Lexical_error("unterminated string", 0, 0)) }
+ { raise(Lexical_error("unterminated string", "", 0, 0)) }
| '\010'
{ store_string_char '\010';
- line_start_pos := Lexing.lexeme_end lexbuf;
- incr line_num;
+ incr_loc lexbuf 0;
string lexbuf }
| _ as c
{ store_string_char c;
@@ -223,10 +253,9 @@ and comment = parse
{ skip_char lexbuf ;
comment lexbuf }
| eof
- { raise(Lexical_error("unterminated comment", 0, 0)) }
+ { raise(Lexical_error("unterminated comment", "", 0, 0)) }
| '\010'
- { line_start_pos := Lexing.lexeme_end lexbuf;
- incr line_num;
+ { incr_loc lexbuf 0;
comment lexbuf }
| _
{ comment lexbuf }
@@ -251,18 +280,17 @@ and action = parse
comment lexbuf;
action lexbuf }
| eof
- { raise (Lexical_error("unterminated action", 0, 0)) }
+ { raise (Lexical_error("unterminated action", "", 0, 0)) }
| '\010'
- { line_start_pos := Lexing.lexeme_end lexbuf;
- incr line_num;
+ { incr_loc lexbuf 0;
action lexbuf }
| _
{ action lexbuf }
and skip_char = parse
| '\\'? '\010' "'"
- { line_start_pos := Lexing.lexeme_end lexbuf;
- incr line_num }
+ { incr_loc lexbuf 1;
+ }
| [^ '\\' '\''] "'" (* regular character *)
(* one character and numeric escape sequences *)
| '\\' _ "'"
diff --git a/lex/lexgen.ml b/lex/lexgen.ml
index 050c00b4cc..8d665e77ee 100644
--- a/lex/lexgen.ml
+++ b/lex/lexgen.ml
@@ -155,7 +155,7 @@ let rec do_find_opt = function
let opt1,all1 = do_find_opt e1
and opt2,all2 = do_find_opt e2 in
StringSet.union
- (stringset_delta opt1 opt2)
+ (StringSet.union opt1 opt2)
(stringset_delta all1 all2),
StringSet.union all1 all2
| Repetition e ->
diff --git a/lex/main.ml b/lex/main.ml
index d97820151f..03b9ac91ef 100644
--- a/lex/main.ml
+++ b/lex/main.ml
@@ -55,6 +55,9 @@ let main () =
let oc = open_out dest_name in
let tr = Common.open_tracker dest_name oc in
let lexbuf = Lexing.from_channel ic in
+ lexbuf.Lexing.lex_curr_p <-
+ {Lexing.pos_fname = source_name; Lexing.pos_lnum = 1;
+ Lexing.pos_bol = 0; Lexing.pos_cnum = 0};
try
let def = Parser.lexer_definition Lexer.main lexbuf in
let (entries, transitions) = Lexgen.make_dfa def.entrypoints in
@@ -76,15 +79,22 @@ let main () =
Common.close_tracker tr;
Sys.remove dest_name;
begin match exn with
- Parsing.Parse_error ->
+ | Cset.Bad ->
+ let p = Lexing.lexeme_start_p lexbuf in
+ Printf.fprintf stderr
+ "File \"%s\", line %d, character %d: character set expected.\n"
+ p.Lexing.pos_fname p.Lexing.pos_lnum
+ (p.Lexing.pos_cnum - p.Lexing.pos_bol)
+ | Parsing.Parse_error ->
+ let p = Lexing.lexeme_start_p lexbuf in
Printf.fprintf stderr
"File \"%s\", line %d, character %d: syntax error.\n"
- source_name !Lexer.line_num
- (Lexing.lexeme_start lexbuf - !Lexer.line_start_pos)
- | Lexer.Lexical_error(msg, line, col) ->
+ p.Lexing.pos_fname p.Lexing.pos_lnum
+ (p.Lexing.pos_cnum - p.Lexing.pos_bol)
+ | Lexer.Lexical_error(msg, file, line, col) ->
Printf.fprintf stderr
"File \"%s\", line %d, character %d: %s.\n"
- source_name line col msg
+ file line col msg
| Lexgen.Memory_overflow ->
Printf.fprintf stderr
"File \"%s\":\n Position memory overflow, too many bindings\n"
diff --git a/lex/parser.mly b/lex/parser.mly
index a1921309b2..8f6ff70525 100644
--- a/lex/parser.mly
+++ b/lex/parser.mly
@@ -40,6 +40,10 @@ let rec remove_as = function
| Alternative (e1, e2) -> Alternative (remove_as e1, remove_as e2)
| Repetition e -> Repetition (remove_as e)
+let as_cset = function
+ | Characters s -> s
+ | _ -> raise Cset.Bad
+
%}
%token <string> Tident
@@ -47,9 +51,10 @@ let rec remove_as = function
%token <string> Tstring
%token <Syntax.location> Taction
%token Trule Tparse Tparse_shortest Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket
-%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash Tlet Tas
+%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash Tlet Tas Tsharp
%right Tas
+%left Tsharp
%left Tor
%nonassoc CONCAT
%nonassoc Tmaybe Tstar Tplus
@@ -131,6 +136,12 @@ regexp:
{ Alternative(Epsilon, $1) }
| regexp Tplus
{ Sequence(Repetition (remove_as $1), $1) }
+ | regexp Tsharp regexp
+ {
+ let s1 = as_cset $1
+ and s2 = as_cset $3 in
+ Characters (Cset.diff s1 s2)
+ }
| regexp Tor regexp
{ Alternative($1,$3) }
| regexp regexp %prec CONCAT