7 files changed, 109 insertions, 48 deletions
diff --git a/lex/cset.ml b/lex/cset.ml
index 84c2a77142..ec68ee1c8b 100644
--- a/lex/cset.ml
+++ b/lex/cset.ml
@@ -11,6 +11,11 @@
 (*                                                                     *)
 (***********************************************************************)
 
+(* $Id$ *)
+
+
+exception Bad
+
 type t = (int * int) list
 
 
diff --git a/lex/cset.mli b/lex/cset.mli
index 0ebcac0e5f..fc2c9930c3 100644
--- a/lex/cset.mli
+++ b/lex/cset.mli
@@ -11,13 +11,18 @@
 (*                                                                     *)
 (***********************************************************************)
 
+(* $Id$ *)
+
 (* Set of characters encoded as list of intervals *)
 
 type t 
+exception Bad
 
 val empty : t
 val is_empty : t -> bool
 val all_chars : t
+exception Bad
+
 val all_chars_eof : t
 val eof : t
 val singleton : int ->  t
diff --git a/lex/lexer.mli b/lex/lexer.mli
index 569a5b266f..be34674eb3 100644
--- a/lex/lexer.mli
+++ b/lex/lexer.mli
@@ -14,7 +14,9 @@
 
 val main: Lexing.lexbuf -> Parser.token
 
-exception Lexical_error of string * int * int
+exception Lexical_error of string * string * int * int
 
+(*n
 val line_num: int ref
 val line_start_pos: int ref
+*)
diff --git a/lex/lexer.mll b/lex/lexer.mll
index 5249ca8bc0..fa1d15bda6 100644
--- a/lex/lexer.mll
+++ b/lex/lexer.mll
@@ -25,7 +25,7 @@ and comment_depth = ref 0
 
 let in_pattern () = !brace_depth = 0 && !comment_depth = 0
 
-exception Lexical_error of string * int * int
+exception Lexical_error of string * string * int * int
 
 let string_buff = Buffer.create 256
 
@@ -42,24 +42,32 @@ let char_for_backslash = function
   | 'r' -> '\r'
   | c   -> c
 
-
-let line_num = ref 1
-let line_start_pos = ref 0
+let raise_lexical_error lexbuf msg =
+  let p = Lexing.lexeme_start_p lexbuf in
+  raise (Lexical_error (msg,
+                        p.Lexing.pos_fname,
+                        p.Lexing.pos_lnum,
+                        p.Lexing.pos_cnum - p.Lexing.pos_bol + 1))
+;;
 
 let handle_lexical_error fn lexbuf =
-  let line = !line_num
-  and column = Lexing.lexeme_start lexbuf - !line_start_pos + 1 in
+  let p = Lexing.lexeme_start_p lexbuf in
+  let line = p.Lexing.pos_lnum
+  and column = p.Lexing.pos_cnum - p.Lexing.pos_bol + 1
+  and file = p.Lexing.pos_fname
+  in
   try
     fn lexbuf
-  with Lexical_error (msg, 0, 0) ->
-    raise(Lexical_error(msg, line, column))
+  with Lexical_error (msg, "", 0, 0) ->
+    raise(Lexical_error(msg, file, line, column))
 
 let get_input_name () = Sys.argv.(Array.length Sys.argv - 1)
   
 let warning lexbuf msg =
+  let p = Lexing.lexeme_start_p lexbuf in
   Printf.eprintf "ocamllex warning:\nFile \"%s\", line %d, character %d: %s.\n"
-    (get_input_name ()) !line_num
-    (Lexing.lexeme_start lexbuf - !line_start_pos+1) msg;
+    p.Lexing.pos_fname p.Lexing.pos_lnum
+    (p.Lexing.pos_cnum - p.Lexing.pos_bol + 1) msg;
   flush stderr
 
 let decimal_code  c d u =
@@ -78,6 +86,27 @@ let char_for_hexadecimal_code d u =
   in
   Char.chr (val1 * 16 + val2)
 
+let incr_loc lexbuf delta =
+  let pos = lexbuf.Lexing.lex_curr_p in
+  lexbuf.Lexing.lex_curr_p <- { pos with
+    Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
+    Lexing.pos_bol = pos.Lexing.pos_cnum - delta;
+  }
+;;
+
+let update_loc lexbuf opt_file line =
+  let pos = lexbuf.Lexing.lex_curr_p in
+  let new_file = match opt_file with
+                 | None -> pos.Lexing.pos_fname
+                 | Some f -> f
+  in
+  lexbuf.Lexing.lex_curr_p <- { pos with
+    Lexing.pos_fname = new_file;
+    Lexing.pos_lnum = line;
+    Lexing.pos_bol = pos.Lexing.pos_cnum;
+  }
+;;
+
 }
 
 let identstart =
@@ -91,9 +120,14 @@ rule main = parse
     [' ' '\013' '\009' '\012' ] + 
     { main lexbuf }
   | '\010'
-    { line_start_pos := Lexing.lexeme_end lexbuf;
-      incr line_num;
+    { incr_loc lexbuf 0;
       main lexbuf }
+  | "#" [' ' '\t']* (['0'-'9']+ as num) [' ' '\t']*
+    ('\"' ([^ '\010' '\013' '\"']* as name) '\"')?
+    [^ '\010' '\013']* '\010'
+    { update_loc lexbuf name (int_of_string num);
+      main lexbuf
+    }
   | "(*" 
     { comment_depth := 1;
       handle_lexical_error comment lexbuf;
@@ -121,25 +155,22 @@ rule main = parse
   | "'" '\\' (['0'-'9'] as c) (['0'-'9'] as d) (['0'-'9'] as u)"'" 
     { let v = decimal_code c d u in
       if v > 255 then
-        raise
-          (Lexical_error
-            (Printf.sprintf "illegal escape sequence \\%c%c%c" c d u,
-            !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1))
+        raise_lexical_error lexbuf
+          (Printf.sprintf "illegal escape sequence \\%c%c%c" c d u)
       else
         Tchar v }
   | "'" '\\' 'x'
        (['0'-'9' 'a'-'f' 'A'-'F'] as d) (['0'-'9' 'a'-'f' 'A'-'F'] as u) "'"
        { Tchar(Char.code(char_for_hexadecimal_code d u)) }
   | "'" '\\' (_ as c)
-    { raise
-       (Lexical_error
-         (Printf.sprintf "illegal escape sequence \\%c" c,
-          !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1))
+    { raise_lexical_error lexbuf
+        (Printf.sprintf "illegal escape sequence \\%c" c)
     }
   | '{' 
-    { let n1 = Lexing.lexeme_end lexbuf
-      and l1 = !line_num
-      and s1 = !line_start_pos in
+    { let p = Lexing.lexeme_end_p lexbuf in
+      let n1 = p.Lexing.pos_cnum
+      and l1 = p.Lexing.pos_lnum
+      and s1 = p.Lexing.pos_bol in
       brace_depth := 1;
       let n2 = handle_lexical_error action lexbuf in
       Taction({start_pos = n1; end_pos = n2;
@@ -155,20 +186,20 @@ rule main = parse
   | ')'  { Trparen }
   | '^'  { Tcaret }
   | '-'  { Tdash }
+  | '#'  { Tsharp }
   | eof  { Tend }
   | _
-    { raise(Lexical_error
-             ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
-              !line_num, Lexing.lexeme_start lexbuf - !line_start_pos+1)) }
+    { raise_lexical_error lexbuf
+        ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf))
+    }
 
 
 (* String parsing comes from the compiler lexer *)
 and string = parse
     '"' 
     { () }
-   | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
-    { line_start_pos := Lexing.lexeme_end lexbuf;
-      incr line_num;
+   | '\\' ("\010" | "\013" | "\013\010") ([' ' '\009'] * as spaces)
+    { incr_loc lexbuf (String.length spaces);
       string lexbuf }
   | '\\' (backslash_escapes as c)
     { store_string_char(char_for_backslash c);
@@ -192,11 +223,10 @@ and string = parse
       store_string_char c ;
       string lexbuf }
   | eof 
-    { raise(Lexical_error("unterminated string", 0, 0)) }
+    { raise(Lexical_error("unterminated string", "", 0, 0)) }
   | '\010'
     { store_string_char '\010';
-      line_start_pos := Lexing.lexeme_end lexbuf;
-      incr line_num;
+      incr_loc lexbuf 0;
       string lexbuf }
   | _ as c
     { store_string_char c;
@@ -223,10 +253,9 @@ and comment = parse
     { skip_char lexbuf ;
       comment lexbuf }
   | eof 
-    { raise(Lexical_error("unterminated comment", 0, 0)) }
+    { raise(Lexical_error("unterminated comment", "", 0, 0)) }
   | '\010'
-    { line_start_pos := Lexing.lexeme_end lexbuf;
-      incr line_num;
+    { incr_loc lexbuf 0;
       comment lexbuf }
   | _ 
     { comment lexbuf }
@@ -251,18 +280,17 @@ and action = parse
       comment lexbuf;
       action lexbuf }
   | eof 
-    { raise (Lexical_error("unterminated action", 0, 0)) }
+    { raise (Lexical_error("unterminated action", "", 0, 0)) }
   | '\010'
-    { line_start_pos := Lexing.lexeme_end lexbuf;
-      incr line_num;
+    { incr_loc lexbuf 0;
       action lexbuf }
   | _ 
     { action lexbuf }
 
 and skip_char = parse
   | '\\'? '\010' "'"
-     { line_start_pos := Lexing.lexeme_end lexbuf;
-       incr line_num }
+     { incr_loc lexbuf 1;
+     }
   | [^ '\\' '\''] "'" (* regular character *)
 (* one character and numeric escape sequences *)
   | '\\' _ "'"
diff --git a/lex/lexgen.ml b/lex/lexgen.ml
index 050c00b4cc..8d665e77ee 100644
--- a/lex/lexgen.ml
+++ b/lex/lexgen.ml
@@ -155,7 +155,7 @@ let rec do_find_opt = function
       let opt1,all1 = do_find_opt e1
       and opt2,all2 = do_find_opt e2 in
       StringSet.union
-        (stringset_delta opt1 opt2)
+        (StringSet.union opt1 opt2)
         (stringset_delta all1 all2),
       StringSet.union all1 all2
   | Repetition e  ->
diff --git a/lex/main.ml b/lex/main.ml
index d97820151f..03b9ac91ef 100644
--- a/lex/main.ml
+++ b/lex/main.ml
@@ -55,6 +55,9 @@ let main () =
   let oc = open_out dest_name in
   let tr = Common.open_tracker dest_name oc in
   let lexbuf = Lexing.from_channel ic in
+  lexbuf.Lexing.lex_curr_p <-
+    {Lexing.pos_fname = source_name; Lexing.pos_lnum = 1;
+     Lexing.pos_bol = 0; Lexing.pos_cnum = 0};
   try
     let def = Parser.lexer_definition Lexer.main lexbuf in
     let (entries, transitions) = Lexgen.make_dfa def.entrypoints in
@@ -76,15 +79,22 @@ let main () =
     Common.close_tracker tr;
     Sys.remove dest_name;
     begin match exn with
-      Parsing.Parse_error ->
+    | Cset.Bad ->
+        let p = Lexing.lexeme_start_p lexbuf in
+        Printf.fprintf stderr
+          "File \"%s\", line %d, character %d: character set expected.\n"
+          p.Lexing.pos_fname p.Lexing.pos_lnum
+          (p.Lexing.pos_cnum - p.Lexing.pos_bol)
+    | Parsing.Parse_error ->
+        let p = Lexing.lexeme_start_p lexbuf in
         Printf.fprintf stderr
           "File \"%s\", line %d, character %d: syntax error.\n"
-          source_name !Lexer.line_num
-          (Lexing.lexeme_start lexbuf - !Lexer.line_start_pos)
-    | Lexer.Lexical_error(msg, line, col) ->
+          p.Lexing.pos_fname p.Lexing.pos_lnum
+          (p.Lexing.pos_cnum - p.Lexing.pos_bol)
+    | Lexer.Lexical_error(msg, file, line, col) ->
         Printf.fprintf stderr
           "File \"%s\", line %d, character %d: %s.\n"
-          source_name line col msg
+          file line col msg
     | Lexgen.Memory_overflow ->
         Printf.fprintf stderr
           "File \"%s\":\n Position memory overflow, too many bindings\n"
diff --git a/lex/parser.mly b/lex/parser.mly
index a1921309b2..8f6ff70525 100644
--- a/lex/parser.mly
+++ b/lex/parser.mly
@@ -40,6 +40,10 @@ let rec remove_as = function
   | Alternative (e1, e2) -> Alternative (remove_as e1, remove_as e2)
   | Repetition e -> Repetition (remove_as e)
 
+let as_cset = function
+  | Characters s -> s
+  | _ -> raise Cset.Bad
+
 %}
 
 %token <string> Tident
@@ -47,9 +51,10 @@ let rec remove_as = function
 %token <string> Tstring
 %token <Syntax.location> Taction
 %token Trule Tparse Tparse_shortest Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket
-%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash Tlet Tas
+%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash Tlet Tas Tsharp
 
 %right Tas
+%left Tsharp
 %left Tor
 %nonassoc CONCAT
 %nonassoc Tmaybe Tstar Tplus
@@ -131,6 +136,12 @@ regexp:
         { Alternative(Epsilon, $1) }
   | regexp Tplus
         { Sequence(Repetition (remove_as $1), $1) }
+  | regexp Tsharp regexp
+        {
+          let s1 = as_cset $1
+          and s2 = as_cset $3 in
+          Characters (Cset.diff s1 s2)
+        } 
   | regexp Tor regexp
         { Alternative($1,$3) }
   | regexp regexp %prec CONCAT