diff options
author | Pierre Weis <Pierre.Weis@inria.fr> | 2002-07-28 21:29:42 +0000 |
---|---|---|
committer | Pierre Weis <Pierre.Weis@inria.fr> | 2002-07-28 21:29:42 +0000 |
commit | 6c15753d0f24be0ce559b8e34e7f89da56f8156a (patch) | |
tree | c0f14980650f10a79bfdfb4ca185812f09e8d849 | |
parent | 8bf3b5a36a518b5c5d548015020159888ba29ee1 (diff) | |
download | ocaml-6c15753d0f24be0ce559b8e34e7f89da56f8156a.tar.gz |
Revu le traitement de %S (les @c ne sont pas nécessaires vus les
guillemets délimiteurs).
Revu le traitement des blancs en fin de fichier et les formats à
indication de scanning erronées (format terminé par @).
Revu le type de kscanf.
Revu la documentation des indications de scanning.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5051 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r-- | stdlib/scanf.ml | 82 | ||||
-rw-r--r-- | stdlib/scanf.mli | 33 |
2 files changed, 61 insertions, 54 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index 1b699e080f..a6fca39707 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -15,7 +15,7 @@ (* The run-time library for scanners. *) -(* {6 Scanning buffers} *) +(* Scanning buffers. *) module type SCANNING = sig type scanbuf;; @@ -163,22 +163,21 @@ let token_bool ib = | "false" -> false | s -> bad_input ("invalid boolean " ^ s);; -(* All the functions that convert a string to a number raise the exception - Failure when the convertion is not possible. - This exception is then trapped in kscanf. *) - let token_int_literal conv ib = match conv with - 'd' | 'i' | 'u' -> Scanning.token ib + | 'd' | 'i' | 'u' -> Scanning.token ib | 'o' -> "0o" ^ Scanning.token ib | 'x' | 'X' -> "0x" ^ Scanning.token ib | _ -> assert false +(* All the functions that convert a string to a number raise the exception + Failure when the conversion is not possible. + This exception is then trapped in kscanf. *) let token_int conv ib = int_of_string (token_int_literal conv ib);; let token_float ib = float_of_string (Scanning.token ib);; (* To scan native ints, int32 and int64 integers. - We cannot access to convertions to/from strings for those types, + We cannot access to conversions to/from strings for those types, Nativeint.of_string, Int32.of_string, and Int64.of_string, since those modules are not available to scanf. However, we can bind and use the corresponding primitives that are @@ -245,7 +244,7 @@ let scan_optionally_signed_decimal_int max ib = scan_unsigned_decimal_int max ib;; (* Scan an unsigned integer that could be given in any (common) basis. - If digits are prefixed by one of 0x, 0X, 0o, 0b, the number is + If digits are prefixed by one of 0x, 0X, 0o, or 0b, the number is assumed to be written respectively in hexadecimal, hexadecimal, octal, or binary. *) let scan_unsigned_int max ib = @@ -337,8 +336,9 @@ let char_for_backslash = let char_for_decimal_code c0 c1 c2 = let c = - 100 * (int_of_char c0 - 48) + 10 * (int_of_char c1 - 48) + - (int_of_char c2 - 48) in + 100 * (int_of_char c0 - 48) + + 10 * (int_of_char c1 - 48) + + (int_of_char c2 - 48) in if c < 0 || c > 255 then bad_input (Printf.sprintf "bad char \\%c%c%c" c0 c1 c2) else char_of_int c;; @@ -377,21 +377,19 @@ let scan_Char max ib = | c, _ -> bad_input_escape c in loop 3 max;; -let scan_String stp max ib = +let scan_String max ib = let rec loop s max = if max = 0 || Scanning.end_of_input ib then bad_input "a string" else let c = Scanning.peek_char ib in - if stp = [] then - match c, s with - | '"', true (* '"' helping Emacs *) -> - Scanning.next_char ib; loop false (max - 1) - | '"', false (* '"' helping Emacs *) -> - Scanning.next_char ib; max - 1 - | '\\', false -> - Scanning.next_char ib; loop false (scan_backslash_char (max - 1) ib) - | c, false -> loop false (Scanning.store_char ib c max) - | c, _ -> bad_input_char c else - if List.mem c stp then max else loop s (Scanning.store_char ib c max) in + match c, s with + | '"', true (* '"' helping Emacs *) -> + Scanning.next_char ib; loop false (max - 1) + | '"', false (* '"' helping Emacs *) -> + Scanning.next_char ib; max - 1 + | '\\', false -> + Scanning.next_char ib; loop false (scan_backslash_char (max - 1) ib) + | c, false -> loop false (Scanning.store_char ib c max) + | c, _ -> bad_input_char c in loop true max;; let scan_bool max ib = @@ -480,10 +478,15 @@ external string_of_format : ('a, 'b, 'c) format -> string = "%identity";; tokens as specified by the format. When it founds one token, it converts it as specified, remembers the converted value as a future argument to the function [f], and continues scanning. - If the scanning or some convertion fails, the scanning function + + If the entire scanning succeeds (i.e. the format string has been + exhausted and the buffer has provided tokens according to the + format string), the tokens are applied to [f]. + + If the scanning or some conversion fails, the scanning function aborts and applies the scanning buffer and a string that explains the error to the error continuation [ef]. *) -let kscanf ib (fmt : ('a, 'b, 'c) format) f ef = +let kscanf ib ef fmt f = let fmt = string_of_format fmt in let lim = String.length fmt - 1 in @@ -494,6 +497,7 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef = let rec scan f i = if i > lim then f else match fmt.[i] with + | ' ' | '\t' | '\r' | '\n' -> skip_whites ib; scan f (i + 1) | c when Scanning.end_of_input ib -> raise End_of_file | '%' -> scan_width f (i + 1) | '@' as t -> @@ -505,7 +509,6 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef = | c when Scanning.peek_char ib = c -> Scanning.next_char ib; scan f (i + 1) | c -> bad_input_char (Scanning.peek_char ib) end - | ' ' | '\r' | '\t' | '\n' -> skip_whites ib; scan f (i + 1) | c when Scanning.peek_char ib = c -> Scanning.next_char ib; scan f (i + 1) | c -> bad_input_char (Scanning.peek_char ib) @@ -542,31 +545,31 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef = | 'f' | 'g' | 'G' | 'e' | 'E' -> let x = scan_float max ib in scan (stack f (token_float ib)) (i + 1) - | 's' | 'S' as conv -> + | 's' -> let i, stp = scan_stoppers (i + 1) in - let x = - if conv = 's' - then scan_string stp max ib - else scan_String stp max ib in + let x = scan_string stp max ib in scan (stack f (token_string ib)) (i + 1) - | 'b' -> - let x = scan_bool max ib in - scan (stack f (token_bool ib)) (i + 1) | '[' -> let i, char_set = read_char_set fmt (i + 1) in let i, stp = scan_stoppers (i + 1) in let x = scan_chars_in_char_set stp char_set max ib in scan (stack f (token_string ib)) (i + 1) + | 'S' -> + let x = scan_String max ib in + scan (stack f (token_string ib)) (i + 1) + | 'b' -> + let x = scan_bool max ib in + scan (stack f (token_bool ib)) (i + 1) | 'l' | 'n' | 'L' as t -> let i = i + 1 in if i > lim then bad_format fmt (i - 1) t else begin match fmt.[i] with | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv -> - let x = scan_int conv max ib in - begin match t with - | 'l' -> scan (stack f (token_int32 conv ib)) (i + 1) - | 'L' -> scan (stack f (token_int64 conv ib)) (i + 1) - | _ -> scan (stack f (token_nativeint conv ib)) (i + 1) end + let x = scan_int conv max ib in + begin match t with + | 'l' -> scan (stack f (token_int32 conv ib)) (i + 1) + | 'L' -> scan (stack f (token_int64 conv ib)) (i + 1) + | _ -> scan (stack f (token_nativeint conv ib)) (i + 1) end | c -> bad_format fmt i c end | 'N' -> let x = Scanning.char_count ib in @@ -581,6 +584,7 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef = if i > lim then i - 1, [] else match fmt.[i] with | '@' when i < lim -> let i = i + 1 in i, [fmt.[i]] + | '@' as c when i = lim -> bad_format fmt i c | _ -> i - 1, [] in Scanning.reset_token ib; @@ -591,7 +595,7 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef = stack (delay ef ib) exc in return v;; -let bscanf ib fmt f = kscanf ib fmt f scanf_bad_input;; +let bscanf ib = kscanf ib scanf_bad_input;; let fscanf ic = bscanf (Scanning.from_channel ic);; diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index 7eb4725203..f2cedebc2f 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -40,7 +40,7 @@ val from_function : (unit -> char) -> scanbuf;; the given function as its reading method. When scanning needs one more character, the given function is called. When the function has no more character to provide, it must set - an end of input condition by raising the exception [End_of_file]. *) + an end-of-input condition by raising the exception [End_of_file]. *) end;; @@ -57,6 +57,8 @@ val bscanf : Raise [Scanf.Scan_failure] if the given input does not match the format. + Raise [Failure] if a conversion to a number is not possible. + Raise [End_of_file] if the end of input is encountered while scanning and the input matches the given format so far. @@ -66,8 +68,7 @@ val bscanf : characters of the input, - conversion specifications, each of which causes reading and conversion of one argument for [f], - - scanning indications to specify boundaries of tokens and the - amount of space to skip between tokens. + - scanning indications to specify boundaries of tokens. Among plain characters the space character (ASCII code 32) has a special meaning: it matches ``whitespace'', that is any number of tab, @@ -86,10 +87,10 @@ val bscanf : - [o]: reads an unsigned octal integer. - [s]: reads a string argument (by default strings end with a space). - [S]: reads a delimited string argument (delimiters and special - escaped characters follow the lexical conventions of Objective Caml). + escaped characters follow the lexical conventions of Caml). - [c]: reads a single character. - [C]: reads a single delimited character (delimiters and special - escaped characters follow the lexical conventions of Objective Caml). + escaped characters follow the lexical conventions of Caml). - [f], [e], [E], [g], [G]: reads an optionally signed floating-point number in decimal notation, in the style [dddd.ddd e/E+-dd]. - [b]: reads a boolean argument ([true] or [false]). @@ -111,17 +112,19 @@ val bscanf : For instance, [%6d] reads an integer, having at most 6 decimal digits; and [%4f] reads a float with 4 characters. - The scanning indications are introduced by a [@] character, followed - by any character [c]. Its effect is to skip input characters - until a matching [c] is found. - If a scanning indication immediately follows a [s] - conversion specification, it specifies the boundary of the token - (that is the character immediately after the end of the token). For - instance, ["%s@\t"] reads a string up to the next tabulation + Scanning indications appear just after string conversions [s] and + [\[ range \]] to delimit the end of the token. A scanning + indication is introduced by a [@] character, followed by some + constant character [c]. It means that the string token should end + just before the next matching [c]. If no [c] character is + encountered, the string token spreads as much as possible. + For instance, ["%s@\t"] reads a string up to the next tabulation + character. If the scanning indication [\@c] does not follow a + string conversion, it is ignored and treated as a plain [c] character. Note: the [scanf] facility is not intended for heavy duty - lexical anaysis and parsing. If it appears too slow or not expressive + lexical analysis and parsing. If it appears not expressive enough for your needs, several alternative exists: regular expressions (module [Str]), stream parsers, [ocamllex]-generated lexers, [ocamlyacc]-generated parsers. *) @@ -137,8 +140,8 @@ val scanf : ('a, Scanning.scanbuf, 'b) format -> 'a -> 'b;; (the standard input channel). *) val kscanf : - Scanning.scanbuf -> ('a, 'b, 'c) format -> 'a -> - (Scanning.scanbuf -> exn -> 'c) -> 'c;; + Scanning.scanbuf -> (Scanning.scanbuf -> exn -> 'a) -> + ('b, Scanning.scanbuf, 'a) format -> 'b -> 'a;; (** Same as {!Scanf.bscanf}, but takes an additional function argument [ef] that is called in case of error: if the scanning process or some convertion fails, the scanning function aborts and applies the |