summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierre Weis <Pierre.Weis@inria.fr>2002-07-28 21:29:42 +0000
committerPierre Weis <Pierre.Weis@inria.fr>2002-07-28 21:29:42 +0000
commit6c15753d0f24be0ce559b8e34e7f89da56f8156a (patch)
treec0f14980650f10a79bfdfb4ca185812f09e8d849
parent8bf3b5a36a518b5c5d548015020159888ba29ee1 (diff)
downloadocaml-6c15753d0f24be0ce559b8e34e7f89da56f8156a.tar.gz
Revu le traitement de %S (les @c ne sont pas nécessaires vus les
guillemets délimiteurs). Revu le traitement des blancs en fin de fichier et les formats à indication de scanning erronées (format terminé par @). Revu le type de kscanf. Revu la documentation des indications de scanning. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5051 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r--stdlib/scanf.ml82
-rw-r--r--stdlib/scanf.mli33
2 files changed, 61 insertions, 54 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml
index 1b699e080f..a6fca39707 100644
--- a/stdlib/scanf.ml
+++ b/stdlib/scanf.ml
@@ -15,7 +15,7 @@
(* The run-time library for scanners. *)
-(* {6 Scanning buffers} *)
+(* Scanning buffers. *)
module type SCANNING = sig
type scanbuf;;
@@ -163,22 +163,21 @@ let token_bool ib =
| "false" -> false
| s -> bad_input ("invalid boolean " ^ s);;
-(* All the functions that convert a string to a number raise the exception
- Failure when the convertion is not possible.
- This exception is then trapped in kscanf. *)
-
let token_int_literal conv ib =
match conv with
- 'd' | 'i' | 'u' -> Scanning.token ib
+ | 'd' | 'i' | 'u' -> Scanning.token ib
| 'o' -> "0o" ^ Scanning.token ib
| 'x' | 'X' -> "0x" ^ Scanning.token ib
| _ -> assert false
+(* All the functions that convert a string to a number raise the exception
+ Failure when the conversion is not possible.
+ This exception is then trapped in kscanf. *)
let token_int conv ib = int_of_string (token_int_literal conv ib);;
let token_float ib = float_of_string (Scanning.token ib);;
(* To scan native ints, int32 and int64 integers.
- We cannot access to convertions to/from strings for those types,
+ We cannot access to conversions to/from strings for those types,
Nativeint.of_string, Int32.of_string, and Int64.of_string,
since those modules are not available to scanf.
However, we can bind and use the corresponding primitives that are
@@ -245,7 +244,7 @@ let scan_optionally_signed_decimal_int max ib =
scan_unsigned_decimal_int max ib;;
(* Scan an unsigned integer that could be given in any (common) basis.
- If digits are prefixed by one of 0x, 0X, 0o, 0b, the number is
+ If digits are prefixed by one of 0x, 0X, 0o, or 0b, the number is
assumed to be written respectively in hexadecimal, hexadecimal,
octal, or binary. *)
let scan_unsigned_int max ib =
@@ -337,8 +336,9 @@ let char_for_backslash =
let char_for_decimal_code c0 c1 c2 =
let c =
- 100 * (int_of_char c0 - 48) + 10 * (int_of_char c1 - 48) +
- (int_of_char c2 - 48) in
+ 100 * (int_of_char c0 - 48) +
+ 10 * (int_of_char c1 - 48) +
+ (int_of_char c2 - 48) in
if c < 0 || c > 255
then bad_input (Printf.sprintf "bad char \\%c%c%c" c0 c1 c2)
else char_of_int c;;
@@ -377,21 +377,19 @@ let scan_Char max ib =
| c, _ -> bad_input_escape c in
loop 3 max;;
-let scan_String stp max ib =
+let scan_String max ib =
let rec loop s max =
if max = 0 || Scanning.end_of_input ib then bad_input "a string" else
let c = Scanning.peek_char ib in
- if stp = [] then
- match c, s with
- | '"', true (* '"' helping Emacs *) ->
- Scanning.next_char ib; loop false (max - 1)
- | '"', false (* '"' helping Emacs *) ->
- Scanning.next_char ib; max - 1
- | '\\', false ->
- Scanning.next_char ib; loop false (scan_backslash_char (max - 1) ib)
- | c, false -> loop false (Scanning.store_char ib c max)
- | c, _ -> bad_input_char c else
- if List.mem c stp then max else loop s (Scanning.store_char ib c max) in
+ match c, s with
+ | '"', true (* '"' helping Emacs *) ->
+ Scanning.next_char ib; loop false (max - 1)
+ | '"', false (* '"' helping Emacs *) ->
+ Scanning.next_char ib; max - 1
+ | '\\', false ->
+ Scanning.next_char ib; loop false (scan_backslash_char (max - 1) ib)
+ | c, false -> loop false (Scanning.store_char ib c max)
+ | c, _ -> bad_input_char c in
loop true max;;
let scan_bool max ib =
@@ -480,10 +478,15 @@ external string_of_format : ('a, 'b, 'c) format -> string = "%identity";;
tokens as specified by the format. When it founds one token, it converts
it as specified, remembers the converted value as a future
argument to the function [f], and continues scanning.
- If the scanning or some convertion fails, the scanning function
+
+ If the entire scanning succeeds (i.e. the format string has been
+ exhausted and the buffer has provided tokens according to the
+ format string), the tokens are applied to [f].
+
+ If the scanning or some conversion fails, the scanning function
aborts and applies the scanning buffer and a string that explains
the error to the error continuation [ef]. *)
-let kscanf ib (fmt : ('a, 'b, 'c) format) f ef =
+let kscanf ib ef fmt f =
let fmt = string_of_format fmt in
let lim = String.length fmt - 1 in
@@ -494,6 +497,7 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef =
let rec scan f i =
if i > lim then f else
match fmt.[i] with
+ | ' ' | '\t' | '\r' | '\n' -> skip_whites ib; scan f (i + 1)
| c when Scanning.end_of_input ib -> raise End_of_file
| '%' -> scan_width f (i + 1)
| '@' as t ->
@@ -505,7 +509,6 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef =
| c when Scanning.peek_char ib = c ->
Scanning.next_char ib; scan f (i + 1)
| c -> bad_input_char (Scanning.peek_char ib) end
- | ' ' | '\r' | '\t' | '\n' -> skip_whites ib; scan f (i + 1)
| c when Scanning.peek_char ib = c ->
Scanning.next_char ib; scan f (i + 1)
| c -> bad_input_char (Scanning.peek_char ib)
@@ -542,31 +545,31 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef =
| 'f' | 'g' | 'G' | 'e' | 'E' ->
let x = scan_float max ib in
scan (stack f (token_float ib)) (i + 1)
- | 's' | 'S' as conv ->
+ | 's' ->
let i, stp = scan_stoppers (i + 1) in
- let x =
- if conv = 's'
- then scan_string stp max ib
- else scan_String stp max ib in
+ let x = scan_string stp max ib in
scan (stack f (token_string ib)) (i + 1)
- | 'b' ->
- let x = scan_bool max ib in
- scan (stack f (token_bool ib)) (i + 1)
| '[' ->
let i, char_set = read_char_set fmt (i + 1) in
let i, stp = scan_stoppers (i + 1) in
let x = scan_chars_in_char_set stp char_set max ib in
scan (stack f (token_string ib)) (i + 1)
+ | 'S' ->
+ let x = scan_String max ib in
+ scan (stack f (token_string ib)) (i + 1)
+ | 'b' ->
+ let x = scan_bool max ib in
+ scan (stack f (token_bool ib)) (i + 1)
| 'l' | 'n' | 'L' as t ->
let i = i + 1 in
if i > lim then bad_format fmt (i - 1) t else begin
match fmt.[i] with
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
- let x = scan_int conv max ib in
- begin match t with
- | 'l' -> scan (stack f (token_int32 conv ib)) (i + 1)
- | 'L' -> scan (stack f (token_int64 conv ib)) (i + 1)
- | _ -> scan (stack f (token_nativeint conv ib)) (i + 1) end
+ let x = scan_int conv max ib in
+ begin match t with
+ | 'l' -> scan (stack f (token_int32 conv ib)) (i + 1)
+ | 'L' -> scan (stack f (token_int64 conv ib)) (i + 1)
+ | _ -> scan (stack f (token_nativeint conv ib)) (i + 1) end
| c -> bad_format fmt i c end
| 'N' ->
let x = Scanning.char_count ib in
@@ -581,6 +584,7 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef =
if i > lim then i - 1, [] else
match fmt.[i] with
| '@' when i < lim -> let i = i + 1 in i, [fmt.[i]]
+ | '@' as c when i = lim -> bad_format fmt i c
| _ -> i - 1, [] in
Scanning.reset_token ib;
@@ -591,7 +595,7 @@ let kscanf ib (fmt : ('a, 'b, 'c) format) f ef =
stack (delay ef ib) exc in
return v;;
-let bscanf ib fmt f = kscanf ib fmt f scanf_bad_input;;
+let bscanf ib = kscanf ib scanf_bad_input;;
let fscanf ic = bscanf (Scanning.from_channel ic);;
diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli
index 7eb4725203..f2cedebc2f 100644
--- a/stdlib/scanf.mli
+++ b/stdlib/scanf.mli
@@ -40,7 +40,7 @@ val from_function : (unit -> char) -> scanbuf;;
the given function as its reading method.
When scanning needs one more character, the given function is called.
When the function has no more character to provide, it must set
- an end of input condition by raising the exception [End_of_file]. *)
+ an end-of-input condition by raising the exception [End_of_file]. *)
end;;
@@ -57,6 +57,8 @@ val bscanf :
Raise [Scanf.Scan_failure] if the given input does not match the format.
+ Raise [Failure] if a conversion to a number is not possible.
+
Raise [End_of_file] if the end of input is encountered while scanning
and the input matches the given format so far.
@@ -66,8 +68,7 @@ val bscanf :
characters of the input,
- conversion specifications, each of which causes reading and
conversion of one argument for [f],
- - scanning indications to specify boundaries of tokens and the
- amount of space to skip between tokens.
+ - scanning indications to specify boundaries of tokens.
Among plain characters the space character (ASCII code 32) has a
special meaning: it matches ``whitespace'', that is any number of tab,
@@ -86,10 +87,10 @@ val bscanf :
- [o]: reads an unsigned octal integer.
- [s]: reads a string argument (by default strings end with a space).
- [S]: reads a delimited string argument (delimiters and special
- escaped characters follow the lexical conventions of Objective Caml).
+ escaped characters follow the lexical conventions of Caml).
- [c]: reads a single character.
- [C]: reads a single delimited character (delimiters and special
- escaped characters follow the lexical conventions of Objective Caml).
+ escaped characters follow the lexical conventions of Caml).
- [f], [e], [E], [g], [G]: reads an optionally signed floating-point number
in decimal notation, in the style [dddd.ddd e/E+-dd].
- [b]: reads a boolean argument ([true] or [false]).
@@ -111,17 +112,19 @@ val bscanf :
For instance, [%6d] reads an integer, having at most 6 decimal digits;
and [%4f] reads a float with 4 characters.
- The scanning indications are introduced by a [@] character, followed
- by any character [c]. Its effect is to skip input characters
- until a matching [c] is found.
- If a scanning indication immediately follows a [s]
- conversion specification, it specifies the boundary of the token
- (that is the character immediately after the end of the token). For
- instance, ["%s@\t"] reads a string up to the next tabulation
+ Scanning indications appear just after string conversions [s] and
+ [\[ range \]] to delimit the end of the token. A scanning
+ indication is introduced by a [@] character, followed by some
+ constant character [c]. It means that the string token should end
+ just before the next matching [c]. If no [c] character is
+ encountered, the string token spreads as much as possible.
+ For instance, ["%s@\t"] reads a string up to the next tabulation
+ character. If the scanning indication [\@c] does not follow a
+ string conversion, it is ignored and treated as a plain [c]
character.
Note: the [scanf] facility is not intended for heavy duty
- lexical anaysis and parsing. If it appears too slow or not expressive
+ lexical analysis and parsing. If it appears not expressive
enough for your needs, several alternative exists: regular expressions
(module [Str]), stream parsers, [ocamllex]-generated lexers,
[ocamlyacc]-generated parsers. *)
@@ -137,8 +140,8 @@ val scanf : ('a, Scanning.scanbuf, 'b) format -> 'a -> 'b;;
(the standard input channel). *)
val kscanf :
- Scanning.scanbuf -> ('a, 'b, 'c) format -> 'a ->
- (Scanning.scanbuf -> exn -> 'c) -> 'c;;
+ Scanning.scanbuf -> (Scanning.scanbuf -> exn -> 'a) ->
+ ('b, Scanning.scanbuf, 'a) format -> 'b -> 'a;;
(** Same as {!Scanf.bscanf}, but takes an additional function argument
[ef] that is called in case of error: if the scanning process or
some convertion fails, the scanning function aborts and applies the