diff options
Diffstat (limited to 'stdlib/scanf.ml')
-rw-r--r-- | stdlib/scanf.ml | 268 |
1 files changed, 160 insertions, 108 deletions
diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index 88a0f97f82..7cd018d3fb 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -47,16 +47,16 @@ val checked_peek_char : scanbuf -> char;; input buffer has reached an end of file, the function raises exception [End_of_file]. *) -val store_char : scanbuf -> char -> int -> int;; -(* [Scanning.store_char ib c lim] adds [c] to the token buffer +val store_char : int -> scanbuf -> char -> int;; +(* [Scanning.store_char lim ib c] adds [c] to the token buffer of the scanning buffer. It also advances the scanning buffer for one character and returns [lim - 1], indicating the new limit for the length of the current token. *) -val skip_char : scanbuf -> int -> int;; -(* [Scanning.skip_char ib lim] ignores the current character. *) +val skip_char : int -> scanbuf -> int;; +(* [Scanning.skip_char lim ib] ignores the current character. *) -val ignore_char : scanbuf -> int -> int;; +val ignore_char : int -> scanbuf -> int;; (* [Scanning.ignore_char ib lim] ignores the current character and decrements the limit. *) @@ -186,16 +186,16 @@ let token ib = let token_count ib = ib.token_count;; -let skip_char ib max = +let skip_char max ib = invalidate_current_char ib; max ;; -let ignore_char ib max = skip_char ib (max - 1);; +let ignore_char max ib = skip_char (max - 1) ib;; -let store_char ib c max = +let store_char max ib c = Buffer.add_char ib.tokbuf c; - ignore_char ib max + ignore_char max ib ;; let default_token_buffer_size = 1024;; @@ -379,8 +379,8 @@ let incomplete_format fmt = (Sformat.to_string fmt)) ;; -let bad_float () = bad_input "no dot or exponent part found in -float token" +let bad_float () = + bad_input "no dot or exponent part found in float token" ;; let character_mismatch_err c ci = @@ -407,11 +407,18 @@ let compatible_format_type fmt1 fmt2 = In this case, the character c has been explicitely specified in the format as being mandatory in the input; hence we should fail with End_of_file in case of end_of_input. - That's why we use checked_peek_char here. *) -let check_char ib c = + That's why we use checked_peek_char here. + We are also careful to treat "\r\n" in the input as a end of line marker: it + always matches a '\n' specification in the input format string. + *) +let rec check_char ib c = let ci = Scanning.checked_peek_char ib in - if ci = c then Scanning.invalidate_current_char ib else - character_mismatch c ci + if ci = c then Scanning.invalidate_current_char ib else begin + match ci with + | '\r' when c = '\n' -> + Scanning.invalidate_current_char ib; check_char ib '\n' + | _ -> character_mismatch c ci + end ;; (* Checks that the current char is indeed one of the stopper characters, @@ -500,10 +507,10 @@ let rec scan_decimal_digits max ib = if Scanning.eof ib then max else match c with | '0' .. '9' as c -> - let max = Scanning.store_char ib c max in + let max = Scanning.store_char max ib c in scan_decimal_digits max ib | '_' -> - let max = Scanning.ignore_char ib max in + let max = Scanning.ignore_char max ib in scan_decimal_digits max ib | _ -> max ;; @@ -512,7 +519,7 @@ let scan_decimal_digits_plus max ib = let c = Scanning.checked_peek_char ib in match c with | '0' .. '9' -> - let max = Scanning.store_char ib c max in + let max = Scanning.store_char max ib c in scan_decimal_digits max ib | c -> bad_input_char c ;; @@ -526,16 +533,16 @@ let scan_digits_plus digitp max ib = if Scanning.eof ib then max else match c with | c when digitp c -> - let max = Scanning.store_char ib c max in + let max = Scanning.store_char max ib c in scan_digits max | '_' -> - let max = Scanning.ignore_char ib max in + let max = Scanning.ignore_char max ib in scan_digits max | _ -> max in let c = Scanning.checked_peek_char ib in if digitp c then - let max = Scanning.store_char ib c max in + let max = Scanning.store_char max ib c in scan_digits max else bad_input_char c ;; @@ -567,8 +574,8 @@ let scan_unsigned_decimal_int = scan_decimal_digits_plus;; let scan_sign max ib = let c = Scanning.checked_peek_char ib in match c with - | '+' -> Scanning.store_char ib c max - | '-' -> Scanning.store_char ib c max + | '+' -> Scanning.store_char max ib c + | '-' -> Scanning.store_char max ib c | c -> max ;; @@ -584,14 +591,14 @@ let scan_optionally_signed_decimal_int max ib = let scan_unsigned_int max ib = match Scanning.checked_peek_char ib with | '0' as c -> - let max = Scanning.store_char ib c max in + let max = Scanning.store_char max ib c in if max = 0 then max else let c = Scanning.peek_char ib in if Scanning.eof ib then max else begin match c with - | 'x' | 'X' -> scan_hexadecimal_int (Scanning.store_char ib c max) ib - | 'o' -> scan_octal_int (Scanning.store_char ib c max) ib - | 'b' -> scan_binary_int (Scanning.store_char ib c max) ib + | 'x' | 'X' -> scan_hexadecimal_int (Scanning.store_char max ib c) ib + | 'o' -> scan_octal_int (Scanning.store_char max ib c) ib + | 'b' -> scan_binary_int (Scanning.store_char max ib c) ib | c -> scan_decimal_digits max ib end | c -> scan_unsigned_decimal_int max ib ;; @@ -620,7 +627,7 @@ let scan_frac_part max ib = if Scanning.eof ib then max else match c with | '0' .. '9' as c -> - scan_decimal_digits (Scanning.store_char ib c max) ib + scan_decimal_digits (Scanning.store_char max ib c) ib | _ -> max ;; @@ -631,7 +638,7 @@ let scan_exp_part max ib = if Scanning.eof ib then max else match c with | 'e' | 'E' as c -> - scan_optionally_signed_decimal_int (Scanning.store_char ib c max) ib + scan_optionally_signed_decimal_int (Scanning.store_char max ib c) ib | _ -> max ;; @@ -651,7 +658,7 @@ let scan_float max ib = if Scanning.eof ib then max else match c with | '.' -> - let max = Scanning.store_char ib c max in + let max = Scanning.store_char max ib c in let max = scan_frac_part max ib in scan_exp_part max ib | c -> scan_exp_part max ib @@ -664,7 +671,7 @@ let scan_Float max ib = if Scanning.eof ib then bad_float () else match c with | '.' -> - let max = Scanning.store_char ib c max in + let max = Scanning.store_char max ib c in let max = scan_frac_part max ib in scan_exp_part max ib | 'e' | 'E' -> @@ -683,15 +690,15 @@ let scan_string stp max ib = if stp = [] then match c with | ' ' | '\t' | '\n' | '\r' -> max - | c -> loop (Scanning.store_char ib c max) else - if List.memq c stp then Scanning.skip_char ib max else - loop (Scanning.store_char ib c max) in + | c -> loop (Scanning.store_char max ib c) else + if List.memq c stp then Scanning.skip_char max ib else + loop (Scanning.store_char max ib c) in loop max ;; (* Scan a char: peek strictly one character in the input, whatsoever. *) let scan_char max ib = - Scanning.store_char ib (Scanning.checked_peek_char ib) max + Scanning.store_char max ib (Scanning.checked_peek_char ib) ;; let char_for_backslash = function @@ -704,27 +711,58 @@ let char_for_backslash = function (* The integer value corresponding to the facial value of a valid decimal digit character. *) -let int_value_of_char c = int_of_char c - 48;; +let decimal_value_of_char c = int_of_char c - int_of_char '0';; let char_for_decimal_code c0 c1 c2 = let c = - 100 * int_value_of_char c0 + - 10 * int_value_of_char c1 + - int_value_of_char c2 in + 100 * decimal_value_of_char c0 + + 10 * decimal_value_of_char c1 + + decimal_value_of_char c2 in if c < 0 || c > 255 then bad_input (Printf.sprintf "bad char \\%c%c%c" c0 c1 c2) else char_of_int c ;; +(* The integer value corresponding to the facial value of a valid + hexadecimal digit character. *) +let hexadecimal_value_of_char c = + let d = int_of_char c in + (* Could also be: + if d <= int_of_char '9' then d - int_of_char '0' else + if d <= int_of_char 'F' then 10 + d - int_of_char 'A' else + if d <= int_of_char 'f' then 10 + d - int_of_char 'a' else assert false + *) + if d >= int_of_char 'a' then + d - 87 (* 10 + int_of_char c - int_of_char 'a' *) else + if d >= int_of_char 'A' then + d - 55 (* 10 + int_of_char c - int_of_char 'A' *) else + d - int_of_char '0' +;; + +let char_for_hexadecimal_code c1 c2 = + let c = + 16 * hexadecimal_value_of_char c1 + + hexadecimal_value_of_char c2 in + if c < 0 || c > 255 + then bad_input (Printf.sprintf "bad char \\%c%c" c1 c2) + else char_of_int c +;; + (* Called when encountering '\\' as starter of a char. Stops before the corresponding '\''. *) -let scan_backslash_char max ib = - if max = 0 then bad_input "a char" else +let check_next_char message max ib = + if max = 0 then bad_input message else let c = Scanning.peek_char ib in - if Scanning.eof ib then bad_input "a char" else - match c with - | '\\' | '\'' | '"' | 'n' | 't' | 'b' | 'r' (* '"' helping Emacs *) -> - Scanning.store_char ib (char_for_backslash c) max + if Scanning.eof ib then bad_input message else c +;; + +let check_next_char_for_char = check_next_char "a char";; +let check_next_char_for_string = check_next_char "a string";; + +let scan_backslash_char max ib = + match check_next_char_for_char max ib with + | '\\' | '\'' | '\"' | 'n' | 't' | 'b' | 'r' as c -> + Scanning.store_char max ib (char_for_backslash c) | '0' .. '9' as c -> let get_digit () = let c = Scanning.next_char ib in @@ -734,57 +772,69 @@ let scan_backslash_char max ib = let c0 = c in let c1 = get_digit () in let c2 = get_digit () in - Scanning.store_char ib (char_for_decimal_code c0 c1 c2) (max - 2) + Scanning.store_char (max - 2) ib (char_for_decimal_code c0 c1 c2) + | 'x' -> + let get_digit () = + let c = Scanning.next_char ib in + match c with + | '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' as c -> c + | c -> bad_input_escape c in + let c1 = get_digit () in + let c2 = get_digit () in + Scanning.store_char (max - 2) ib (char_for_hexadecimal_code c1 c2) | c -> bad_input_char c ;; let scan_Char max ib = - let rec loop s max = - if max = 0 then bad_input "a char" else - let c = Scanning.checked_peek_char ib in - if Scanning.eof ib then bad_input "a char" else - match c, s with - (* Looking for the '\'' at the beginning of the delimited char. *) - | '\'', 3 -> loop 2 (Scanning.ignore_char ib max) - (* Looking for the '\'' at the end of the delimited char. *) - | '\'', 1 -> Scanning.ignore_char ib max - (* Any other char at the beginning or end of the delimited char should be - '\''. *) - | c, (3 | 1) -> character_mismatch '\'' c - (* Found a '\\': check and read this escape char. *) - | '\\', 2 -> loop 1 (scan_backslash_char (Scanning.ignore_char ib max) ib) - (* The regular case, remember the char, then look for the terminal '\\'. *) - | c, 2 -> loop 1 (Scanning.store_char ib c max) - (* Any other case is an error, *) - | c, _ -> bad_input_char c in - loop 3 max + + let rec find_start max = + match check_next_char_for_char max ib with + | '\'' -> find_char (Scanning.ignore_char max ib) + | c -> character_mismatch '\'' c + + and find_char max = + match check_next_char_for_char max ib with + | '\\' -> find_stop (scan_backslash_char (Scanning.ignore_char max ib) ib) + | c -> find_stop (Scanning.store_char max ib c) + + and find_stop max = + match check_next_char_for_char max ib with + | '\'' -> Scanning.ignore_char max ib + | c -> character_mismatch '\'' c in + + find_start max ;; let scan_String max ib = - let rec loop s max = - if max = 0 then bad_input "a string" else - let c = Scanning.checked_peek_char ib in - if Scanning.eof ib then bad_input "a string" else - match c, s with - | '"', true (* '"' helping Emacs *) -> - loop false (Scanning.ignore_char ib max) - | '"', false (* '"' helping Emacs *) -> - Scanning.ignore_char ib max - | '\\', false -> - skip_spaces true (Scanning.ignore_char ib max) - | c, false -> loop false (Scanning.store_char ib c max) - | c, _ -> bad_input_char c - and skip_spaces s max = - if max = 0 then bad_input "a string" else - let c = Scanning.checked_peek_char ib in - if Scanning.eof ib then bad_input "a string" else - match c, s with - | '\n', true - | ' ', false -> - skip_spaces false (Scanning.ignore_char ib max) - | c, false -> loop false max - | _, _ -> loop false (scan_backslash_char (max - 1) ib) in - loop true max + + let rec find_start max = + match check_next_char_for_string max ib with + | '\"' -> find_stop (Scanning.ignore_char max ib) + | c -> character_mismatch '\"' c + + and find_stop max = + match check_next_char_for_string max ib with + | '\"' -> Scanning.ignore_char max ib + | '\\' -> scan_backslash (Scanning.ignore_char max ib) + | c -> find_stop (Scanning.store_char max ib c) + + and scan_backslash max = + match check_next_char_for_string max ib with + | '\r' -> skip_newline (Scanning.ignore_char max ib) + | '\n' -> skip_spaces (Scanning.ignore_char max ib) + | c -> find_stop (scan_backslash_char max ib) + + and skip_newline max = + match check_next_char_for_string max ib with + | '\n' -> skip_spaces (Scanning.ignore_char max ib) + | _ -> find_stop (Scanning.store_char max ib '\r') + + and skip_spaces max = + match check_next_char_for_string max ib with + | ' ' -> skip_spaces (Scanning.ignore_char max ib) + | _ -> find_stop max in + + find_start max ;; let scan_bool max ib = @@ -964,49 +1014,49 @@ let scan_chars_in_char_set stp char_set max ib = let c = Scanning.peek_char ib in if Scanning.eof ib then max else if c == cp1 - then loop_pos1 cp1 (Scanning.store_char ib c max) + then loop_pos1 cp1 (Scanning.store_char max ib c) else max and loop_pos2 cp1 cp2 max = if max = 0 then max else let c = Scanning.peek_char ib in if Scanning.eof ib then max else if c == cp1 || c == cp2 - then loop_pos2 cp1 cp2 (Scanning.store_char ib c max) + then loop_pos2 cp1 cp2 (Scanning.store_char max ib c) else max and loop_pos3 cp1 cp2 cp3 max = if max = 0 then max else let c = Scanning.peek_char ib in if Scanning.eof ib then max else if c == cp1 || c == cp2 || c == cp3 - then loop_pos3 cp1 cp2 cp3 (Scanning.store_char ib c max) + then loop_pos3 cp1 cp2 cp3 (Scanning.store_char max ib c) else max and loop_neg1 cp1 max = if max = 0 then max else let c = Scanning.peek_char ib in if Scanning.eof ib then max else if c != cp1 - then loop_neg1 cp1 (Scanning.store_char ib c max) + then loop_neg1 cp1 (Scanning.store_char max ib c) else max and loop_neg2 cp1 cp2 max = if max = 0 then max else let c = Scanning.peek_char ib in if Scanning.eof ib then max else if c != cp1 && c != cp2 - then loop_neg2 cp1 cp2 (Scanning.store_char ib c max) + then loop_neg2 cp1 cp2 (Scanning.store_char max ib c) else max and loop_neg3 cp1 cp2 cp3 max = if max = 0 then max else let c = Scanning.peek_char ib in if Scanning.eof ib then max else if c != cp1 && c != cp2 && c != cp3 - then loop_neg3 cp1 cp2 cp3 (Scanning.store_char ib c max) + then loop_neg3 cp1 cp2 cp3 (Scanning.store_char max ib c) else max and loop setp max = if max = 0 then max else let c = Scanning.peek_char ib in if Scanning.eof ib then max else if setp c == 1 - then loop setp (Scanning.store_char ib c max) + then loop setp (Scanning.store_char max ib c) else max in let max = @@ -1175,25 +1225,27 @@ let scan_format ib ef fmt rv f = if ir > limr then assert false else let token = Obj.magic rv.(ir) ib in scan_fmt (succ ir) (stack f token) (succ i) - | 'l' | 'n' | 'L' as conv -> + | 'l' | 'n' | 'L' as conv0 -> let i = succ i in - if i > lim then scan_fmt ir (stack f (get_count conv ib)) i else begin + if i > lim then scan_fmt ir (stack f (get_count conv0 ib)) i else begin match Sformat.get fmt i with (* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *) - | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv -> - let _x = scan_int_conv conv max ib in + | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv1 -> + let _x = scan_int_conv conv1 max ib in (* Look back to the character that triggered the integer conversion - (this character is either 'l', 'n' or 'L'), to find the + (this character is either 'l', 'n' or 'L') to find the conversion to apply to the integer token read. *) - begin match Sformat.get fmt (i - 1) with - | 'l' -> scan_fmt ir (stack f (token_int32 conv ib)) (succ i) - | 'n' -> scan_fmt ir (stack f (token_nativeint conv ib)) (succ i) - | _ -> scan_fmt ir (stack f (token_int64 conv ib)) (succ i) end + begin match conv0 with + | 'l' -> scan_fmt ir (stack f (token_int32 conv1 ib)) (succ i) + | 'n' -> scan_fmt ir (stack f (token_nativeint conv1 ib)) (succ i) + | _ -> scan_fmt ir (stack f (token_int64 conv1 ib)) (succ i) end (* This is not an integer conversion, but a regular %l, %n or %L. *) - | _ -> scan_fmt ir (stack f (get_count conv ib)) i end + | _ -> scan_fmt ir (stack f (get_count conv0 ib)) i end | '!' -> if Scanning.end_of_input ib then scan_fmt ir f (succ i) else bad_input "end of input not found" + | ',' -> + scan_fmt ir f (succ i) | '_' -> if i > lim then incomplete_format fmt else scan_conversion true max ir f (succ i) @@ -1202,10 +1254,10 @@ let scan_format ib ef fmt rv f = if i > lim then accu, i else match Sformat.get fmt i with | '0' .. '9' as c -> - let accu = 10 * accu + int_value_of_char c in + let accu = 10 * accu + decimal_value_of_char c in read_width accu (succ i) | _ -> accu, i in - let max, i = read_width (int_value_of_char conv) (succ i) in + let max, i = read_width (decimal_value_of_char conv) (succ i) in if i > lim then incomplete_format fmt else begin match Sformat.get fmt i with | '.' -> |