diff options
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/arg.ml | 13 | ||||
-rw-r--r-- | stdlib/arg.mli | 2 | ||||
-rw-r--r-- | stdlib/array.ml | 72 | ||||
-rw-r--r-- | stdlib/callback.ml | 2 | ||||
-rw-r--r-- | stdlib/callback.mli | 6 | ||||
-rw-r--r-- | stdlib/camlinternalOO.ml | 3 | ||||
-rw-r--r-- | stdlib/digest.ml | 3 | ||||
-rw-r--r-- | stdlib/digest.mli | 12 | ||||
-rw-r--r-- | stdlib/filename.ml | 86 | ||||
-rw-r--r-- | stdlib/filename.mli | 14 | ||||
-rw-r--r-- | stdlib/gc.mli | 4 | ||||
-rw-r--r-- | stdlib/genlex.mli | 2 | ||||
-rw-r--r-- | stdlib/hashtbl.mli | 13 | ||||
-rw-r--r-- | stdlib/list.mli | 12 | ||||
-rw-r--r-- | stdlib/map.ml | 48 | ||||
-rw-r--r-- | stdlib/marshal.mli | 2 | ||||
-rw-r--r-- | stdlib/oo.mli | 10 | ||||
-rw-r--r-- | stdlib/pervasives.ml | 10 | ||||
-rw-r--r-- | stdlib/pervasives.mli | 11 | ||||
-rw-r--r-- | stdlib/printf.ml | 22 | ||||
-rw-r--r-- | stdlib/printf.mli | 42 | ||||
-rw-r--r-- | stdlib/queue.ml | 2 | ||||
-rw-r--r-- | stdlib/scanf.ml | 114 | ||||
-rw-r--r-- | stdlib/scanf.mli | 298 | ||||
-rw-r--r-- | stdlib/set.ml | 51 | ||||
-rw-r--r-- | stdlib/string.mli | 7 | ||||
-rw-r--r-- | stdlib/sys.mli | 4 |
27 files changed, 467 insertions, 398 deletions
diff --git a/stdlib/arg.ml b/stdlib/arg.ml index d5d1bdf74d..8453058e4e 100644 --- a/stdlib/arg.ml +++ b/stdlib/arg.ml @@ -64,10 +64,11 @@ let make_symlist prefix sep suffix l = ;; let print_spec buf (key, spec, doc) = - match spec with - | Symbol (l, _) -> bprintf buf " %s %s%s\n" key (make_symlist "{" "|" "}" l) - doc - | _ -> bprintf buf " %s %s\n" key doc + if String.length doc > 0 then + match spec with + | Symbol (l, _) -> bprintf buf " %s %s%s\n" key (make_symlist "{" "|" "}" l) + doc + | _ -> bprintf buf " %s %s\n" key doc ;; let help_action () = raise (Stop (Unknown "-help"));; @@ -237,6 +238,10 @@ let max_arg_len cur (kwd, spec, doc) = let add_padding len ksd = match ksd with + | (_, _, "") -> + (* Do not pad undocumented options, so that they still don't show up when + * run through [usage] or [parse]. *) + ksd | (kwd, (Symbol (l, _) as spec), msg) -> let cutcol = second_word msg in let spaces = String.make (len - cutcol + 3) ' ' in diff --git a/stdlib/arg.mli b/stdlib/arg.mli index 1fff78f190..d6e0210aa1 100644 --- a/stdlib/arg.mli +++ b/stdlib/arg.mli @@ -83,6 +83,8 @@ val parse : - The reason for the error: unknown option, invalid or missing argument, etc. - [usage_msg] - The list of options, each followed by the corresponding [doc] string. + Beware: options that have an empty [doc] string will not be included in the + list. For the user to be able to specify anonymous arguments starting with a [-], include for example [("-", String anon_fun, doc)] in [speclist]. diff --git a/stdlib/array.ml b/stdlib/array.ml index e29b2fe831..076a3af031 100644 --- a/stdlib/array.ml +++ b/stdlib/array.ml @@ -1,6 +1,6 @@ (***********************************************************************) (* *) -(* OCaml *) +(* OCaml *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) @@ -22,6 +22,10 @@ external unsafe_get: 'a array -> int -> 'a = "%array_unsafe_get" external unsafe_set: 'a array -> int -> 'a -> unit = "%array_unsafe_set" external make: int -> 'a -> 'a array = "caml_make_vect" external create: int -> 'a -> 'a array = "caml_make_vect" +external sub : 'a array -> int -> int -> 'a array = "caml_array_sub" +external append_prim : 'a array -> 'a array -> 'a array = "caml_array_append" +external concat : 'a array list -> 'a array = "caml_array_concat" +external unsafe_blit : 'a array -> int -> 'a array -> int -> int -> unit = "caml_array_blit" let init l f = if l = 0 then [||] else @@ -41,58 +45,13 @@ let make_matrix sx sy init = let create_matrix = make_matrix let copy a = - let l = length a in - if l = 0 then [||] else begin - let res = create l (unsafe_get a 0) in - for i = 1 to pred l do - unsafe_set res i (unsafe_get a i) - done; - res - end + let l = length a in if l = 0 then [||] else sub a 0 l let append a1 a2 = - let l1 = length a1 and l2 = length a2 in - if l1 = 0 && l2 = 0 then [||] else begin - let r = create (l1 + l2) (unsafe_get (if l1 > 0 then a1 else a2) 0) in - for i = 0 to l1 - 1 do unsafe_set r i (unsafe_get a1 i) done; - for i = 0 to l2 - 1 do unsafe_set r (i + l1) (unsafe_get a2 i) done; - r - end - -let concat_aux init al = - let rec size accu = function - | [] -> accu - | h::t -> size (accu + length h) t - in - let res = create (size 0 al) init in - let rec fill pos = function - | [] -> () - | h::t -> - for i = 0 to length h - 1 do - unsafe_set res (pos + i) (unsafe_get h i); - done; - fill (pos + length h) t; - in - fill 0 al; - res -;; - -let concat al = - let rec find_init aa = - match aa with - | [] -> [||] - | a :: rem -> - if length a > 0 then concat_aux (unsafe_get a 0) aa else find_init rem - in find_init al - -let sub a ofs len = - if ofs < 0 || len < 0 || ofs > length a - len then invalid_arg "Array.sub" - else if len = 0 then [||] - else begin - let r = create len (unsafe_get a ofs) in - for i = 1 to len - 1 do unsafe_set r i (unsafe_get a (ofs + i)) done; - r - end + let l1 = length a1 in + if l1 = 0 then copy a2 + else if length a2 = 0 then sub a1 0 l1 + else append_prim a1 a2 let fill a ofs len v = if ofs < 0 || len < 0 || ofs > length a - len @@ -103,16 +62,7 @@ let blit a1 ofs1 a2 ofs2 len = if len < 0 || ofs1 < 0 || ofs1 > length a1 - len || ofs2 < 0 || ofs2 > length a2 - len then invalid_arg "Array.blit" - else if ofs1 < ofs2 then - (* Top-down copy *) - for i = len - 1 downto 0 do - unsafe_set a2 (ofs2 + i) (unsafe_get a1 (ofs1 + i)) - done - else - (* Bottom-up copy *) - for i = 0 to len - 1 do - unsafe_set a2 (ofs2 + i) (unsafe_get a1 (ofs1 + i)) - done + else unsafe_blit a1 ofs1 a2 ofs2 len let iter f a = for i = 0 to length a - 1 do f(unsafe_get a i) done diff --git a/stdlib/callback.ml b/stdlib/callback.ml index a8a490c46b..5dd7894846 100644 --- a/stdlib/callback.ml +++ b/stdlib/callback.ml @@ -13,7 +13,7 @@ (* $Id$ *) -(* Registering Caml values with the C runtime for later callbacks *) +(* Registering OCaml values with the C runtime for later callbacks *) external register_named_value : string -> Obj.t -> unit = "caml_register_named_value" diff --git a/stdlib/callback.mli b/stdlib/callback.mli index dfb31617a2..ca5f1f073d 100644 --- a/stdlib/callback.mli +++ b/stdlib/callback.mli @@ -13,11 +13,11 @@ (* $Id$ *) -(** Registering Caml values with the C runtime. +(** Registering OCaml values with the C runtime. - This module allows Caml values to be registered with the C runtime + This module allows OCaml values to be registered with the C runtime under a symbolic name, so that C code can later call back registered - Caml functions, or raise registered Caml exceptions. + OCaml functions, or raise registered OCaml exceptions. *) val register : string -> 'a -> unit diff --git a/stdlib/camlinternalOO.ml b/stdlib/camlinternalOO.ml index db3aef73df..6d78714655 100644 --- a/stdlib/camlinternalOO.ml +++ b/stdlib/camlinternalOO.ml @@ -18,8 +18,7 @@ open Obj (**** Object representation ****) let last_id = ref 0 -let new_id () = - let id = !last_id in incr last_id; id +let () = Callback.register "CamlinternalOO.last_id" last_id let set_id o id = let id0 = !id in diff --git a/stdlib/digest.ml b/stdlib/digest.ml index 7d69141561..47c7e65a7a 100644 --- a/stdlib/digest.ml +++ b/stdlib/digest.ml @@ -17,6 +17,8 @@ type t = string +let compare = String.compare + external unsafe_string: string -> int -> int -> t = "caml_md5_string" external channel: in_channel -> int -> t = "caml_md5_chan" @@ -48,4 +50,3 @@ let to_hex d = String.blit (Printf.sprintf "%02x" (int_of_char d.[i])) 0 result (2*i) 2; done; result -;; diff --git a/stdlib/digest.mli b/stdlib/digest.mli index 14f27e2630..01a5f8ba8a 100644 --- a/stdlib/digest.mli +++ b/stdlib/digest.mli @@ -18,12 +18,22 @@ This module provides functions to compute 128-bit ``digests'' of arbitrary-length strings or files. The digests are of cryptographic quality: it is very hard, given a digest, to forge a string having - that digest. The algorithm used is MD5. + that digest. The algorithm used is MD5. This module should not be + used for secure and sensitive cryptographic applications. For these + kind of applications more recent and stronger cryptographic + primitives should be used instead. *) type t = string (** The type of digests: 16-character strings. *) +val compare : t -> t -> int +(** The comparison function for 16-character digest, with the same + specification as {!Pervasives.compare} and the implementation + shared with {!String.compare}. Along with the type [t], this + function [compare] allows the module [Digest] to be passed as + argument to the functors {!Set.Make} and {!Map.Make}. *) + val string : string -> t (** Return the digest of the given string. *) diff --git a/stdlib/filename.ml b/stdlib/filename.ml index 92bd217188..8c3ad53155 100644 --- a/stdlib/filename.ml +++ b/stdlib/filename.ml @@ -25,30 +25,55 @@ let generic_quote quotequote s = Buffer.add_char b '\''; Buffer.contents b -let generic_basename rindex_dir_sep current_dir_name name = - let raw_name = - try - let p = rindex_dir_sep name + 1 in - String.sub name p (String.length name - p) - with Not_found -> - name +(* This function implements the Open Group specification found here: + [[1]] http://pubs.opengroup.org/onlinepubs/9699919799/utilities/basename.html + In step 1 of [[1]], we choose to return "." for empty input. + (for compatibility with previous versions of OCaml) + In step 2, we choose to process "//" normally. + Step 6 is not implemented: we consider that the [suffix] operand is + always absent. Suffixes are handled by [chop_suffix] and [chop_extension]. +*) +let generic_basename is_dir_sep current_dir_name name = + let rec find_end n = + if n < 0 then String.sub name 0 1 + else if is_dir_sep name n then find_end (n - 1) + else find_beg n (n + 1) + and find_beg n p = + if n < 0 then String.sub name 0 p + else if is_dir_sep name n then String.sub name (n + 1) (p - n - 1) + else find_beg (n - 1) p in - if raw_name = "" then current_dir_name else raw_name - -let generic_dirname rindex_dir_sep current_dir_name dir_sep name = - try - match rindex_dir_sep name with - 0 -> dir_sep - | n -> String.sub name 0 n - with Not_found -> - current_dir_name + if name = "" + then current_dir_name + else find_end (String.length name - 1) + +(* This function implements the Open Group specification found here: + [[2]] http://pubs.opengroup.org/onlinepubs/9699919799/utilities/dirname.html + In step 6 of [[2]], we choose to process "//" normally. +*) +let generic_dirname is_dir_sep current_dir_name name = + let rec trailing_sep n = + if n < 0 then String.sub name 0 1 + else if is_dir_sep name n then trailing_sep (n - 1) + else base n + and base n = + if n < 0 then current_dir_name + else if is_dir_sep name n then intermediate_sep n + else base (n - 1) + and intermediate_sep n = + if n < 0 then String.sub name 0 1 + else if is_dir_sep name n then intermediate_sep (n - 1) + else String.sub name 0 (n + 1) + in + if name = "" + then current_dir_name + else trailing_sep (String.length name - 1) module Unix = struct let current_dir_name = "." let parent_dir_name = ".." let dir_sep = "/" let is_dir_sep s i = s.[i] = '/' - let rindex_dir_sep s = String.rindex s '/' let is_relative n = String.length n < 1 || n.[0] <> '/';; let is_implicit n = is_relative n @@ -61,8 +86,8 @@ module Unix = struct let temp_dir_name = try Sys.getenv "TMPDIR" with Not_found -> "/tmp" let quote = generic_quote "'\\''" - let basename = generic_basename rindex_dir_sep current_dir_name - let dirname = generic_dirname rindex_dir_sep current_dir_name dir_sep + let basename = generic_basename is_dir_sep current_dir_name + let dirname = generic_dirname is_dir_sep current_dir_name end module Win32 = struct @@ -70,12 +95,6 @@ module Win32 = struct let parent_dir_name = ".." let dir_sep = "\\" let is_dir_sep s i = let c = s.[i] in c = '/' || c = '\\' || c = ':' - let rindex_dir_sep s = - let rec pos i = - if i < 0 then raise Not_found - else if is_dir_sep s i then i - else pos (i - 1) - in pos (String.length s - 1) let is_relative n = (String.length n < 1 || n.[0] <> '/') && (String.length n < 1 || n.[0] <> '\\') @@ -129,11 +148,11 @@ module Win32 = struct else ("", s) let dirname s = let (drive, path) = drive_and_path s in - let dir = generic_dirname rindex_dir_sep current_dir_name dir_sep path in + let dir = generic_dirname is_dir_sep current_dir_name path in drive ^ dir let basename s = let (drive, path) = drive_and_path s in - generic_basename rindex_dir_sep current_dir_name path + generic_basename is_dir_sep current_dir_name path end module Cygwin = struct @@ -141,33 +160,32 @@ module Cygwin = struct let parent_dir_name = ".." let dir_sep = "/" let is_dir_sep = Win32.is_dir_sep - let rindex_dir_sep = Win32.rindex_dir_sep let is_relative = Win32.is_relative let is_implicit = Win32.is_implicit let check_suffix = Win32.check_suffix let temp_dir_name = Unix.temp_dir_name let quote = Unix.quote - let basename = generic_basename rindex_dir_sep current_dir_name - let dirname = generic_dirname rindex_dir_sep current_dir_name dir_sep + let basename = generic_basename is_dir_sep current_dir_name + let dirname = generic_dirname is_dir_sep current_dir_name end -let (current_dir_name, parent_dir_name, dir_sep, is_dir_sep, rindex_dir_sep, +let (current_dir_name, parent_dir_name, dir_sep, is_dir_sep, is_relative, is_implicit, check_suffix, temp_dir_name, quote, basename, dirname) = match Sys.os_type with "Unix" -> (Unix.current_dir_name, Unix.parent_dir_name, Unix.dir_sep, - Unix.is_dir_sep, Unix.rindex_dir_sep, + Unix.is_dir_sep, Unix.is_relative, Unix.is_implicit, Unix.check_suffix, Unix.temp_dir_name, Unix.quote, Unix.basename, Unix.dirname) | "Win32" -> (Win32.current_dir_name, Win32.parent_dir_name, Win32.dir_sep, - Win32.is_dir_sep, Win32.rindex_dir_sep, + Win32.is_dir_sep, Win32.is_relative, Win32.is_implicit, Win32.check_suffix, Win32.temp_dir_name, Win32.quote, Win32.basename, Win32.dirname) | "Cygwin" -> (Cygwin.current_dir_name, Cygwin.parent_dir_name, Cygwin.dir_sep, - Cygwin.is_dir_sep, Cygwin.rindex_dir_sep, + Cygwin.is_dir_sep, Cygwin.is_relative, Cygwin.is_implicit, Cygwin.check_suffix, Cygwin.temp_dir_name, Cygwin.quote, Cygwin.basename, Cygwin.dirname) | _ -> assert false diff --git a/stdlib/filename.mli b/stdlib/filename.mli index 7e447585ca..b4644ad67d 100644 --- a/stdlib/filename.mli +++ b/stdlib/filename.mli @@ -59,17 +59,19 @@ val chop_extension : string -> string val basename : string -> string (** Split a file name into directory name / base file name. - [concat (dirname name) (basename name)] returns a file name - which is equivalent to [name]. Moreover, after setting the - current directory to [dirname name] (with {!Sys.chdir}), + If [name] is a valid file name, then [concat (dirname name) (basename name)] + returns a file name which is equivalent to [name]. Moreover, + after setting the current directory to [dirname name] (with {!Sys.chdir}), references to [basename name] (which is a relative file name) designate the same file as [name] before the call to {!Sys.chdir}. - The result is not specified if the argument is not a valid file name - (for example, under Unix if there is a NUL character in the string). *) + This function conforms to the specification of POSIX.1-2008 for the + [basename] utility. *) val dirname : string -> string -(** See {!Filename.basename}. *) +(** See {!Filename.basename}. + This function conforms to the specification of POSIX.1-2008 for the + [dirname] utility. *) val temp_file : ?temp_dir: string -> string -> string -> string (** [temp_file prefix suffix] returns the name of a diff --git a/stdlib/gc.mli b/stdlib/gc.mli index ec6cc17c30..71b8ffa783 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -123,6 +123,8 @@ type control = compaction is triggered at the end of each major GC cycle (this setting is intended for testing purposes only). If [max_overhead >= 1000000], compaction is never triggered. + If compaction is permanently disabled, it is strongly suggested + to set [allocation_policy] to 1. Default: 500. *) mutable stack_limit : int; @@ -221,7 +223,7 @@ val finalise : ('a -> unit) -> 'a -> unit - [ let f = fun x -> ... ;; let v = ... in Gc.finalise f v ] - The [f] function can use all features of O'Caml, including + The [f] function can use all features of OCaml, including assignments that make the value reachable again. It can also loop forever (in this case, the other finalisation functions will not be called during the execution of f, diff --git a/stdlib/genlex.mli b/stdlib/genlex.mli index d7cecc781e..41ce68d0fd 100644 --- a/stdlib/genlex.mli +++ b/stdlib/genlex.mli @@ -18,7 +18,7 @@ This module implements a simple ``standard'' lexical analyzer, presented as a function from character streams to token streams. It implements - roughly the lexical conventions of Caml, but is parameterized by the + roughly the lexical conventions of OCaml, but is parameterized by the set of keywords of your language. diff --git a/stdlib/hashtbl.mli b/stdlib/hashtbl.mli index 6dcfdafcfa..3f1a77d54d 100644 --- a/stdlib/hashtbl.mli +++ b/stdlib/hashtbl.mli @@ -35,7 +35,7 @@ val create : ?seed:int -> int -> ('a, 'b) t The optional [seed] parameter (an integer) can be given to diversify the hash function used to access the returned table. With high probability, hash tables created with different seeds - have different collision patterns. In Web-facing applications + have different collision patterns. In Web-facing applications for instance, it is recommended to create hash tables with a randomly-chosen seed. This prevents a denial-of-service attack whereas a malicious user sends input crafted to create many @@ -124,7 +124,8 @@ type statistics = { val stats : ('a, 'b) t -> statistics (** [Hashtbl.stats tbl] returns statistics about the table [tbl]: number of buckets, size of the biggest bucket, distribution of - buckets by size. *) + buckets by size. + @since 3.13.0 *) (** {6 Functorial interface} *) @@ -226,7 +227,7 @@ module MakeSeeded (H : SeededHashedType) : SeededS with type key = H.t The operations perform similarly to those of the generic interface, but use the seeded hashing and equality functions specified in the functor argument [H] instead of generic - equality and hashing. + equality and hashing. @since 3.13.0 *) @@ -263,10 +264,6 @@ val hash_param : int -> int -> 'a -> int val seeded_hash_param : int -> int -> int -> 'a -> int (** A variant of {!Hashtbl.hash_param} that is further parameterized by - an integer seed. Usage: + an integer seed. Usage: [Hashtbl.seeded_hash_param meaningful total seed x]. @since 3.13.0 *) - - - - diff --git a/stdlib/list.mli b/stdlib/list.mli index 996436ef50..96166e25d9 100644 --- a/stdlib/list.mli +++ b/stdlib/list.mli @@ -76,9 +76,9 @@ val iter : ('a -> unit) -> 'a list -> unit [begin f a1; f a2; ...; f an; () end]. *) val iteri : (int -> 'a -> unit) -> 'a list -> unit -(** Same as {!List.iter}, but the - function is applied to the index of the element as first argument (counting from 0), - and the element itself as second argument. +(** Same as {!List.iter}, but the function is applied to the index of + the element as first argument (counting from 0), and the element + itself as second argument. @since 3.13.0 *) @@ -88,9 +88,9 @@ val map : ('a -> 'b) -> 'a list -> 'b list with the results returned by [f]. Not tail-recursive. *) val mapi : (int -> 'a -> 'b) -> 'a list -> 'b list -(** Same as {!List.map}, but the - function is applied to the index of the element as first argument (counting from 0), - and the element itself as second argument. +(** Same as {!List.map}, but the function is applied to the index of + the element as first argument (counting from 0), and the element + itself as second argument. Not tail-recursive. @since 3.13.0 *) diff --git a/stdlib/map.ml b/stdlib/map.ml index 8f658b2126..519ef824e7 100644 --- a/stdlib/map.ml +++ b/stdlib/map.ml @@ -200,27 +200,31 @@ module Make(Ord: OrderedType) = struct Empty -> false | Node(l, v, d, r, _) -> p v d || exists p l || exists p r - let filter p s = - let rec filt accu = function - | Empty -> accu - | Node(l, v, d, r, _) -> - filt (filt (if p v d then add v d accu else accu) l) r in - filt Empty s - - let partition p s = - let rec part (t, f as accu) = function - | Empty -> accu - | Node(l, v, d, r, _) -> - part (part (if p v d then (add v d t, f) else (t, add v d f)) l) r in - part (Empty, Empty) s + (* Beware: those two functions assume that the added k is *strictly* + smaller (or bigger) than all the present keys in the tree; it + does not test for equality with the current min (or max) key. + + Indeed, they are only used during the "join" operation which + respects this precondition. + *) + + let rec add_min_binding k v = function + | Empty -> singleton k v + | Node (l, x, d, r, h) -> + bal (add_min_binding k v l) x d r + + let rec add_max_binding k v = function + | Empty -> singleton k v + | Node (l, x, d, r, h) -> + bal l x d (add_max_binding k v r) (* Same as create and bal, but no assumptions are made on the relative heights of l and r. *) let rec join l v d r = match (l, r) with - (Empty, _) -> add v d r - | (_, Empty) -> add v d l + (Empty, _) -> add_min_binding v d r + | (_, Empty) -> add_max_binding v d l | (Node(ll, lv, ld, lr, lh), Node(rl, rv, rd, rr, rh)) -> if lh > rh + 2 then bal ll lv ld (join lr v d r) else if rh > lh + 2 then bal (join l v d rl) rv rd rr else @@ -266,6 +270,20 @@ module Make(Ord: OrderedType) = struct | _ -> assert false + let rec filter p = function + Empty -> Empty + | Node(l, v, d, r, _) -> + let l' = filter p l and r' = filter p r in + if p v d then join l' v d r' else concat l' r' + + let rec partition p = function + Empty -> (Empty, Empty) + | Node(l, v, d, r, _) -> + let (lt, lf) = partition p l and (rt, rf) = partition p r in + if p v d + then (join lt v d rt, concat lf rf) + else (concat lt rt, join lf v d rf) + type 'a enumeration = End | More of key * 'a * 'a t * 'a enumeration let rec cons_enum m e = diff --git a/stdlib/marshal.mli b/stdlib/marshal.mli index 90e6dbffe1..86e1ebd199 100644 --- a/stdlib/marshal.mli +++ b/stdlib/marshal.mli @@ -27,7 +27,7 @@ making it impossible to check that the data read back possesses the type expected by the context. In particular, the result type of the [Marshal.from_*] functions is given as ['a], but this is - misleading: the returned Caml value does not possess type ['a] + misleading: the returned OCaml value does not possess type ['a] for all ['a]; it has one, unique type which cannot be determined at compile-type. The programmer should explicitly give the expected type of the returned value, using the following syntax: diff --git a/stdlib/oo.mli b/stdlib/oo.mli index cd83151bb8..508217228b 100644 --- a/stdlib/oo.mli +++ b/stdlib/oo.mli @@ -17,11 +17,17 @@ val copy : (< .. > as 'a) -> 'a (** [Oo.copy o] returns a copy of object [o], that is a fresh - object with the same methods and instance variables as [o] *) + object with the same methods and instance variables as [o]. *) external id : < .. > -> int = "%field1" (** Return an integer identifying this object, unique for - the current execution of the program. *) + the current execution of the program. The generic comparison + and hashing functions are based on this integer. When an object + is obtained by unmarshaling, the id is refreshed, and thus + different from the original object. As a consequence, the internal + invariants of data structures such as hash table or sets containing + objects are broken after unmarshaling the data structures. + *) (**/**) (** For internal use (CamlIDL) *) diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml index 5148f228fa..17a1a9c1a8 100644 --- a/stdlib/pervasives.ml +++ b/stdlib/pervasives.ml @@ -52,7 +52,6 @@ external ( || ) : bool -> bool -> bool = "%sequor" external ( ~- ) : int -> int = "%negint" external ( ~+ ) : int -> int = "%identity" -external (~+) : int -> int = "%identity" external succ : int -> int = "%succint" external pred : int -> int = "%predint" external ( + ) : int -> int -> int = "%addint" @@ -91,7 +90,8 @@ external acos : float -> float = "caml_acos_float" "acos" "float" external asin : float -> float = "caml_asin_float" "asin" "float" external atan : float -> float = "caml_atan_float" "atan" "float" external atan2 : float -> float -> float = "caml_atan2_float" "atan2" "float" -external hypot : float -> float -> float = "caml_hypot_float" "caml_hypot" "float" +external hypot : float -> float -> float + = "caml_hypot_float" "caml_hypot" "float" external cos : float -> float = "caml_cos_float" "cos" "float" external cosh : float -> float = "caml_cosh_float" "cosh" "float" external log : float -> float = "caml_log_float" "log" "float" @@ -105,7 +105,8 @@ external tanh : float -> float = "caml_tanh_float" "tanh" "float" external ceil : float -> float = "caml_ceil_float" "ceil" "float" external floor : float -> float = "caml_floor_float" "floor" "float" external abs_float : float -> float = "%absfloat" -external copysign : float -> float -> float = "caml_copysign_float" "caml_copysign" "float" +external copysign : float -> float -> float + = "caml_copysign_float" "caml_copysign" "float" external mod_float : float -> float -> float = "caml_fmod_float" "fmod" "float" external frexp : float -> float * int = "caml_frexp_float" external ldexp : float -> int -> float = "caml_ldexp_float" @@ -214,7 +215,8 @@ let rec ( @ ) l1 l2 = type in_channel type out_channel -external open_descriptor_out : int -> out_channel = "caml_ml_open_descriptor_out" +external open_descriptor_out : int -> out_channel + = "caml_ml_open_descriptor_out" external open_descriptor_in : int -> in_channel = "caml_ml_open_descriptor_in" let stdin = open_descriptor_in 0 diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli index d2141cd74a..9da56a2615 100644 --- a/stdlib/pervasives.mli +++ b/stdlib/pervasives.mli @@ -229,7 +229,7 @@ external ( asr ) : int -> int -> int = "%asrint" (** {6 Floating-point arithmetic} - Caml's floating-point numbers follow the + OCaml's floating-point numbers follow the IEEE 754 standard, using double precision (64 bits) numbers. Floating-point operations never raise an exception on overflow, underflow, division by zero, etc. Instead, special IEEE numbers @@ -314,7 +314,8 @@ external atan2 : float -> float -> float = "caml_atan2_float" "atan2" "float" and [y] are used to determine the quadrant of the result. Result is in radians and is between [-pi] and [pi]. *) -external hypot : float -> float -> float = "caml_hypot_float" "caml_hypot" "float" +external hypot : float -> float -> float + = "caml_hypot_float" "caml_hypot" "float" (** [hypot x y] returns [sqrt(x *. x + y *. y)], that is, the length of the hypotenuse of a right-angled triangle with sides of length [x] and [y], or, equivalently, the distance of the point [(x,y)] @@ -344,11 +345,13 @@ external floor : float -> float = "caml_floor_float" "floor" "float" external abs_float : float -> float = "%absfloat" (** [abs_float f] returns the absolute value of [f]. *) -external copysign : float -> float -> float = "caml_copysign_float" "caml_copysign" "float" +external copysign : float -> float -> float + = "caml_copysign_float" "caml_copysign" "float" (** [copysign x y] returns a float whose absolute value is that of [x] and whose sign is that of [y]. If [x] is [nan], returns [nan]. If [y] is [nan], returns either [x] or [-. x], but it is not - specified which. *) + specified which. + @since 3.13.0 *) external mod_float : float -> float -> float = "caml_fmod_float" "fmod" "float" (** [mod_float a b] returns the remainder of [a] with respect to diff --git a/stdlib/printf.ml b/stdlib/printf.ml index afca2034ee..c55c64d367 100644 --- a/stdlib/printf.ml +++ b/stdlib/printf.ml @@ -217,7 +217,7 @@ let iter_on_format_args fmt add_conv add_char = and scan_conv skip i = if i > lim then incomplete_format fmt else match Sformat.unsafe_get fmt i with - | '%' | '!' | ',' -> succ i + | '%' | '@' | '!' | ',' -> succ i | 's' | 'S' | '[' -> add_conv skip i 's' | 'c' | 'C' -> add_conv skip i 'c' | 'd' | 'i' |'o' | 'u' | 'x' | 'X' | 'N' -> add_conv skip i 'i' @@ -391,7 +391,7 @@ type positional_specification = with $n$ being the {\em value} of the integer argument defining [*]; we clearly cannot statically guess the value of this parameter in the general case. Put it another way: this means type dependency, which is completely - out of scope of the Caml type algebra. *) + out of scope of the OCaml type algebra. *) let scan_positional_spec fmt got_spec i = match Sformat.unsafe_get fmt i with @@ -430,7 +430,7 @@ let get_index spec n = | Spec_index p -> p ;; -(* Format a float argument as a valid Caml lexeme. *) +(* Format a float argument as a valid OCaml lexeme. *) let format_float_lexeme = (* To be revised: this procedure should be a unique loop that performs the @@ -443,7 +443,7 @@ let format_float_lexeme = let make_valid_float_lexeme s = (* Check if s is already a valid lexeme: in this case do nothing, - otherwise turn s into a valid Caml lexeme. *) + otherwise turn s into a valid OCaml lexeme. *) let l = String.length s in let rec valid_float_loop i = if i >= l then s ^ "." else @@ -505,8 +505,10 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = and scan_conv spec n widths i = match Sformat.unsafe_get fmt i with - | '%' -> - cont_s n "%" (succ i) + | '%' | '@' as c -> + cont_s n (String.make 1 c) (succ i) + | '!' -> cont_f n (succ i) + | ',' -> cont_s n "" (succ i) | 's' | 'S' as conv -> let (x : string) = get_arg spec n in let x = if conv = 's' then x else "\"" ^ String.escaped x ^ "\"" in @@ -515,6 +517,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = if i = succ pos then x else format_string (extract_format fmt pos i widths) x in cont_s (next_index spec n) s (succ i) + | '[' as conv -> + bad_conversion_format fmt i conv | 'c' | 'C' as conv -> let (x : char) = get_arg spec n in let s = @@ -546,6 +550,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = let n = Sformat.succ_index (get_index spec n) in let arg = get_arg Spec_none n in cont_a (next_index spec n) printer arg (succ i) + | 'r' as conv -> + bad_conversion_format fmt i conv | 't' -> let printer = get_arg spec n in cont_t (next_index spec n) printer (succ i) @@ -570,8 +576,6 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = let s = format_int (extract_format_int 'n' fmt pos i widths) x in cont_s (next_index spec n) s (succ i) end - | ',' -> cont_s n "" (succ i) - | '!' -> cont_f n (succ i) | '{' | '(' as conv (* ')' '}' *) -> let (xf : ('a, 'b, 'c, 'd, 'e, 'f) format6) = get_arg spec n in let i = succ i in @@ -670,7 +674,7 @@ let sprintf fmt = ksprintf (fun s -> s) fmt;; (* Obsolete and deprecated. *) let kprintf = ksprintf;; -(* For Caml system internal use only: needed to implement modules [Format] +(* For OCaml system internal use only: needed to implement modules [Format] and [Scanf]. *) module CamlinternalPr = struct diff --git a/stdlib/printf.mli b/stdlib/printf.mli index 797a354dd0..942ec49b05 100644 --- a/stdlib/printf.mli +++ b/stdlib/printf.mli @@ -20,7 +20,7 @@ val fprintf : out_channel -> ('a, out_channel, unit) format -> 'a [arg1] to [argN] according to the format string [format], and outputs the resulting string on the channel [outchan]. - The format is a character string which contains two types of + The format string is a character string which contains two types of objects: plain characters, which are simply copied to the output channel, and conversion specifications, each of which causes conversion and printing of arguments. @@ -31,60 +31,66 @@ val fprintf : out_channel -> ('a, out_channel, unit) format -> 'a In short, a conversion specification consists in the [%] character, followed by optional modifiers and a type which is made of one or - two characters. The types and their meanings are: + two characters. - - [d], [i], [n], [l], [L], or [N]: convert an integer argument to - signed decimal. - - [u]: convert an integer argument to unsigned decimal. + The types and their meanings are: + + - [d], [i]: convert an integer argument to signed decimal. + - [u], [n], [l], [L], or [N]: convert an integer argument to + unsigned decimal. Warning: [n], [l], [L], and [N] are + used for [scanf], and should not be used for [printf]. - [x]: convert an integer argument to unsigned hexadecimal, using lowercase letters. - [X]: convert an integer argument to unsigned hexadecimal, using uppercase letters. - [o]: convert an integer argument to unsigned octal. - [s]: insert a string argument. - - [S]: insert a string argument in Caml syntax (double quotes, escapes). + - [S]: convert a string argument to OCaml syntax (double quotes, escapes). - [c]: insert a character argument. - - [C]: insert a character argument in Caml syntax (single quotes, escapes). + - [C]: convert a character argument to OCaml syntax (single quotes, escapes). - [f]: convert a floating-point argument to decimal notation, in the style [dddd.ddd]. - - [F]: convert a floating-point argument to Caml syntax ([dddd.] + - [F]: convert a floating-point argument to OCaml syntax ([dddd.] or [dddd.ddd] or [d.ddd e+-dd]). - [e] or [E]: convert a floating-point argument to decimal notation, in the style [d.ddd e+-dd] (mantissa and exponent). - [g] or [G]: convert a floating-point argument to decimal notation, in style [f] or [e], [E] (whichever is more compact). - [B]: convert a boolean argument to the string [true] or [false] - - [b]: convert a boolean argument (for backward compatibility; do not - use in new programs). + - [b]: convert a boolean argument (deprecated; do not use in new + programs). - [ld], [li], [lu], [lx], [lX], [lo]: convert an [int32] argument to the format specified by the second letter (decimal, hexadecimal, etc). - [nd], [ni], [nu], [nx], [nX], [no]: convert a [nativeint] argument to the format specified by the second letter. - [Ld], [Li], [Lu], [Lx], [LX], [Lo]: convert an [int64] argument to the format specified by the second letter. - - [a]: user-defined printer. Takes two arguments and applies the + - [a]: user-defined printer. Take two arguments and apply the first one to [outchan] (the current output channel) and to the second argument. The first argument must therefore have type [out_channel -> 'b -> unit] and the second ['b]. The output produced by the function is inserted in the output of [fprintf] at the current point. - - [t]: same as [%a], but takes only one argument (with type + - [t]: same as [%a], but take only one argument (with type [out_channel -> unit]) and apply it to [outchan]. - [\{ fmt %\}]: convert a format string argument. The argument must have the same type as the internal format string [fmt]. - - [( fmt %)]: format string substitution. Takes a format string - argument and substitutes it to the internal format string [fmt] + - [( fmt %)]: format string substitution. Take a format string + argument and substitute it to the internal format string [fmt] to print following arguments. The argument must have the same type as the internal format string [fmt]. - [!]: take no argument and flush the output. - [%]: take no argument and output one [%] character. - - [,]: the no-op delimiter for conversion specifications. + - [\@]: take no argument and output one [\@] character. + - [,]: take no argument and do nothing. The optional [flags] are: - [-]: left-justify the output (default is right justification). - [0]: for numerical conversions, pad with zeroes instead of spaces. - - [+]: for numerical conversions, prefix number with a [+] sign if positive. - - space: for numerical conversions, prefix number with a space if positive. + - [+]: for signed numerical conversions, prefix number with a [+] + sign if positive. + - space: for signed numerical conversions, prefix number with a + space if positive. - [#]: request an alternate formatting style for numbers. The optional [width] is an integer indicating the minimal @@ -153,7 +159,7 @@ val kprintf : (string -> 'a) -> ('b, unit, string, 'a) format4 -> 'b;; (**/**) -(* For Caml system internal use only. Don't call directly. *) +(* For OCaml system internal use only. Don't call directly. *) module CamlinternalPr : sig diff --git a/stdlib/queue.ml b/stdlib/queue.ml index 605a892046..4e12eb3d2f 100644 --- a/stdlib/queue.ml +++ b/stdlib/queue.ml @@ -15,7 +15,7 @@ exception Empty -(* O'Caml currently does not allow the components of a sum type to be +(* OCaml currently does not allow the components of a sum type to be mutable. Yet, for optimal space efficiency, we must have cons cells whose [next] field is mutable. This leads us to define a type of cyclic lists, so as to eliminate the [Nil] case and the sum diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index ee80f5e7a6..9c6ecef62f 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -438,7 +438,7 @@ let int_of_width_opt = function ;; let int_of_prec_opt = function - | None -> 0 + | None -> max_int | Some prec -> prec ;; @@ -737,7 +737,7 @@ let scan_exp_part width ib = ;; (* Scan the integer part of a floating point number, (not using the - Caml lexical convention since the integer part can be empty): + OCaml lexical convention since the integer part can be empty): an optional sign, followed by a possibly empty sequence of decimal digits (e.g. -.1). *) let scan_int_part width ib = @@ -925,7 +925,7 @@ let scan_backslash_char width ib = bad_input_escape c ;; -(* Scan a character (a Caml token). *) +(* Scan a character (an OCaml token). *) let scan_Char width ib = let rec find_start width = @@ -946,7 +946,7 @@ let scan_Char width ib = find_start width ;; -(* Scan a delimited string (a Caml token). *) +(* Scan a delimited string (an OCaml token). *) let scan_String width ib = let rec find_start width = @@ -979,7 +979,7 @@ let scan_String width ib = find_start width ;; -(* Scan a boolean (a Caml token). *) +(* Scan a boolean (an OCaml token). *) let scan_bool width ib = if width < 4 then bad_token_length "a boolean" else let c = Scanning.checked_peek_char ib in @@ -999,31 +999,51 @@ type char_set = | Neg_set of string (* Negative (complementary) set. *) ;; + (* Char sets are read as sub-strings in the format string. *) -let read_char_set fmt i = - let lim = Sformat.length fmt - 1 in +let scan_range fmt j = + + let len = Sformat.length fmt in + + let buffer = Buffer.create len in - let rec find_in_set j = - if j > lim then incomplete_format fmt else + let rec scan_closing j = + if j >= len then incomplete_format fmt else match Sformat.get fmt j with - | ']' -> j - | _ -> find_in_set (succ j) - - and find_set i = - if i > lim then incomplete_format fmt else - match Sformat.get fmt i with - | ']' -> find_in_set (succ i) - | _ -> find_in_set i in - - if i > lim then incomplete_format fmt else - match Sformat.get fmt i with - | '^' -> - let i = succ i in - let j = find_set i in - j, Neg_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) - | _ -> - let j = find_set i in - j, Pos_set (Sformat.sub fmt (Sformat.index_of_int i) (j - i)) + | ']' -> j, Buffer.contents buffer + | '%' -> + let j = j + 1 in + if j >= len then incomplete_format fmt else + begin match Sformat.get fmt j with + | '%' | '@' as c -> + Buffer.add_char buffer c; + scan_closing (j + 1) + | c -> bad_conversion fmt j c + end + | c -> + Buffer.add_char buffer c; + scan_closing (j + 1) in + + let scan_first_pos j = + if j >= len then incomplete_format fmt else + match Sformat.get fmt j with + | ']' as c -> + Buffer.add_char buffer c; + scan_closing (j + 1) + | _ -> scan_closing j in + + let rec scan_first_neg j = + if j >= len then incomplete_format fmt else + match Sformat.get fmt j with + | '^' -> + let j = j + 1 in + let k, char_set = scan_first_pos j in + k, Neg_set char_set + | _ -> + let k, char_set = scan_first_pos j in + k, Pos_set char_set in + + scan_first_neg j ;; (* Char sets are now represented as bit vectors that are represented as @@ -1370,18 +1390,19 @@ let scan_format ib ef fmt rv f = let width = int_of_width_opt width_opt in let prec = int_of_prec_opt prec_opt in match Sformat.get fmt i with - | '%' as conv -> - check_char ib conv; scan_fmt ir f (succ i) + | '%' | '@' as c -> + check_char ib c; + scan_fmt ir f (succ i) | 's' -> - let i, stp = scan_fmt_stoppers (succ i) in + let i, stp = scan_indication (succ i) in let _x = scan_string stp width ib in scan_fmt ir (stack f (token_string ib)) (succ i) | 'S' -> let _x = scan_String width ib in scan_fmt ir (stack f (token_string ib)) (succ i) | '[' (* ']' *) -> - let i, char_set = read_char_set fmt (succ i) in - let i, stp = scan_fmt_stoppers (succ i) in + let i, char_set = scan_range fmt (succ i) in + let i, stp = scan_indication (succ i) in let _x = scan_chars_in_char_set stp char_set width ib in scan_fmt ir (stack f (token_string ib)) (succ i) | ('c' | 'C') when width = 0 -> @@ -1458,12 +1479,23 @@ let scan_format ib ef fmt rv f = | c -> bad_conversion fmt i c - and scan_fmt_stoppers i = - if i > lim then i - 1, [] else - match Sformat.get fmt i with - | '@' when i < lim -> let i = succ i in i, [Sformat.get fmt i] - | '@' when i = lim -> incomplete_format fmt - | _ -> i - 1, [] in + and scan_indication j = + if j > lim then j - 1, [] else + match Sformat.get fmt j with + | '@' -> + let k = j + 1 in + if k > lim then j - 1, [] else + begin match Sformat.get fmt k with + | '%' -> + let k = k + 1 in + if k > lim then j - 1, [] else + begin match Sformat.get fmt k with + | '%' | '@' as c -> k, [ c ] + | _c -> j - 1, [] + end + | c -> k, [ c ] + end + | _c -> j - 1, [] in scan_fmt in @@ -1488,7 +1520,8 @@ let bscanf ib = kscanf ib scanf_bad_input;; let fscanf ic = bscanf (Scanning.from_channel ic);; -let sscanf s = bscanf (Scanning.from_string s);; +let sscanf : string -> ('a, 'b, 'c, 'd) scanner + = fun s -> bscanf (Scanning.from_string s);; let scanf fmt = bscanf Scanning.stdib fmt;; @@ -1521,6 +1554,9 @@ let format_from_string s fmt = sscanf_format (string_to_String s) fmt (fun x -> x) ;; +let unescaped s = + sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x) + (* Local Variables: compile-command: "cd ..; make world" diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index 1e8a744840..53317d66d8 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -65,16 +65,16 @@ (** {7 Formatted input as a functional feature} *) -(** The Caml scanning facility is reminiscent of the corresponding C feature. +(** The OCaml scanning facility is reminiscent of the corresponding C feature. However, it is also largely different, simpler, and yet more powerful: the formatted input functions are higher-order functionals and the parameter passing mechanism is just the regular function application not the variable assignment based mechanism which is typical for formatted - input in imperative languages; the Caml format strings also feature + input in imperative languages; the OCaml format strings also feature useful additions to easily define complex tokens; as expected within a functional programming language, the formatted input functions also support polymorphism, in particular arbitrary interaction with - polymorphic user-defined scanners. Furthermore, the Caml formatted input + polymorphic user-defined scanners. Furthermore, the OCaml formatted input facility is fully type-checked at compile time. *) (** {6 Formatted input channel} *) @@ -232,21 +232,14 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; (** {6 Format string description} *) -(** The format string is a character string which contains three types of +(** The format is a character string which contains three types of objects: - plain characters, which are simply matched with the characters of the input (with a special case for space and line feed, see {!Scanf.space}), - conversion specifications, each of which causes reading and conversion of one argument for the function [f] (see {!Scanf.conversion}), - scanning indications to specify boundaries of tokens - (see scanning {!Scanf.indication}). - - As a special convention for format strings, the [\@] character introduces - an escape for both characters [\@] and [%]: in a format string, - [\@\@] and [\@%] are respectively equivalent to the plain characters [\@] - and [%]. - @since 3.13 -*) + (see scanning {!Scanf.indication}). *) (** {7:space The space character in format strings} *) @@ -269,157 +262,148 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;; (** {7:conversion Conversion specifications in format strings} *) -(** Conversion specifications have the following form: - - [% \[flags\] \[width\] \[.precision\] type] - - In short, a conversion specification consists in the [%] character, - followed by optional modifiers, and a type which is made of one or - several characters. - - The types and their meanings are: - - - [d]: reads an optionally signed decimal integer. - - [i]: reads an optionally signed integer - (usual input conventions for decimal ([0-9]+), hexadecimal - ([0x[0-9a-f]+] and [0X[0-9A-F]+]), octal ([0o[0-7]+]), and binary - ([0b[0-1]+]) notations are understood). - - [u]: reads an unsigned decimal integer. - - [x] or [X]: reads an unsigned hexadecimal integer ([[0-9a-f]+] or [[0-9A-F]+]). - - [o]: reads an unsigned octal integer ([[0-7]+]). - - [s]: reads a string argument that spreads as much as possible, until - the following bounding conditions holds: - {ul - {- a whitespace has been found (see {!Scanf.space}),} - {- a scanning indication has been encountered - (see scanning {!Scanf.indication}),} - {- the end-of-input has been reached.} - } - Hence, the [%s] conversion always succeeds: it returns an empty - string, if the bounding condition holds when the scan begins. - - [S]: reads a delimited string argument (delimiters and special - escaped characters follow the lexical conventions of Caml). - - [c]: reads a single character. To test the current input character - without reading it, specify a null field width, i.e. use - specification [%0c]. Raise [Invalid_argument], if the field width - specification is greater than 1. - - [C]: reads a single delimited character (delimiters and special - escaped characters follow the lexical conventions of Caml). - - [f], [e], [E], [g], [G]: reads an optionally signed - floating-point number in decimal notation, in the style [dddd.ddd - e/E+-dd]. - - [F]: reads a floating point number according to the lexical - conventions of Caml (hence the decimal point is mandatory if the - exponent part is not mentioned). - - [B]: reads a boolean argument ([true] or [false]). - - [b]: reads a boolean argument (for backward compatibility; do not use - in new programs). - - [ld], [li], [lu], [lx], [lX], [lo]: reads an [int32] argument to - the format specified by the second letter for regular integers. - - [nd], [ni], [nu], [nx], [nX], [no]: reads a [nativeint] argument to - the format specified by the second letter for regular integers. - - [Ld], [Li], [Lu], [Lx], [LX], [Lo]: reads an [int64] argument to - the format specified by the second letter for regular integers. - - [\[ range \]]: reads characters that matches one of the characters - mentioned in the range of characters [range] (or not mentioned in - it, if the range starts with [^]). Reads a [string] that can be - empty, if the next input character does not match the range. The set of - characters from [c1] to [c2] (inclusively) is denoted by [c1-c2]. - Hence, [%\[0-9\]] returns a string representing a decimal number - or an empty string if no decimal digit is found; similarly, - [%\[\\048-\\057\\065-\\070\]] returns a string of hexadecimal digits. - If a closing bracket appears in a range, it must occur as the - first character of the range (or just after the [^] in case of - range negation); hence [\[\]\]] matches a [\]] character and - [\[^\]\]] matches any character that is not [\]]. - - [r]: user-defined reader. Takes the next [ri] formatted input function and - applies it to the scanning buffer [ib] to read the next argument. The - input function [ri] must therefore have type [Scanning.in_channel -> 'a] and - the argument read has type ['a]. - - [\{ fmt %\}]: reads a format string argument. - The format string read must have the same type as the format string - specification [fmt]. - For instance, ["%{ %i %}"] reads any format string that can read a value of - type [int]; hence, if [s] is the string ["fmt:\"number is %u\""], then - [Scanf.sscanf s "fmt: %{%i%}"] succeeds and returns the format string - ["number is %u"]. - - [\( fmt %\)]: scanning format substitution. - Reads a format string and then goes on scanning with the format string - read, instead of using [fmt]. - The format string read must have the same type as the format string - specification [fmt] that it replaces. - For instance, ["%( %i %)"] reads any format string that can read a value - of type [int]. - Returns the format string read, and the value read using the format - string read. - Hence, if [s] is the string ["\"%4d\"1234.00"], then - [Scanf.sscanf s "%(%i%)" (fun fmt i -> fmt, i)] evaluates to - [("%4d", 1234)]. - If the special flag [_] is used, the conversion discards the - format string read and only returns the value read with the format - string read. - Hence, if [s] is the string ["\"%4d\"1234.00"], then - [Scanf.sscanf s "%_(%i%)"] is simply equivalent to - [Scanf.sscanf "1234.00" "%4d"]. - - [l]: returns the number of lines read so far. - - [n]: returns the number of characters read so far. - - [N] or [L]: returns the number of tokens read so far. - - [!]: matches the end of input condition. - - [%]: matches one [%] character in the input. - - [,]: the no-op delimiter for conversion specifications. - - Following the [%] character that introduces a conversion, there may be - the special flag [_]: the conversion that follows occurs as usual, - but the resulting value is discarded. - For instance, if [f] is the function [fun i -> i + 1], and [s] is the - string ["x = 1"], then [Scanf.sscanf s "%_s = %i" f] returns [2]. - - The optional [width] is an integer literal indicating the maximal width - of the token to read. - For instance, [%6d] reads an integer, having at most 6 decimal digits; - [%4f] reads a float with at most 4 characters; and [%8\[\\000-\\255\]] - returns the next 8 characters (or all the characters still available, - if fewer than 8 characters are available in the input). - - The optional [precision] is a dot [.] followed by an integer literal - indicating the maximum number of digits that follow the decimal point in - the [%f], [%e], and [%E] conversions. For instance, [%.4f] reads a - [float] with at most 4 fractional digits. - - Notes: - - - as mentioned above, the [%s] conversion always succeeds, even if there is - nothing to read in the input: in this case, it simply returns [""]. - - - in addition to the relevant digits, ['_'] characters may appear - inside numbers (this is reminiscent to the usual Caml lexical - conventions). If stricter scanning is desired, use the range - conversion facility instead of the number conversions. - - - the [scanf] facility is not intended for heavy duty lexical - analysis and parsing. If it appears not expressive enough for your - needs, several alternative exists: regular expressions (module - [Str]), stream parsers, [ocamllex]-generated lexers, - [ocamlyacc]-generated parsers. *) +(** Conversion specifications consist in the [%] character, followed by + an optional flag, an optional field width, and followed by one or + two conversion characters. The conversion characters and their + meanings are: + + - [d]: reads an optionally signed decimal integer. + - [i]: reads an optionally signed integer + (usual input conventions for decimal ([0-9]+), hexadecimal + ([0x[0-9a-f]+] and [0X[0-9A-F]+]), octal ([0o[0-7]+]), and binary + ([0b[0-1]+]) notations are understood). + - [u]: reads an unsigned decimal integer. + - [x] or [X]: reads an unsigned hexadecimal integer ([[0-9a-fA-F]+]). + - [o]: reads an unsigned octal integer ([[0-7]+]). + - [s]: reads a string argument that spreads as much as possible, until the + following bounding condition holds: {ul + {- a whitespace has been found (see {!Scanf.space}),} + {- a scanning indication (see scanning {!Scanf.indication}) has been + encountered,} + {- the end-of-input has been reached.}} + Hence, this conversion always succeeds: it returns an empty + string, if the bounding condition holds when the scan begins. + - [S]: reads a delimited string argument (delimiters and special + escaped characters follow the lexical conventions of Caml). + - [c]: reads a single character. To test the current input character + without reading it, specify a null field width, i.e. use + specification [%0c]. Raise [Invalid_argument], if the field width + specification is greater than 1. + - [C]: reads a single delimited character (delimiters and special + escaped characters follow the lexical conventions of Caml). + - [f], [e], [E], [g], [G]: reads an optionally signed + floating-point number in decimal notation, in the style [dddd.ddd + e/E+-dd]. + - [F]: reads a floating point number according to the lexical + conventions of Caml (hence the decimal point is mandatory if the + exponent part is not mentioned). + - [B]: reads a boolean argument ([true] or [false]). + - [b]: reads a boolean argument (for backward compatibility; do not use + in new programs). + - [ld], [li], [lu], [lx], [lX], [lo]: reads an [int32] argument to + the format specified by the second letter for regular integers. + - [nd], [ni], [nu], [nx], [nX], [no]: reads a [nativeint] argument to + the format specified by the second letter for regular integers. + - [Ld], [Li], [Lu], [Lx], [LX], [Lo]: reads an [int64] argument to + the format specified by the second letter for regular integers. + - [\[ range \]]: reads characters that matches one of the characters + mentioned in the range of characters [range] (or not mentioned in + it, if the range starts with [^]). Reads a [string] that can be + empty, if the next input character does not match the range. The set of + characters from [c1] to [c2] (inclusively) is denoted by [c1-c2]. + Hence, [%\[0-9\]] returns a string representing a decimal number + or an empty string if no decimal digit is found; similarly, + [%\[\\048-\\057\\065-\\070\]] returns a string of hexadecimal digits. + If a closing bracket appears in a range, it must occur as the + first character of the range (or just after the [^] in case of + range negation); hence [\[\]\]] matches a [\]] character and + [\[^\]\]] matches any character that is not [\]]. + Use [%%] and [%\@] to include a [%] or a [\@] in a range. + - [r]: user-defined reader. Takes the next [ri] formatted input function and + applies it to the scanning buffer [ib] to read the next argument. The + input function [ri] must therefore have type [Scanning.in_channel -> 'a] and + the argument read has type ['a]. + - [\{ fmt %\}]: reads a format string argument. + The format string read must have the same type as the format string + specification [fmt]. + For instance, ["%{ %i %}"] reads any format string that can read a value of + type [int]; hence, if [s] is the string ["fmt:\"number is %u\""], then + [Scanf.sscanf s "fmt: %{%i%}"] succeeds and returns the format string + ["number is %u"]. + - [\( fmt %\)]: scanning format substitution. + Reads a format string and then goes on scanning with the format string + read, instead of using [fmt]. + The format string read must have the same type as the format string + specification [fmt] that it replaces. + For instance, ["%( %i %)"] reads any format string that can read a value + of type [int]. + Returns the format string read, and the value read using the format + string read. + Hence, if [s] is the string ["\"%4d\"1234.00"], then + [Scanf.sscanf s "%(%i%)" (fun fmt i -> fmt, i)] evaluates to + [("%4d", 1234)]. + If the special flag [_] is used, the conversion discards the + format string read and only returns the value read with the format + string read. + Hence, if [s] is the string ["\"%4d\"1234.00"], then + [Scanf.sscanf s "%_(%i%)"] is simply equivalent to + [Scanf.sscanf "1234.00" "%4d"]. + - [l]: returns the number of lines read so far. + - [n]: returns the number of characters read so far. + - [N] or [L]: returns the number of tokens read so far. + - [!]: matches the end of input condition. + - [%]: matches one [%] character in the input. + - [\@]: matches one [\@] character in the input. + - [,]: does nothing. + + Following the [%] character that introduces a conversion, there may be + the special flag [_]: the conversion that follows occurs as usual, + but the resulting value is discarded. + For instance, if [f] is the function [fun i -> i + 1], and [s] is the + string ["x = 1"], then [Scanf.sscanf s "%_s = %i" f] returns [2]. + + The field width is composed of an optional integer literal + indicating the maximal width of the token to read. + For instance, [%6d] reads an integer, having at most 6 decimal digits; + [%4f] reads a float with at most 4 characters; and [%8[\\000-\\255]] + returns the next 8 characters (or all the characters still available, + if fewer than 8 characters are available in the input). + + Notes: + + - as mentioned above, a [%s] conversion always succeeds, even if there is + nothing to read in the input: in this case, it simply returns [""]. + + - in addition to the relevant digits, ['_'] characters may appear + inside numbers (this is reminiscent to the usual Caml lexical + conventions). If stricter scanning is desired, use the range + conversion facility instead of the number conversions. + + - the [scanf] facility is not intended for heavy duty lexical + analysis and parsing. If it appears not expressive enough for your + needs, several alternative exists: regular expressions (module + [Str]), stream parsers, [ocamllex]-generated lexers, + [ocamlyacc]-generated parsers. *) (** {7:indication Scanning indications in format strings} *) (** Scanning indications appear just after the string conversions [%s] - and [%\[ range \]] to delimit the end of the token. A scanning + and [%[ range ]] to delimit the end of the token. A scanning indication is introduced by a [\@] character, followed by some - literal character [c]. It means that the string token should end + plain character [c]. It means that the string token should end just before the next matching [c] (which is skipped). If no [c] character is encountered, the string token spreads as much as possible. For instance, ["%s@\t"] reads a string up to the next - tab character or up to the end of input. - - When it does not introduce a scanning indication, the [\@] character - introduces an escape for the next character: [\@c] is treated as a plain - [c] character. + tab character or to the end of input. If a [\@] character appears + anywhere else in the format string, it is treated as a plain character. Note: - - the scanning indications introduce slight differences in the syntax of + - As usual in format strings, [%] characters must be escaped using [%%] + and [%\@] is equivalent to [\@]; this rule still holds within range + specifications and scanning indications. + For instance, ["%s@%%"] reads a string up to the next [%] character. + - The scanning indications introduce slight differences in the syntax of [Scanf] format strings, compared to those used for the [Printf] module. However, the scanning indications are similar to those used in the [Format] module; hence, when producing formatted text to be scanned @@ -509,8 +493,10 @@ val format_from_string : @since 3.10.0 *) -(* - Local Variables: - compile-command: "cd ..; make world" - End: +val unescaped : string -> string +(** Return a copy of the argument with escape sequences, following the + lexical conventions of OCaml, replaced by their corresponding + special characters. If there is no escape sequence in the + argument, still return a copy, contrary to String.escaped. + @since 3.13.0 *) diff --git a/stdlib/set.ml b/stdlib/set.ml index 63e965fa4f..e61fd24b6a 100644 --- a/stdlib/set.ml +++ b/stdlib/set.ml @@ -117,13 +117,32 @@ module Make(Ord: OrderedType) = if c = 0 then t else if c < 0 then bal (add x l) v r else bal l v (add x r) + let singleton x = Node(Empty, x, Empty, 1) + + (* Beware: those two functions assume that the added v is *strictly* + smaller (or bigger) than all the present elements in the tree; it + does not test for equality with the current min (or max) element. + Indeed, they are only used during the "join" operation which + respects this precondition. + *) + + let rec add_min_element v = function + | Empty -> singleton v + | Node (l, x, r, h) -> + bal (add_min_element v l) x r + + let rec add_max_element v = function + | Empty -> singleton v + | Node (l, x, r, h) -> + bal l x (add_max_element v r) + (* Same as create and bal, but no assumptions are made on the relative heights of l and r. *) let rec join l v r = match (l, r) with - (Empty, _) -> add v r - | (_, Empty) -> add v l + (Empty, _) -> add_min_element v r + | (_, Empty) -> add_max_element v l | (Node(ll, lv, lr, lh), Node(rl, rv, rr, rh)) -> if lh > rh + 2 then bal ll lv (join lr v r) else if rh > lh + 2 then bal (join l v rl) rv rr else @@ -197,8 +216,6 @@ module Make(Ord: OrderedType) = let c = Ord.compare x v in c = 0 || mem x (if c < 0 then l else r) - let singleton x = Node(Empty, x, Empty, 1) - let rec remove x = function Empty -> Empty | Node(l, v, r, _) -> @@ -300,19 +317,19 @@ module Make(Ord: OrderedType) = Empty -> false | Node(l, v, r, _) -> p v || exists p l || exists p r - let filter p s = - let rec filt accu = function - | Empty -> accu - | Node(l, v, r, _) -> - filt (filt (if p v then add v accu else accu) l) r in - filt Empty s - - let partition p s = - let rec part (t, f as accu) = function - | Empty -> accu - | Node(l, v, r, _) -> - part (part (if p v then (add v t, f) else (t, add v f)) l) r in - part (Empty, Empty) s + let rec filter p = function + Empty -> Empty + | Node(l, v, r, _) -> + let l' = filter p l and r' = filter p r in + if p v then join l' v r' else concat l' r' + + let rec partition p = function + Empty -> (Empty, Empty) + | Node(l, v, r, _) -> + let (lt, lf) = partition p l and (rt, rf) = partition p r in + if p v + then (join lt v rt, concat lf rf) + else (concat lt rt, join lf v rf) let rec cardinal = function Empty -> 0 diff --git a/stdlib/string.mli b/stdlib/string.mli index 3a7976bd20..501fb181c0 100644 --- a/stdlib/string.mli +++ b/stdlib/string.mli @@ -101,12 +101,17 @@ val iteri : (int -> char -> unit) -> string -> unit @since 3.13.0 *) +val map : (char -> char) -> string -> string +(** [String.map f s] applies function [f] in turn to all + the characters of [s] and stores the results in a new string that + is returned. *) + val escaped : string -> string (** Return a copy of the argument, with special characters represented by escape sequences, following the lexical conventions of OCaml. If there is no special character in the argument, return the original string itself, - not a copy. *) + not a copy. Its inverse function is Scanf.unescaped. *) val index : string -> char -> int (** [String.index s c] returns the character number of the first diff --git a/stdlib/sys.mli b/stdlib/sys.mli index ee5021dcc1..4913bef8ee 100644 --- a/stdlib/sys.mli +++ b/stdlib/sys.mli @@ -75,13 +75,13 @@ val interactive : bool ref the interactive toplevel system [ocaml]. *) val os_type : string -(** Operating system currently executing the Caml program. One of +(** Operating system currently executing the OCaml program. One of - ["Unix"] (for all Unix versions, including Linux and Mac OS X), - ["Win32"] (for MS-Windows, OCaml compiled with MSVC++ or Mingw), - ["Cygwin"] (for MS-Windows, OCaml compiled with Cygwin). *) val word_size : int -(** Size of one word on the machine currently executing the Caml +(** Size of one word on the machine currently executing the OCaml program, in bits: 32 or 64. *) val max_string_length : int |