diff options
Diffstat (limited to 'otherlibs/bigarray')
-rw-r--r-- | otherlibs/bigarray/bigarray.h | 4 | ||||
-rw-r--r-- | otherlibs/bigarray/bigarray.ml | 8 | ||||
-rw-r--r-- | otherlibs/bigarray/bigarray.mli | 58 | ||||
-rw-r--r-- | otherlibs/bigarray/bigarray_stubs.c | 20 | ||||
-rw-r--r-- | otherlibs/bigarray/mmap_unix.c | 95 |
5 files changed, 144 insertions, 41 deletions
diff --git a/otherlibs/bigarray/bigarray.h b/otherlibs/bigarray/bigarray.h index 8625b0d9fe..f6552107a6 100644 --- a/otherlibs/bigarray/bigarray.h +++ b/otherlibs/bigarray/bigarray.h @@ -73,8 +73,8 @@ struct caml_ba_array { intnat num_dims; /* Number of dimensions */ intnat flags; /* Kind of element array + memory layout + allocation status */ struct caml_ba_proxy * proxy; /* The proxy for sub-arrays, or NULL */ - /* PR#5516: use C99's / gcc's flexible array types if possible */ -#if (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) + /* PR#5516: use C99's flexible array types if possible */ +#if (__STDC_VERSION__ >= 199901L) intnat dim[] /*[num_dims]*/; /* Size in each dimension */ #else intnat dim[1] /*[num_dims]*/; /* Size in each dimension */ diff --git a/otherlibs/bigarray/bigarray.ml b/otherlibs/bigarray/bigarray.ml index 1d3dbcf971..b9f22b1828 100644 --- a/otherlibs/bigarray/bigarray.ml +++ b/otherlibs/bigarray/bigarray.ml @@ -99,6 +99,8 @@ module Genarray = struct = "caml_ba_map_file_bytecode" "caml_ba_map_file" let map_file fd ?(pos = 0L) kind layout shared dims = map_internal fd kind layout shared dims pos + external release: ('a, 'b, 'c) t -> unit + = "caml_ba_release" end module Array1 = struct @@ -122,6 +124,8 @@ module Array1 = struct ba let map_file fd ?pos kind layout shared dim = Genarray.map_file fd ?pos kind layout shared [|dim|] + external release: ('a, 'b, 'c) t -> unit + = "caml_ba_release" end module Array2 = struct @@ -161,6 +165,8 @@ module Array2 = struct ba let map_file fd ?pos kind layout shared dim1 dim2 = Genarray.map_file fd ?pos kind layout shared [|dim1;dim2|] + external release: ('a, 'b, 'c) t -> unit + = "caml_ba_release" end module Array3 = struct @@ -210,6 +216,8 @@ module Array3 = struct ba let map_file fd ?pos kind layout shared dim1 dim2 dim3 = Genarray.map_file fd ?pos kind layout shared [|dim1;dim2;dim3|] + external release: ('a, 'b, 'c) t -> unit + = "caml_ba_release" end external genarray_of_array1: ('a, 'b, 'c) Array1.t -> ('a, 'b, 'c) Genarray.t diff --git a/otherlibs/bigarray/bigarray.mli b/otherlibs/bigarray/bigarray.mli index 8b260bf790..73c27b5750 100644 --- a/otherlibs/bigarray/bigarray.mli +++ b/otherlibs/bigarray/bigarray.mli @@ -426,7 +426,27 @@ module Genarray : or a SIGBUS signal may be raised. This happens, for instance, if the file is shrinked. *) - end + val release: ('a, 'b, 'c) t -> unit + (** Release the resources associated with the given big array, + then set all of its dimensions to 0, causing subsequent accesses + to the big array to fail. This releasing of resources is performed + automatically by the garbage collector when the big array is no longer + referenced by the program. However, memory behavior of the program + can be improved by releasing the resources explicitly via + [Genarray.release] as soon as the big array is no longer useful. + + If the big array was created with [Genarray.create], the memory + space occupied by its data is freed. If the big array was + created with [Genarray.map_file], updates performed on the array + are flushed to the file (if the mapping is shared), then the + mapping is removed, freeing the corresponding virtual memory + space. If several views on the big array data were created + using [Genarray.sub_*] or [Genarray.slice_*], data release occurs + when the last not-yet-released view is released. Multiple calls + to [Genarray.release] on the same big array are safe: the second + and subsequent calls have no effect. *) + +end (** {6 One-dimensional arrays} *) @@ -496,16 +516,20 @@ module Array1 : sig (** Memory mapping of a file as a one-dimensional big array. See {!Bigarray.Genarray.map_file} for more details. *) + val release: ('a, 'b, 'c) t -> unit + (** Explicit release of the resources associated with the big array. + See {!Bigarray.Genarray.release} for more details. *) + external unsafe_get: ('a, 'b, 'c) t -> int -> 'a = "%caml_ba_unsafe_ref_1" (** Like {!Bigarray.Array1.get}, but bounds checking is not always performed. Use with caution and only when the program logic guarantees that - the access is within bounds. *) + the access is within bounds and the big array has not been released. *) external unsafe_set: ('a, 'b, 'c) t -> int -> 'a -> unit = "%caml_ba_unsafe_set_1" (** Like {!Bigarray.Array1.set}, but bounds checking is not always performed. Use with caution and only when the program logic guarantees that - the access is within bounds. *) + the access is within bounds and the big array has not been released. *) end @@ -601,15 +625,21 @@ module Array2 : (** Memory mapping of a file as a two-dimensional big array. See {!Bigarray.Genarray.map_file} for more details. *) + val release: ('a, 'b, 'c) t -> unit + (** Explicit release of the resources associated with the big array. + See {!Bigarray.Genarray.release} for more details. *) + external unsafe_get: ('a, 'b, 'c) t -> int -> int -> 'a = "%caml_ba_unsafe_ref_2" - (** Like {!Bigarray.Array2.get}, but bounds checking is not always - performed. *) + (** Like {!Bigarray.Array2.get}, but bounds checking is not always performed. + Use with caution and only when the program logic guarantees that + the access is within bounds and the big array has not been released. *) external unsafe_set: ('a, 'b, 'c) t -> int -> int -> 'a -> unit = "%caml_ba_unsafe_set_2" - (** Like {!Bigarray.Array2.set}, but bounds checking is not always - performed. *) + (** Like {!Bigarray.Array2.set}, but bounds checking is not always performed. + Use with caution and only when the program logic guarantees that + the access is within bounds and the big array has not been released. *) end @@ -729,15 +759,21 @@ module Array3 : (** Memory mapping of a file as a three-dimensional big array. See {!Bigarray.Genarray.map_file} for more details. *) + val release: ('a, 'b, 'c) t -> unit + (** Explicit release of the resources associated with the big array. + See {!Bigarray.Genarray.release} for more details. *) + external unsafe_get: ('a, 'b, 'c) t -> int -> int -> int -> 'a = "%caml_ba_unsafe_ref_3" - (** Like {!Bigarray.Array3.get}, but bounds checking is not always - performed. *) + (** Like {!Bigarray.Array3.get}, but bounds checking is not always performed. + Use with caution and only when the program logic guarantees that + the access is within bounds and the big array has not been released. *) external unsafe_set: ('a, 'b, 'c) t -> int -> int -> int -> 'a -> unit = "%caml_ba_unsafe_set_3" - (** Like {!Bigarray.Array3.set}, but bounds checking is not always - performed. *) + (** Like {!Bigarray.Array3.set}, but bounds checking is not always performed. + Use with caution and only when the program logic guarantees that + the access is within bounds and the big array has not been released. *) end diff --git a/otherlibs/bigarray/bigarray_stubs.c b/otherlibs/bigarray/bigarray_stubs.c index 8afdc0df6e..4021b74aee 100644 --- a/otherlibs/bigarray/bigarray_stubs.c +++ b/otherlibs/bigarray/bigarray_stubs.c @@ -160,8 +160,8 @@ caml_ba_alloc(int flags, int num_dims, void * data, intnat * dim) if (data == NULL && size != 0) caml_raise_out_of_memory(); flags |= CAML_BA_MANAGED; } - /* PR#5516: use C99's / gcc's flexible array types if possible */ -#if (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) + /* PR#5516: use C99's flexible array types if possible */ +#if (__STDC_VERSION__ >= 199901L) asize = sizeof(struct caml_ba_array) + num_dims * sizeof(intnat); #else asize = sizeof(struct caml_ba_array) + (num_dims - 1) * sizeof(intnat); @@ -496,18 +496,19 @@ CAMLprim value caml_ba_layout(value vb) return Val_int(Caml_ba_array_val(vb)->flags & CAML_BA_LAYOUT_MASK); } -/* Finalization of a big array */ +/* Finalization / release of a big array */ static void caml_ba_finalize(value v) { struct caml_ba_array * b = Caml_ba_array_val(v); + intnat i; switch (b->flags & CAML_BA_MANAGED_MASK) { case CAML_BA_EXTERNAL: break; case CAML_BA_MANAGED: if (b->proxy == NULL) { - free(b->data); + free(b->data); /* no op if b->data = NULL */ } else { if (-- b->proxy->refcount == 0) { free(b->proxy->data); @@ -526,6 +527,17 @@ static void caml_ba_finalize(value v) } break; } + /* Make sure that subsequent accesses to the bigarray fail (empty bounds) + and that subsequent calls to caml_ba_finalize do nothing. */ + for (i = 0; i < b->num_dims; i++) b->dim[i] = 0; + b->data = NULL; + b->proxy = NULL; +} + +CAMLprim value caml_ba_release(value v) +{ + caml_ba_finalize(v); + return Val_unit; } /* Comparison of two big arrays */ diff --git a/otherlibs/bigarray/mmap_unix.c b/otherlibs/bigarray/mmap_unix.c index 8e71664ab0..30294cc4bb 100644 --- a/otherlibs/bigarray/mmap_unix.c +++ b/otherlibs/bigarray/mmap_unix.c @@ -25,12 +25,14 @@ extern int caml_ba_element_size[]; /* from bigarray_stubs.c */ +#include <errno.h> #ifdef HAS_UNISTD #include <unistd.h> #endif #ifdef HAS_MMAP #include <sys/types.h> #include <sys/mman.h> +#include <sys/stat.h> #endif #if defined(HAS_MMAP) @@ -39,15 +41,61 @@ extern int caml_ba_element_size[]; /* from bigarray_stubs.c */ #define MAP_FAILED ((void *) -1) #endif +/* [caml_grow_file] function contributed by Gerd Stolpmann (PR#5543). */ + +static int caml_grow_file(int fd, file_offset size) +{ + char c; + int p; + + /* First use pwrite for growing - it is a conservative method, as it + can never happen that we shrink by accident + */ +#ifdef HAS_PWRITE + c = 0; + p = pwrite(fd, &c, 1, size - 1); +#else + + /* Emulate pwrite with lseek. This should only be necessary on ancient + systems nowadays + */ + file_offset currpos; + currpos = lseek(fd, 0, SEEK_CUR); + if (currpos != -1) { + p = lseek(fd, size - 1, SEEK_SET); + if (p != -1) { + c = 0; + p = write(fd, &c, 1); + if (p != -1) + p = lseek(fd, currpos, SEEK_SET); + } + } + else p=-1; +#endif +#ifdef HAS_TRUNCATE + if (p == -1 && errno == ESPIPE) { + /* Plan B. Check if at least ftruncate is possible. There are + some non-seekable descriptor types that do not support pwrite + but ftruncate, like shared memory. We never get into this case + for real files, so there is no danger of truncating persistent + data by accident + */ + p = ftruncate(fd, size); + } +#endif + return p; +} + + CAMLprim value caml_ba_map_file(value vfd, value vkind, value vlayout, value vshared, value vdim, value vstart) { int fd, flags, major_dim, shared; intnat num_dims, i; intnat dim[CAML_BA_MAX_NUM_DIMS]; - file_offset currpos, startpos, file_size, data_size; + file_offset startpos, file_size, data_size; + struct stat st; uintnat array_size, page, delta; - char c; void * addr; fd = Int_val(vfd); @@ -65,18 +113,15 @@ CAMLprim value caml_ba_map_file(value vfd, value vkind, value vlayout, if (dim[i] < 0) caml_invalid_argument("Bigarray.create: negative dimension"); } - /* Determine file size */ + /* Determine file size. We avoid lseek here because it is fragile, + and because some mappable file types do not support it + */ caml_enter_blocking_section(); - currpos = lseek(fd, 0, SEEK_CUR); - if (currpos == -1) { - caml_leave_blocking_section(); - caml_sys_error(NO_ARG); - } - file_size = lseek(fd, 0, SEEK_END); - if (file_size == -1) { + if (fstat(fd, &st) == -1) { caml_leave_blocking_section(); caml_sys_error(NO_ARG); } + file_size = st.st_size; /* Determine array size in bytes (or size of array without the major dimension if that dimension wasn't specified) */ array_size = caml_ba_element_size[flags & CAML_BA_KIND_MASK]; @@ -99,26 +144,22 @@ CAMLprim value caml_ba_map_file(value vfd, value vkind, value vlayout, } else { /* Check that file is large enough, and grow it otherwise */ if (file_size < startpos + array_size) { - if (lseek(fd, startpos + array_size - 1, SEEK_SET) == -1) { - caml_leave_blocking_section(); - caml_sys_error(NO_ARG); - } - c = 0; - if (write(fd, &c, 1) != 1) { + if (caml_grow_file(fd, startpos + array_size) == -1) { /* PR#5543 */ caml_leave_blocking_section(); caml_sys_error(NO_ARG); } } } - /* Restore original file position */ - lseek(fd, currpos, SEEK_SET); /* Determine offset so that the mapping starts at the given file pos */ page = getpagesize(); - delta = (uintnat) (startpos % page); + delta = (uintnat) startpos % page; /* Do the mmap */ shared = Bool_val(vshared) ? MAP_SHARED : MAP_PRIVATE; - addr = mmap(NULL, array_size + delta, PROT_READ | PROT_WRITE, - shared, fd, startpos - delta); + if (array_size > 0) + addr = mmap(NULL, array_size + delta, PROT_READ | PROT_WRITE, + shared, fd, startpos - delta); + else + addr = NULL; /* PR#5463 - mmap fails on empty region */ caml_leave_blocking_section(); if (addr == (void *) MAP_FAILED) caml_sys_error(NO_ARG); addr = (void *) ((uintnat) addr + delta); @@ -128,8 +169,8 @@ CAMLprim value caml_ba_map_file(value vfd, value vkind, value vlayout, #else -value caml_ba_map_file(value vfd, value vkind, value vlayout, - value vshared, value vdim, value vpos) +CAMLprim value caml_ba_map_file(value vfd, value vkind, value vlayout, + value vshared, value vdim, value vpos) { caml_invalid_argument("Bigarray.map_file: not supported"); return Val_unit; @@ -148,6 +189,12 @@ void caml_ba_unmap_file(void * addr, uintnat len) #if defined(HAS_MMAP) uintnat page = getpagesize(); uintnat delta = (uintnat) addr % page; - munmap((void *)((uintnat)addr - delta), len + delta); + if (len == 0) return; /* PR#5463 */ + addr = (void *)((uintnat)addr - delta); + len = len + delta; +#if defined(_POSIX_SYNCHRONIZED_IO) + msync(addr, len, MS_ASYNC); /* PR#3571 */ +#endif + munmap(addr, len); #endif } |