diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2021-12-03 17:46:41 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2021-12-22 10:47:37 +0000 |
commit | 4da9ae405b0b6c6b08bb0d0605da0a96c02babb7 (patch) | |
tree | 48c5f2038a1200bfc481b70269dc09dbebda87f4 /libgomp/testsuite | |
parent | 5f702eb7ad1e50bc3ca37e247d8097a8b15d5606 (diff) | |
download | gcc-4da9ae405b0b6c6b08bb0d0605da0a96c02babb7.tar.gz |
libgomp, nvptx: low-latency memory allocator
This patch adds support for allocating low-latency ".shared" memory on
NVPTX GPU device, via the omp_low_lat_mem_space and omp_alloc. The memory
can be allocated, reallocated, and freed using a basic but fast algorithm,
is thread safe and the size of the low-latency heap can be configured using
the GOMP_NVPTX_LOWLAT_POOL environment variable.
The use of the PTX dynamic_smem_size feature means that the minimum version
requirement is now bumped to 4.1 (still old at this point).
libgomp/ChangeLog:
* allocator.c (MEMSPACE_ALLOC): New macro.
(MEMSPACE_CALLOC): New macro.
(MEMSPACE_REALLOC): New macro.
(MEMSPACE_FREE): New macro.
(dynamic_smem_size): New constants.
(omp_alloc): Use MEMSPACE_ALLOC.
Implement fall-backs for predefined allocators.
(omp_free): Use MEMSPACE_FREE.
(omp_calloc): Use MEMSPACE_CALLOC.
Implement fall-backs for predefined allocators.
(omp_realloc): Use MEMSPACE_REALLOC.
Implement fall-backs for predefined allocators.
* config/nvptx/team.c (__nvptx_lowlat_heap_root): New variable.
(__nvptx_lowlat_pool): New asm varaible.
(gomp_nvptx_main): Initialize the low-latency heap.
* plugin/plugin-nvptx.c (lowlat_pool_size): New variable.
(GOMP_OFFLOAD_init_device): Read the GOMP_NVPTX_LOWLAT_POOL envvar.
(GOMP_OFFLOAD_run): Apply lowlat_pool_size.
* config/nvptx/allocator.c: New file.
* testsuite/libgomp.c/allocators-1.c: New test.
* testsuite/libgomp.c/allocators-2.c: New test.
* testsuite/libgomp.c/allocators-3.c: New test.
* testsuite/libgomp.c/allocators-4.c: New test.
* testsuite/libgomp.c/allocators-5.c: New test.
* testsuite/libgomp.c/allocators-6.c: New test.
Diffstat (limited to 'libgomp/testsuite')
-rw-r--r-- | libgomp/testsuite/libgomp.c/allocators-1.c | 56 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c/allocators-2.c | 64 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c/allocators-3.c | 42 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c/allocators-4.c | 196 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c/allocators-5.c | 63 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c/allocators-6.c | 117 |
6 files changed, 538 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.c/allocators-1.c b/libgomp/testsuite/libgomp.c/allocators-1.c new file mode 100644 index 00000000000..04968e4c83d --- /dev/null +++ b/libgomp/testsuite/libgomp.c/allocators-1.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ + +/* Test that omp_alloc returns usable memory. */ + +#include <omp.h> + +#pragma omp requires dynamic_allocators + +void +test (int n, omp_allocator_handle_t allocator) +{ + #pragma omp target map(to:n) map(to:allocator) + { + int *a; + a = (int *) omp_alloc(n*sizeof(int), allocator); + + #pragma omp parallel + for (int i = 0; i < n; i++) + a[i] = i; + + for (int i = 0; i < n; i++) + if (a[i] != i) + { + __builtin_printf ("data mismatch at %i\n", i); + __builtin_abort (); + } + + omp_free(a, allocator); + } +} + +int +main () +{ + // Smaller than low-latency memory limit + test (10, omp_default_mem_alloc); + test (10, omp_large_cap_mem_alloc); + test (10, omp_const_mem_alloc); + test (10, omp_high_bw_mem_alloc); + test (10, omp_low_lat_mem_alloc); + test (10, omp_cgroup_mem_alloc); + test (10, omp_pteam_mem_alloc); + test (10, omp_thread_mem_alloc); + + // Larger than low-latency memory limit + test (100000, omp_default_mem_alloc); + test (100000, omp_large_cap_mem_alloc); + test (100000, omp_const_mem_alloc); + test (100000, omp_high_bw_mem_alloc); + test (100000, omp_low_lat_mem_alloc); + test (100000, omp_cgroup_mem_alloc); + test (100000, omp_pteam_mem_alloc); + test (100000, omp_thread_mem_alloc); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/allocators-2.c b/libgomp/testsuite/libgomp.c/allocators-2.c new file mode 100644 index 00000000000..a98f1b4c05e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/allocators-2.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ + +/* Test concurrent and repeated allocations. */ + +#include <omp.h> + +#pragma omp requires dynamic_allocators + +void +test (int n, omp_allocator_handle_t allocator) +{ + #pragma omp target map(to:n) map(to:allocator) + { + int **a; + a = (int **) omp_alloc(n*sizeof(int*), allocator); + + #pragma omp parallel for + for (int i = 0; i < n; i++) + { + /*Use 10x to ensure we do activate low-latency fall-back. */ + a[i] = omp_alloc(sizeof(int)*10, allocator); + a[i][0] = i; + } + + for (int i = 0; i < n; i++) + if (a[i][0] != i) + { + __builtin_printf ("data mismatch at %i\n", i); + __builtin_abort (); + } + + #pragma omp parallel for + for (int i = 0; i < n; i++) + omp_free(a[i], allocator); + + omp_free (a, allocator); + } +} + +int +main () +{ + // Smaller than low-latency memory limit + test (10, omp_default_mem_alloc); + test (10, omp_large_cap_mem_alloc); + test (10, omp_const_mem_alloc); + test (10, omp_high_bw_mem_alloc); + test (10, omp_low_lat_mem_alloc); + test (10, omp_cgroup_mem_alloc); + test (10, omp_pteam_mem_alloc); + test (10, omp_thread_mem_alloc); + + // Larger than low-latency memory limit (on aggregate) + test (1000, omp_default_mem_alloc); + test (1000, omp_large_cap_mem_alloc); + test (1000, omp_const_mem_alloc); + test (1000, omp_high_bw_mem_alloc); + test (1000, omp_low_lat_mem_alloc); + test (1000, omp_cgroup_mem_alloc); + test (1000, omp_pteam_mem_alloc); + test (1000, omp_thread_mem_alloc); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/allocators-3.c b/libgomp/testsuite/libgomp.c/allocators-3.c new file mode 100644 index 00000000000..45514c2a088 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/allocators-3.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +/* Stress-test omp_alloc/omp_malloc under concurrency. */ + +#include <omp.h> +#include <stdio.h> +#include <stdlib.h> + +#pragma omp requires dynamic_allocators + +#define N 1000 + +void +test (omp_allocator_handle_t allocator) +{ + #pragma omp target map(to:allocator) + { + #pragma omp parallel for + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + { + int *p = omp_alloc(sizeof(int), allocator); + omp_free(p, allocator); + } + } +} + +int +main () +{ + // Smaller than low-latency memory limit + test (omp_default_mem_alloc); + test (omp_large_cap_mem_alloc); + test (omp_const_mem_alloc); + test (omp_high_bw_mem_alloc); + test (omp_low_lat_mem_alloc); + test (omp_cgroup_mem_alloc); + test (omp_pteam_mem_alloc); + test (omp_thread_mem_alloc); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/allocators-4.c b/libgomp/testsuite/libgomp.c/allocators-4.c new file mode 100644 index 00000000000..9fa6aa1624f --- /dev/null +++ b/libgomp/testsuite/libgomp.c/allocators-4.c @@ -0,0 +1,196 @@ +/* { dg-do run } */ + +/* Test that low-latency free chains are sound. */ + +#include <stddef.h> +#include <omp.h> + +#pragma omp requires dynamic_allocators + +void +check (int cond, const char *msg) +{ + if (!cond) + { + __builtin_printf ("%s\n", msg); + __builtin_abort (); + } +} + +int +main () +{ + #pragma omp target + { + /* Ensure that the memory we get *is* low-latency with a null-fallback. */ + omp_alloctrait_t traits[1] + = { { omp_atk_fallback, omp_atv_null_fb } }; + omp_allocator_handle_t lowlat = omp_init_allocator (omp_low_lat_mem_space, + 1, traits); + + int size = 4; + + char *a = omp_alloc(size, lowlat); + char *b = omp_alloc(size, lowlat); + char *c = omp_alloc(size, lowlat); + char *d = omp_alloc(size, lowlat); + + /* There are headers and padding to account for. */ + int size2 = size + (b-a); + int size3 = size + (c-a); + int size4 = size + (d-a) + 100; /* Random larger amount. */ + + check (a != NULL && b != NULL && c != NULL && d != NULL, + "omp_alloc returned NULL\n"); + + omp_free(a, lowlat); + char *p = omp_alloc (size, lowlat); + check (p == a, "allocate did not reuse first chunk"); + + omp_free(b, lowlat); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not reuse second chunk"); + + omp_free(c, lowlat); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not reuse third chunk"); + + omp_free(a, lowlat); + omp_free(b, lowlat); + p = omp_alloc (size2, lowlat); + check (p == a, "allocate did not coalesce first two chunks"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == a, "allocate did not split first chunk (1)"); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split first chunk (2)"); + + omp_free(b, lowlat); + omp_free(c, lowlat); + p = omp_alloc (size2, lowlat); + check (p == b, "allocate did not coalesce middle two chunks"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split second chunk (1)"); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not split second chunk (2)"); + + omp_free(b, lowlat); + omp_free(a, lowlat); + p = omp_alloc (size2, lowlat); + check (p == a, "allocate did not coalesce first two chunks, reverse free"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == a, "allocate did not split first chunk (1), reverse free"); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split first chunk (2), reverse free"); + + omp_free(c, lowlat); + omp_free(b, lowlat); + p = omp_alloc (size2, lowlat); + check (p == b, "allocate did not coalesce second two chunks, reverse free"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split second chunk (1), reverse free"); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not split second chunk (2), reverse free"); + + omp_free(a, lowlat); + omp_free(b, lowlat); + omp_free(c, lowlat); + p = omp_alloc (size3, lowlat); + check (p == a, "allocate did not coalesce first three chunks"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == a, "allocate did not split first chunk (1)"); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split first chunk (2)"); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not split first chunk (3)"); + + omp_free(b, lowlat); + omp_free(c, lowlat); + omp_free(d, lowlat); + p = omp_alloc (size3, lowlat); + check (p == b, "allocate did not coalesce last three chunks"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split second chunk (1)"); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not split second chunk (2)"); + p = omp_alloc (size, lowlat); + check (p == d, "allocate did not split second chunk (3)"); + + omp_free(c, lowlat); + omp_free(b, lowlat); + omp_free(a, lowlat); + p = omp_alloc (size3, lowlat); + check (p == a, "allocate did not coalesce first three chunks, reverse free"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == a, "allocate did not split first chunk (1), reverse free"); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split first chunk (2), reverse free"); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not split first chunk (3), reverse free"); + + omp_free(d, lowlat); + omp_free(c, lowlat); + omp_free(b, lowlat); + p = omp_alloc (size3, lowlat); + check (p == b, "allocate did not coalesce second three chunks, reverse free"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split second chunk (1), reverse free"); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not split second chunk (2), reverse free"); + p = omp_alloc (size, lowlat); + check (p == d, "allocate did not split second chunk (3), reverse free"); + + omp_free(c, lowlat); + omp_free(a, lowlat); + omp_free(b, lowlat); + p = omp_alloc (size3, lowlat); + check (p == a, "allocate did not coalesce first three chunks, mixed free"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == a, "allocate did not split first chunk (1), mixed free"); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split first chunk (2), mixed free"); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not split first chunk (3), mixed free"); + + omp_free(d, lowlat); + omp_free(b, lowlat); + omp_free(c, lowlat); + p = omp_alloc (size3, lowlat); + check (p == b, "allocate did not coalesce second three chunks, mixed free"); + + omp_free(p, lowlat); + p = omp_alloc (size, lowlat); + check (p == b, "allocate did not split second chunk (1), mixed free"); + p = omp_alloc (size, lowlat); + check (p == c, "allocate did not split second chunk (2), mixed free"); + p = omp_alloc (size, lowlat); + check (p == d, "allocate did not split second chunk (3), mixed free"); + + omp_free(a, lowlat); + omp_free(b, lowlat); + omp_free(c, lowlat); + omp_free(d, lowlat); + p = omp_alloc(size4, lowlat); + check (p == a, "allocate did not coalesce all memory"); + } + +return 0; +} + diff --git a/libgomp/testsuite/libgomp.c/allocators-5.c b/libgomp/testsuite/libgomp.c/allocators-5.c new file mode 100644 index 00000000000..9694010cf1f --- /dev/null +++ b/libgomp/testsuite/libgomp.c/allocators-5.c @@ -0,0 +1,63 @@ +/* { dg-do run } */ + +/* Test calloc with omp_alloc. */ + +#include <omp.h> + +#pragma omp requires dynamic_allocators + +void +test (int n, omp_allocator_handle_t allocator) +{ + #pragma omp target map(to:n) map(to:allocator) + { + int *a; + a = (int *) omp_calloc(n, sizeof(int), allocator); + + for (int i = 0; i < n; i++) + if (a[i] != 0) + { + __builtin_printf ("memory not zeroed at %i\n", i); + __builtin_abort (); + } + + #pragma omp parallel + for (int i = 0; i < n; i++) + a[i] = i; + + for (int i = 0; i < n; i++) + if (a[i] != i) + { + __builtin_printf ("data mismatch at %i\n", i); + __builtin_abort (); + } + + omp_free(a, allocator); + } +} + +int +main () +{ + // Smaller than low-latency memory limit + test (10, omp_default_mem_alloc); + test (10, omp_large_cap_mem_alloc); + test (10, omp_const_mem_alloc); + test (10, omp_high_bw_mem_alloc); + test (10, omp_low_lat_mem_alloc); + test (10, omp_cgroup_mem_alloc); + test (10, omp_pteam_mem_alloc); + test (10, omp_thread_mem_alloc); + + // Larger than low-latency memory limit + test (100000, omp_default_mem_alloc); + test (100000, omp_large_cap_mem_alloc); + test (100000, omp_const_mem_alloc); + test (100000, omp_high_bw_mem_alloc); + test (100000, omp_low_lat_mem_alloc); + test (100000, omp_cgroup_mem_alloc); + test (100000, omp_pteam_mem_alloc); + test (100000, omp_thread_mem_alloc); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/allocators-6.c b/libgomp/testsuite/libgomp.c/allocators-6.c new file mode 100644 index 00000000000..90bf73095ef --- /dev/null +++ b/libgomp/testsuite/libgomp.c/allocators-6.c @@ -0,0 +1,117 @@ +/* { dg-do run } */ + +/* Test that low-latency realloc and free chains are sound. */ + +#include <stddef.h> +#include <omp.h> + +#pragma omp requires dynamic_allocators + +void +check (int cond, const char *msg) +{ + if (!cond) + { + __builtin_printf ("%s\n", msg); + __builtin_abort (); + } +} + +int +main () +{ + #pragma omp target + { + /* Ensure that the memory we get *is* low-latency with a null-fallback. */ + omp_alloctrait_t traits[1] + = { { omp_atk_fallback, omp_atv_null_fb } }; + omp_allocator_handle_t lowlat = omp_init_allocator (omp_low_lat_mem_space, + 1, traits); + + int size = 16; + + char *a = (char *)omp_alloc(size, lowlat); + char *b = (char *)omp_alloc(size, lowlat); + char *c = (char *)omp_alloc(size, lowlat); + char *d = (char *)omp_alloc(size, lowlat); + + /* There are headers and padding to account for. */ + int size2 = size + (b-a); + int size3 = size + (c-a); + int size4 = size + (d-a) + 100; /* Random larger amount. */ + + check (a != NULL && b != NULL && c != NULL && d != NULL, + "omp_alloc returned NULL\n"); + + char *p = omp_realloc (b, size, lowlat, lowlat); + check (p == b, "realloc did not reuse same size chunk, no space after"); + + p = omp_realloc (b, size-8, lowlat, lowlat); + check (p == b, "realloc did not reuse smaller chunk, no space after"); + + p = omp_realloc (b, size, lowlat, lowlat); + check (p == b, "realloc did not reuse original size chunk, no space after"); + + /* Make space after b. */ + omp_free(c, lowlat); + + p = omp_realloc (b, size, lowlat, lowlat); + check (p == b, "realloc did not reuse same size chunk"); + + p = omp_realloc (b, size-8, lowlat, lowlat); + check (p == b, "realloc did not reuse smaller chunk"); + + p = omp_realloc (b, size, lowlat, lowlat); + check (p == b, "realloc did not reuse original size chunk"); + + p = omp_realloc (b, size+8, lowlat, lowlat); + check (p == b, "realloc did not extend in place by a little"); + + p = omp_realloc (b, size2, lowlat, lowlat); + check (p == b, "realloc did not extend into whole next chunk"); + + p = omp_realloc (b, size3, lowlat, lowlat); + check (p != b, "realloc did not move b elsewhere"); + omp_free (p, lowlat); + + + p = omp_realloc (a, size, lowlat, lowlat); + check (p == a, "realloc did not reuse same size chunk, first position"); + + p = omp_realloc (a, size-8, lowlat, lowlat); + check (p == a, "realloc did not reuse smaller chunk, first position"); + + p = omp_realloc (a, size, lowlat, lowlat); + check (p == a, "realloc did not reuse original size chunk, first position"); + + p = omp_realloc (a, size+8, lowlat, lowlat); + check (p == a, "realloc did not extend in place by a little, first position"); + + p = omp_realloc (a, size3, lowlat, lowlat); + check (p == a, "realloc did not extend into whole next chunk, first position"); + + p = omp_realloc (a, size4, lowlat, lowlat); + check (p != a, "realloc did not move a elsewhere, first position"); + omp_free (p, lowlat); + + + p = omp_realloc (d, size, lowlat, lowlat); + check (p == d, "realloc did not reuse same size chunk, last position"); + + p = omp_realloc (d, size-8, lowlat, lowlat); + check (p == d, "realloc did not reuse smaller chunk, last position"); + + p = omp_realloc (d, size, lowlat, lowlat); + check (p == d, "realloc did not reuse original size chunk, last position"); + + p = omp_realloc (d, size+8, lowlat, lowlat); + check (p == d, "realloc did not extend in place by d little, last position"); + + /* Larger than low latency memory. */ + p = omp_realloc(d, 100000000, lowlat, lowlat); + check (p == NULL, "realloc did not fail on OOM"); + } + +return 0; +} + |