summaryrefslogtreecommitdiff
path: root/libgomp/testsuite
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2021-12-03 17:46:41 +0000
committerAndrew Stubbs <ams@codesourcery.com>2021-12-22 10:47:37 +0000
commit4da9ae405b0b6c6b08bb0d0605da0a96c02babb7 (patch)
tree48c5f2038a1200bfc481b70269dc09dbebda87f4 /libgomp/testsuite
parent5f702eb7ad1e50bc3ca37e247d8097a8b15d5606 (diff)
downloadgcc-4da9ae405b0b6c6b08bb0d0605da0a96c02babb7.tar.gz
libgomp, nvptx: low-latency memory allocator
This patch adds support for allocating low-latency ".shared" memory on NVPTX GPU device, via the omp_low_lat_mem_space and omp_alloc. The memory can be allocated, reallocated, and freed using a basic but fast algorithm, is thread safe and the size of the low-latency heap can be configured using the GOMP_NVPTX_LOWLAT_POOL environment variable. The use of the PTX dynamic_smem_size feature means that the minimum version requirement is now bumped to 4.1 (still old at this point). libgomp/ChangeLog: * allocator.c (MEMSPACE_ALLOC): New macro. (MEMSPACE_CALLOC): New macro. (MEMSPACE_REALLOC): New macro. (MEMSPACE_FREE): New macro. (dynamic_smem_size): New constants. (omp_alloc): Use MEMSPACE_ALLOC. Implement fall-backs for predefined allocators. (omp_free): Use MEMSPACE_FREE. (omp_calloc): Use MEMSPACE_CALLOC. Implement fall-backs for predefined allocators. (omp_realloc): Use MEMSPACE_REALLOC. Implement fall-backs for predefined allocators. * config/nvptx/team.c (__nvptx_lowlat_heap_root): New variable. (__nvptx_lowlat_pool): New asm varaible. (gomp_nvptx_main): Initialize the low-latency heap. * plugin/plugin-nvptx.c (lowlat_pool_size): New variable. (GOMP_OFFLOAD_init_device): Read the GOMP_NVPTX_LOWLAT_POOL envvar. (GOMP_OFFLOAD_run): Apply lowlat_pool_size. * config/nvptx/allocator.c: New file. * testsuite/libgomp.c/allocators-1.c: New test. * testsuite/libgomp.c/allocators-2.c: New test. * testsuite/libgomp.c/allocators-3.c: New test. * testsuite/libgomp.c/allocators-4.c: New test. * testsuite/libgomp.c/allocators-5.c: New test. * testsuite/libgomp.c/allocators-6.c: New test.
Diffstat (limited to 'libgomp/testsuite')
-rw-r--r--libgomp/testsuite/libgomp.c/allocators-1.c56
-rw-r--r--libgomp/testsuite/libgomp.c/allocators-2.c64
-rw-r--r--libgomp/testsuite/libgomp.c/allocators-3.c42
-rw-r--r--libgomp/testsuite/libgomp.c/allocators-4.c196
-rw-r--r--libgomp/testsuite/libgomp.c/allocators-5.c63
-rw-r--r--libgomp/testsuite/libgomp.c/allocators-6.c117
6 files changed, 538 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.c/allocators-1.c b/libgomp/testsuite/libgomp.c/allocators-1.c
new file mode 100644
index 00000000000..04968e4c83d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/allocators-1.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+
+/* Test that omp_alloc returns usable memory. */
+
+#include <omp.h>
+
+#pragma omp requires dynamic_allocators
+
+void
+test (int n, omp_allocator_handle_t allocator)
+{
+ #pragma omp target map(to:n) map(to:allocator)
+ {
+ int *a;
+ a = (int *) omp_alloc(n*sizeof(int), allocator);
+
+ #pragma omp parallel
+ for (int i = 0; i < n; i++)
+ a[i] = i;
+
+ for (int i = 0; i < n; i++)
+ if (a[i] != i)
+ {
+ __builtin_printf ("data mismatch at %i\n", i);
+ __builtin_abort ();
+ }
+
+ omp_free(a, allocator);
+ }
+}
+
+int
+main ()
+{
+ // Smaller than low-latency memory limit
+ test (10, omp_default_mem_alloc);
+ test (10, omp_large_cap_mem_alloc);
+ test (10, omp_const_mem_alloc);
+ test (10, omp_high_bw_mem_alloc);
+ test (10, omp_low_lat_mem_alloc);
+ test (10, omp_cgroup_mem_alloc);
+ test (10, omp_pteam_mem_alloc);
+ test (10, omp_thread_mem_alloc);
+
+ // Larger than low-latency memory limit
+ test (100000, omp_default_mem_alloc);
+ test (100000, omp_large_cap_mem_alloc);
+ test (100000, omp_const_mem_alloc);
+ test (100000, omp_high_bw_mem_alloc);
+ test (100000, omp_low_lat_mem_alloc);
+ test (100000, omp_cgroup_mem_alloc);
+ test (100000, omp_pteam_mem_alloc);
+ test (100000, omp_thread_mem_alloc);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/allocators-2.c b/libgomp/testsuite/libgomp.c/allocators-2.c
new file mode 100644
index 00000000000..a98f1b4c05e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/allocators-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+
+/* Test concurrent and repeated allocations. */
+
+#include <omp.h>
+
+#pragma omp requires dynamic_allocators
+
+void
+test (int n, omp_allocator_handle_t allocator)
+{
+ #pragma omp target map(to:n) map(to:allocator)
+ {
+ int **a;
+ a = (int **) omp_alloc(n*sizeof(int*), allocator);
+
+ #pragma omp parallel for
+ for (int i = 0; i < n; i++)
+ {
+ /*Use 10x to ensure we do activate low-latency fall-back. */
+ a[i] = omp_alloc(sizeof(int)*10, allocator);
+ a[i][0] = i;
+ }
+
+ for (int i = 0; i < n; i++)
+ if (a[i][0] != i)
+ {
+ __builtin_printf ("data mismatch at %i\n", i);
+ __builtin_abort ();
+ }
+
+ #pragma omp parallel for
+ for (int i = 0; i < n; i++)
+ omp_free(a[i], allocator);
+
+ omp_free (a, allocator);
+ }
+}
+
+int
+main ()
+{
+ // Smaller than low-latency memory limit
+ test (10, omp_default_mem_alloc);
+ test (10, omp_large_cap_mem_alloc);
+ test (10, omp_const_mem_alloc);
+ test (10, omp_high_bw_mem_alloc);
+ test (10, omp_low_lat_mem_alloc);
+ test (10, omp_cgroup_mem_alloc);
+ test (10, omp_pteam_mem_alloc);
+ test (10, omp_thread_mem_alloc);
+
+ // Larger than low-latency memory limit (on aggregate)
+ test (1000, omp_default_mem_alloc);
+ test (1000, omp_large_cap_mem_alloc);
+ test (1000, omp_const_mem_alloc);
+ test (1000, omp_high_bw_mem_alloc);
+ test (1000, omp_low_lat_mem_alloc);
+ test (1000, omp_cgroup_mem_alloc);
+ test (1000, omp_pteam_mem_alloc);
+ test (1000, omp_thread_mem_alloc);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/allocators-3.c b/libgomp/testsuite/libgomp.c/allocators-3.c
new file mode 100644
index 00000000000..45514c2a088
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/allocators-3.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+/* Stress-test omp_alloc/omp_malloc under concurrency. */
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#pragma omp requires dynamic_allocators
+
+#define N 1000
+
+void
+test (omp_allocator_handle_t allocator)
+{
+ #pragma omp target map(to:allocator)
+ {
+ #pragma omp parallel for
+ for (int i = 0; i < N; i++)
+ for (int j = 0; j < N; j++)
+ {
+ int *p = omp_alloc(sizeof(int), allocator);
+ omp_free(p, allocator);
+ }
+ }
+}
+
+int
+main ()
+{
+ // Smaller than low-latency memory limit
+ test (omp_default_mem_alloc);
+ test (omp_large_cap_mem_alloc);
+ test (omp_const_mem_alloc);
+ test (omp_high_bw_mem_alloc);
+ test (omp_low_lat_mem_alloc);
+ test (omp_cgroup_mem_alloc);
+ test (omp_pteam_mem_alloc);
+ test (omp_thread_mem_alloc);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/allocators-4.c b/libgomp/testsuite/libgomp.c/allocators-4.c
new file mode 100644
index 00000000000..9fa6aa1624f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/allocators-4.c
@@ -0,0 +1,196 @@
+/* { dg-do run } */
+
+/* Test that low-latency free chains are sound. */
+
+#include <stddef.h>
+#include <omp.h>
+
+#pragma omp requires dynamic_allocators
+
+void
+check (int cond, const char *msg)
+{
+ if (!cond)
+ {
+ __builtin_printf ("%s\n", msg);
+ __builtin_abort ();
+ }
+}
+
+int
+main ()
+{
+ #pragma omp target
+ {
+ /* Ensure that the memory we get *is* low-latency with a null-fallback. */
+ omp_alloctrait_t traits[1]
+ = { { omp_atk_fallback, omp_atv_null_fb } };
+ omp_allocator_handle_t lowlat = omp_init_allocator (omp_low_lat_mem_space,
+ 1, traits);
+
+ int size = 4;
+
+ char *a = omp_alloc(size, lowlat);
+ char *b = omp_alloc(size, lowlat);
+ char *c = omp_alloc(size, lowlat);
+ char *d = omp_alloc(size, lowlat);
+
+ /* There are headers and padding to account for. */
+ int size2 = size + (b-a);
+ int size3 = size + (c-a);
+ int size4 = size + (d-a) + 100; /* Random larger amount. */
+
+ check (a != NULL && b != NULL && c != NULL && d != NULL,
+ "omp_alloc returned NULL\n");
+
+ omp_free(a, lowlat);
+ char *p = omp_alloc (size, lowlat);
+ check (p == a, "allocate did not reuse first chunk");
+
+ omp_free(b, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not reuse second chunk");
+
+ omp_free(c, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not reuse third chunk");
+
+ omp_free(a, lowlat);
+ omp_free(b, lowlat);
+ p = omp_alloc (size2, lowlat);
+ check (p == a, "allocate did not coalesce first two chunks");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == a, "allocate did not split first chunk (1)");
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split first chunk (2)");
+
+ omp_free(b, lowlat);
+ omp_free(c, lowlat);
+ p = omp_alloc (size2, lowlat);
+ check (p == b, "allocate did not coalesce middle two chunks");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split second chunk (1)");
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not split second chunk (2)");
+
+ omp_free(b, lowlat);
+ omp_free(a, lowlat);
+ p = omp_alloc (size2, lowlat);
+ check (p == a, "allocate did not coalesce first two chunks, reverse free");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == a, "allocate did not split first chunk (1), reverse free");
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split first chunk (2), reverse free");
+
+ omp_free(c, lowlat);
+ omp_free(b, lowlat);
+ p = omp_alloc (size2, lowlat);
+ check (p == b, "allocate did not coalesce second two chunks, reverse free");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split second chunk (1), reverse free");
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not split second chunk (2), reverse free");
+
+ omp_free(a, lowlat);
+ omp_free(b, lowlat);
+ omp_free(c, lowlat);
+ p = omp_alloc (size3, lowlat);
+ check (p == a, "allocate did not coalesce first three chunks");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == a, "allocate did not split first chunk (1)");
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split first chunk (2)");
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not split first chunk (3)");
+
+ omp_free(b, lowlat);
+ omp_free(c, lowlat);
+ omp_free(d, lowlat);
+ p = omp_alloc (size3, lowlat);
+ check (p == b, "allocate did not coalesce last three chunks");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split second chunk (1)");
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not split second chunk (2)");
+ p = omp_alloc (size, lowlat);
+ check (p == d, "allocate did not split second chunk (3)");
+
+ omp_free(c, lowlat);
+ omp_free(b, lowlat);
+ omp_free(a, lowlat);
+ p = omp_alloc (size3, lowlat);
+ check (p == a, "allocate did not coalesce first three chunks, reverse free");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == a, "allocate did not split first chunk (1), reverse free");
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split first chunk (2), reverse free");
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not split first chunk (3), reverse free");
+
+ omp_free(d, lowlat);
+ omp_free(c, lowlat);
+ omp_free(b, lowlat);
+ p = omp_alloc (size3, lowlat);
+ check (p == b, "allocate did not coalesce second three chunks, reverse free");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split second chunk (1), reverse free");
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not split second chunk (2), reverse free");
+ p = omp_alloc (size, lowlat);
+ check (p == d, "allocate did not split second chunk (3), reverse free");
+
+ omp_free(c, lowlat);
+ omp_free(a, lowlat);
+ omp_free(b, lowlat);
+ p = omp_alloc (size3, lowlat);
+ check (p == a, "allocate did not coalesce first three chunks, mixed free");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == a, "allocate did not split first chunk (1), mixed free");
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split first chunk (2), mixed free");
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not split first chunk (3), mixed free");
+
+ omp_free(d, lowlat);
+ omp_free(b, lowlat);
+ omp_free(c, lowlat);
+ p = omp_alloc (size3, lowlat);
+ check (p == b, "allocate did not coalesce second three chunks, mixed free");
+
+ omp_free(p, lowlat);
+ p = omp_alloc (size, lowlat);
+ check (p == b, "allocate did not split second chunk (1), mixed free");
+ p = omp_alloc (size, lowlat);
+ check (p == c, "allocate did not split second chunk (2), mixed free");
+ p = omp_alloc (size, lowlat);
+ check (p == d, "allocate did not split second chunk (3), mixed free");
+
+ omp_free(a, lowlat);
+ omp_free(b, lowlat);
+ omp_free(c, lowlat);
+ omp_free(d, lowlat);
+ p = omp_alloc(size4, lowlat);
+ check (p == a, "allocate did not coalesce all memory");
+ }
+
+return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.c/allocators-5.c b/libgomp/testsuite/libgomp.c/allocators-5.c
new file mode 100644
index 00000000000..9694010cf1f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/allocators-5.c
@@ -0,0 +1,63 @@
+/* { dg-do run } */
+
+/* Test calloc with omp_alloc. */
+
+#include <omp.h>
+
+#pragma omp requires dynamic_allocators
+
+void
+test (int n, omp_allocator_handle_t allocator)
+{
+ #pragma omp target map(to:n) map(to:allocator)
+ {
+ int *a;
+ a = (int *) omp_calloc(n, sizeof(int), allocator);
+
+ for (int i = 0; i < n; i++)
+ if (a[i] != 0)
+ {
+ __builtin_printf ("memory not zeroed at %i\n", i);
+ __builtin_abort ();
+ }
+
+ #pragma omp parallel
+ for (int i = 0; i < n; i++)
+ a[i] = i;
+
+ for (int i = 0; i < n; i++)
+ if (a[i] != i)
+ {
+ __builtin_printf ("data mismatch at %i\n", i);
+ __builtin_abort ();
+ }
+
+ omp_free(a, allocator);
+ }
+}
+
+int
+main ()
+{
+ // Smaller than low-latency memory limit
+ test (10, omp_default_mem_alloc);
+ test (10, omp_large_cap_mem_alloc);
+ test (10, omp_const_mem_alloc);
+ test (10, omp_high_bw_mem_alloc);
+ test (10, omp_low_lat_mem_alloc);
+ test (10, omp_cgroup_mem_alloc);
+ test (10, omp_pteam_mem_alloc);
+ test (10, omp_thread_mem_alloc);
+
+ // Larger than low-latency memory limit
+ test (100000, omp_default_mem_alloc);
+ test (100000, omp_large_cap_mem_alloc);
+ test (100000, omp_const_mem_alloc);
+ test (100000, omp_high_bw_mem_alloc);
+ test (100000, omp_low_lat_mem_alloc);
+ test (100000, omp_cgroup_mem_alloc);
+ test (100000, omp_pteam_mem_alloc);
+ test (100000, omp_thread_mem_alloc);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/allocators-6.c b/libgomp/testsuite/libgomp.c/allocators-6.c
new file mode 100644
index 00000000000..90bf73095ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/allocators-6.c
@@ -0,0 +1,117 @@
+/* { dg-do run } */
+
+/* Test that low-latency realloc and free chains are sound. */
+
+#include <stddef.h>
+#include <omp.h>
+
+#pragma omp requires dynamic_allocators
+
+void
+check (int cond, const char *msg)
+{
+ if (!cond)
+ {
+ __builtin_printf ("%s\n", msg);
+ __builtin_abort ();
+ }
+}
+
+int
+main ()
+{
+ #pragma omp target
+ {
+ /* Ensure that the memory we get *is* low-latency with a null-fallback. */
+ omp_alloctrait_t traits[1]
+ = { { omp_atk_fallback, omp_atv_null_fb } };
+ omp_allocator_handle_t lowlat = omp_init_allocator (omp_low_lat_mem_space,
+ 1, traits);
+
+ int size = 16;
+
+ char *a = (char *)omp_alloc(size, lowlat);
+ char *b = (char *)omp_alloc(size, lowlat);
+ char *c = (char *)omp_alloc(size, lowlat);
+ char *d = (char *)omp_alloc(size, lowlat);
+
+ /* There are headers and padding to account for. */
+ int size2 = size + (b-a);
+ int size3 = size + (c-a);
+ int size4 = size + (d-a) + 100; /* Random larger amount. */
+
+ check (a != NULL && b != NULL && c != NULL && d != NULL,
+ "omp_alloc returned NULL\n");
+
+ char *p = omp_realloc (b, size, lowlat, lowlat);
+ check (p == b, "realloc did not reuse same size chunk, no space after");
+
+ p = omp_realloc (b, size-8, lowlat, lowlat);
+ check (p == b, "realloc did not reuse smaller chunk, no space after");
+
+ p = omp_realloc (b, size, lowlat, lowlat);
+ check (p == b, "realloc did not reuse original size chunk, no space after");
+
+ /* Make space after b. */
+ omp_free(c, lowlat);
+
+ p = omp_realloc (b, size, lowlat, lowlat);
+ check (p == b, "realloc did not reuse same size chunk");
+
+ p = omp_realloc (b, size-8, lowlat, lowlat);
+ check (p == b, "realloc did not reuse smaller chunk");
+
+ p = omp_realloc (b, size, lowlat, lowlat);
+ check (p == b, "realloc did not reuse original size chunk");
+
+ p = omp_realloc (b, size+8, lowlat, lowlat);
+ check (p == b, "realloc did not extend in place by a little");
+
+ p = omp_realloc (b, size2, lowlat, lowlat);
+ check (p == b, "realloc did not extend into whole next chunk");
+
+ p = omp_realloc (b, size3, lowlat, lowlat);
+ check (p != b, "realloc did not move b elsewhere");
+ omp_free (p, lowlat);
+
+
+ p = omp_realloc (a, size, lowlat, lowlat);
+ check (p == a, "realloc did not reuse same size chunk, first position");
+
+ p = omp_realloc (a, size-8, lowlat, lowlat);
+ check (p == a, "realloc did not reuse smaller chunk, first position");
+
+ p = omp_realloc (a, size, lowlat, lowlat);
+ check (p == a, "realloc did not reuse original size chunk, first position");
+
+ p = omp_realloc (a, size+8, lowlat, lowlat);
+ check (p == a, "realloc did not extend in place by a little, first position");
+
+ p = omp_realloc (a, size3, lowlat, lowlat);
+ check (p == a, "realloc did not extend into whole next chunk, first position");
+
+ p = omp_realloc (a, size4, lowlat, lowlat);
+ check (p != a, "realloc did not move a elsewhere, first position");
+ omp_free (p, lowlat);
+
+
+ p = omp_realloc (d, size, lowlat, lowlat);
+ check (p == d, "realloc did not reuse same size chunk, last position");
+
+ p = omp_realloc (d, size-8, lowlat, lowlat);
+ check (p == d, "realloc did not reuse smaller chunk, last position");
+
+ p = omp_realloc (d, size, lowlat, lowlat);
+ check (p == d, "realloc did not reuse original size chunk, last position");
+
+ p = omp_realloc (d, size+8, lowlat, lowlat);
+ check (p == d, "realloc did not extend in place by d little, last position");
+
+ /* Larger than low latency memory. */
+ p = omp_realloc(d, 100000000, lowlat, lowlat);
+ check (p == NULL, "realloc did not fail on OOM");
+ }
+
+return 0;
+}
+