summaryrefslogtreecommitdiff
path: root/libgomp/config
diff options
context:
space:
mode:
authorTobias Burnus <tobias@codesourcery.com>2022-10-24 16:58:43 +0200
committerTobias Burnus <tobias@codesourcery.com>2022-10-24 17:04:08 +0200
commit131d18e928a3ea1ab2d3bf61aa92d68a8a254609 (patch)
tree9c379ef9c639a56d0b1146aada7cef937328a89e /libgomp/config
parenta096036589d82175a0f729c2dab73c9a527d075d (diff)
downloadgcc-131d18e928a3ea1ab2d3bf61aa92d68a8a254609.tar.gz
libgomp/nvptx: Prepare for reverse-offload callback handling
This patch adds a stub 'gomp_target_rev' in the host's target.c, which will later handle the reverse offload. For nvptx, it adds support for forwarding the offload gomp_target_ext call to the host by setting values in a struct on the device and querying it on the host - invoking gomp_target_rev on the result. include/ChangeLog: * cuda/cuda.h (enum CUdevice_attribute): Add CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. (CU_MEMHOSTALLOC_DEVICEMAP): Define. (cuMemHostAlloc): Add prototype. libgomp/ChangeLog: * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Remove 'static' for this variable. * config/nvptx/libgomp-nvptx.h: New file. * config/nvptx/target.c: Include it. (GOMP_ADDITIONAL_ICVS): Declare extern var. (GOMP_REV_OFFLOAD_VAR): Declare var. (GOMP_target_ext): Handle reverse offload. * libgomp-plugin.h (GOMP_PLUGIN_target_rev): New prototype. * libgomp-plugin.c (GOMP_PLUGIN_target_rev): New, call ... * target.c (gomp_target_rev): ... this new stub function. * libgomp.h (gomp_target_rev): Declare. * libgomp.map (GOMP_PLUGIN_1.4): New; add GOMP_PLUGIN_target_rev. * plugin/cuda-lib.def (cuMemHostAlloc): Add. * plugin/plugin-nvptx.c: Include libgomp-nvptx.h. (struct ptx_device): Add rev_data member. (nvptx_open_device): Remove async_engines query, last used in r10-304-g1f4c5b9b; add unified-address assert check. (GOMP_OFFLOAD_get_num_devices): Claim unified address support. (GOMP_OFFLOAD_load_image): Free rev_fn_table if no offload functions exist. Make offload var available on host and device. (rev_off_dev_to_host_cpy, rev_off_host_to_dev_cpy): New. (GOMP_OFFLOAD_run): Handle reverse offload.
Diffstat (limited to 'libgomp/config')
-rw-r--r--libgomp/config/nvptx/icv-device.c2
-rw-r--r--libgomp/config/nvptx/libgomp-nvptx.h51
-rw-r--r--libgomp/config/nvptx/target.c54
3 files changed, 99 insertions, 8 deletions
diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c
index 6f869beadce..eef151c23c7 100644
--- a/libgomp/config/nvptx/icv-device.c
+++ b/libgomp/config/nvptx/icv-device.c
@@ -30,7 +30,7 @@
/* This is set to the ICV values of current GPU during device initialization,
when the offload image containing this libgomp portion is loaded. */
-static volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS;
+volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS;
void
omp_set_default_device (int device_num __attribute__((unused)))
diff --git a/libgomp/config/nvptx/libgomp-nvptx.h b/libgomp/config/nvptx/libgomp-nvptx.h
new file mode 100644
index 00000000000..5da9aae2531
--- /dev/null
+++ b/libgomp/config/nvptx/libgomp-nvptx.h
@@ -0,0 +1,51 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Tobias Burnus <tobias@codesourcery.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains defines and type definitions shared between the
+ nvptx target's libgomp.a and the plugin-nvptx.c, but that is only
+ needef for this target. */
+
+#ifndef LIBGOMP_NVPTX_H
+#define LIBGOMP_NVPTX_H 1
+
+#define GOMP_REV_OFFLOAD_VAR __gomp_rev_offload_var
+
+struct rev_offload {
+ uint64_t fn;
+ uint64_t mapnum;
+ uint64_t addrs;
+ uint64_t sizes;
+ uint64_t kinds;
+ int32_t dev_num;
+};
+
+#if (__SIZEOF_SHORT__ != 2 \
+ || __SIZEOF_SIZE_T__ != 8 \
+ || __SIZEOF_POINTER__ != 8)
+#error "Data-type conversion required for rev_offload"
+#endif
+
+#endif /* LIBGOMP_NVPTX_H */
+
diff --git a/libgomp/config/nvptx/target.c b/libgomp/config/nvptx/target.c
index 11108d20e15..0e79388fbba 100644
--- a/libgomp/config/nvptx/target.c
+++ b/libgomp/config/nvptx/target.c
@@ -24,9 +24,12 @@
<http://www.gnu.org/licenses/>. */
#include "libgomp.h"
+#include "libgomp-nvptx.h" /* For struct rev_offload + GOMP_REV_OFFLOAD_VAR. */
#include <limits.h>
extern int __gomp_team_num __attribute__((shared));
+extern volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS;
+volatile struct rev_offload *GOMP_REV_OFFLOAD_VAR;
bool
GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
@@ -88,16 +91,53 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned short *kinds,
unsigned int flags, void **depend, void **args)
{
- (void) device;
- (void) fn;
- (void) mapnum;
- (void) hostaddrs;
- (void) sizes;
- (void) kinds;
+ static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */
(void) flags;
(void) depend;
(void) args;
- __builtin_unreachable ();
+
+ if (device != GOMP_DEVICE_HOST_FALLBACK
+ || fn == NULL
+ || GOMP_REV_OFFLOAD_VAR == NULL)
+ return;
+
+ gomp_mutex_lock (&lock);
+
+ GOMP_REV_OFFLOAD_VAR->mapnum = mapnum;
+ GOMP_REV_OFFLOAD_VAR->addrs = (uint64_t) hostaddrs;
+ GOMP_REV_OFFLOAD_VAR->sizes = (uint64_t) sizes;
+ GOMP_REV_OFFLOAD_VAR->kinds = (uint64_t) kinds;
+ GOMP_REV_OFFLOAD_VAR->dev_num = GOMP_ADDITIONAL_ICVS.device_num;
+
+ /* Set 'fn' to trigger processing on the host; wait for completion,
+ which is flagged by setting 'fn' back to 0 on the host. */
+ uint64_t addr_struct_fn = (uint64_t) &GOMP_REV_OFFLOAD_VAR->fn;
+#if __PTX_SM__ >= 700
+ asm volatile ("st.global.release.sys.u64 [%0], %1;"
+ : : "r"(addr_struct_fn), "r" (fn) : "memory");
+#else
+ __sync_synchronize (); /* membar.sys */
+ asm volatile ("st.volatile.global.u64 [%0], %1;"
+ : : "r"(addr_struct_fn), "r" (fn) : "memory");
+#endif
+
+#if __PTX_SM__ >= 700
+ uint64_t fn2;
+ do
+ {
+ asm volatile ("ld.acquire.sys.global.u64 %0, [%1];"
+ : "=r" (fn2) : "r" (addr_struct_fn) : "memory");
+ }
+ while (fn2 != 0);
+#else
+ /* ld.global.u64 %r64,[__gomp_rev_offload_var];
+ ld.u64 %r36,[%r64];
+ membar.sys; */
+ while (__atomic_load_n (&GOMP_REV_OFFLOAD_VAR->fn, __ATOMIC_ACQUIRE) != 0)
+ ; /* spin */
+#endif
+
+ gomp_mutex_unlock (&lock);
}
void