diff options
author | Tobias Burnus <tobias@codesourcery.com> | 2022-10-24 16:58:43 +0200 |
---|---|---|
committer | Tobias Burnus <tobias@codesourcery.com> | 2022-10-24 17:04:08 +0200 |
commit | 131d18e928a3ea1ab2d3bf61aa92d68a8a254609 (patch) | |
tree | 9c379ef9c639a56d0b1146aada7cef937328a89e /libgomp/config | |
parent | a096036589d82175a0f729c2dab73c9a527d075d (diff) | |
download | gcc-131d18e928a3ea1ab2d3bf61aa92d68a8a254609.tar.gz |
libgomp/nvptx: Prepare for reverse-offload callback handling
This patch adds a stub 'gomp_target_rev' in the host's target.c, which will
later handle the reverse offload.
For nvptx, it adds support for forwarding the offload gomp_target_ext call
to the host by setting values in a struct on the device and querying it on
the host - invoking gomp_target_rev on the result.
include/ChangeLog:
* cuda/cuda.h (enum CUdevice_attribute): Add
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING.
(CU_MEMHOSTALLOC_DEVICEMAP): Define.
(cuMemHostAlloc): Add prototype.
libgomp/ChangeLog:
* config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Remove
'static' for this variable.
* config/nvptx/libgomp-nvptx.h: New file.
* config/nvptx/target.c: Include it.
(GOMP_ADDITIONAL_ICVS): Declare extern var.
(GOMP_REV_OFFLOAD_VAR): Declare var.
(GOMP_target_ext): Handle reverse offload.
* libgomp-plugin.h (GOMP_PLUGIN_target_rev): New prototype.
* libgomp-plugin.c (GOMP_PLUGIN_target_rev): New, call ...
* target.c (gomp_target_rev): ... this new stub function.
* libgomp.h (gomp_target_rev): Declare.
* libgomp.map (GOMP_PLUGIN_1.4): New; add GOMP_PLUGIN_target_rev.
* plugin/cuda-lib.def (cuMemHostAlloc): Add.
* plugin/plugin-nvptx.c: Include libgomp-nvptx.h.
(struct ptx_device): Add rev_data member.
(nvptx_open_device): Remove async_engines query, last used in
r10-304-g1f4c5b9b; add unified-address assert check.
(GOMP_OFFLOAD_get_num_devices): Claim unified address
support.
(GOMP_OFFLOAD_load_image): Free rev_fn_table if no
offload functions exist. Make offload var available
on host and device.
(rev_off_dev_to_host_cpy, rev_off_host_to_dev_cpy): New.
(GOMP_OFFLOAD_run): Handle reverse offload.
Diffstat (limited to 'libgomp/config')
-rw-r--r-- | libgomp/config/nvptx/icv-device.c | 2 | ||||
-rw-r--r-- | libgomp/config/nvptx/libgomp-nvptx.h | 51 | ||||
-rw-r--r-- | libgomp/config/nvptx/target.c | 54 |
3 files changed, 99 insertions, 8 deletions
diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c index 6f869beadce..eef151c23c7 100644 --- a/libgomp/config/nvptx/icv-device.c +++ b/libgomp/config/nvptx/icv-device.c @@ -30,7 +30,7 @@ /* This is set to the ICV values of current GPU during device initialization, when the offload image containing this libgomp portion is loaded. */ -static volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; +volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; void omp_set_default_device (int device_num __attribute__((unused))) diff --git a/libgomp/config/nvptx/libgomp-nvptx.h b/libgomp/config/nvptx/libgomp-nvptx.h new file mode 100644 index 00000000000..5da9aae2531 --- /dev/null +++ b/libgomp/config/nvptx/libgomp-nvptx.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Tobias Burnus <tobias@codesourcery.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains defines and type definitions shared between the + nvptx target's libgomp.a and the plugin-nvptx.c, but that is only + needef for this target. */ + +#ifndef LIBGOMP_NVPTX_H +#define LIBGOMP_NVPTX_H 1 + +#define GOMP_REV_OFFLOAD_VAR __gomp_rev_offload_var + +struct rev_offload { + uint64_t fn; + uint64_t mapnum; + uint64_t addrs; + uint64_t sizes; + uint64_t kinds; + int32_t dev_num; +}; + +#if (__SIZEOF_SHORT__ != 2 \ + || __SIZEOF_SIZE_T__ != 8 \ + || __SIZEOF_POINTER__ != 8) +#error "Data-type conversion required for rev_offload" +#endif + +#endif /* LIBGOMP_NVPTX_H */ + diff --git a/libgomp/config/nvptx/target.c b/libgomp/config/nvptx/target.c index 11108d20e15..0e79388fbba 100644 --- a/libgomp/config/nvptx/target.c +++ b/libgomp/config/nvptx/target.c @@ -24,9 +24,12 @@ <http://www.gnu.org/licenses/>. */ #include "libgomp.h" +#include "libgomp-nvptx.h" /* For struct rev_offload + GOMP_REV_OFFLOAD_VAR. */ #include <limits.h> extern int __gomp_team_num __attribute__((shared)); +extern volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; +volatile struct rev_offload *GOMP_REV_OFFLOAD_VAR; bool GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper, @@ -88,16 +91,53 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, unsigned int flags, void **depend, void **args) { - (void) device; - (void) fn; - (void) mapnum; - (void) hostaddrs; - (void) sizes; - (void) kinds; + static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */ (void) flags; (void) depend; (void) args; - __builtin_unreachable (); + + if (device != GOMP_DEVICE_HOST_FALLBACK + || fn == NULL + || GOMP_REV_OFFLOAD_VAR == NULL) + return; + + gomp_mutex_lock (&lock); + + GOMP_REV_OFFLOAD_VAR->mapnum = mapnum; + GOMP_REV_OFFLOAD_VAR->addrs = (uint64_t) hostaddrs; + GOMP_REV_OFFLOAD_VAR->sizes = (uint64_t) sizes; + GOMP_REV_OFFLOAD_VAR->kinds = (uint64_t) kinds; + GOMP_REV_OFFLOAD_VAR->dev_num = GOMP_ADDITIONAL_ICVS.device_num; + + /* Set 'fn' to trigger processing on the host; wait for completion, + which is flagged by setting 'fn' back to 0 on the host. */ + uint64_t addr_struct_fn = (uint64_t) &GOMP_REV_OFFLOAD_VAR->fn; +#if __PTX_SM__ >= 700 + asm volatile ("st.global.release.sys.u64 [%0], %1;" + : : "r"(addr_struct_fn), "r" (fn) : "memory"); +#else + __sync_synchronize (); /* membar.sys */ + asm volatile ("st.volatile.global.u64 [%0], %1;" + : : "r"(addr_struct_fn), "r" (fn) : "memory"); +#endif + +#if __PTX_SM__ >= 700 + uint64_t fn2; + do + { + asm volatile ("ld.acquire.sys.global.u64 %0, [%1];" + : "=r" (fn2) : "r" (addr_struct_fn) : "memory"); + } + while (fn2 != 0); +#else + /* ld.global.u64 %r64,[__gomp_rev_offload_var]; + ld.u64 %r36,[%r64]; + membar.sys; */ + while (__atomic_load_n (&GOMP_REV_OFFLOAD_VAR->fn, __ATOMIC_ACQUIRE) != 0) + ; /* spin */ +#endif + + gomp_mutex_unlock (&lock); } void |