summaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorcesar <cesar@138bc75d-0d04-0410-961f-82ee72b054a4>2016-11-02 22:10:02 +0000
committercesar <cesar@138bc75d-0d04-0410-961f-82ee72b054a4>2016-11-02 22:10:02 +0000
commitb7aef2f7a0d2a3c20700edfbff2168a417d57e24 (patch)
tree4b6752f6e52b937b8a95a6f6e537ef42923f160b /libgomp
parent60cd8c97d99f66469d112a34a09b18e78ca193e6 (diff)
downloadgcc-b7aef2f7a0d2a3c20700edfbff2168a417d57e24.tar.gz
gcc/
* config/nvptx/nvptx.c (PTX_GANG_DEFAULT): Set to zero. libgomp/ * plugin/plugin-nvptx.c (nvptx_exec): Interrogate board attributes to determine default geometry. * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Set gang dimension. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@241803 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog8
-rw-r--r--libgomp/plugin/plugin-nvptx.c88
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c2
3 files changed, 94 insertions, 4 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 675c63ec1cd..a7a52f82f0c 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,11 @@
+2016-11-02 Cesar Philippidis <cesar@codesourcery.com>
+ Nathan Sidwell <nathan@acm.org>
+
+ * plugin/plugin-nvptx.c (nvptx_exec): Interrogate board attributes
+ to determine default geometry.
+ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Set gang
+ dimension.
+
2016-11-01 Jakub Jelinek <jakub@redhat.com>
* hashtab.h: Use standard GPLv3 with runtime exception
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 327500c01aa..5ee350d4c1d 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -45,6 +45,7 @@
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
+#include <errno.h>
static const char *
cuda_error (CUresult r)
@@ -932,9 +933,88 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
if (seen_zero)
{
+ /* See if the user provided GOMP_OPENACC_DIM environment
+ variable to specify runtime defaults. */
+ static int default_dims[GOMP_DIM_MAX];
+
+ pthread_mutex_lock (&ptx_dev_lock);
+ if (!default_dims[0])
+ {
+ /* We only read the environment variable once. You can't
+ change it in the middle of execution. The syntax is
+ the same as for the -fopenacc-dim compilation option. */
+ const char *env_var = getenv ("GOMP_OPENACC_DIM");
+ if (env_var)
+ {
+ const char *pos = env_var;
+
+ for (i = 0; *pos && i != GOMP_DIM_MAX; i++)
+ {
+ if (i && *pos++ != ':')
+ break;
+ if (*pos != ':')
+ {
+ const char *eptr;
+
+ errno = 0;
+ long val = strtol (pos, (char **)&eptr, 10);
+ if (errno || val < 0 || (unsigned)val != val)
+ break;
+ default_dims[i] = (int)val;
+ pos = eptr;
+ }
+ }
+ }
+
+ int warp_size, block_size, dev_size, cpu_size;
+ CUdevice dev = nvptx_thread()->ptx_dev->dev;
+ /* 32 is the default for known hardware. */
+ int gang = 0, worker = 32, vector = 32;
+ CUdevice_attribute cu_tpb, cu_ws, cu_mpc, cu_tpm;
+
+ cu_tpb = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK;
+ cu_ws = CU_DEVICE_ATTRIBUTE_WARP_SIZE;
+ cu_mpc = CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT;
+ cu_tpm = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR;
+
+ if (cuDeviceGetAttribute (&block_size, cu_tpb, dev) == CUDA_SUCCESS
+ && cuDeviceGetAttribute (&warp_size, cu_ws, dev) == CUDA_SUCCESS
+ && cuDeviceGetAttribute (&dev_size, cu_mpc, dev) == CUDA_SUCCESS
+ && cuDeviceGetAttribute (&cpu_size, cu_tpm, dev) == CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_debug (0, " warp_size=%d, block_size=%d,"
+ " dev_size=%d, cpu_size=%d\n",
+ warp_size, block_size, dev_size, cpu_size);
+ gang = (cpu_size / block_size) * dev_size;
+ worker = block_size / warp_size;
+ vector = warp_size;
+ }
+
+ /* There is no upper bound on the gang size. The best size
+ matches the hardware configuration. Logical gangs are
+ scheduled onto physical hardware. To maximize usage, we
+ should guess a large number. */
+ if (default_dims[GOMP_DIM_GANG] < 1)
+ default_dims[GOMP_DIM_GANG] = gang ? gang : 1024;
+ /* The worker size must not exceed the hardware. */
+ if (default_dims[GOMP_DIM_WORKER] < 1
+ || (default_dims[GOMP_DIM_WORKER] > worker && gang))
+ default_dims[GOMP_DIM_WORKER] = worker;
+ /* The vector size must exactly match the hardware. */
+ if (default_dims[GOMP_DIM_VECTOR] < 1
+ || (default_dims[GOMP_DIM_VECTOR] != vector && gang))
+ default_dims[GOMP_DIM_VECTOR] = vector;
+
+ GOMP_PLUGIN_debug (0, " default dimensions [%d,%d,%d]\n",
+ default_dims[GOMP_DIM_GANG],
+ default_dims[GOMP_DIM_WORKER],
+ default_dims[GOMP_DIM_VECTOR]);
+ }
+ pthread_mutex_unlock (&ptx_dev_lock);
+
for (i = 0; i != GOMP_DIM_MAX; i++)
- if (!dims[i])
- dims[i] = /* TODO */ 32;
+ if (!dims[i])
+ dims[i] = default_dims[i];
}
/* This reserves a chunk of a pre-allocated page of memory mapped on both
@@ -954,8 +1034,8 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
mapnum * sizeof (void *));
GOMP_PLUGIN_debug (0, " %s: kernel %s: launch"
" gangs=%u, workers=%u, vectors=%u\n",
- __FUNCTION__, targ_fn->launch->fn,
- dims[0], dims[1], dims[2]);
+ __FUNCTION__, targ_fn->launch->fn, dims[GOMP_DIM_GANG],
+ dims[GOMP_DIM_WORKER], dims[GOMP_DIM_VECTOR]);
// OpenACC CUDA
//
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
index 8a755b88038..3ca9388d405 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -2,6 +2,8 @@
not optimized away at -O0, and then confuses the target assembler.
{ dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+/* { dg-additional-options "-fopenacc-dim=32" } */
+
#include <stdio.h>
#include <openacc.h>