summaryrefslogtreecommitdiff
path: root/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c
blob: 533d498bcf75b69001c4c4082fbd74276689867a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/* Test mapping of async values to specific underlying queues.  */

#undef NDEBUG
#include <assert.h>
#include <openacc.h>

/* This is implemented in terms of the "acc_get_cuda_stream" interface.  */

struct
{
  int async;
  void *cuda_stream;
} queues[] = { { acc_async_sync, NULL },
	       { acc_async_noval, NULL },
	       { 0, NULL },
	       { 1, NULL },
	       { 2, NULL },
	       { 36, NULL },
	       { 1982, NULL } };
const size_t queues_n = sizeof queues / sizeof queues[0];

int main(void)
{
  /* Explicitly initialize: it's not clear whether the following OpenACC
     runtime library calls implicitly initialize;
     <https://github.com/OpenACC/openacc-spec/issues/102>.  */
  acc_device_t d;
#if defined ACC_DEVICE_TYPE_nvidia
  d = acc_device_nvidia;
#elif defined ACC_DEVICE_TYPE_radeon
  d = acc_device_radeon;
#elif defined ACC_DEVICE_TYPE_host
  d = acc_device_host;
#else
# error Not ported to this ACC_DEVICE_TYPE
#endif
  acc_init (d);

  for (size_t i = 0; i < queues_n; ++i)
    {
      /* Before actually being used, there are all NULL.  */
      queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
      assert (queues[i].cuda_stream == NULL);
    }

  /* No-ops still don't initialize them.  */
  {
    size_t i = 0;
    /* Find the first non-special async-argument.  */
    while (queues[i].async < 0)
      ++i;
    assert (i < queues_n);

#pragma acc wait(queues[i].async) // no-op

    ++i;
    assert (i < queues_n);
#pragma acc parallel wait(queues[i].async) // no-op
    ;

    ++i;
    assert (i < queues_n);
    acc_wait(queues[i].async); // no-op

    i += 2;
    assert (i < queues_n);
    acc_wait_async(queues[i - 1].async, queues[i].async); // no-op, and async queue "i" does not get set up

    for (size_t i = 0; i < queues_n; ++i)
      {
	queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
	assert (queues[i].cuda_stream == NULL);
      }
  }

  for (size_t i = 0; i < queues_n; ++i)
    {
      /* Use the queue to initialize it.  */
#pragma acc parallel async(queues[i].async)
      ;
#pragma acc wait

      /* Verify CUDA stream used.  */
      queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
#if defined ACC_DEVICE_TYPE_nvidia
      /* "acc_async_sync" maps to the NULL CUDA default stream.  */
      if (queues[i].async == acc_async_sync)
	assert (queues[i].cuda_stream == NULL);
      else
	assert (queues[i].cuda_stream != NULL);
#elif defined ACC_DEVICE_TYPE_radeon
      /* For "acc_device_radeon" there are no CUDA streams.  */
      assert (queues[i].cuda_stream == NULL);
#elif defined ACC_DEVICE_TYPE_host
      /* For "acc_device_host" there are no CUDA streams.  */
      assert (queues[i].cuda_stream == NULL);
#else
# error Not ported to this ACC_DEVICE_TYPE
#endif
    }

  /* Verify same results.  */
  for (size_t i = 0; i < queues_n; ++i)
    {
      void *cuda_stream;

      cuda_stream = acc_get_cuda_stream (queues[i].async);
      assert (cuda_stream == queues[i].cuda_stream);

#pragma acc parallel async(queues[i].async)
      ;
#pragma acc wait

      cuda_stream = acc_get_cuda_stream (queues[i].async);
      assert (cuda_stream == queues[i].cuda_stream);
    }

  /* Verify individual underlying queues are all different.  */
  for (size_t i = 0; i < queues_n; ++i)
    {
      if (queues[i].cuda_stream == NULL)
	continue;
      for (size_t j = i + 1; j < queues_n; ++j)
	{
	  if (queues[j].cuda_stream == NULL)
	    continue;
	  assert (queues[j].cuda_stream != queues[i].cuda_stream);
	}
    }

  return 0;
}