1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
#include "utest_helper.hpp"
#define THREAD_SIZE 8
pthread_t tid[THREAD_SIZE];
static cl_command_queue all_queues[THREAD_SIZE];
static cl_event enqueue_events[THREAD_SIZE];
static cl_event user_event;
static cl_kernel the_kernel;
static char source_str[] =
"kernel void assgin_work_dim( __global int *ret, int i) { \n"
"if (i == 0) ret[i] = 10; \n"
"else ret[i] = ret[i - 1] + 1; \n"
"}\n";
static size_t the_globals[3] = {16, 1, 1};
static size_t the_locals[3] = {16, 1, 1};
static size_t the_goffsets[3] = {0, 0, 0};
static void *thread_function(void *arg)
{
int num = *((int *)arg);
cl_int ret;
cl_event dep_event[2];
ret = clSetKernelArg(the_kernel, 1, sizeof(cl_int), &num);
OCL_ASSERT(ret == CL_SUCCESS);
if (num == 0) {
dep_event[0] = user_event;
ret = clEnqueueNDRangeKernel(all_queues[num], the_kernel, 1, the_goffsets, the_globals, the_locals,
1, dep_event, &enqueue_events[num]);
} else {
dep_event[0] = user_event;
dep_event[1] = enqueue_events[num - 1];
ret = clEnqueueNDRangeKernel(all_queues[num], the_kernel, 1, the_goffsets, the_globals, the_locals,
2, dep_event, &enqueue_events[num]);
}
OCL_ASSERT(ret == CL_SUCCESS);
return NULL;
}
void multi_queue_events(void)
{
cl_int ret;
size_t source_size = sizeof(source_str);
const char *source = source_str;
cl_program program = NULL;
int i;
/* Create Kernel Program from the source */
program = clCreateProgramWithSource(ctx, 1, &source, &source_size, &ret);
OCL_ASSERT(ret == CL_SUCCESS);
/* Build Kernel Program */
ret = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
OCL_ASSERT(ret == CL_SUCCESS);
the_kernel = clCreateKernel(program, "assgin_work_dim", NULL);
OCL_ASSERT(the_kernel != NULL);
int buffer_content[16] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
cl_mem buf = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, 16 * 4, buffer_content, &ret);
OCL_ASSERT(buf != NULL);
ret = clSetKernelArg(the_kernel, 0, sizeof(cl_mem), &buf);
OCL_ASSERT(ret == CL_SUCCESS);
for (i = 0; i < THREAD_SIZE; i++) {
all_queues[i] = clCreateCommandQueue(ctx, device, 0, &ret);
OCL_ASSERT(ret == CL_SUCCESS);
}
user_event = clCreateUserEvent(ctx, &ret);
OCL_ASSERT(ret == CL_SUCCESS);
for (i = 0; i < THREAD_SIZE; i++) {
pthread_create(&tid[i], NULL, thread_function, &i);
pthread_join(tid[i], NULL);
}
cl_event map_event;
void *map_ptr = clEnqueueMapBuffer(all_queues[0], buf, 0, CL_MAP_READ, 0, 32,
THREAD_SIZE, enqueue_events, &map_event, NULL);
OCL_ASSERT(map_ptr != NULL);
cl_event all_event[10];
for (i = 0; i < THREAD_SIZE; i++) {
all_event[i] = enqueue_events[i];
}
all_event[8] = user_event;
all_event[9] = map_event;
//printf("before Waitfor events ##\n");
clSetUserEventStatus(user_event, CL_COMPLETE);
ret = clWaitForEvents(10, all_event);
OCL_ASSERT(ret == CL_SUCCESS);
//printf("After Waitfor events ##\n");
//printf("############# Finish Setting ################\n");
printf("\n");
for (i = 0; i < 8; i++) {
//printf(" %d", ((int *)map_ptr)[i]);
OCL_ASSERT(((int *)map_ptr)[i] == 10 + i);
}
//printf("\n");
ret = clEnqueueUnmapMemObject(all_queues[0], buf, map_ptr, 1, &map_event, NULL);
OCL_ASSERT(ret == CL_SUCCESS);
//printf("------------------------- End -------------------------------\n");
clReleaseKernel(the_kernel);
clReleaseProgram(program);
clReleaseMemObject(buf);
for (i = 0; i < THREAD_SIZE; i++) {
clReleaseCommandQueue(all_queues[i]);
clReleaseEvent(enqueue_events[i]);
}
clReleaseEvent(user_event);
clReleaseEvent(map_event);
}
MAKE_UTEST_FROM_FUNCTION(multi_queue_events);
|