/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Author: Benjamin Segovia
*/
#include "utest_file_map.hpp"
#include "utest_helper.hpp"
#include "utest_error.h"
#include "CL/cl.h"
#include "CL/cl_intel.h"
#include
#include
#include
#include
#include
#define FATAL(...) \
do { \
fprintf(stderr, "error: "); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\n");\
assert(0); \
exit(-1); \
} while (0)
#define FATAL_IF(COND, ...) \
do { \
if (COND) FATAL(__VA_ARGS__); \
} while (0)
cl_platform_id platform = NULL;
cl_device_id device = NULL;
cl_context ctx = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_command_queue queue = NULL;
cl_mem buf[MAX_BUFFER_N] = {};
void *buf_data[MAX_BUFFER_N] = {};
size_t globals[3] = {};
size_t locals[3] = {};
static const char*
cl_test_channel_order_string(cl_channel_order order)
{
switch(order) {
#define DECL_ORDER(WHICH) case CL_##WHICH: return "CL_"#WHICH
DECL_ORDER(R);
DECL_ORDER(A);
DECL_ORDER(RG);
DECL_ORDER(RA);
DECL_ORDER(RGB);
DECL_ORDER(RGBA);
DECL_ORDER(BGRA);
DECL_ORDER(ARGB);
DECL_ORDER(INTENSITY);
DECL_ORDER(LUMINANCE);
DECL_ORDER(Rx);
DECL_ORDER(RGx);
DECL_ORDER(RGBx);
#undef DECL_ORDER
default: return "Unsupported image channel order";
};
}
static const char*
cl_test_channel_type_string(cl_channel_type type)
{
switch(type) {
#define DECL_TYPE(WHICH) case CL_##WHICH: return "CL_"#WHICH
DECL_TYPE(SNORM_INT8);
DECL_TYPE(SNORM_INT16);
DECL_TYPE(UNORM_INT8);
DECL_TYPE(UNORM_INT16);
DECL_TYPE(UNORM_SHORT_565);
DECL_TYPE(UNORM_SHORT_555);
DECL_TYPE(UNORM_INT_101010);
DECL_TYPE(SIGNED_INT8);
DECL_TYPE(SIGNED_INT16);
DECL_TYPE(SIGNED_INT32);
DECL_TYPE(UNSIGNED_INT8);
DECL_TYPE(UNSIGNED_INT16);
DECL_TYPE(UNSIGNED_INT32);
DECL_TYPE(HALF_FLOAT);
DECL_TYPE(FLOAT);
#undef DECL_TYPE
default: return "Unsupported image channel type";
};
}
static void
clpanic(const char *msg, int rval)
{
printf("Failed: %s (%d)\n", msg, rval);
exit(-1);
}
static char*
do_kiss_path(const char *file, cl_device_id device)
{
cl_int ver;
const char *sub_path = NULL;
char *ker_path = NULL;
const char *kiss_path = getenv("OCL_KERNEL_PATH");
size_t sz = strlen(file);
if (device == NULL)
sub_path = "";
else {
if (clIntelGetGenVersion(device, &ver) != CL_SUCCESS)
clpanic("Unable to get Gen version", -1);
sub_path = "";
}
if (kiss_path == NULL)
clpanic("set OCL_KERNEL_PATH. This is where the kiss kernels are", -1);
sz += strlen(kiss_path) + strlen(sub_path) + 2; /* +1 for end of string, +1 for '/' */
if ((ker_path = (char*) malloc(sz)) == NULL)
clpanic("Allocation failed", -1);
sprintf(ker_path, "%s/%s%s", kiss_path, sub_path, file);
return ker_path;
}
int
cl_kernel_init(const char *file_name, const char *kernel_name, int format)
{
cl_file_map_t *fm = NULL;
char *ker_path = NULL;
cl_int status = CL_SUCCESS;
/* Load the program and build it */
ker_path = do_kiss_path(file_name, device);
if (format == LLVM)
program = clCreateProgramWithLLVM(ctx, 1, &device, ker_path, &status);
else if (format == SOURCE) {
cl_file_map_t *fm = cl_file_map_new();
FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS,
"Failed to open file \"%s\" with kernel \"%s\". Did you properly set OCL_KERNEL_PATH variable?",
file_name, kernel_name);
const char *src = cl_file_map_begin(fm);
const size_t sz = cl_file_map_size(fm);
program = clCreateProgramWithSource(ctx, 1, &src, &sz, &status);
cl_file_map_delete(fm);
} else
FATAL("Not able to create program from binary");
if (status != CL_SUCCESS) {
fprintf(stderr, "error calling clCreateProgramWithBinary\n");
goto error;
}
/* OCL requires to build the program even if it is created from a binary */
OCL_CALL (clBuildProgram, program, 1, &device, NULL, NULL, NULL);
/* Create a kernel from the program */
kernel = clCreateKernel(program, kernel_name, &status);
if (status != CL_SUCCESS) {
fprintf(stderr, "error calling clCreateKernel\n");
goto error;
}
exit:
free(ker_path);
cl_file_map_delete(fm);
return status;
error:
goto exit;
}
int
cl_ocl_init(void)
{
cl_int status = CL_SUCCESS;
char name[128];
cl_uint platform_n;
size_t i;
/* Get the platform number */
OCL_CALL (clGetPlatformIDs, 0, NULL, &platform_n);
printf("platform number %u\n", platform_n);
assert(platform_n >= 1);
/* Get a valid platform */
OCL_CALL (clGetPlatformIDs, 1, &platform, &platform_n);
OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_PROFILE, sizeof(name), name, NULL);
printf("platform_profile \"%s\"\n", name);
OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_NAME, sizeof(name), name, NULL);
printf("platform_name \"%s\"\n", name);
OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_VENDOR, sizeof(name), name, NULL);
printf("platform_vendor \"%s\"\n", name);
OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_VERSION, sizeof(name), name, NULL);
printf("platform_version \"%s\"\n", name);
/* Get the device (only GPU device is supported right now) */
OCL_CALL (clGetDeviceIDs, platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_PROFILE, sizeof(name), name, NULL);
printf("device_profile \"%s\"\n", name);
OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_NAME, sizeof(name), name, NULL);
printf("device_name \"%s\"\n", name);
OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_VENDOR, sizeof(name), name, NULL);
printf("device_vendor \"%s\"\n", name);
OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_VERSION, sizeof(name), name, NULL);
printf("device_version \"%s\"\n", name);
/* Now create a context */
ctx = clCreateContext(0, 1, &device, NULL, NULL, &status);
if (status != CL_SUCCESS) {
fprintf(stderr, "error calling clCreateContext\n");
goto error;
}
/* All image types currently supported by the context */
cl_image_format fmt[256];
cl_uint fmt_n;
clGetSupportedImageFormats(ctx, 0, CL_MEM_OBJECT_IMAGE2D, 256, fmt, &fmt_n);
printf("%u image formats are supported\n", fmt_n);
for (i = 0; i < fmt_n; ++i)
printf("[%s %s]\n",
cl_test_channel_order_string(fmt[i].image_channel_order),
cl_test_channel_type_string(fmt[i].image_channel_data_type));
/* We are going to push NDRange kernels here */
queue = clCreateCommandQueue(ctx, device, 0, &status);
if (status != CL_SUCCESS) {
fprintf(stderr, "error calling clCreateCommandQueue\n");
goto error;
}
error:
return status;
}
int
cl_test_init(const char *file_name, const char *kernel_name, int format)
{
cl_int status = CL_SUCCESS;
/* Initialize OCL */
if ((status = cl_ocl_init()) != CL_SUCCESS)
goto error;
/* Load the kernel */
if ((status = cl_kernel_init(file_name, kernel_name, format)) != CL_SUCCESS)
goto error;
error:
return status;
}
void
cl_kernel_destroy(void)
{
if (kernel) clReleaseKernel(kernel);
if (program) clReleaseProgram(program);
kernel = NULL;
program = NULL;
}
void
cl_ocl_destroy(void)
{
clReleaseCommandQueue(queue);
clReleaseContext(ctx);
}
void
cl_test_destroy(void)
{
cl_kernel_destroy();
cl_ocl_destroy();
printf("%i memory leaks\n", clIntelReportUnfreed());
assert(clIntelReportUnfreed() == 0);
}
void
cl_buffer_destroy(void)
{
int i;
for (i = 0; i < MAX_BUFFER_N; ++i) {
if (buf_data[i] != NULL) {
clIntelUnmapBuffer(buf[i]);
buf_data[i] = NULL;
}
if (buf[i] != NULL) {
clReleaseMemObject(buf[i]);
buf[i] = NULL;
}
}
}
void
cl_report_perf_counters(cl_mem perf)
{
cl_int status = CL_SUCCESS;
uint32_t *start = NULL, *end = NULL;
uint32_t i;
if (perf == NULL)
return;
start = (uint32_t*) clIntelMapBuffer(perf, &status);
assert(status == CL_SUCCESS && start != NULL);
end = start + 128;
printf("BEFORE\n");
for (i = 0; i < 6*8; ++i) {
if (i % 8 == 0) printf("\n");
printf("[%3u 0x%8x] ", i, start[i]);
}
printf("\n\n");
printf("AFTER\n");
for (i = 0; i < 6*8; ++i) {
if (i % 8 == 0) printf("\n");
printf("[%3u 0x%8x] ", i, end[i]);
}
printf("\n\n");
printf("DIFF\n");
for (i = 0; i < 6*8; ++i) {
if (i % 8 == 0) printf("\n");
printf("[%3u %8i] ", i, end[i] - start[i]);
}
printf("\n\n");
clIntelUnmapBuffer(perf);
}
struct bmphdr {
// 2 bytes of magic here, "BM", total header size is 54 bytes!
int filesize; // 4 total file size incl header
short as0, as1; // 8 app specific
int bmpoffset; // 12 ofset of bmp data
int headerbytes; // 16 bytes in header from this point (40 actually)
int width; // 20
int height; // 24
short nplanes; // 26 no of color planes
short bpp; // 28 bits/pixel
int compression; // 32 BI_RGB = 0 = no compression
int sizeraw; // 36 size of raw bmp file, excluding header, incl padding
int hres; // 40 horz resolutions pixels/meter
int vres; // 44
int npalcolors; // 48 No of colors in palette
int nimportant; // 52 No of important colors
// raw b, g, r data here, dword aligned per scan line
};
int *cl_read_bmp(const char *filename, int *width, int *height)
{
struct bmphdr hdr;
char *bmppath = do_kiss_path(filename, device);
FILE *fp = fopen(bmppath, "rb");
assert(fp);
char magic[2];
fread(&magic[0], 1, 2, fp);
assert(magic[0] == 'B' && magic[1] == 'M');
fread(&hdr, 1, sizeof(hdr), fp);
assert(hdr.width > 0 && hdr.height > 0 && hdr.nplanes == 1 && hdr.compression == 0);
int *rgb32 = (int *) malloc(hdr.width * hdr.height * sizeof(int));
assert(rgb32);
int x, y;
int *dst = rgb32;
for (y = 0; y < hdr.height; y++) {
for (x = 0; x < hdr.width; x++) {
assert(!feof(fp));
int b = (getc(fp) & 0x0ff);
int g = (getc(fp) & 0x0ff);
int r = (getc(fp) & 0x0ff);
*dst++ = (r | (g << 8) | (b << 16) | 0xff000000); /* abgr */
}
while (x & 3) {
getc(fp);
x++;
} // each scanline padded to dword
// printf("read row %d\n", y);
// fflush(stdout);
}
fclose(fp);
*width = hdr.width;
*height = hdr.height;
free(bmppath);
return rgb32;
}
void cl_write_bmp(const int *data, int width, int height, const char *filename)
{
int x, y;
FILE *fp = fopen(filename, "wb");
assert(fp);
char *raw = (char *) malloc(width * height * sizeof(int)); // at most
assert(raw);
char *p = raw;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int c = *data++;
*p++ = ((c >> 16) & 0xff);
*p++ = ((c >> 8) & 0xff);
*p++ = ((c >> 0) & 0xff);
}
while (x & 3) {
*p++ = 0;
x++;
} // pad to dword
}
int sizeraw = p - raw;
int scanline = (width * 3 + 3) & ~3;
assert(sizeraw == scanline * height);
struct bmphdr hdr;
hdr.filesize = scanline * height + sizeof(hdr) + 2;
hdr.as0 = 0;
hdr.as1 = 0;
hdr.bmpoffset = sizeof(hdr) + 2;
hdr.headerbytes = 40;
hdr.width = width;
hdr.height = height;
hdr.nplanes = 1;
hdr.bpp = 24;
hdr.compression = 0;
hdr.sizeraw = sizeraw;
hdr.hres = 0; // 2834;
hdr.vres = 0; // 2834;
hdr.npalcolors = 0;
hdr.nimportant = 0;
/* Now write bmp file */
char magic[2] = { 'B', 'M' };
fwrite(&magic[0], 1, 2, fp);
fwrite(&hdr, 1, sizeof(hdr), fp);
fwrite(raw, 1, hdr.sizeraw, fp);
fclose(fp);
free(raw);
}
static const float pixel_threshold = 0.05f;
static const float max_error_ratio = 0.001f;
int cl_check_image(const int *img, int w, int h, const char *bmp)
{
int refw, refh;
int *ref = cl_read_bmp(bmp, &refw, &refh);
if (ref == NULL || refw != w || refh != h) return 0;
const int n = w*h;
int discrepancy = 0;
for (int i = 0; i < n; ++i) {
const float r = (float) (img[i] & 0xff);
const float g = (float) ((img[i] >> 8) & 0xff);
const float b = (float) ((img[i] >> 16) & 0xff);
const float rr = (float) (ref[i] & 0xff);
const float rg = (float) ((ref[i] >> 8) & 0xff);
const float rb = (float) ((ref[i] >> 16) & 0xff);
const float dr = fabs(r-rr) / (1.f/255.f + std::max(r,rr));
const float dg = fabs(g-rg) / (1.f/255.f + std::max(g,rg));
const float db = fabs(b-rb) / (1.f/255.f + std::max(b,rb));
const float err = sqrtf(dr*dr+dg*dg+db*db);
if (err > pixel_threshold) discrepancy++;
}
free(ref);
return (float(discrepancy) / float(n) > max_error_ratio) ? 0 : 1;
}