diff options
-rw-r--r-- | backend/src/ocl_stdlib.h | 18 | ||||
-rw-r--r-- | backend/src/ocl_stdlib_str.cpp | 18 | ||||
-rw-r--r-- | kernels/compiler_mandelbrot.cl (renamed from kernels/mandelbrot.cl) | 2 | ||||
-rw-r--r-- | kernels/compiler_mandelbrot_alternate.cl | 38 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 3 | ||||
-rw-r--r-- | utests/Makefile | 3 | ||||
-rw-r--r-- | utests/compiler_mandelbrot.cpp (renamed from utests/app_mandelbrot.cpp) | 10 | ||||
-rw-r--r-- | utests/compiler_mandelbrot_alternate.cpp | 52 |
8 files changed, 118 insertions, 26 deletions
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 6e01a451..357aaf3c 100644 --- a/backend/src/ocl_stdlib.h +++ b/backend/src/ocl_stdlib.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012 Intel Corporation +uint* Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -20,7 +20,7 @@ #ifndef __GEN_OCL_STDLIB_H__ #define __GEN_OCL_STDLIB_H__ -#define INLINE_OVERLOADABLE __attribute__((overloadable,always_inline)) +#define INLINE_OVERLOADABLE __attribute__((overloadable,always_inline)) inline #define OVERLOADABLE __attribute__((overloadable)) ///////////////////////////////////////////////////////////////////////////// @@ -138,11 +138,11 @@ inline uint get_global_id(uint dim) { __attribute__ ((pure, const, overloadable)) float mad(float a, float b, float c); __attribute__((overloadable, always_inline)) inline uint select(uint src0, uint src1, uint cond) { - return cond ? src0 : src1; + return cond ? src1 : src0; } __attribute__((overloadable, always_inline)) inline int select(int src0, int src1, int cond) { - return cond ? src0 : src1; + return cond ? src1 : src0; } // This will be optimized out by LLVM and will output LLVM select instructions @@ -168,22 +168,22 @@ DECL_SELECT4(int4, int, int4, 0x80000000) DECL_SELECT4(float4, float, int4, 0x80000000) #undef DECL_SELECT4 -INLINE_OVERLOADABLE inline float2 mad(float2 a, float2 b, float2 c) { +INLINE_OVERLOADABLE float2 mad(float2 a, float2 b, float2 c) { return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y)); } -INLINE_OVERLOADABLE inline float3 mad(float3 a, float3 b, float3 c) { +INLINE_OVERLOADABLE float3 mad(float3 a, float3 b, float3 c) { return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z)); } -INLINE_OVERLOADABLE inline float4 mad(float4 a, float4 b, float4 c) { +INLINE_OVERLOADABLE float4 mad(float4 a, float4 b, float4 c) { return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z), mad(a.w,b.w,c.w)); } #define DECL_MIN_MAX(TYPE) \ -INLINE_OVERLOADABLE inline TYPE max(TYPE a, TYPE b) { \ +INLINE_OVERLOADABLE TYPE max(TYPE a, TYPE b) { \ return a > b ? a : b; \ } \ -INLINE_OVERLOADABLE inline TYPE min(TYPE a, TYPE b) { \ +INLINE_OVERLOADABLE TYPE min(TYPE a, TYPE b) { \ return a < b ? a : b; \ } DECL_MIN_MAX(float) diff --git a/backend/src/ocl_stdlib_str.cpp b/backend/src/ocl_stdlib_str.cpp index 8fc1dba0..ceda95fe 100644 --- a/backend/src/ocl_stdlib_str.cpp +++ b/backend/src/ocl_stdlib_str.cpp @@ -2,7 +2,7 @@ namespace gbe { std::string ocl_stdlib_str = "/* \n" -" * Copyright © 2012 Intel Corporation\n" +"uint* Copyright © 2012 Intel Corporation\n" " *\n" " * This library is free software; you can redistribute it and/or\n" " * modify it under the terms of the GNU Lesser General Public\n" @@ -23,7 +23,7 @@ std::string ocl_stdlib_str = "#ifndef __GEN_OCL_STDLIB_H__\n" "#define __GEN_OCL_STDLIB_H__\n" "\n" -"#define INLINE_OVERLOADABLE __attribute__((overloadable,always_inline))\n" +"#define INLINE_OVERLOADABLE __attribute__((overloadable,always_inline)) inline\n" "#define OVERLOADABLE __attribute__((overloadable))\n" "\n" "/////////////////////////////////////////////////////////////////////////////\n" @@ -141,11 +141,11 @@ std::string ocl_stdlib_str = "__attribute__ ((pure, const, overloadable)) float mad(float a, float b, float c);\n" "__attribute__((overloadable, always_inline))\n" "inline uint select(uint src0, uint src1, uint cond) {\n" -" return cond ? src0 : src1;\n" +" return cond ? src1 : src0;\n" "}\n" "__attribute__((overloadable, always_inline))\n" "inline int select(int src0, int src1, int cond) {\n" -" return cond ? src0 : src1;\n" +" return cond ? src1 : src0;\n" "}\n" "\n" "// This will be optimized out by LLVM and will output LLVM select instructions\n" @@ -171,22 +171,22 @@ std::string ocl_stdlib_str = "DECL_SELECT4(float4, float, int4, 0x80000000)\n" "#undef DECL_SELECT4\n" "\n" -"INLINE_OVERLOADABLE inline float2 mad(float2 a, float2 b, float2 c) {\n" +"INLINE_OVERLOADABLE float2 mad(float2 a, float2 b, float2 c) {\n" " return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y));\n" "}\n" -"INLINE_OVERLOADABLE inline float3 mad(float3 a, float3 b, float3 c) {\n" +"INLINE_OVERLOADABLE float3 mad(float3 a, float3 b, float3 c) {\n" " return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z));\n" "}\n" -"INLINE_OVERLOADABLE inline float4 mad(float4 a, float4 b, float4 c) {\n" +"INLINE_OVERLOADABLE float4 mad(float4 a, float4 b, float4 c) {\n" " return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y),\n" " mad(a.z,b.z,c.z), mad(a.w,b.w,c.w));\n" "}\n" "\n" "#define DECL_MIN_MAX(TYPE) \\\n" -"INLINE_OVERLOADABLE inline TYPE max(TYPE a, TYPE b) { \\\n" +"INLINE_OVERLOADABLE TYPE max(TYPE a, TYPE b) { \\\n" " return a > b ? a : b; \\\n" "} \\\n" -"INLINE_OVERLOADABLE inline TYPE min(TYPE a, TYPE b) { \\\n" +"INLINE_OVERLOADABLE TYPE min(TYPE a, TYPE b) { \\\n" " return a < b ? a : b; \\\n" "}\n" "DECL_MIN_MAX(float)\n" diff --git a/kernels/mandelbrot.cl b/kernels/compiler_mandelbrot.cl index f07e945e..42295ab8 100644 --- a/kernels/mandelbrot.cl +++ b/kernels/compiler_mandelbrot.cl @@ -4,7 +4,7 @@ int ID(int x, int y, int width) { return 4*width*y + x*4; } float mapX(float x) { return x*3.25f - 2.f; } float mapY(float y) { return y*2.5f - 1.25f; } -__kernel void mandelbrot(__global char *out) { +__kernel void compiler_mandelbrot(__global char *out) { int x_dim = get_global_id(0); int y_dim = get_global_id(1); int width = get_global_size(0); diff --git a/kernels/compiler_mandelbrot_alternate.cl b/kernels/compiler_mandelbrot_alternate.cl new file mode 100644 index 00000000..fc993267 --- /dev/null +++ b/kernels/compiler_mandelbrot_alternate.cl @@ -0,0 +1,38 @@ +int offset(int x, int y, int width) { return width*y + x; } +float mapX(float x) {return x*3.25f - 2.f;} +float mapY(float y) {return y*2.5f - 1.25f;} + +__kernel void compiler_mandelbrot_alternate(__global uint *out, + float rcpWidth, + float rcpHeight, + float criterium) +{ + int xDim = get_global_id(0); + int yDim = get_global_id(1); + int width = get_global_size(0); + int height = get_global_size(1); + int idx = offset(xDim, yDim, width); + + float xOrigin = mapX((float) xDim * rcpWidth); + float yOrigin = mapY((float) yDim * rcpHeight); + float x = 0.0f; + float y = 0.0f; + + float iteration = 256.f; + + bool breakCond = true; + while (breakCond) { + const float xtemp = mad(-y,y,mad(x,x,xOrigin)); + y = mad(2.f*x, y, yOrigin); + x = xtemp; + iteration -= 1.f; + breakCond = -mad(y,y,mad(x,x, -criterium)) * iteration > 0.f; + } + + const uint iIteration = 256 - (uint) iteration; + const uint isBlack = (iIteration == 256); + const uint black = 255 << 24; + const uint nonBlack = iIteration | (iIteration << 8) | (iIteration << 16) | (255 << 24); + out[idx] = select(nonBlack, black, isBlack); +} + diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 5fe86ad5..d3f02851 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -4,7 +4,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} link_directories (${LLVM_LIBRARY_DIRS}) ADD_LIBRARY(utests SHARED utest_error.c - app_mandelbrot.cpp + compiler_mandelbrot.cpp + compiler_mandelbrot_alternate.cpp compiler_box_blur_float.cpp compiler_box_blur.cpp compiler_argument_structure.cpp diff --git a/utests/Makefile b/utests/Makefile index b2bade66..6c257758 100644 --- a/utests/Makefile +++ b/utests/Makefile @@ -9,7 +9,8 @@ SUBDIRS=. C_SRC=utest_error.c CPP_SRC=\ - app_mandelbrot.cpp \ + compiler_mandelbrot.cpp \ + compiler_mandelbrot_alternate.cpp \ compiler_box_blur.cpp \ compiler_box_blur_float.cpp \ compiler_argument_structure.cpp \ diff --git a/utests/app_mandelbrot.cpp b/utests/compiler_mandelbrot.cpp index 239b0b5d..61751500 100644 --- a/utests/app_mandelbrot.cpp +++ b/utests/compiler_mandelbrot.cpp @@ -20,17 +20,17 @@ #include "utest_helper.hpp" static int *dst = NULL; -static const size_t w = 64; -static const size_t h = 64; +static const size_t w = 16; +static const size_t h = 16; static const size_t iter = 4; -static void mandelbrot(void) +static void compiler_mandelbrot(void) { const size_t global[2] = {w, h}; const size_t local[2] = {16, 1}; const size_t sz = w * h * sizeof(char[4]); - OCL_CREATE_KERNEL("mandelbrot"); + OCL_CREATE_KERNEL("compiler_mandelbrot"); cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL); OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst); @@ -42,5 +42,5 @@ static void mandelbrot(void) OCL_CALL (clReleaseMemObject, cl_dst); } -MAKE_UTEST_FROM_FUNCTION(mandelbrot); +MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot); diff --git a/utests/compiler_mandelbrot_alternate.cpp b/utests/compiler_mandelbrot_alternate.cpp new file mode 100644 index 00000000..f1c2960b --- /dev/null +++ b/utests/compiler_mandelbrot_alternate.cpp @@ -0,0 +1,52 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Benjamin Segovia <benjamin.segovia@intel.com> + */ + +#include "utest_helper.hpp" + +static int *dst = NULL; +static const size_t w = 16; +static const size_t h = 16; +static const size_t iter = 4; +static const float criterium = 4.f; + +static void compiler_mandelbrot_alternate(void) +{ + const size_t global[2] = {w, h}; + const size_t local[2] = {16, 1}; + const size_t sz = w * h * sizeof(char[4]); + const float rcpW = 1.f / float(w); + const float rcpH = 1.f / float(h); + + OCL_CREATE_KERNEL("compiler_mandelbrot_alternate"); + + cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL); + OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst); + OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpW); + OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpH); + OCL_CALL (clSetKernelArg, kernel, 3, sizeof(float), &criterium); + OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); + dst = (int *) clIntelMapBuffer(cl_dst, NULL); + + cl_write_bmp(dst, w, h, "mandelbrot.bmp"); + OCL_CALL (clIntelUnmapBuffer, cl_dst); + OCL_CALL (clReleaseMemObject, cl_dst); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot_alternate); + |