diff options
-rw-r--r-- | src/cl_context.c | 1 | ||||
-rw-r--r-- | src/cl_device_id.c | 48 | ||||
-rw-r--r-- | src/cl_device_id.h | 1 | ||||
-rw-r--r-- | src/cl_driver.h | 9 | ||||
-rw-r--r-- | src/cl_driver_defs.c | 1 | ||||
-rw-r--r-- | src/intel/intel_defines.h | 3 | ||||
-rw-r--r-- | src/intel/intel_driver.c | 7 | ||||
-rw-r--r-- | src/intel/intel_driver.h | 1 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 18 |
9 files changed, 66 insertions, 23 deletions
diff --git a/src/cl_context.c b/src/cl_context.c index 0f08e6ad..773f545b 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -149,6 +149,7 @@ cl_create_context(const cl_context_properties * properties, /* Save the user callback and user data*/ ctx->pfn_notify = pfn_notify; ctx->user_data = user_data; + cl_driver_set_atomic_flag(ctx->drv, ctx->device->atomic_test_result); exit: if (errcode_ret != NULL) diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 215f7f28..9d3ab2ff 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -197,7 +197,6 @@ static struct _cl_device_id intel_skl_gt4_device = { #include "cl_gen75_device.h" }; - LOCAL cl_device_id cl_get_gt_device(void) { @@ -546,8 +545,11 @@ skl_gt4_break: } /* Runs a small kernel to check that the device works; returns - * 0 for success, 1 for silently wrong result, 2 for error */ -LOCAL cl_int + * SELF_TEST_PASS: for success. + * SELF_TEST_SLM_FAIL: for SLM results mismatch; + * SELF_TEST_ATOMIC_FAIL: for hsw enqueue kernel failure to not enable atomics in L3. + * SELF_TEST_OTHER_FAIL: other fail like runtime API fail.*/ +LOCAL cl_self_test_res cl_self_test(cl_device_id device) { cl_int status; @@ -566,7 +568,7 @@ cl_self_test(cl_device_id device) " buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];" "}"; // using __local to catch the "no SLM on Haswell" problem static int tested = 0; - static cl_int ret = 2; + static cl_self_test_res ret = SELF_TEST_OTHER_FAIL; if (tested != 0) return ret; tested = 1; @@ -589,14 +591,16 @@ cl_self_test(cl_device_id device) status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4, test_data, 1, &kernel_finished, NULL); if (status == CL_SUCCESS) { if (test_data[0] == 8 && test_data[1] == 14 && test_data[2] == 8){ - ret = 0; + ret = SELF_TEST_PASS; } else { - ret = 1; + ret = SELF_TEST_SLM_FAIL; printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (%i, %i, %i)\n" "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", test_data[0], test_data[1], test_data[2]); } } + } else{ + ret = SELF_TEST_ATOMIC_FAIL; } } } @@ -610,10 +614,6 @@ cl_self_test(cl_device_id device) clReleaseCommandQueue(queue); } clReleaseContext(ctx); - if (ret == 2) { - printf("Beignet: self-test failed: error %i\n" - "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", status); - } return ret; } @@ -628,18 +628,22 @@ cl_get_device_ids(cl_platform_id platform, /* Do we have a usable device? */ device = cl_get_gt_device(); - if (device && cl_self_test(device)) { - int disable_self_test = 0; - // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++ - const char *env = getenv("OCL_IGNORE_SELF_TEST"); - if (env != NULL) { - sscanf(env, "%i", &disable_self_test); - } - if (disable_self_test) { - printf("Beignet: Warning - overriding self-test failure\n"); - } else { - printf("Beignet: disabling non-working device\n"); - device = 0; + if (device) { + cl_self_test_res ret = cl_self_test(device); + device->atomic_test_result = ret; + if(ret == SELF_TEST_SLM_FAIL) { + int disable_self_test = 0; + // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++ + const char *env = getenv("OCL_IGNORE_SELF_TEST"); + if (env != NULL) { + sscanf(env, "%i", &disable_self_test); + } + if (disable_self_test) { + printf("Beignet: Warning - overriding self-test failure\n"); + } else { + printf("Beignet: disabling non-working device\n"); + device = 0; + } } } if (!device) { diff --git a/src/cl_device_id.h b/src/cl_device_id.h index ee6a8e67..1bd58069 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -113,6 +113,7 @@ struct _cl_device_id { cl_device_affinity_domain affinity_domain; cl_device_partition_property partition_type[3]; cl_uint device_reference_count; + uint32_t atomic_test_result; }; /* Get a device from the given platform */ diff --git a/src/cl_driver.h b/src/cl_driver.h index b2510dee..1ab4dff3 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -49,6 +49,15 @@ extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr; typedef uint32_t (cl_driver_get_ver_cb)(cl_driver); extern cl_driver_get_ver_cb *cl_driver_get_ver; +typedef enum cl_self_test_res{ + SELF_TEST_PASS = 0, + SELF_TEST_SLM_FAIL = 1, + SELF_TEST_ATOMIC_FAIL = 2, + SELF_TEST_OTHER_FAIL = 3, +} cl_self_test_res; +/* Set the atomic enable/disable flag in the driver */ +typedef void (cl_driver_set_atomic_flag_cb)(cl_driver, int); +extern cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag; /************************************************************************** * GPGPU command streamer **************************************************************************/ diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index 9a47210c..b77acdc4 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL; LOCAL cl_driver_delete_cb *cl_driver_delete = NULL; LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL; LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL; +LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL; LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL; LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL; diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index 1080a91a..6ada30cd 100644 --- a/src/intel/intel_defines.h +++ b/src/intel/intel_defines.h @@ -304,6 +304,9 @@ #define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) +// HSW +#define HSW_SCRATCH1_OFFSET (0xB038) +#define HSW_ROW_CHICKEN3_HDC_OFFSET (0xE49C) // L3 cache stuff #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010) diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index 1bebd9a3..9c727774 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -448,6 +448,12 @@ intel_driver_get_ver(struct intel_driver *drv) return drv->gen_ver; } +static void +intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag) +{ + drv->atomic_test_result = atomic_flag; +} + static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; } static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; } @@ -834,6 +840,7 @@ intel_setup_callbacks(void) cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new; cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete; cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver; + cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag; cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr; cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id; cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info; diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h index f972ec82..51f0e0db 100644 --- a/src/intel/intel_driver.h +++ b/src/intel/intel_driver.h @@ -89,6 +89,7 @@ typedef struct intel_driver Display *x11_display; struct dri_state *dri_ctx; struct intel_gpgpu_node *gpgpu_list; + int atomic_test_result; } intel_driver_t; #define SET_BLOCKED_SIGSET(DRIVER) do { \ diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index b083dab5..901bd98b 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -719,7 +719,23 @@ static void intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm) { /* still set L3 in batch buffer for fulsim. */ - BEGIN_BATCH(gpgpu->batch, 9); + if(gpgpu->drv->atomic_test_result != SELF_TEST_ATOMIC_FAIL) + { + BEGIN_BATCH(gpgpu->batch, 15); + OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ + /* FIXME: KMD always disable the atomic in L3 for some reason. + I checked the spec, and don't think we need that workaround now. + Before I send a patch to kernel, let's just enable it here. */ + OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET); + OUT_BATCH(gpgpu->batch, 0); /* enable atomic in L3 */ + OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ + OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET); + OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16); /* enable atomic in L3 */ + } + else + { + BEGIN_BATCH(gpgpu->batch, 9); + } OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET); OUT_BATCH(gpgpu->batch, 0x08800000); |