summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cl_context.c1
-rw-r--r--src/cl_device_id.c48
-rw-r--r--src/cl_device_id.h1
-rw-r--r--src/cl_driver.h9
-rw-r--r--src/cl_driver_defs.c1
-rw-r--r--src/intel/intel_defines.h3
-rw-r--r--src/intel/intel_driver.c7
-rw-r--r--src/intel/intel_driver.h1
-rw-r--r--src/intel/intel_gpgpu.c18
9 files changed, 66 insertions, 23 deletions
diff --git a/src/cl_context.c b/src/cl_context.c
index 0f08e6ad..773f545b 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -149,6 +149,7 @@ cl_create_context(const cl_context_properties * properties,
/* Save the user callback and user data*/
ctx->pfn_notify = pfn_notify;
ctx->user_data = user_data;
+ cl_driver_set_atomic_flag(ctx->drv, ctx->device->atomic_test_result);
exit:
if (errcode_ret != NULL)
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 215f7f28..9d3ab2ff 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -197,7 +197,6 @@ static struct _cl_device_id intel_skl_gt4_device = {
#include "cl_gen75_device.h"
};
-
LOCAL cl_device_id
cl_get_gt_device(void)
{
@@ -546,8 +545,11 @@ skl_gt4_break:
}
/* Runs a small kernel to check that the device works; returns
- * 0 for success, 1 for silently wrong result, 2 for error */
-LOCAL cl_int
+ * SELF_TEST_PASS: for success.
+ * SELF_TEST_SLM_FAIL: for SLM results mismatch;
+ * SELF_TEST_ATOMIC_FAIL: for hsw enqueue kernel failure to not enable atomics in L3.
+ * SELF_TEST_OTHER_FAIL: other fail like runtime API fail.*/
+LOCAL cl_self_test_res
cl_self_test(cl_device_id device)
{
cl_int status;
@@ -566,7 +568,7 @@ cl_self_test(cl_device_id device)
" buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];"
"}"; // using __local to catch the "no SLM on Haswell" problem
static int tested = 0;
- static cl_int ret = 2;
+ static cl_self_test_res ret = SELF_TEST_OTHER_FAIL;
if (tested != 0)
return ret;
tested = 1;
@@ -589,14 +591,16 @@ cl_self_test(cl_device_id device)
status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4, test_data, 1, &kernel_finished, NULL);
if (status == CL_SUCCESS) {
if (test_data[0] == 8 && test_data[1] == 14 && test_data[2] == 8){
- ret = 0;
+ ret = SELF_TEST_PASS;
} else {
- ret = 1;
+ ret = SELF_TEST_SLM_FAIL;
printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (%i, %i, %i)\n"
"See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n",
test_data[0], test_data[1], test_data[2]);
}
}
+ } else{
+ ret = SELF_TEST_ATOMIC_FAIL;
}
}
}
@@ -610,10 +614,6 @@ cl_self_test(cl_device_id device)
clReleaseCommandQueue(queue);
}
clReleaseContext(ctx);
- if (ret == 2) {
- printf("Beignet: self-test failed: error %i\n"
- "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", status);
- }
return ret;
}
@@ -628,18 +628,22 @@ cl_get_device_ids(cl_platform_id platform,
/* Do we have a usable device? */
device = cl_get_gt_device();
- if (device && cl_self_test(device)) {
- int disable_self_test = 0;
- // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
- const char *env = getenv("OCL_IGNORE_SELF_TEST");
- if (env != NULL) {
- sscanf(env, "%i", &disable_self_test);
- }
- if (disable_self_test) {
- printf("Beignet: Warning - overriding self-test failure\n");
- } else {
- printf("Beignet: disabling non-working device\n");
- device = 0;
+ if (device) {
+ cl_self_test_res ret = cl_self_test(device);
+ device->atomic_test_result = ret;
+ if(ret == SELF_TEST_SLM_FAIL) {
+ int disable_self_test = 0;
+ // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
+ const char *env = getenv("OCL_IGNORE_SELF_TEST");
+ if (env != NULL) {
+ sscanf(env, "%i", &disable_self_test);
+ }
+ if (disable_self_test) {
+ printf("Beignet: Warning - overriding self-test failure\n");
+ } else {
+ printf("Beignet: disabling non-working device\n");
+ device = 0;
+ }
}
}
if (!device) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index ee6a8e67..1bd58069 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -113,6 +113,7 @@ struct _cl_device_id {
cl_device_affinity_domain affinity_domain;
cl_device_partition_property partition_type[3];
cl_uint device_reference_count;
+ uint32_t atomic_test_result;
};
/* Get a device from the given platform */
diff --git a/src/cl_driver.h b/src/cl_driver.h
index b2510dee..1ab4dff3 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -49,6 +49,15 @@ extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr;
typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
extern cl_driver_get_ver_cb *cl_driver_get_ver;
+typedef enum cl_self_test_res{
+ SELF_TEST_PASS = 0,
+ SELF_TEST_SLM_FAIL = 1,
+ SELF_TEST_ATOMIC_FAIL = 2,
+ SELF_TEST_OTHER_FAIL = 3,
+} cl_self_test_res;
+/* Set the atomic enable/disable flag in the driver */
+typedef void (cl_driver_set_atomic_flag_cb)(cl_driver, int);
+extern cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag;
/**************************************************************************
* GPGPU command streamer
**************************************************************************/
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 9a47210c..b77acdc4 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL;
LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;
LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;
LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL;
LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL;
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index 1080a91a..6ada30cd 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -304,6 +304,9 @@
#define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \
IS_G4X(intel->device_id) ? 384 : 256)
+// HSW
+#define HSW_SCRATCH1_OFFSET (0xB038)
+#define HSW_ROW_CHICKEN3_HDC_OFFSET (0xE49C)
// L3 cache stuff
#define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010)
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 1bebd9a3..9c727774 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -448,6 +448,12 @@ intel_driver_get_ver(struct intel_driver *drv)
return drv->gen_ver;
}
+static void
+intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag)
+{
+ drv->atomic_test_result = atomic_flag;
+}
+
static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
@@ -834,6 +840,7 @@ intel_setup_callbacks(void)
cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
+ cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag;
cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info;
diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h
index f972ec82..51f0e0db 100644
--- a/src/intel/intel_driver.h
+++ b/src/intel/intel_driver.h
@@ -89,6 +89,7 @@ typedef struct intel_driver
Display *x11_display;
struct dri_state *dri_ctx;
struct intel_gpgpu_node *gpgpu_list;
+ int atomic_test_result;
} intel_driver_t;
#define SET_BLOCKED_SIGSET(DRIVER) do { \
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index b083dab5..901bd98b 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -719,7 +719,23 @@ static void
intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)
{
/* still set L3 in batch buffer for fulsim. */
- BEGIN_BATCH(gpgpu->batch, 9);
+ if(gpgpu->drv->atomic_test_result != SELF_TEST_ATOMIC_FAIL)
+ {
+ BEGIN_BATCH(gpgpu->batch, 15);
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ /* FIXME: KMD always disable the atomic in L3 for some reason.
+ I checked the spec, and don't think we need that workaround now.
+ Before I send a patch to kernel, let's just enable it here. */
+ OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);
+ OUT_BATCH(gpgpu->batch, 0); /* enable atomic in L3 */
+ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+ OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);
+ OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16); /* enable atomic in L3 */
+ }
+ else
+ {
+ BEGIN_BATCH(gpgpu->batch, 9);
+ }
OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
OUT_BATCH(gpgpu->batch, 0x08800000);