From bd8524f197a2faf0586d9abe97d42ea5a07cc62a Mon Sep 17 00:00:00 2001 From: Luo Xionghu Date: Thu, 19 May 2016 19:44:05 +0800 Subject: 3 op math functions dst need 16 byte align when allocate register. gpu hang will happen for uniform mad instruction without dest register 16-byte alignment, check and adjust to 16 byte align for mad dst before register allocate. this patch could fix "STRICT=0, opencv_test_video/OCL_Video/PyrLKOpticalFlow." Signed-off-by: Luo Xionghu --- backend/src/backend/gen_reg_allocation.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'backend') diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index da3dac04..1aaac588 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -49,10 +49,11 @@ namespace gbe */ struct GenRegInterval { INLINE GenRegInterval(ir::Register reg) : - reg(reg), minID(INT_MAX), maxID(-INT_MAX), conflictReg(0) {} + reg(reg), minID(INT_MAX), maxID(-INT_MAX), conflictReg(0), b3OpAlign(0) {} ir::Register reg; //!< (virtual) register of the interval int32_t minID, maxID; //!< Starting and ending points ir::Register conflictReg; // < has banck conflict with this register + bool b3OpAlign; }; typedef struct GenRegIntervalKey { @@ -1050,6 +1051,9 @@ namespace gbe } } } + if (interval.b3OpAlign != 0) { + alignment = (alignment + 15) & ~15; + } while ((grfOffset = ctx.allocate(size, alignment, direction)) == -1) { const bool success = this->expireGRF(interval); if (success == false) { @@ -1138,6 +1142,9 @@ namespace gbe reg == ir::ocl::groupid1 || reg == ir::ocl::groupid2) continue; + if (is3SrcOp) { + this->intervals[reg].b3OpAlign = 1; + } this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID); this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID); } -- cgit v1.2.1