GBE: adjust instruction order for load/function call for vector.

The previous implementation generates code as below: %33 = extractelement <4 x i8> %32, i32 0 %34 = extractelement <4 x i8> %32, i32 1 %35 = extractelement <4 x i8> %32, i32 2 %36 = extractelement <4 x i8> %32, i32 3 %32 = load <4 x i8> addrspace(1)* %31, align 4, !tbaa !3 It may bring some potential problems in the consequent optimization pass. Now fix adjust the extractelement instruction after the load instruction. %32 = load <4 x i8> addrspace(1)* %31, align 4, !tbaa !3 %33 = extractelement <4 x i8> %32, i32 0 %34 = extractelement <4 x i8> %32, i32 1 %35 = extractelement <4 x i8> %32, i32 2 %36 = extractelement <4 x i8> %32, i32 3 This patch also move the dead code elimination pass after the scalarize pass. As after scalarize pass, there may be some opportunity to remove more dead instructions. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
author: Zhigang Gong <zhigang.gong@intel.com> 2013-12-18 07:19:05 +0000
committer: Zhigang Gong <zhigang.gong@intel.com> 2013-12-23 18:41:35 +0800
commit: 6e2d5ebf0a09590a3365302b088de20841ebd97e (patch)
tree: eed6f6a61896d199fb2dbeacc3be798a42ce4963
parent: 64b4ceb4366a99ede6b1f7665166ce121f9aa2a9 (diff)
download: beignet-6e2d5ebf0a09590a3365302b088de20841ebd97e.tar.gz
2 files changed, 9 insertions, 1 deletions
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 6394909f..35504f37 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -222,6 +222,12 @@ namespace gbe {
       return GetComponentCount(value->getType());
     }
 
+    /* set to insert new instructions after the specified instruction.*/
+    void setAppendPoint(Instruction *insn)  {
+      BasicBlock::iterator next(insn);
+      builder->SetInsertPoint(++next);
+    }
+
     DenseMap<Value*, VectorValues> vectorVals;
     Module* module;
     IRBuilder<>* builder;
@@ -649,6 +655,7 @@ namespace gbe {
           case GEN_OCL_GET_IMAGE_WIDTH:
           case GEN_OCL_GET_IMAGE_HEIGHT:
           {
+            setAppendPoint(call);
             extractFromVector(call);
             break;
           }
@@ -686,6 +693,7 @@ namespace gbe {
 
   bool Scalarize::scalarizeLoad(LoadInst* ld)
   {
+    setAppendPoint(ld);
     extractFromVector(ld);
     return false;
   }
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index ad07c5e3..e11e449a 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -83,11 +83,11 @@ namespace gbe
     passes.add(createScalarReplAggregatesPass()); // Break up allocas
     passes.add(createRemoveGEPPass(unit));
     passes.add(createConstantPropagationPass());
-    passes.add(createDeadInstEliminationPass());  // Remove simplified instructions
     passes.add(createLowerSwitchPass());
     passes.add(createPromoteMemoryToRegisterPass());
     passes.add(createGVNPass());                  // Remove redundancies
     passes.add(createScalarizePass());        // Expand all vector ops
+    passes.add(createDeadInstEliminationPass());  // Remove simplified instructions
     passes.add(createGenPass(unit));
 
     // Print the code extra optimization passes
author	Zhigang Gong <zhigang.gong@intel.com>	2013-12-18 07:19:05 +0000
committer	Zhigang Gong <zhigang.gong@intel.com>	2013-12-23 18:41:35 +0800
commit	6e2d5ebf0a09590a3365302b088de20841ebd97e (patch)
tree	eed6f6a61896d199fb2dbeacc3be798a42ce4963
parent	64b4ceb4366a99ede6b1f7665166ce121f9aa2a9 (diff)
download	beignet-6e2d5ebf0a09590a3365302b088de20841ebd97e.tar.gz