summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNate Begeman <natebegeman@mac.com>2010-06-10 00:17:56 +0000
committerNate Begeman <natebegeman@mac.com>2010-06-10 00:17:56 +0000
commitd075c01c359b9cc120c3accc7166990f9f4ac423 (patch)
tree07d3885554dc6f769d80156bea8511920421668b
parentc3926645d70842eae22641df1bf69da457a0ff11 (diff)
downloadclang-d075c01c359b9cc120c3accc7166990f9f4ac423.tar.gz
support _lane ops, and multiplies by scalar.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@105770 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang/Basic/BuiltinsARM.def46
-rw-r--r--lib/CodeGen/CGBuiltin.cpp28
-rw-r--r--lib/CodeGen/CodeGenFunction.h3
-rw-r--r--lib/Headers/arm_neon.td75
4 files changed, 63 insertions, 89 deletions
diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def
index 26c5bec635..247cf7f5ab 100644
--- a/include/clang/Basic/BuiltinsARM.def
+++ b/include/clang/Basic/BuiltinsARM.def
@@ -16,8 +16,6 @@
// In libgcc
BUILTIN(__clear_cache, "vc*c*", "")
-
-// FIXME: This is just a placeholder. NEON intrinsics should be listed here.
BUILTIN(__builtin_thread_pointer, "v*", "")
// NEON
@@ -105,46 +103,16 @@ BUILTIN(__builtin_neon_vmin_v, "V8cV8cV8ci", "n")
BUILTIN(__builtin_neon_vminq_v, "V16cV16cV16ci", "n")
BUILTIN(__builtin_neon_vmlal_v, "V16cV16cV8cV8ci", "n")
BUILTIN(__builtin_neon_vmlal_lane_v, "V16cV16cV8cV8cii", "n")
-BUILTIN(__builtin_neon_vmlal_n_s16, "V4iV4iV4ss", "n")
-BUILTIN(__builtin_neon_vmlal_n_s32, "V2LLiV2LLiV2ii", "n")
-BUILTIN(__builtin_neon_vmlal_n_u16, "V4iV4iV4sUs", "n")
-BUILTIN(__builtin_neon_vmlal_n_u32, "V2LLiV2LLiV2iUi", "n")
BUILTIN(__builtin_neon_vmla_lane_v, "V8cV8cV8cV8cii", "n")
BUILTIN(__builtin_neon_vmlaq_lane_v, "V16cV16cV16cV16cii", "n")
-BUILTIN(__builtin_neon_vmla_n_i16, "V4sV4sV4sUs", "n")
-BUILTIN(__builtin_neon_vmla_n_i32, "V2iV2iV2iUi", "n")
-BUILTIN(__builtin_neon_vmla_n_f32, "V2fV2fV2ff", "n")
-BUILTIN(__builtin_neon_vmlaq_n_i16, "V8sV8sV8sUs", "n")
-BUILTIN(__builtin_neon_vmlaq_n_i32, "V4iV4iV4iUi", "n")
-BUILTIN(__builtin_neon_vmlaq_n_f32, "V4fV4fV4ff", "n")
BUILTIN(__builtin_neon_vmlsl_v, "V16cV16cV8cV8ci", "n")
BUILTIN(__builtin_neon_vmlsl_lane_v, "V16cV16cV8cV8cii", "n")
-BUILTIN(__builtin_neon_vmlsl_n_s16, "V4iV4iV4ss", "n")
-BUILTIN(__builtin_neon_vmlsl_n_s32, "V2LLiV2LLiV2ii", "n")
-BUILTIN(__builtin_neon_vmlsl_n_u16, "V4iV4iV4sUs", "n")
-BUILTIN(__builtin_neon_vmlsl_n_u32, "V2LLiV2LLiV2iUi", "n")
BUILTIN(__builtin_neon_vmls_lane_v, "V8cV8cV8cV8cii", "n")
BUILTIN(__builtin_neon_vmlsq_lane_v, "V16cV16cV16cV16cii", "n")
-BUILTIN(__builtin_neon_vmls_n_i16, "V4sV4sV4sUs", "n")
-BUILTIN(__builtin_neon_vmls_n_i32, "V2iV2iV2iUi", "n")
-BUILTIN(__builtin_neon_vmls_n_f32, "V2fV2fV2ff", "n")
-BUILTIN(__builtin_neon_vmlsq_n_i16, "V8sV8sV8sUs", "n")
-BUILTIN(__builtin_neon_vmlsq_n_i32, "V4iV4iV4iUi", "n")
-BUILTIN(__builtin_neon_vmlsq_n_f32, "V4fV4fV4ff", "n")
BUILTIN(__builtin_neon_vmovl_v, "V16cV8ci", "n")
BUILTIN(__builtin_neon_vmovn_v, "V8cV16ci", "n")
BUILTIN(__builtin_neon_vmull_v, "V16cV8cV8ci", "n")
BUILTIN(__builtin_neon_vmull_lane_v, "V16cV8cV8cii", "n")
-BUILTIN(__builtin_neon_vmull_n_s16, "V4iV4ss", "n")
-BUILTIN(__builtin_neon_vmull_n_s32, "V2LLiV2ii", "n")
-BUILTIN(__builtin_neon_vmull_n_u16, "V4iV4sUs", "n")
-BUILTIN(__builtin_neon_vmull_n_u32, "V2LLiV2iUi", "n")
-BUILTIN(__builtin_neon_vmul_n_i16, "V4sV4sUs", "n")
-BUILTIN(__builtin_neon_vmul_n_i32, "V2iV2iUi", "n")
-BUILTIN(__builtin_neon_vmul_n_f32, "V2fV2ff", "n")
-BUILTIN(__builtin_neon_vmulq_n_i16, "V8sV8sUs", "n")
-BUILTIN(__builtin_neon_vmulq_n_i32, "V4iV4iUi", "n")
-BUILTIN(__builtin_neon_vmulq_n_f32, "V4fV4ff", "n")
BUILTIN(__builtin_neon_vpadal_v, "V8cV8cV8ci", "n")
BUILTIN(__builtin_neon_vpadalq_v, "V16cV16cV16ci", "n")
BUILTIN(__builtin_neon_vpadd_v, "V8cV8cV8ci", "n")
@@ -158,24 +126,14 @@ BUILTIN(__builtin_neon_vqadd_v, "V8cV8cV8ci", "n")
BUILTIN(__builtin_neon_vqaddq_v, "V16cV16cV16ci", "n")
BUILTIN(__builtin_neon_vqdmlal_v, "V16cV16cV8cV8ci", "n")
BUILTIN(__builtin_neon_vqdmlal_lane_v, "V16cV16cV8cV8cii", "n")
-BUILTIN(__builtin_neon_vqdmlal_n_s16, "V4iV4iV4ss", "n")
-BUILTIN(__builtin_neon_vqdmlal_n_s32, "V2LLiV2LLiV2ii", "n")
BUILTIN(__builtin_neon_vqdmlsl_v, "V16cV16cV8cV8ci", "n")
BUILTIN(__builtin_neon_vqdmlsl_lane_v, "V16cV16cV8cV8cii", "n")
-BUILTIN(__builtin_neon_vqdmlsl_n_s16, "V4iV4iV4ss", "n")
-BUILTIN(__builtin_neon_vqdmlsl_n_s32, "V2LLiV2LLiV2ii", "n")
BUILTIN(__builtin_neon_vqdmulh_v, "V8cV8cV8ci", "n")
BUILTIN(__builtin_neon_vqdmulhq_v, "V16cV16cV16ci", "n")
BUILTIN(__builtin_neon_vqdmulh_lane_v, "V8cV8cV8cii", "n")
BUILTIN(__builtin_neon_vqdmulhq_lane_v, "V16cV16cV16cii", "n")
-BUILTIN(__builtin_neon_vqdmulh_n_s16, "V4sV4ss", "n")
-BUILTIN(__builtin_neon_vqdmulh_n_s32, "V2iV2ii", "n")
-BUILTIN(__builtin_neon_vqdmulhq_n_s16, "V8sV8ss", "n")
-BUILTIN(__builtin_neon_vqdmulhq_n_s32, "V4iV4ii", "n")
BUILTIN(__builtin_neon_vqdmull_v, "V16cV8cV8ci", "n")
BUILTIN(__builtin_neon_vqdmull_lane_v, "V16cV8cV8cii", "n")
-BUILTIN(__builtin_neon_vqdmull_n_s16, "V4iV4ss", "n")
-BUILTIN(__builtin_neon_vqdmull_n_s32, "V2LLiV2ii", "n")
BUILTIN(__builtin_neon_vqmovn_v, "V8cV16ci", "n")
BUILTIN(__builtin_neon_vqmovun_v, "V8cV16ci", "n")
BUILTIN(__builtin_neon_vqneg_v, "V8cV8ci", "n")
@@ -184,10 +142,6 @@ BUILTIN(__builtin_neon_vqrdmulh_v, "V8cV8cV8ci", "n")
BUILTIN(__builtin_neon_vqrdmulhq_v, "V16cV16cV16ci", "n")
BUILTIN(__builtin_neon_vqrdmulh_lane_v, "V8cV8cV8cii", "n")
BUILTIN(__builtin_neon_vqrdmulhq_lane_v, "V16cV16cV16cii", "n")
-BUILTIN(__builtin_neon_vqrdmulh_n_s16, "V4sV4ss", "n")
-BUILTIN(__builtin_neon_vqrdmulh_n_s32, "V2iV2ii", "n")
-BUILTIN(__builtin_neon_vqrdmulhq_n_s16, "V8sV8ss", "n")
-BUILTIN(__builtin_neon_vqrdmulhq_n_s32, "V4iV4ii", "n")
BUILTIN(__builtin_neon_vqrshl_v, "V8cV8cV8ci", "n")
BUILTIN(__builtin_neon_vqrshlq_v, "V16cV16cV16ci", "n")
BUILTIN(__builtin_neon_vqrshrn_n_v, "V8cV16cii", "n")
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 3e8fec5b35..dbf5352028 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -894,13 +894,24 @@ const llvm::Type *GetNeonType(LLVMContext &Ctx, unsigned type, bool q) {
return 0;
}
+Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
+ unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
+ SmallVector<Constant*, 16> Indices(nElts, C);
+ Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
+ return Builder.CreateShuffleVector(V, V, SV, "lane");
+}
+
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
- const char *name) {
+ const char *name, bool splat) {
unsigned j = 0;
for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
ai != ae; ++ai, ++j)
Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
+ if (splat) {
+ Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j]));
+ Ops.resize(j);
+ }
return Builder.CreateCall(F, Ops.begin(), Ops.end(), name);
}
@@ -917,9 +928,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
a, b);
}
- llvm::SmallVector<Value*, 4> Ops;
// Determine the type of this overloaded NEON intrinsic.
assert(BuiltinID > ARM::BI__builtin_thread_pointer);
+
+ llvm::SmallVector<Value*, 4> Ops;
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
Ops.push_back(EmitScalarExpr(E->getArg(i)));
@@ -931,11 +943,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
unsigned type = Result.getZExtValue();
bool usgn = type & 0x08;
bool quad = type & 0x10;
+ bool splat = false;
const llvm::Type *Ty = GetNeonType(VMContext, type & 0x7, quad);
if (!Ty)
return 0;
+ // FIXME: multiplies by scalar do not currently match their patterns because
+ // they are implemented via mul(splat(scalar_to_vector)) rather than
+ // mul(dup(scalar))
+
unsigned Int;
switch (BuiltinID) {
default: return 0;
@@ -1087,12 +1104,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case ARM::BI__builtin_neon_vminq_v:
Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin");
- // FIXME: vmlal_lane -> splat, drop imm
+ case ARM::BI__builtin_neon_vmlal_lane_v:
+ splat = true;
case ARM::BI__builtin_neon_vmlal_v:
Int = usgn ? Intrinsic::arm_neon_vmlalu : Intrinsic::arm_neon_vmlals;
- return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal");
- // FIXME: vmlal_n, vmla_n, vmlsl_n, vmls_n, vmull_n, vmul_n,
- // vqdmlal_n, vqdmlsl_n, vqdmulh_n, vqdmull_n, vqrdmulh_n -> splat,-_n
+ return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal", splat);
case ARM::BI__builtin_neon_vmovl_v:
Int = usgn ? Intrinsic::arm_neon_vmovlu : Intrinsic::arm_neon_vmovls;
return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmovl");
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 0682a0474b..50e334061e 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -1147,7 +1147,8 @@ public:
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitNeonCall(llvm::Function *F,
llvm::SmallVectorImpl<llvm::Value*> &O,
- const char *name);
+ const char *name, bool splat = false);
+ llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
diff --git a/lib/Headers/arm_neon.td b/lib/Headers/arm_neon.td
index 4dbecb66b9..fb298a67c6 100644
--- a/lib/Headers/arm_neon.td
+++ b/lib/Headers/arm_neon.td
@@ -14,29 +14,32 @@
class Op;
-def OP_NONE : Op;
-def OP_ADD : Op;
-def OP_SUB : Op;
-def OP_MUL : Op;
-def OP_MLA : Op;
-def OP_MLS : Op;
-def OP_EQ : Op;
-def OP_GE : Op;
-def OP_LE : Op;
-def OP_GT : Op;
-def OP_LT : Op;
-def OP_NEG : Op;
-def OP_NOT : Op;
-def OP_AND : Op;
-def OP_OR : Op;
-def OP_XOR : Op;
-def OP_ANDN : Op;
-def OP_ORN : Op;
-def OP_CAST : Op;
-def OP_HI : Op;
-def OP_LO : Op;
-def OP_CONC : Op;
-def OP_DUP : Op;
+def OP_NONE : Op;
+def OP_ADD : Op;
+def OP_SUB : Op;
+def OP_MUL : Op;
+def OP_MLA : Op;
+def OP_MLS : Op;
+def OP_MUL_N : Op;
+def OP_MLA_N : Op;
+def OP_MLS_N : Op;
+def OP_EQ : Op;
+def OP_GE : Op;
+def OP_LE : Op;
+def OP_GT : Op;
+def OP_LT : Op;
+def OP_NEG : Op;
+def OP_NOT : Op;
+def OP_AND : Op;
+def OP_OR : Op;
+def OP_XOR : Op;
+def OP_ANDN : Op;
+def OP_ORN : Op;
+def OP_CAST : Op;
+def OP_HI : Op;
+def OP_LO : Op;
+def OP_CONC : Op;
+def OP_DUP : Op;
class Inst <string p, string t, Op o> {
string Prototype = p;
@@ -48,7 +51,6 @@ class Inst <string p, string t, Op o> {
class SInst<string p, string t> : Inst<p, t, OP_NONE> {}
class IInst<string p, string t> : Inst<p, t, OP_NONE> {}
class WInst<string p, string t> : Inst<p, t, OP_NONE> {}
-class BInst<string p, string t> : Inst<p, t, OP_NONE> {}
// prototype: return (arg, arg, ...)
// v: void
@@ -64,6 +66,7 @@ class BInst<string p, string t> : Inst<p, t, OP_NONE> {}
// i: constant int
// l: constant uint64
// s: scalar of element type
+// a: scalar of element type (splat to vector type)
// k: default elt width, double num elts
// #: array of default vectors
// p: pointer type
@@ -273,21 +276,21 @@ def VQDMLAL_LANE : SInst<"wwddi", "si">;
def VMLS_LANE : IInst<"ddddi", "siUsUifQsQiQUsQUiQf">;
def VMLSL_LANE : SInst<"wwddi", "siUsUi">;
def VQDMLSL_LANE : SInst<"wwddi", "si">;
-def VMUL_N : IInst<"dds", "sifUsUiQsQiQfQUsQUi">;
-def VMULL_N : SInst<"wds", "siUsUi">;
+def VMUL_N : Inst<"dds", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>;
+def VMULL_N : SInst<"wda", "siUsUi">;
def VMULL_LANE : SInst<"wddi", "siUsUi">;
-def VQDMULL_N : SInst<"wds", "si">;
+def VQDMULL_N : SInst<"wda", "si">;
def VQDMULL_LANE : SInst<"wddi", "si">;
-def VQDMULH_N : SInst<"dds", "siQsQi">;
+def VQDMULH_N : SInst<"dda", "siQsQi">;
def VQDMULH_LANE : SInst<"dddi", "siQsQi">;
-def VQRDMULH_N : SInst<"dds", "siQsQi">;
+def VQRDMULH_N : SInst<"dda", "siQsQi">;
def VQRDMULH_LANE : SInst<"dddi", "siQsQi">;
-def VMLA_N : IInst<"ddds", "siUsUifQsQiQUsQUiQf">;
-def VMLAL_N : SInst<"wwds", "siUsUi">;
-def VQDMLAL_N : SInst<"wwds", "si">;
-def VMLS_N : IInst<"ddds", "siUsUifQsQiQUsQUiQf">;
-def VMLSL_N : SInst<"wwds", "siUsUi">;
-def VQDMLSL_N : SInst<"wwds", "si">;
+def VMLA_N : Inst<"ddda", "siUsUifQsQiQUsQUiQf", OP_MLA_N>;
+def VMLAL_N : SInst<"wwda", "siUsUi">;
+def VQDMLAL_N : SInst<"wwda", "si">;
+def VMLS_N : Inst<"ddds", "siUsUifQsQiQUsQUiQf", OP_MLS_N>;
+def VMLSL_N : SInst<"wwda", "siUsUi">;
+def VQDMLSL_N : SInst<"wwda", "si">;
////////////////////////////////////////////////////////////////////////////////
// E.3.26 Vector Extract
@@ -319,7 +322,7 @@ def VORR : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>;
def VEOR : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>;
def VBIC : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>;
def VORN : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>;
-def VBSL : BInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">;
+def VBSL : SInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">;
////////////////////////////////////////////////////////////////////////////////
// E.3.30 Transposition operations