summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CmmMachOp.hs10
-rw-r--r--compiler/cmm/PprC.hs9
-rw-r--r--compiler/codeGen/StgCmmPrim.hs288
-rw-r--r--compiler/ghc.mk16
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs6
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs2
-rw-r--r--compiler/prelude/PrelNames.lhs15
-rw-r--r--compiler/prelude/PrimOp.lhs11
-rw-r--r--compiler/prelude/TysPrim.lhs40
-rw-r--r--compiler/prelude/primops.txt.pp518
10 files changed, 351 insertions, 564 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 8d42bbd2cb..c009d15e25 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -118,6 +118,10 @@ data MachOp
| MO_VS_Rem Length Width
| MO_VS_Neg Length Width
+ -- Unsigned vector multiply/divide
+ | MO_VU_Quot Length Width
+ | MO_VU_Rem Length Width
+
-- Floting point vector element insertion and extraction operations
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
@@ -375,6 +379,9 @@ machOpResultType dflags mop tys =
MO_VS_Rem l w -> cmmVec l (cmmBits w)
MO_VS_Neg l w -> cmmVec l (cmmBits w)
+ MO_VU_Quot l w -> cmmVec l (cmmBits w)
+ MO_VU_Rem l w -> cmmVec l (cmmBits w)
+
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
@@ -461,6 +468,9 @@ machOpArgReps dflags op =
MO_VS_Rem _ r -> [r,r]
MO_VS_Neg _ r -> [r]
+ MO_VU_Quot _ r -> [r,r]
+ MO_VU_Rem _ r -> [r,r]
+
MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index d45b103954..c468161c73 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -651,6 +651,15 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
++ " should have been handled earlier!")
+ MO_VU_Quot {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VU_Quot")
+ (panic $ "PprC.pprMachOp_for_C: MO_VU_Quot"
+ ++ " should have been handled earlier!")
+ MO_VU_Rem {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VU_Rem")
+ (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
+ ++ " should have been handled earlier!")
+
MO_VF_Insert {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 8560f7cf1c..5250c9378e 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -40,7 +40,7 @@ import FastString
import Outputable
import Util
-import Control.Monad (liftM)
+import Control.Monad (liftM, when)
import Data.Bits
------------------------------------------------------------------------
@@ -380,14 +380,6 @@ emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp
emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
@@ -407,14 +399,6 @@ emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (
emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
-emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args
-emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- IndexXXXArray
@@ -434,14 +418,6 @@ emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayO
emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- ReadXXXArray, identical to IndexXXXArray.
@@ -461,14 +437,6 @@ emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp
emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
-emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
-emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- WriteXXXoffAddr
@@ -488,14 +456,6 @@ emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (J
emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
-emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
-emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
-emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args
-emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args
-emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args
-emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args
-- WriteXXXArray
@@ -515,14 +475,6 @@ emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayO
emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
-emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
-emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
-emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args
-emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args
-emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args
-emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args
-- Copying and setting byte arrays
emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
@@ -556,78 +508,136 @@ emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
(MO_UF_Conv W64) [w]
--- SIMD vector packing and unpacking
-emitPrimOp _ [res] FloatToFloatX4Op [e] =
- doVecPackOp Nothing vec4f32 zero [e,e,e,e] res
+-- SIMD primops
+emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] =
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
+ when (length es /= n) $
+ panic "emitPrimOp: VecPackOp has wrong number of arguments"
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
+ where
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
+ when (length res /= n) $
+ panic "emitPrimOp: VecUnpackOp has wrong number of results"
+ doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] =
- doVecPackOp Nothing vec4f32 zero es res
+emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] =
+ doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
- doVecUnpackOp Nothing vec4f32 arg res
+emitPrimOp _ res (VecIndexByteArrayOp vcat n w) args =
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
- doVecInsertOp Nothing vec4f32 v e i res
+emitPrimOp _ res (VecReadByteArrayOp vcat n w) args =
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleToDoubleX2Op [e] =
- doVecPackOp Nothing vec2f64 zero [e,e] res
+emitPrimOp _ res (VecWriteByteArrayOp vcat n w) args =
+ doWriteByteArrayOp Nothing ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleX2PackOp es@[_,_] =
- doVecPackOp Nothing vec2f64 zero es res
+emitPrimOp _ res (VecIndexOffAddrOp vcat n w) args =
+ doIndexOffAddrOp Nothing ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] =
- doVecUnpackOp Nothing vec2f64 arg res
+emitPrimOp _ res (VecReadOffAddrOp vcat n w) args =
+ doIndexOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] =
- doVecInsertOp Nothing vec2f64 v e i res
+emitPrimOp _ res (VecWriteOffAddrOp vcat n w) args =
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
-emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
- doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
+emitPrimOp _ res (VecIndexScalarByteArrayOp vcat n w) args =
+ doIndexByteArrayOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
- doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp _ res (VecReadScalarByteArrayOp vcat n w) args =
+ doIndexByteArrayOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
- doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
- doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
+emitPrimOp _ res (VecWriteScalarByteArrayOp vcat _ w) args =
+ doWriteByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp _ [res] Int64ToInt64X2Op [e] =
- doVecPackOp Nothing vec2b64 zero [e,e] res
+emitPrimOp _ res (VecIndexScalarOffAddrOp vcat n w) args =
+ doIndexOffAddrOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp _ [res] Int64X2PackOp es@[_,_] =
- doVecPackOp Nothing vec2b64 zero es res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp _ res (VecReadScalarOffAddrOp vcat n w) args =
+ doIndexOffAddrOpAs Nothing vecty ty res args
where
- zero :: CmmExpr
- zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
-emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
- doVecUnpackOp Nothing vec2b64 arg res
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
- doVecInsertOp Nothing vec2b64 v e i res
+emitPrimOp _ res (VecWriteScalarOffAddrOp vcat _ w) args =
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
-- Prefetch
emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
@@ -944,33 +954,26 @@ translateOp _ FloatMulOp = Just (MO_F_Mul W32)
translateOp _ FloatDivOp = Just (MO_F_Quot W32)
translateOp _ FloatNegOp = Just (MO_F_Neg W32)
--- Floating point vector ops
-
-translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32)
-translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32)
-translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
-translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
-translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
-
-translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64)
-translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64)
-translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64)
-translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64)
-translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64)
-
-translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
-translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
-translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
-translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
-translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
-translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
-
-translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64)
-translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64)
-translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64)
-translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64)
-translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64)
-translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64)
+-- Vector ops
+
+translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
+translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
+translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
+translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
+translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
+
+translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
+translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
+translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
+
+translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
+translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
-- Conversions
@@ -1183,6 +1186,41 @@ setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
------------------------------------------------------------------------------
+-- Helpers for translating vector primops.
+
+vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
+vecVmmType pocat n w = vec n (vecCmmCat pocat w)
+
+vecCmmCat :: PrimOpVecCat -> Width -> CmmType
+vecCmmCat IntVec = cmmBits
+vecCmmCat WordVec = cmmBits
+vecCmmCat FloatVec = cmmFloat
+
+vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemInjectCast _ FloatVec _ = Nothing
+vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ IntVec W64 = Nothing
+vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ WordVec W64 = Nothing
+vecElemInjectCast _ _ _ = Nothing
+
+vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemProjectCast _ FloatVec _ = Nothing
+vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
+vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
+vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
+vecElemProjectCast _ IntVec W64 = Nothing
+vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
+vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
+vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
+vecElemProjectCast _ WordVec W64 = Nothing
+vecElemProjectCast _ _ _ = Nothing
+
+------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
doVecPackOp :: Maybe MachOp -- Cast from element to vector component
diff --git a/compiler/ghc.mk b/compiler/ghc.mk
index 4fdadd7c30..5b9610103b 100644
--- a/compiler/ghc.mk
+++ b/compiler/ghc.mk
@@ -250,8 +250,12 @@ PRIMOP_BITS_NAMES = primop-data-decl.hs-incl \
primop-code-size.hs-incl \
primop-can-fail.hs-incl \
primop-strictness.hs-incl \
- primop-fixity.hs-incl \
- primop-primop-info.hs-incl
+ primop-fixity.hs-incl \
+ primop-primop-info.hs-incl \
+ primop-vector-uniques.hs-incl \
+ primop-vector-tys.hs-incl \
+ primop-vector-tys-exports.hs-incl \
+ primop-vector-tycons.hs-incl
PRIMOP_BITS_STAGE1 = $(addprefix compiler/stage1/build/,$(PRIMOP_BITS_NAMES))
PRIMOP_BITS_STAGE2 = $(addprefix compiler/stage2/build/,$(PRIMOP_BITS_NAMES))
@@ -290,6 +294,14 @@ compiler/stage$1/build/primop-fixity.hs-incl: compiler/stage$1/build/primops.txt
"$$(genprimopcode_INPLACE)" --fixity < $$< > $$@
compiler/stage$1/build/primop-primop-info.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
"$$(genprimopcode_INPLACE)" --primop-primop-info < $$< > $$@
+compiler/stage$1/build/primop-vector-uniques.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
+ "$$(genprimopcode_INPLACE)" --primop-vector-uniques < $$< > $$@
+compiler/stage$1/build/primop-vector-tys.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
+ "$$(genprimopcode_INPLACE)" --primop-vector-tys < $$< > $$@
+compiler/stage$1/build/primop-vector-tys-exports.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
+ "$$(genprimopcode_INPLACE)" --primop-vector-tys-exports < $$< > $$@
+compiler/stage$1/build/primop-vector-tycons.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
+ "$$(genprimopcode_INPLACE)" --primop-vector-tycons < $$< > $$@
# Usages aren't used any more; but the generator
# can still generate them if we want them back
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 33107c0b68..c52640b17f 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -967,6 +967,9 @@ genMachOp _ op [x] = case op of
MO_VS_Quot _ _ -> panicOp
MO_VS_Rem _ _ -> panicOp
+
+ MO_VU_Quot _ _ -> panicOp
+ MO_VU_Rem _ _ -> panicOp
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
@@ -1140,6 +1143,9 @@ genMachOp_slow opt op [x, y] = case op of
MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
+
+ MO_VU_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_UDiv
+ MO_VU_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_URem
MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index f6143d3fb9..e18da25347 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -610,6 +610,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
+ MO_VU_Quot {} -> needLlvm
+ MO_VU_Rem {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
diff --git a/compiler/prelude/PrelNames.lhs b/compiler/prelude/PrelNames.lhs
index 81fb9be52a..07730e653d 100644
--- a/compiler/prelude/PrelNames.lhs
+++ b/compiler/prelude/PrelNames.lhs
@@ -1474,15 +1474,6 @@ typeNatMulTyFamNameKey = mkPreludeTyConUnique 163
typeNatExpTyFamNameKey = mkPreludeTyConUnique 164
typeNatLeqTyFamNameKey = mkPreludeTyConUnique 165
--- SIMD vector types (Unique keys)
-floatX4PrimTyConKey, doubleX2PrimTyConKey, int32X4PrimTyConKey,
- int64X2PrimTyConKey :: Unique
-
-floatX4PrimTyConKey = mkPreludeTyConUnique 170
-doubleX2PrimTyConKey = mkPreludeTyConUnique 171
-int32X4PrimTyConKey = mkPreludeTyConUnique 172
-int64X2PrimTyConKey = mkPreludeTyConUnique 173
-
ntTyConKey:: Unique
ntTyConKey = mkPreludeTyConUnique 174
coercibleTyConKey :: Unique
@@ -1492,6 +1483,12 @@ coercibleTyConKey = mkPreludeTyConUnique 175
-- USES TyConUniques 200-299
-----------------------------------------------------
+----------------------- SIMD ------------------------
+-- USES TyConUniques 300-399
+-----------------------------------------------------
+
+#include "primop-vector-uniques.hs-incl"
+
unitTyConKey :: Unique
unitTyConKey = mkTupleTyConUnique BoxedTuple 0
\end{code}
diff --git a/compiler/prelude/PrimOp.lhs b/compiler/prelude/PrimOp.lhs
index 8b1970c37f..22753ee3ea 100644
--- a/compiler/prelude/PrimOp.lhs
+++ b/compiler/prelude/PrimOp.lhs
@@ -5,7 +5,7 @@
\begin{code}
module PrimOp (
- PrimOp(..), allThePrimOps,
+ PrimOp(..), PrimOpVecCat(..), allThePrimOps,
primOpType, primOpSig,
primOpTag, maxPrimOpTag, primOpOcc,
@@ -25,6 +25,7 @@ module PrimOp (
import TysPrim
import TysWiredIn
+import CmmType
import Demand
import Var ( TyVar )
import OccName ( OccName, pprOccName, mkVarOccFS )
@@ -64,6 +65,7 @@ primOpTag op = iBox (tagOf_PrimOp op)
-- supplies
-- tagOf_PrimOp :: PrimOp -> FastInt
#include "primop-tag.hs-incl"
+tagOf_PrimOp _ = error "tagOf_PrimOp: unknown primop"
instance Eq PrimOp where
@@ -82,6 +84,12 @@ instance Outputable PrimOp where
ppr op = pprPrimOp op
\end{code}
+\begin{code}
+data PrimOpVecCat = IntVec
+ | WordVec
+ | FloatVec
+\end{code}
+
An @Enum@-derived list would be better; meanwhile... (ToDo)
\begin{code}
@@ -173,6 +181,7 @@ else, notably a type, can be constructed) for each @PrimOp@.
\begin{code}
primOpInfo :: PrimOp -> PrimOpInfo
#include "primop-primop-info.hs-incl"
+primOpInfo _ = error "primOpInfo: unknown primop"
\end{code}
Here are a load of comments from the old primOp info:
diff --git a/compiler/prelude/TysPrim.lhs b/compiler/prelude/TysPrim.lhs
index f166065b22..b17f1a6f9a 100644
--- a/compiler/prelude/TysPrim.lhs
+++ b/compiler/prelude/TysPrim.lhs
@@ -76,11 +76,8 @@ module TysPrim(
-- * Any
anyTy, anyTyCon, anyTypeOfKind,
- -- * SIMD
- floatX4PrimTyCon, floatX4PrimTy,
- doubleX2PrimTyCon, doubleX2PrimTy,
- int32X4PrimTyCon, int32X4PrimTy,
- int64X2PrimTyCon, int64X2PrimTy
+ -- * SIMD
+#include "primop-vector-tys-exports.hs-incl"
) where
#include "HsVersions.h"
@@ -144,10 +141,7 @@ primTyCons
, superKindTyCon
, anyKindTyCon
- , floatX4PrimTyCon
- , doubleX2PrimTyCon
- , int32X4PrimTyCon
- , int64X2PrimTyCon
+#include "primop-vector-tycons.hs-incl"
]
mkPrimTc :: FastString -> Unique -> TyCon -> Name
@@ -157,7 +151,7 @@ mkPrimTc fs unique tycon
(ATyCon tycon) -- Relevant TyCon
UserSyntax -- None are built-in syntax
-charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, eqReprPrimTyConName, floatX4PrimTyConName, doubleX2PrimTyConName, int32X4PrimTyConName, int64X2PrimTyConName :: Name
+charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, eqReprPrimTyConName :: Name
charPrimTyConName = mkPrimTc (fsLit "Char#") charPrimTyConKey charPrimTyCon
intPrimTyConName = mkPrimTc (fsLit "Int#") intPrimTyConKey intPrimTyCon
int32PrimTyConName = mkPrimTc (fsLit "Int32#") int32PrimTyConKey int32PrimTyCon
@@ -186,10 +180,6 @@ stableNamePrimTyConName = mkPrimTc (fsLit "StableName#") stableNamePrimTyC
bcoPrimTyConName = mkPrimTc (fsLit "BCO#") bcoPrimTyConKey bcoPrimTyCon
weakPrimTyConName = mkPrimTc (fsLit "Weak#") weakPrimTyConKey weakPrimTyCon
threadIdPrimTyConName = mkPrimTc (fsLit "ThreadId#") threadIdPrimTyConKey threadIdPrimTyCon
-floatX4PrimTyConName = mkPrimTc (fsLit "FloatX4#") floatX4PrimTyConKey floatX4PrimTyCon
-doubleX2PrimTyConName = mkPrimTc (fsLit "DoubleX2#") doubleX2PrimTyConKey doubleX2PrimTyCon
-int32X4PrimTyConName = mkPrimTc (fsLit "Int32X4#") int32X4PrimTyConKey int32X4PrimTyCon
-int64X2PrimTyConName = mkPrimTc (fsLit "Int64X2#") int64X2PrimTyConKey int64X2PrimTyCon
\end{code}
%************************************************************************
@@ -766,28 +756,10 @@ anyTypeOfKind kind = TyConApp anyTyCon [kind]
%************************************************************************
%* *
-\subsection{SIMD vector type}
+\subsection{SIMD vector types}
%* *
%************************************************************************
\begin{code}
-floatX4PrimTy :: Type
-floatX4PrimTy = mkTyConTy floatX4PrimTyCon
-floatX4PrimTyCon :: TyCon
-floatX4PrimTyCon = pcPrimTyCon0 floatX4PrimTyConName (VecRep 4 FloatElemRep)
-
-doubleX2PrimTy :: Type
-doubleX2PrimTy = mkTyConTy doubleX2PrimTyCon
-doubleX2PrimTyCon :: TyCon
-doubleX2PrimTyCon = pcPrimTyCon0 doubleX2PrimTyConName (VecRep 2 DoubleElemRep)
-
-int32X4PrimTy :: Type
-int32X4PrimTy = mkTyConTy int32X4PrimTyCon
-int32X4PrimTyCon :: TyCon
-int32X4PrimTyCon = pcPrimTyCon0 int32X4PrimTyConName (VecRep 4 Int32ElemRep)
-
-int64X2PrimTy :: Type
-int64X2PrimTy = mkTyConTy int64X2PrimTyCon
-int64X2PrimTyCon :: TyCon
-int64X2PrimTyCon = pcPrimTyCon0 int64X2PrimTyConName (VecRep 2 Int64ElemRep)
+#include "primop-vector-tys.hs-incl"
\end{code}
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index cfd6afa4c6..f4b7b6c5d0 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -39,6 +39,22 @@
-- (eg, out_of_line), whilst avoiding parsing complex expressions
-- needed for strictness info.
+-- The vector attribute is rather special. It takes a list of 3-tuples, each of
+-- which is of the form <ELEM_TYPE,SCALAR_TYPE,LENGTH>. ELEM_TYPE is the type of
+-- the elements in the vector; LENGTH is the length of the vector; and
+-- SCALAR_TYPE is the scalar type used to inject to/project from vector
+-- element. Note that ELEM_TYPE and SCALAR_TYPE are not the same; for example,
+-- to broadcast a scalar value to a vector whose elements are of type Int8, we
+-- use an Int#.
+
+-- When a primtype or primop has a vector attribute, it is instantiated at each
+-- 3-tuple in the list of 3-tuples. That is, the vector attribute allows us to
+-- define a family of types or primops. Vector support also adds three new
+-- keywords: VECTOR, SCALAR, and VECTUPLE. These keywords are expanded to types
+-- derived from the 3-tuple. For the 3-tuple <Int64,INT64,2>, VECTOR expands to
+-- Int64X2#, SCALAR expands to INT64, and VECTUPLE expands to (# INT64, INT64
+-- #).
+
defaults
has_side_effects = False
out_of_line = False -- See Note Note [PrimOp can_fail and has_side_effects] in PrimOp
@@ -48,6 +64,7 @@ defaults
strictness = { \ arity -> mkStrictSig (mkTopDmdType (replicate arity topDmd) topRes) }
fixity = Nothing
llvm_only = False
+ vector = []
-- Currently, documentation is produced using latex, so contents of
-- description fields should be legal latex. Descriptions can contain
@@ -2373,479 +2390,194 @@ primclass Coercible a b
}
------------------------------------------------------------------------
-section "Float SIMD Vectors"
- {Operations on SIMD vectors of 4 single-precision (32-bit)
- floating-point numbers.}
+section "SIMD Vectors"
+ {Operations on SIMD vectors.}
------------------------------------------------------------------------
-primtype FloatX4#
- with llvm_only = True
+#define ALL_VECTOR_TYPES \
+ [<Int32,INT32,4>,<Int64,INT64,2> \
+ ,<Word32,WORD32,4>,<Word64,WORD64,2> \
+ ,<Float,Float#,4>,<Double,Double#,2>]
-primop FloatToFloatX4Op "floatToFloatX4#" GenPrimOp
- Float# -> FloatX4#
- with llvm_only = True
+#define SIGNED_VECTOR_TYPES \
+ [<Int32,INT32,4>,<Int64,INT64,2> \
+ ,<Float,Float#,4>,<Double,Double#,2>]
-primop FloatX4PackOp "packFloatX4#" GenPrimOp
- Float# -> Float# -> Float# -> Float# -> FloatX4#
- with llvm_only = True
+#define FLOAT_VECTOR_TYPES \
+ [<Float,Float#,4>,<Double,Double#,2>]
-primop FloatX4UnpackOp "unpackFloatX4#" GenPrimOp
- FloatX4# -> (# Float#, Float#, Float#, Float# #)
- with llvm_only = True
+#define INT_VECTOR_TYPES \
+ [<Int32,INT32,4>,<Int64,INT64,2> \
+ ,<Word32,WORD32,4>,<Word64,WORD64,2>]
-primop FloatX4InsertOp "insertFloatX4#" GenPrimOp
- FloatX4# -> Float# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop FloatX4AddOp "plusFloatX4#" Dyadic
- FloatX4# -> FloatX4# -> FloatX4#
- with commutable = True
- llvm_only = True
-
-primop FloatX4SubOp "minusFloatX4#" Dyadic
- FloatX4# -> FloatX4# -> FloatX4#
+primtype VECTOR
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop FloatX4MulOp "timesFloatX4#" Dyadic
- FloatX4# -> FloatX4# -> FloatX4#
- with commutable = True
- llvm_only = True
-
-primop FloatX4DivOp "divideFloatX4#" Dyadic
- FloatX4# -> FloatX4# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop FloatX4NegOp "negateFloatX4#" Monadic
- FloatX4# -> FloatX4#
+primop VecBroadcastOp "broadcast#" GenPrimOp
+ SCALAR -> VECTOR
+ { Broadcast a scalar to all elements of a vector. }
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop IndexByteArrayOp_FloatX4 "indexFloatX4Array#" GenPrimOp
- ByteArray# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_FloatX4 "readFloatX4Array#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, FloatX4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_FloatX4 "writeFloatX4Array#" GenPrimOp
- MutableByteArray# s -> Int# -> FloatX4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_FloatX4 "indexFloatX4OffAddr#" GenPrimOp
- Addr# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_FloatX4 "readFloatX4OffAddr#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, FloatX4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_FloatX4 "writeFloatX4OffAddr#" GenPrimOp
- Addr# -> Int# -> FloatX4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexByteArrayOp_FloatAsFloatX4 "indexFloatArrayAsFloatX4#" GenPrimOp
- ByteArray# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_FloatAsFloatX4 "readFloatArrayAsFloatX4#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, FloatX4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_FloatAsFloatX4 "writeFloatArrayAsFloatX4#" GenPrimOp
- MutableByteArray# s -> Int# -> FloatX4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_FloatAsFloatX4 "indexFloatOffAddrAsFloatX4#" GenPrimOp
- Addr# -> Int# -> FloatX4#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_FloatAsFloatX4 "readFloatOffAddrAsFloatX4#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, FloatX4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_FloatAsFloatX4 "writeFloatOffAddrAsFloatX4#" GenPrimOp
- Addr# -> Int# -> FloatX4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-------------------------------------------------------------------------
-section "Double SIMD Vectors"
- {Operations on SIMD vectors of 2 double-precision (64-bit)
- floating-point numbers.}
-------------------------------------------------------------------------
-
-primtype DoubleX2#
+primop VecPackOp "pack#" GenPrimOp
+ VECTUPLE -> VECTOR
+ { Pack the elements of an unboxed tuple into a vector. }
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop DoubleToDoubleX2Op "doubleToDoubleX2#" GenPrimOp
- Double# -> DoubleX2#
+primop VecUnpackOp "unpack#" GenPrimOp
+ VECTOR -> VECTUPLE
+ { Unpack the elements of a vector into an unboxed tuple. #}
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop DoubleX2InsertOp "insertDoubleX2#" GenPrimOp
- DoubleX2# -> Double# -> Int# -> DoubleX2#
+primop VecInsertOp "insert#" GenPrimOp
+ VECTOR -> SCALAR -> Int# -> VECTOR
+ { Insert a scalar at the given position in a vector. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop DoubleX2PackOp "packDoubleX2#" GenPrimOp
- Double# -> Double# -> DoubleX2#
- with llvm_only = True
-
-primop DoubleX2UnpackOp "unpackDoubleX2#" GenPrimOp
- DoubleX2# -> (# Double#, Double# #)
- with llvm_only = True
-
-primop DoubleX2AddOp "plusDoubleX2#" Dyadic
- DoubleX2# -> DoubleX2# -> DoubleX2#
+primop VecAddOp "plus#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Add two vectors element-wise. }
with commutable = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop DoubleX2SubOp "minusDoubleX2#" Dyadic
- DoubleX2# -> DoubleX2# -> DoubleX2#
+primop VecSubOp "minus#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Subtract two vectors element-wise. }
with llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop DoubleX2MulOp "timesDoubleX2#" Dyadic
- DoubleX2# -> DoubleX2# -> DoubleX2#
+primop VecMulOp "times#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Multiply two vectors element-wise. }
with commutable = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop DoubleX2DivOp "divideDoubleX2#" Dyadic
- DoubleX2# -> DoubleX2# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop DoubleX2NegOp "negateDoubleX2#" Monadic
- DoubleX2# -> DoubleX2#
- with llvm_only = True
-
-primop IndexByteArrayOp_DoubleX2 "indexDoubleX2Array#" GenPrimOp
- ByteArray# -> Int# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_DoubleX2 "readDoubleX2Array#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, DoubleX2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_DoubleX2 "writeDoubleX2Array#" GenPrimOp
- MutableByteArray# s -> Int# -> DoubleX2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_DoubleX2 "indexDoubleX2OffAddr#" GenPrimOp
- Addr# -> Int# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_DoubleX2 "readDoubleX2OffAddr#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, DoubleX2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_DoubleX2 "writeDoubleX2OffAddr#" GenPrimOp
- Addr# -> Int# -> DoubleX2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexByteArrayOp_DoubleAsDoubleX2 "indexDoubleArrayAsDoubleX2#" GenPrimOp
- ByteArray# -> Int# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_DoubleAsDoubleX2 "readDoubleArrayAsDoubleX2#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, DoubleX2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_DoubleAsDoubleX2 "writeDoubleArrayAsDoubleX2#" GenPrimOp
- MutableByteArray# s -> Int# -> DoubleX2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_DoubleAsDoubleX2 "indexDoubleOffAddrAsDoubleX2#" GenPrimOp
- Addr# -> Int# -> DoubleX2#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_DoubleAsDoubleX2 "readDoubleOffAddrAsDoubleX2#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, DoubleX2# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_DoubleAsDoubleX2 "writeDoubleOffAddrAsDoubleX2#" GenPrimOp
- Addr# -> Int# -> DoubleX2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-------------------------------------------------------------------------
-section "Int32 SIMD Vectors"
- {Operations on SIMD vectors of 4 32-bit signed integers.}
-------------------------------------------------------------------------
-
-primtype Int32X4#
- with llvm_only = True
-
-primop Int32ToInt32X4Op "int32ToInt32X4#" GenPrimOp
- INT32 -> Int32X4#
- with llvm_only = True
-
-primop Int32X4InsertOp "insertInt32X4#" GenPrimOp
- Int32X4# -> INT32 -> Int# -> Int32X4#
+primop VecDivOp "divide#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Divide two vectors element-wise. }
with can_fail = True
llvm_only = True
+ vector = FLOAT_VECTOR_TYPES
-primop Int32X4PackOp "packInt32X4#" GenPrimOp
- INT32 -> INT32 -> INT32 -> INT32 -> Int32X4#
- with llvm_only = True
-
-primop Int32X4UnpackOp "unpackInt32X4#" GenPrimOp
- Int32X4# -> (# INT32, INT32, INT32, INT32 #)
- with llvm_only = True
-
-primop Int32X4AddOp "plusInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
- with commutable = True
- llvm_only = True
-
-primop Int32X4SubOp "minusInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
- with llvm_only = True
-
-primop Int32X4MulOp "timesInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
- with commutable = True
- llvm_only = True
-
-primop Int32X4QuotOp "quotInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
+primop VecQuotOp "quot#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Rounds towards zero element-wise. }
with can_fail = True
llvm_only = True
+ vector = INT_VECTOR_TYPES
-primop Int32X4RemOp "remInt32X4#" Dyadic
- Int32X4# -> Int32X4# -> Int32X4#
+primop VecRemOp "rem#" Dyadic
+ VECTOR -> VECTOR -> VECTOR
+ { Satisfies \texttt{(quot\# x y) times\# y plus\# (rem\# x y) == x}. }
with can_fail = True
llvm_only = True
+ vector = INT_VECTOR_TYPES
-primop Int32X4NegOp "negateInt32X4#" Monadic
- Int32X4# -> Int32X4#
+primop VecNegOp "negate#" Monadic
+ VECTOR -> VECTOR
+ { Negate element-wise. }
with llvm_only = True
+ vector = SIGNED_VECTOR_TYPES
-primop IndexByteArrayOp_Int32X4 "indexInt32X4Array#" GenPrimOp
- ByteArray# -> Int# -> Int32X4#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_Int32X4 "readInt32X4Array#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, Int32X4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_Int32X4 "writeInt32X4Array#" GenPrimOp
- MutableByteArray# s -> Int# -> Int32X4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_Int32X4 "indexInt32X4OffAddr#" GenPrimOp
- Addr# -> Int# -> Int32X4#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_Int32X4 "readInt32X4OffAddr#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, Int32X4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteOffAddrOp_Int32X4 "writeInt32X4OffAddr#" GenPrimOp
- Addr# -> Int# -> Int32X4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexByteArrayOp_Int32AsInt32X4 "indexInt32ArrayAsInt32X4#" GenPrimOp
- ByteArray# -> Int# -> Int32X4#
- with can_fail = True
- llvm_only = True
-
-primop ReadByteArrayOp_Int32AsInt32X4 "readInt32ArrayAsInt32X4#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, Int32X4# #)
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop WriteByteArrayOp_Int32AsInt32X4 "writeInt32ArrayAsInt32X4#" GenPrimOp
- MutableByteArray# s -> Int# -> Int32X4# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-
-primop IndexOffAddrOp_Int32AsInt32X4 "indexInt32OffAddrAsInt32X4#" GenPrimOp
- Addr# -> Int# -> Int32X4#
+primop VecIndexByteArrayOp "indexArray#" GenPrimOp
+ ByteArray# -> Int# -> VECTOR
+ { Read a vector from specified index of immutable array. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop ReadOffAddrOp_Int32AsInt32X4 "readInt32OffAddrAsInt32X4#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, Int32X4# #)
+primop VecReadByteArrayOp "readArray#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> (# State# s, VECTOR #)
+ { Read a vector from specified index of mutable array. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop WriteOffAddrOp_Int32AsInt32X4 "writeInt32OffAddrAsInt32X4#" GenPrimOp
- Addr# -> Int# -> Int32X4# -> State# s -> State# s
+primop VecWriteByteArrayOp "writeArray#" GenPrimOp
+ MutableByteArray# s -> Int# -> VECTOR -> State# s -> State# s
+ { Write a vector to specified index of mutable array. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-------------------------------------------------------------------------
-section "Int64 SIMD Vectors"
- {Operations on SIMD vectors of 2 64-bit signed integers.}
-------------------------------------------------------------------------
-
-primtype Int64X2#
- with llvm_only = True
-
-primop Int64ToInt64X2Op "int64ToInt64X2#" GenPrimOp
- INT64 -> Int64X2#
- with llvm_only = True
-
-primop Int64X2InsertOp "insertInt64X2#" GenPrimOp
- Int64X2# -> INT64 -> Int# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop Int64X2PackOp "packInt64X2#" GenPrimOp
- INT64 -> INT64 -> Int64X2#
- with llvm_only = True
-
-primop Int64X2UnpackOp "unpackInt64X2#" GenPrimOp
- Int64X2# -> (# INT64, INT64 #)
- with llvm_only = True
-
-primop Int64X2AddOp "plusInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with commutable = True
- llvm_only = True
-
-primop Int64X2SubOp "minusInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with llvm_only = True
-
-primop Int64X2MulOp "timesInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with commutable = True
- llvm_only = True
-
-primop Int64X2QuotOp "quotInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop Int64X2RemOp "remInt64X2#" Dyadic
- Int64X2# -> Int64X2# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop Int64X2NegOp "negateInt64X2#" Monadic
- Int64X2# -> Int64X2#
- with llvm_only = True
-
-primop IndexByteArrayOp_Int64X2 "indexInt64X2Array#" GenPrimOp
- ByteArray# -> Int# -> Int64X2#
+primop VecIndexOffAddrOp "indexOffAddr#" GenPrimOp
+ Addr# -> Int# -> VECTOR
+ { Reads vector; offset in bytes. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop ReadByteArrayOp_Int64X2 "readInt64X2Array#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, Int64X2# #)
+primop VecReadOffAddrOp "readOffAddr#" GenPrimOp
+ Addr# -> Int# -> State# s -> (# State# s, VECTOR #)
+ { Reads vector; offset in bytes. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop WriteByteArrayOp_Int64X2 "writeInt64X2Array#" GenPrimOp
- MutableByteArray# s -> Int# -> Int64X2# -> State# s -> State# s
+primop VecWriteOffAddrOp "writeOffAddr#" GenPrimOp
+ Addr# -> Int# -> VECTOR -> State# s -> State# s
+ { Write vector; offset in bytes. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop IndexOffAddrOp_Int64X2 "indexInt64X2OffAddr#" GenPrimOp
- Addr# -> Int# -> Int64X2#
- with can_fail = True
- llvm_only = True
-
-primop ReadOffAddrOp_Int64X2 "readInt64X2OffAddr#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, Int64X2# #)
- with has_side_effects = True
- llvm_only = True
-
-primop WriteOffAddrOp_Int64X2 "writeInt64X2OffAddr#" GenPrimOp
- Addr# -> Int# -> Int64X2# -> State# s -> State# s
- with has_side_effects = True
- can_fail = True
- llvm_only = True
-primop IndexByteArrayOp_Int64AsInt64X2 "indexInt64ArrayAsInt64X2#" GenPrimOp
- ByteArray# -> Int# -> Int64X2#
+primop VecIndexScalarByteArrayOp "indexArrayAs#" GenPrimOp
+ ByteArray# -> Int# -> VECTOR
+ { Read a vector from specified index of immutable array of scalars; offset is in scalar elements. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop ReadByteArrayOp_Int64AsInt64X2 "readInt64ArrayAsInt64X2#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> (# State# s, Int64X2# #)
+primop VecReadScalarByteArrayOp "readArrayAs#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> (# State# s, VECTOR #)
+ { Read a vector from specified index of mutable array of scalars; offset is in scalar elements. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop WriteByteArrayOp_Int64AsInt64X2 "writeInt64ArrayAsInt64X2#" GenPrimOp
- MutableByteArray# s -> Int# -> Int64X2# -> State# s -> State# s
+primop VecWriteScalarByteArrayOp "writeArrayAs#" GenPrimOp
+ MutableByteArray# s -> Int# -> VECTOR -> State# s -> State# s
+ { Write a vector to specified index of mutable array of scalars; offset is in scalar elements. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop IndexOffAddrOp_Int64AsInt64X2 "indexInt64OffAddrAsInt64X2#" GenPrimOp
- Addr# -> Int# -> Int64X2#
+primop VecIndexScalarOffAddrOp "indexOffAddrAs#" GenPrimOp
+ Addr# -> Int# -> VECTOR
+ { Reads vector; offset in scalar elements. }
with can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop ReadOffAddrOp_Int64AsInt64X2 "readInt64OffAddrAsInt64X2#" GenPrimOp
- Addr# -> Int# -> State# s -> (# State# s, Int64X2# #)
+primop VecReadScalarOffAddrOp "readOffAddrAs#" GenPrimOp
+ Addr# -> Int# -> State# s -> (# State# s, VECTOR #)
+ { Reads vector; offset in scalar elements. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
-primop WriteOffAddrOp_Int64AsInt64X2 "writeInt64OffAddrAsInt64X2#" GenPrimOp
- Addr# -> Int# -> Int64X2# -> State# s -> State# s
+primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
+ Addr# -> Int# -> VECTOR -> State# s -> State# s
+ { Write vector; offset in scalar elements. }
with has_side_effects = True
can_fail = True
llvm_only = True
+ vector = ALL_VECTOR_TYPES
------------------------------------------------------------------------
section "Prefetch"