diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 10 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 9 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 288 | ||||
-rw-r--r-- | compiler/ghc.mk | 16 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 6 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 2 | ||||
-rw-r--r-- | compiler/prelude/PrelNames.lhs | 15 | ||||
-rw-r--r-- | compiler/prelude/PrimOp.lhs | 11 | ||||
-rw-r--r-- | compiler/prelude/TysPrim.lhs | 40 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 518 |
10 files changed, 351 insertions, 564 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 8d42bbd2cb..c009d15e25 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -118,6 +118,10 @@ data MachOp | MO_VS_Rem Length Width | MO_VS_Neg Length Width + -- Unsigned vector multiply/divide + | MO_VU_Quot Length Width + | MO_VU_Rem Length Width + -- Floting point vector element insertion and extraction operations | MO_VF_Insert Length Width -- Insert scalar into vector | MO_VF_Extract Length Width -- Extract scalar from vector @@ -375,6 +379,9 @@ machOpResultType dflags mop tys = MO_VS_Rem l w -> cmmVec l (cmmBits w) MO_VS_Neg l w -> cmmVec l (cmmBits w) + MO_VU_Quot l w -> cmmVec l (cmmBits w) + MO_VU_Rem l w -> cmmVec l (cmmBits w) + MO_VF_Insert l w -> cmmVec l (cmmFloat w) MO_VF_Extract _ w -> cmmFloat w @@ -461,6 +468,9 @@ machOpArgReps dflags op = MO_VS_Rem _ r -> [r,r] MO_VS_Neg _ r -> [r] + MO_VU_Quot _ r -> [r,r] + MO_VU_Rem _ r -> [r,r] + MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags] MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags] diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index d45b103954..c468161c73 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -651,6 +651,15 @@ pprMachOp_for_C mop = case mop of (panic $ "PprC.pprMachOp_for_C: MO_VS_Neg" ++ " should have been handled earlier!") + MO_VU_Quot {} -> pprTrace "offending mop:" + (ptext $ sLit "MO_VU_Quot") + (panic $ "PprC.pprMachOp_for_C: MO_VU_Quot" + ++ " should have been handled earlier!") + MO_VU_Rem {} -> pprTrace "offending mop:" + (ptext $ sLit "MO_VU_Rem") + (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem" + ++ " should have been handled earlier!") + MO_VF_Insert {} -> pprTrace "offending mop:" (ptext $ sLit "MO_VF_Insert") (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert" diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 8560f7cf1c..5250c9378e 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -40,7 +40,7 @@ import FastString import Outputable import Util -import Control.Monad (liftM) +import Control.Monad (liftM, when) import Data.Bits ------------------------------------------------------------------------ @@ -380,14 +380,6 @@ emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args -emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args -emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args -emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args -emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args -emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args -emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args -emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args -emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. @@ -407,14 +399,6 @@ emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp ( emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args -emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args -emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args -emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args -emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args -emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args -emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args -emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args -emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args -- IndexXXXArray @@ -434,14 +418,6 @@ emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayO emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args -emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args -emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args -emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args -emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args -emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args -emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args -emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args -emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args -- ReadXXXArray, identical to IndexXXXArray. @@ -461,14 +437,6 @@ emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args -emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args -emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args -emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args -emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args -emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args -emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args -emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args -emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args -- WriteXXXoffAddr @@ -488,14 +456,6 @@ emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (J emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args -emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args -emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args -emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args -emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args -emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args -emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args -emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args -emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args -- WriteXXXArray @@ -515,14 +475,6 @@ emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayO emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args -emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args -emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args -emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args -emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args -emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args -emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args -emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args -emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args -- Copying and setting byte arrays emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] = @@ -556,78 +508,136 @@ emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res] emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res] (MO_UF_Conv W64) [w] --- SIMD vector packing and unpacking -emitPrimOp _ [res] FloatToFloatX4Op [e] = - doVecPackOp Nothing vec4f32 zero [e,e,e,e] res +-- SIMD primops +emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = + doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res where - zero :: CmmExpr - zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32)) + zeros :: CmmExpr + zeros = CmmLit $ CmmVec (replicate n zero) + + zero :: CmmLit + zero = case vcat of + IntVec -> CmmInt 0 w + WordVec -> CmmInt 0 w + FloatVec -> CmmFloat 0 w + + ty :: CmmType + ty = vecVmmType vcat n w + +emitPrimOp dflags [res] (VecPackOp vcat n w) es = do + when (length es /= n) $ + panic "emitPrimOp: VecPackOp has wrong number of arguments" + doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res + where + zeros :: CmmExpr + zeros = CmmLit $ CmmVec (replicate n zero) + + zero :: CmmLit + zero = case vcat of + IntVec -> CmmInt 0 w + WordVec -> CmmInt 0 w + FloatVec -> CmmFloat 0 w + + ty :: CmmType + ty = vecVmmType vcat n w + +emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do + when (length res /= n) $ + panic "emitPrimOp: VecUnpackOp has wrong number of results" + doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res + where + ty :: CmmType + ty = vecVmmType vcat n w -emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] = - doVecPackOp Nothing vec4f32 zero es res +emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = + doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res where - zero :: CmmExpr - zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32)) + ty :: CmmType + ty = vecVmmType vcat n w -emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] = - doVecUnpackOp Nothing vec4f32 arg res +emitPrimOp _ res (VecIndexByteArrayOp vcat n w) args = + doIndexByteArrayOp Nothing ty res args + where + ty :: CmmType + ty = vecVmmType vcat n w -emitPrimOp _ [res] FloatX4InsertOp [v,e,i] = - doVecInsertOp Nothing vec4f32 v e i res +emitPrimOp _ res (VecReadByteArrayOp vcat n w) args = + doIndexByteArrayOp Nothing ty res args + where + ty :: CmmType + ty = vecVmmType vcat n w -emitPrimOp _ [res] DoubleToDoubleX2Op [e] = - doVecPackOp Nothing vec2f64 zero [e,e] res +emitPrimOp _ res (VecWriteByteArrayOp vcat n w) args = + doWriteByteArrayOp Nothing ty res args where - zero :: CmmExpr - zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64)) + ty :: CmmType + ty = vecVmmType vcat n w -emitPrimOp _ [res] DoubleX2PackOp es@[_,_] = - doVecPackOp Nothing vec2f64 zero es res +emitPrimOp _ res (VecIndexOffAddrOp vcat n w) args = + doIndexOffAddrOp Nothing ty res args where - zero :: CmmExpr - zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64)) + ty :: CmmType + ty = vecVmmType vcat n w -emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] = - doVecUnpackOp Nothing vec2f64 arg res +emitPrimOp _ res (VecReadOffAddrOp vcat n w) args = + doIndexOffAddrOp Nothing ty res args + where + ty :: CmmType + ty = vecVmmType vcat n w -emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] = - doVecInsertOp Nothing vec2f64 v e i res +emitPrimOp _ res (VecWriteOffAddrOp vcat n w) args = + doWriteOffAddrOp Nothing ty res args + where + ty :: CmmType + ty = vecVmmType vcat n w -emitPrimOp dflags [res] Int32ToInt32X4Op [e] = - doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res +emitPrimOp _ res (VecIndexScalarByteArrayOp vcat n w) args = + doIndexByteArrayOpAs Nothing vecty ty res args where - zero :: CmmExpr - zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32)) + vecty :: CmmType + vecty = vecVmmType vcat n w -emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] = - doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res + ty :: CmmType + ty = vecCmmCat vcat w + +emitPrimOp _ res (VecReadScalarByteArrayOp vcat n w) args = + doIndexByteArrayOpAs Nothing vecty ty res args where - zero :: CmmExpr - zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32)) + vecty :: CmmType + vecty = vecVmmType vcat n w -emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] = - doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res + ty :: CmmType + ty = vecCmmCat vcat w -emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] = - doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res +emitPrimOp _ res (VecWriteScalarByteArrayOp vcat _ w) args = + doWriteByteArrayOp Nothing ty res args + where + ty :: CmmType + ty = vecCmmCat vcat w -emitPrimOp _ [res] Int64ToInt64X2Op [e] = - doVecPackOp Nothing vec2b64 zero [e,e] res +emitPrimOp _ res (VecIndexScalarOffAddrOp vcat n w) args = + doIndexOffAddrOpAs Nothing vecty ty res args where - zero :: CmmExpr - zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64)) + vecty :: CmmType + vecty = vecVmmType vcat n w -emitPrimOp _ [res] Int64X2PackOp es@[_,_] = - doVecPackOp Nothing vec2b64 zero es res + ty :: CmmType + ty = vecCmmCat vcat w + +emitPrimOp _ res (VecReadScalarOffAddrOp vcat n w) args = + doIndexOffAddrOpAs Nothing vecty ty res args where - zero :: CmmExpr - zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64)) + vecty :: CmmType + vecty = vecVmmType vcat n w -emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] = - doVecUnpackOp Nothing vec2b64 arg res + ty :: CmmType + ty = vecCmmCat vcat w -emitPrimOp _ [res] Int64X2InsertOp [v,e,i] = - doVecInsertOp Nothing vec2b64 v e i res +emitPrimOp _ res (VecWriteScalarOffAddrOp vcat _ w) args = + doWriteOffAddrOp Nothing ty res args + where + ty :: CmmType + ty = vecCmmCat vcat w -- Prefetch emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args @@ -944,33 +954,26 @@ translateOp _ FloatMulOp = Just (MO_F_Mul W32) translateOp _ FloatDivOp = Just (MO_F_Quot W32) translateOp _ FloatNegOp = Just (MO_F_Neg W32) --- Floating point vector ops - -translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32) -translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32) -translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32) -translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32) -translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32) - -translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64) -translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64) -translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64) -translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64) -translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64) - -translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32) -translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32) -translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32) -translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32) -translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32) -translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32) - -translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64) -translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64) -translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64) -translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64) -translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64) -translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64) +-- Vector ops + +translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w) +translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w) +translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w) +translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w) +translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w) + +translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w) +translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w) +translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w) +translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w) +translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w) +translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w) + +translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w) +translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w) +translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w) +translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w) +translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w) -- Conversions @@ -1183,6 +1186,41 @@ setInfo :: CmmExpr -> CmmExpr -> CmmAGraph setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr ------------------------------------------------------------------------------ +-- Helpers for translating vector primops. + +vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType +vecVmmType pocat n w = vec n (vecCmmCat pocat w) + +vecCmmCat :: PrimOpVecCat -> Width -> CmmType +vecCmmCat IntVec = cmmBits +vecCmmCat WordVec = cmmBits +vecCmmCat FloatVec = cmmFloat + +vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp +vecElemInjectCast _ FloatVec _ = Nothing +vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags) +vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags) +vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags) +vecElemInjectCast _ IntVec W64 = Nothing +vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags) +vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags) +vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags) +vecElemInjectCast _ WordVec W64 = Nothing +vecElemInjectCast _ _ _ = Nothing + +vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp +vecElemProjectCast _ FloatVec _ = Nothing +vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags) +vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags) +vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags) +vecElemProjectCast _ IntVec W64 = Nothing +vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags) +vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags) +vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags) +vecElemProjectCast _ WordVec W64 = Nothing +vecElemProjectCast _ _ _ = Nothing + +------------------------------------------------------------------------------ -- Helpers for translating vector packing and unpacking. doVecPackOp :: Maybe MachOp -- Cast from element to vector component diff --git a/compiler/ghc.mk b/compiler/ghc.mk index 4fdadd7c30..5b9610103b 100644 --- a/compiler/ghc.mk +++ b/compiler/ghc.mk @@ -250,8 +250,12 @@ PRIMOP_BITS_NAMES = primop-data-decl.hs-incl \ primop-code-size.hs-incl \ primop-can-fail.hs-incl \ primop-strictness.hs-incl \ - primop-fixity.hs-incl \ - primop-primop-info.hs-incl + primop-fixity.hs-incl \ + primop-primop-info.hs-incl \ + primop-vector-uniques.hs-incl \ + primop-vector-tys.hs-incl \ + primop-vector-tys-exports.hs-incl \ + primop-vector-tycons.hs-incl PRIMOP_BITS_STAGE1 = $(addprefix compiler/stage1/build/,$(PRIMOP_BITS_NAMES)) PRIMOP_BITS_STAGE2 = $(addprefix compiler/stage2/build/,$(PRIMOP_BITS_NAMES)) @@ -290,6 +294,14 @@ compiler/stage$1/build/primop-fixity.hs-incl: compiler/stage$1/build/primops.txt "$$(genprimopcode_INPLACE)" --fixity < $$< > $$@ compiler/stage$1/build/primop-primop-info.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE) "$$(genprimopcode_INPLACE)" --primop-primop-info < $$< > $$@ +compiler/stage$1/build/primop-vector-uniques.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE) + "$$(genprimopcode_INPLACE)" --primop-vector-uniques < $$< > $$@ +compiler/stage$1/build/primop-vector-tys.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE) + "$$(genprimopcode_INPLACE)" --primop-vector-tys < $$< > $$@ +compiler/stage$1/build/primop-vector-tys-exports.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE) + "$$(genprimopcode_INPLACE)" --primop-vector-tys-exports < $$< > $$@ +compiler/stage$1/build/primop-vector-tycons.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE) + "$$(genprimopcode_INPLACE)" --primop-vector-tycons < $$< > $$@ # Usages aren't used any more; but the generator # can still generate them if we want them back diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 33107c0b68..c52640b17f 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -967,6 +967,9 @@ genMachOp _ op [x] = case op of MO_VS_Quot _ _ -> panicOp MO_VS_Rem _ _ -> panicOp + + MO_VU_Quot _ _ -> panicOp + MO_VU_Rem _ _ -> panicOp MO_VF_Insert _ _ -> panicOp MO_VF_Extract _ _ -> panicOp @@ -1140,6 +1143,9 @@ genMachOp_slow opt op [x, y] = case op of MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem + + MO_VU_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_UDiv + MO_VU_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_URem MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index f6143d3fb9..e18da25347 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -610,6 +610,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps MO_VS_Quot {} -> needLlvm MO_VS_Rem {} -> needLlvm MO_VS_Neg {} -> needLlvm + MO_VU_Quot {} -> needLlvm + MO_VU_Rem {} -> needLlvm MO_VF_Insert {} -> needLlvm MO_VF_Extract {} -> needLlvm MO_VF_Add {} -> needLlvm diff --git a/compiler/prelude/PrelNames.lhs b/compiler/prelude/PrelNames.lhs index 81fb9be52a..07730e653d 100644 --- a/compiler/prelude/PrelNames.lhs +++ b/compiler/prelude/PrelNames.lhs @@ -1474,15 +1474,6 @@ typeNatMulTyFamNameKey = mkPreludeTyConUnique 163 typeNatExpTyFamNameKey = mkPreludeTyConUnique 164 typeNatLeqTyFamNameKey = mkPreludeTyConUnique 165 --- SIMD vector types (Unique keys) -floatX4PrimTyConKey, doubleX2PrimTyConKey, int32X4PrimTyConKey, - int64X2PrimTyConKey :: Unique - -floatX4PrimTyConKey = mkPreludeTyConUnique 170 -doubleX2PrimTyConKey = mkPreludeTyConUnique 171 -int32X4PrimTyConKey = mkPreludeTyConUnique 172 -int64X2PrimTyConKey = mkPreludeTyConUnique 173 - ntTyConKey:: Unique ntTyConKey = mkPreludeTyConUnique 174 coercibleTyConKey :: Unique @@ -1492,6 +1483,12 @@ coercibleTyConKey = mkPreludeTyConUnique 175 -- USES TyConUniques 200-299 ----------------------------------------------------- +----------------------- SIMD ------------------------ +-- USES TyConUniques 300-399 +----------------------------------------------------- + +#include "primop-vector-uniques.hs-incl" + unitTyConKey :: Unique unitTyConKey = mkTupleTyConUnique BoxedTuple 0 \end{code} diff --git a/compiler/prelude/PrimOp.lhs b/compiler/prelude/PrimOp.lhs index 8b1970c37f..22753ee3ea 100644 --- a/compiler/prelude/PrimOp.lhs +++ b/compiler/prelude/PrimOp.lhs @@ -5,7 +5,7 @@ \begin{code} module PrimOp ( - PrimOp(..), allThePrimOps, + PrimOp(..), PrimOpVecCat(..), allThePrimOps, primOpType, primOpSig, primOpTag, maxPrimOpTag, primOpOcc, @@ -25,6 +25,7 @@ module PrimOp ( import TysPrim import TysWiredIn +import CmmType import Demand import Var ( TyVar ) import OccName ( OccName, pprOccName, mkVarOccFS ) @@ -64,6 +65,7 @@ primOpTag op = iBox (tagOf_PrimOp op) -- supplies -- tagOf_PrimOp :: PrimOp -> FastInt #include "primop-tag.hs-incl" +tagOf_PrimOp _ = error "tagOf_PrimOp: unknown primop" instance Eq PrimOp where @@ -82,6 +84,12 @@ instance Outputable PrimOp where ppr op = pprPrimOp op \end{code} +\begin{code} +data PrimOpVecCat = IntVec + | WordVec + | FloatVec +\end{code} + An @Enum@-derived list would be better; meanwhile... (ToDo) \begin{code} @@ -173,6 +181,7 @@ else, notably a type, can be constructed) for each @PrimOp@. \begin{code} primOpInfo :: PrimOp -> PrimOpInfo #include "primop-primop-info.hs-incl" +primOpInfo _ = error "primOpInfo: unknown primop" \end{code} Here are a load of comments from the old primOp info: diff --git a/compiler/prelude/TysPrim.lhs b/compiler/prelude/TysPrim.lhs index f166065b22..b17f1a6f9a 100644 --- a/compiler/prelude/TysPrim.lhs +++ b/compiler/prelude/TysPrim.lhs @@ -76,11 +76,8 @@ module TysPrim( -- * Any anyTy, anyTyCon, anyTypeOfKind, - -- * SIMD - floatX4PrimTyCon, floatX4PrimTy, - doubleX2PrimTyCon, doubleX2PrimTy, - int32X4PrimTyCon, int32X4PrimTy, - int64X2PrimTyCon, int64X2PrimTy + -- * SIMD +#include "primop-vector-tys-exports.hs-incl" ) where #include "HsVersions.h" @@ -144,10 +141,7 @@ primTyCons , superKindTyCon , anyKindTyCon - , floatX4PrimTyCon - , doubleX2PrimTyCon - , int32X4PrimTyCon - , int64X2PrimTyCon +#include "primop-vector-tycons.hs-incl" ] mkPrimTc :: FastString -> Unique -> TyCon -> Name @@ -157,7 +151,7 @@ mkPrimTc fs unique tycon (ATyCon tycon) -- Relevant TyCon UserSyntax -- None are built-in syntax -charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, eqReprPrimTyConName, floatX4PrimTyConName, doubleX2PrimTyConName, int32X4PrimTyConName, int64X2PrimTyConName :: Name +charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, eqReprPrimTyConName :: Name charPrimTyConName = mkPrimTc (fsLit "Char#") charPrimTyConKey charPrimTyCon intPrimTyConName = mkPrimTc (fsLit "Int#") intPrimTyConKey intPrimTyCon int32PrimTyConName = mkPrimTc (fsLit "Int32#") int32PrimTyConKey int32PrimTyCon @@ -186,10 +180,6 @@ stableNamePrimTyConName = mkPrimTc (fsLit "StableName#") stableNamePrimTyC bcoPrimTyConName = mkPrimTc (fsLit "BCO#") bcoPrimTyConKey bcoPrimTyCon weakPrimTyConName = mkPrimTc (fsLit "Weak#") weakPrimTyConKey weakPrimTyCon threadIdPrimTyConName = mkPrimTc (fsLit "ThreadId#") threadIdPrimTyConKey threadIdPrimTyCon -floatX4PrimTyConName = mkPrimTc (fsLit "FloatX4#") floatX4PrimTyConKey floatX4PrimTyCon -doubleX2PrimTyConName = mkPrimTc (fsLit "DoubleX2#") doubleX2PrimTyConKey doubleX2PrimTyCon -int32X4PrimTyConName = mkPrimTc (fsLit "Int32X4#") int32X4PrimTyConKey int32X4PrimTyCon -int64X2PrimTyConName = mkPrimTc (fsLit "Int64X2#") int64X2PrimTyConKey int64X2PrimTyCon \end{code} %************************************************************************ @@ -766,28 +756,10 @@ anyTypeOfKind kind = TyConApp anyTyCon [kind] %************************************************************************ %* * -\subsection{SIMD vector type} +\subsection{SIMD vector types} %* * %************************************************************************ \begin{code} -floatX4PrimTy :: Type -floatX4PrimTy = mkTyConTy floatX4PrimTyCon -floatX4PrimTyCon :: TyCon -floatX4PrimTyCon = pcPrimTyCon0 floatX4PrimTyConName (VecRep 4 FloatElemRep) - -doubleX2PrimTy :: Type -doubleX2PrimTy = mkTyConTy doubleX2PrimTyCon -doubleX2PrimTyCon :: TyCon -doubleX2PrimTyCon = pcPrimTyCon0 doubleX2PrimTyConName (VecRep 2 DoubleElemRep) - -int32X4PrimTy :: Type -int32X4PrimTy = mkTyConTy int32X4PrimTyCon -int32X4PrimTyCon :: TyCon -int32X4PrimTyCon = pcPrimTyCon0 int32X4PrimTyConName (VecRep 4 Int32ElemRep) - -int64X2PrimTy :: Type -int64X2PrimTy = mkTyConTy int64X2PrimTyCon -int64X2PrimTyCon :: TyCon -int64X2PrimTyCon = pcPrimTyCon0 int64X2PrimTyConName (VecRep 2 Int64ElemRep) +#include "primop-vector-tys.hs-incl" \end{code} diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index cfd6afa4c6..f4b7b6c5d0 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -39,6 +39,22 @@ -- (eg, out_of_line), whilst avoiding parsing complex expressions -- needed for strictness info. +-- The vector attribute is rather special. It takes a list of 3-tuples, each of +-- which is of the form <ELEM_TYPE,SCALAR_TYPE,LENGTH>. ELEM_TYPE is the type of +-- the elements in the vector; LENGTH is the length of the vector; and +-- SCALAR_TYPE is the scalar type used to inject to/project from vector +-- element. Note that ELEM_TYPE and SCALAR_TYPE are not the same; for example, +-- to broadcast a scalar value to a vector whose elements are of type Int8, we +-- use an Int#. + +-- When a primtype or primop has a vector attribute, it is instantiated at each +-- 3-tuple in the list of 3-tuples. That is, the vector attribute allows us to +-- define a family of types or primops. Vector support also adds three new +-- keywords: VECTOR, SCALAR, and VECTUPLE. These keywords are expanded to types +-- derived from the 3-tuple. For the 3-tuple <Int64,INT64,2>, VECTOR expands to +-- Int64X2#, SCALAR expands to INT64, and VECTUPLE expands to (# INT64, INT64 +-- #). + defaults has_side_effects = False out_of_line = False -- See Note Note [PrimOp can_fail and has_side_effects] in PrimOp @@ -48,6 +64,7 @@ defaults strictness = { \ arity -> mkStrictSig (mkTopDmdType (replicate arity topDmd) topRes) } fixity = Nothing llvm_only = False + vector = [] -- Currently, documentation is produced using latex, so contents of -- description fields should be legal latex. Descriptions can contain @@ -2373,479 +2390,194 @@ primclass Coercible a b } ------------------------------------------------------------------------ -section "Float SIMD Vectors" - {Operations on SIMD vectors of 4 single-precision (32-bit) - floating-point numbers.} +section "SIMD Vectors" + {Operations on SIMD vectors.} ------------------------------------------------------------------------ -primtype FloatX4# - with llvm_only = True +#define ALL_VECTOR_TYPES \ + [<Int32,INT32,4>,<Int64,INT64,2> \ + ,<Word32,WORD32,4>,<Word64,WORD64,2> \ + ,<Float,Float#,4>,<Double,Double#,2>] -primop FloatToFloatX4Op "floatToFloatX4#" GenPrimOp - Float# -> FloatX4# - with llvm_only = True +#define SIGNED_VECTOR_TYPES \ + [<Int32,INT32,4>,<Int64,INT64,2> \ + ,<Float,Float#,4>,<Double,Double#,2>] -primop FloatX4PackOp "packFloatX4#" GenPrimOp - Float# -> Float# -> Float# -> Float# -> FloatX4# - with llvm_only = True +#define FLOAT_VECTOR_TYPES \ + [<Float,Float#,4>,<Double,Double#,2>] -primop FloatX4UnpackOp "unpackFloatX4#" GenPrimOp - FloatX4# -> (# Float#, Float#, Float#, Float# #) - with llvm_only = True +#define INT_VECTOR_TYPES \ + [<Int32,INT32,4>,<Int64,INT64,2> \ + ,<Word32,WORD32,4>,<Word64,WORD64,2>] -primop FloatX4InsertOp "insertFloatX4#" GenPrimOp - FloatX4# -> Float# -> Int# -> FloatX4# - with can_fail = True - llvm_only = True - -primop FloatX4AddOp "plusFloatX4#" Dyadic - FloatX4# -> FloatX4# -> FloatX4# - with commutable = True - llvm_only = True - -primop FloatX4SubOp "minusFloatX4#" Dyadic - FloatX4# -> FloatX4# -> FloatX4# +primtype VECTOR with llvm_only = True + vector = ALL_VECTOR_TYPES -primop FloatX4MulOp "timesFloatX4#" Dyadic - FloatX4# -> FloatX4# -> FloatX4# - with commutable = True - llvm_only = True - -primop FloatX4DivOp "divideFloatX4#" Dyadic - FloatX4# -> FloatX4# -> FloatX4# - with can_fail = True - llvm_only = True - -primop FloatX4NegOp "negateFloatX4#" Monadic - FloatX4# -> FloatX4# +primop VecBroadcastOp "broadcast#" GenPrimOp + SCALAR -> VECTOR + { Broadcast a scalar to all elements of a vector. } with llvm_only = True + vector = ALL_VECTOR_TYPES -primop IndexByteArrayOp_FloatX4 "indexFloatX4Array#" GenPrimOp - ByteArray# -> Int# -> FloatX4# - with can_fail = True - llvm_only = True - -primop ReadByteArrayOp_FloatX4 "readFloatX4Array#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> (# State# s, FloatX4# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteByteArrayOp_FloatX4 "writeFloatX4Array#" GenPrimOp - MutableByteArray# s -> Int# -> FloatX4# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexOffAddrOp_FloatX4 "indexFloatX4OffAddr#" GenPrimOp - Addr# -> Int# -> FloatX4# - with can_fail = True - llvm_only = True - -primop ReadOffAddrOp_FloatX4 "readFloatX4OffAddr#" GenPrimOp - Addr# -> Int# -> State# s -> (# State# s, FloatX4# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteOffAddrOp_FloatX4 "writeFloatX4OffAddr#" GenPrimOp - Addr# -> Int# -> FloatX4# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexByteArrayOp_FloatAsFloatX4 "indexFloatArrayAsFloatX4#" GenPrimOp - ByteArray# -> Int# -> FloatX4# - with can_fail = True - llvm_only = True - -primop ReadByteArrayOp_FloatAsFloatX4 "readFloatArrayAsFloatX4#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> (# State# s, FloatX4# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteByteArrayOp_FloatAsFloatX4 "writeFloatArrayAsFloatX4#" GenPrimOp - MutableByteArray# s -> Int# -> FloatX4# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexOffAddrOp_FloatAsFloatX4 "indexFloatOffAddrAsFloatX4#" GenPrimOp - Addr# -> Int# -> FloatX4# - with can_fail = True - llvm_only = True - -primop ReadOffAddrOp_FloatAsFloatX4 "readFloatOffAddrAsFloatX4#" GenPrimOp - Addr# -> Int# -> State# s -> (# State# s, FloatX4# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteOffAddrOp_FloatAsFloatX4 "writeFloatOffAddrAsFloatX4#" GenPrimOp - Addr# -> Int# -> FloatX4# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - ------------------------------------------------------------------------- -section "Double SIMD Vectors" - {Operations on SIMD vectors of 2 double-precision (64-bit) - floating-point numbers.} ------------------------------------------------------------------------- - -primtype DoubleX2# +primop VecPackOp "pack#" GenPrimOp + VECTUPLE -> VECTOR + { Pack the elements of an unboxed tuple into a vector. } with llvm_only = True + vector = ALL_VECTOR_TYPES -primop DoubleToDoubleX2Op "doubleToDoubleX2#" GenPrimOp - Double# -> DoubleX2# +primop VecUnpackOp "unpack#" GenPrimOp + VECTOR -> VECTUPLE + { Unpack the elements of a vector into an unboxed tuple. #} with llvm_only = True + vector = ALL_VECTOR_TYPES -primop DoubleX2InsertOp "insertDoubleX2#" GenPrimOp - DoubleX2# -> Double# -> Int# -> DoubleX2# +primop VecInsertOp "insert#" GenPrimOp + VECTOR -> SCALAR -> Int# -> VECTOR + { Insert a scalar at the given position in a vector. } with can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop DoubleX2PackOp "packDoubleX2#" GenPrimOp - Double# -> Double# -> DoubleX2# - with llvm_only = True - -primop DoubleX2UnpackOp "unpackDoubleX2#" GenPrimOp - DoubleX2# -> (# Double#, Double# #) - with llvm_only = True - -primop DoubleX2AddOp "plusDoubleX2#" Dyadic - DoubleX2# -> DoubleX2# -> DoubleX2# +primop VecAddOp "plus#" Dyadic + VECTOR -> VECTOR -> VECTOR + { Add two vectors element-wise. } with commutable = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop DoubleX2SubOp "minusDoubleX2#" Dyadic - DoubleX2# -> DoubleX2# -> DoubleX2# +primop VecSubOp "minus#" Dyadic + VECTOR -> VECTOR -> VECTOR + { Subtract two vectors element-wise. } with llvm_only = True + vector = ALL_VECTOR_TYPES -primop DoubleX2MulOp "timesDoubleX2#" Dyadic - DoubleX2# -> DoubleX2# -> DoubleX2# +primop VecMulOp "times#" Dyadic + VECTOR -> VECTOR -> VECTOR + { Multiply two vectors element-wise. } with commutable = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop DoubleX2DivOp "divideDoubleX2#" Dyadic - DoubleX2# -> DoubleX2# -> DoubleX2# - with can_fail = True - llvm_only = True - -primop DoubleX2NegOp "negateDoubleX2#" Monadic - DoubleX2# -> DoubleX2# - with llvm_only = True - -primop IndexByteArrayOp_DoubleX2 "indexDoubleX2Array#" GenPrimOp - ByteArray# -> Int# -> DoubleX2# - with can_fail = True - llvm_only = True - -primop ReadByteArrayOp_DoubleX2 "readDoubleX2Array#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> (# State# s, DoubleX2# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteByteArrayOp_DoubleX2 "writeDoubleX2Array#" GenPrimOp - MutableByteArray# s -> Int# -> DoubleX2# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexOffAddrOp_DoubleX2 "indexDoubleX2OffAddr#" GenPrimOp - Addr# -> Int# -> DoubleX2# - with can_fail = True - llvm_only = True - -primop ReadOffAddrOp_DoubleX2 "readDoubleX2OffAddr#" GenPrimOp - Addr# -> Int# -> State# s -> (# State# s, DoubleX2# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteOffAddrOp_DoubleX2 "writeDoubleX2OffAddr#" GenPrimOp - Addr# -> Int# -> DoubleX2# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexByteArrayOp_DoubleAsDoubleX2 "indexDoubleArrayAsDoubleX2#" GenPrimOp - ByteArray# -> Int# -> DoubleX2# - with can_fail = True - llvm_only = True - -primop ReadByteArrayOp_DoubleAsDoubleX2 "readDoubleArrayAsDoubleX2#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> (# State# s, DoubleX2# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteByteArrayOp_DoubleAsDoubleX2 "writeDoubleArrayAsDoubleX2#" GenPrimOp - MutableByteArray# s -> Int# -> DoubleX2# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexOffAddrOp_DoubleAsDoubleX2 "indexDoubleOffAddrAsDoubleX2#" GenPrimOp - Addr# -> Int# -> DoubleX2# - with can_fail = True - llvm_only = True - -primop ReadOffAddrOp_DoubleAsDoubleX2 "readDoubleOffAddrAsDoubleX2#" GenPrimOp - Addr# -> Int# -> State# s -> (# State# s, DoubleX2# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteOffAddrOp_DoubleAsDoubleX2 "writeDoubleOffAddrAsDoubleX2#" GenPrimOp - Addr# -> Int# -> DoubleX2# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - ------------------------------------------------------------------------- -section "Int32 SIMD Vectors" - {Operations on SIMD vectors of 4 32-bit signed integers.} ------------------------------------------------------------------------- - -primtype Int32X4# - with llvm_only = True - -primop Int32ToInt32X4Op "int32ToInt32X4#" GenPrimOp - INT32 -> Int32X4# - with llvm_only = True - -primop Int32X4InsertOp "insertInt32X4#" GenPrimOp - Int32X4# -> INT32 -> Int# -> Int32X4# +primop VecDivOp "divide#" Dyadic + VECTOR -> VECTOR -> VECTOR + { Divide two vectors element-wise. } with can_fail = True llvm_only = True + vector = FLOAT_VECTOR_TYPES -primop Int32X4PackOp "packInt32X4#" GenPrimOp - INT32 -> INT32 -> INT32 -> INT32 -> Int32X4# - with llvm_only = True - -primop Int32X4UnpackOp "unpackInt32X4#" GenPrimOp - Int32X4# -> (# INT32, INT32, INT32, INT32 #) - with llvm_only = True - -primop Int32X4AddOp "plusInt32X4#" Dyadic - Int32X4# -> Int32X4# -> Int32X4# - with commutable = True - llvm_only = True - -primop Int32X4SubOp "minusInt32X4#" Dyadic - Int32X4# -> Int32X4# -> Int32X4# - with llvm_only = True - -primop Int32X4MulOp "timesInt32X4#" Dyadic - Int32X4# -> Int32X4# -> Int32X4# - with commutable = True - llvm_only = True - -primop Int32X4QuotOp "quotInt32X4#" Dyadic - Int32X4# -> Int32X4# -> Int32X4# +primop VecQuotOp "quot#" Dyadic + VECTOR -> VECTOR -> VECTOR + { Rounds towards zero element-wise. } with can_fail = True llvm_only = True + vector = INT_VECTOR_TYPES -primop Int32X4RemOp "remInt32X4#" Dyadic - Int32X4# -> Int32X4# -> Int32X4# +primop VecRemOp "rem#" Dyadic + VECTOR -> VECTOR -> VECTOR + { Satisfies \texttt{(quot\# x y) times\# y plus\# (rem\# x y) == x}. } with can_fail = True llvm_only = True + vector = INT_VECTOR_TYPES -primop Int32X4NegOp "negateInt32X4#" Monadic - Int32X4# -> Int32X4# +primop VecNegOp "negate#" Monadic + VECTOR -> VECTOR + { Negate element-wise. } with llvm_only = True + vector = SIGNED_VECTOR_TYPES -primop IndexByteArrayOp_Int32X4 "indexInt32X4Array#" GenPrimOp - ByteArray# -> Int# -> Int32X4# - with can_fail = True - llvm_only = True - -primop ReadByteArrayOp_Int32X4 "readInt32X4Array#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> (# State# s, Int32X4# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteByteArrayOp_Int32X4 "writeInt32X4Array#" GenPrimOp - MutableByteArray# s -> Int# -> Int32X4# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexOffAddrOp_Int32X4 "indexInt32X4OffAddr#" GenPrimOp - Addr# -> Int# -> Int32X4# - with can_fail = True - llvm_only = True - -primop ReadOffAddrOp_Int32X4 "readInt32X4OffAddr#" GenPrimOp - Addr# -> Int# -> State# s -> (# State# s, Int32X4# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteOffAddrOp_Int32X4 "writeInt32X4OffAddr#" GenPrimOp - Addr# -> Int# -> Int32X4# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexByteArrayOp_Int32AsInt32X4 "indexInt32ArrayAsInt32X4#" GenPrimOp - ByteArray# -> Int# -> Int32X4# - with can_fail = True - llvm_only = True - -primop ReadByteArrayOp_Int32AsInt32X4 "readInt32ArrayAsInt32X4#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> (# State# s, Int32X4# #) - with has_side_effects = True - can_fail = True - llvm_only = True - -primop WriteByteArrayOp_Int32AsInt32X4 "writeInt32ArrayAsInt32X4#" GenPrimOp - MutableByteArray# s -> Int# -> Int32X4# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True - -primop IndexOffAddrOp_Int32AsInt32X4 "indexInt32OffAddrAsInt32X4#" GenPrimOp - Addr# -> Int# -> Int32X4# +primop VecIndexByteArrayOp "indexArray#" GenPrimOp + ByteArray# -> Int# -> VECTOR + { Read a vector from specified index of immutable array. } with can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop ReadOffAddrOp_Int32AsInt32X4 "readInt32OffAddrAsInt32X4#" GenPrimOp - Addr# -> Int# -> State# s -> (# State# s, Int32X4# #) +primop VecReadByteArrayOp "readArray#" GenPrimOp + MutableByteArray# s -> Int# -> State# s -> (# State# s, VECTOR #) + { Read a vector from specified index of mutable array. } with has_side_effects = True can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop WriteOffAddrOp_Int32AsInt32X4 "writeInt32OffAddrAsInt32X4#" GenPrimOp - Addr# -> Int# -> Int32X4# -> State# s -> State# s +primop VecWriteByteArrayOp "writeArray#" GenPrimOp + MutableByteArray# s -> Int# -> VECTOR -> State# s -> State# s + { Write a vector to specified index of mutable array. } with has_side_effects = True can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES ------------------------------------------------------------------------- -section "Int64 SIMD Vectors" - {Operations on SIMD vectors of 2 64-bit signed integers.} ------------------------------------------------------------------------- - -primtype Int64X2# - with llvm_only = True - -primop Int64ToInt64X2Op "int64ToInt64X2#" GenPrimOp - INT64 -> Int64X2# - with llvm_only = True - -primop Int64X2InsertOp "insertInt64X2#" GenPrimOp - Int64X2# -> INT64 -> Int# -> Int64X2# - with can_fail = True - llvm_only = True - -primop Int64X2PackOp "packInt64X2#" GenPrimOp - INT64 -> INT64 -> Int64X2# - with llvm_only = True - -primop Int64X2UnpackOp "unpackInt64X2#" GenPrimOp - Int64X2# -> (# INT64, INT64 #) - with llvm_only = True - -primop Int64X2AddOp "plusInt64X2#" Dyadic - Int64X2# -> Int64X2# -> Int64X2# - with commutable = True - llvm_only = True - -primop Int64X2SubOp "minusInt64X2#" Dyadic - Int64X2# -> Int64X2# -> Int64X2# - with llvm_only = True - -primop Int64X2MulOp "timesInt64X2#" Dyadic - Int64X2# -> Int64X2# -> Int64X2# - with commutable = True - llvm_only = True - -primop Int64X2QuotOp "quotInt64X2#" Dyadic - Int64X2# -> Int64X2# -> Int64X2# - with can_fail = True - llvm_only = True - -primop Int64X2RemOp "remInt64X2#" Dyadic - Int64X2# -> Int64X2# -> Int64X2# - with can_fail = True - llvm_only = True - -primop Int64X2NegOp "negateInt64X2#" Monadic - Int64X2# -> Int64X2# - with llvm_only = True - -primop IndexByteArrayOp_Int64X2 "indexInt64X2Array#" GenPrimOp - ByteArray# -> Int# -> Int64X2# +primop VecIndexOffAddrOp "indexOffAddr#" GenPrimOp + Addr# -> Int# -> VECTOR + { Reads vector; offset in bytes. } with can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop ReadByteArrayOp_Int64X2 "readInt64X2Array#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> (# State# s, Int64X2# #) +primop VecReadOffAddrOp "readOffAddr#" GenPrimOp + Addr# -> Int# -> State# s -> (# State# s, VECTOR #) + { Reads vector; offset in bytes. } with has_side_effects = True can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop WriteByteArrayOp_Int64X2 "writeInt64X2Array#" GenPrimOp - MutableByteArray# s -> Int# -> Int64X2# -> State# s -> State# s +primop VecWriteOffAddrOp "writeOffAddr#" GenPrimOp + Addr# -> Int# -> VECTOR -> State# s -> State# s + { Write vector; offset in bytes. } with has_side_effects = True can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop IndexOffAddrOp_Int64X2 "indexInt64X2OffAddr#" GenPrimOp - Addr# -> Int# -> Int64X2# - with can_fail = True - llvm_only = True - -primop ReadOffAddrOp_Int64X2 "readInt64X2OffAddr#" GenPrimOp - Addr# -> Int# -> State# s -> (# State# s, Int64X2# #) - with has_side_effects = True - llvm_only = True - -primop WriteOffAddrOp_Int64X2 "writeInt64X2OffAddr#" GenPrimOp - Addr# -> Int# -> Int64X2# -> State# s -> State# s - with has_side_effects = True - can_fail = True - llvm_only = True -primop IndexByteArrayOp_Int64AsInt64X2 "indexInt64ArrayAsInt64X2#" GenPrimOp - ByteArray# -> Int# -> Int64X2# +primop VecIndexScalarByteArrayOp "indexArrayAs#" GenPrimOp + ByteArray# -> Int# -> VECTOR + { Read a vector from specified index of immutable array of scalars; offset is in scalar elements. } with can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop ReadByteArrayOp_Int64AsInt64X2 "readInt64ArrayAsInt64X2#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> (# State# s, Int64X2# #) +primop VecReadScalarByteArrayOp "readArrayAs#" GenPrimOp + MutableByteArray# s -> Int# -> State# s -> (# State# s, VECTOR #) + { Read a vector from specified index of mutable array of scalars; offset is in scalar elements. } with has_side_effects = True can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop WriteByteArrayOp_Int64AsInt64X2 "writeInt64ArrayAsInt64X2#" GenPrimOp - MutableByteArray# s -> Int# -> Int64X2# -> State# s -> State# s +primop VecWriteScalarByteArrayOp "writeArrayAs#" GenPrimOp + MutableByteArray# s -> Int# -> VECTOR -> State# s -> State# s + { Write a vector to specified index of mutable array of scalars; offset is in scalar elements. } with has_side_effects = True can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop IndexOffAddrOp_Int64AsInt64X2 "indexInt64OffAddrAsInt64X2#" GenPrimOp - Addr# -> Int# -> Int64X2# +primop VecIndexScalarOffAddrOp "indexOffAddrAs#" GenPrimOp + Addr# -> Int# -> VECTOR + { Reads vector; offset in scalar elements. } with can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop ReadOffAddrOp_Int64AsInt64X2 "readInt64OffAddrAsInt64X2#" GenPrimOp - Addr# -> Int# -> State# s -> (# State# s, Int64X2# #) +primop VecReadScalarOffAddrOp "readOffAddrAs#" GenPrimOp + Addr# -> Int# -> State# s -> (# State# s, VECTOR #) + { Reads vector; offset in scalar elements. } with has_side_effects = True can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES -primop WriteOffAddrOp_Int64AsInt64X2 "writeInt64OffAddrAsInt64X2#" GenPrimOp - Addr# -> Int# -> Int64X2# -> State# s -> State# s +primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp + Addr# -> Int# -> VECTOR -> State# s -> State# s + { Write vector; offset in scalar elements. } with has_side_effects = True can_fail = True llvm_only = True + vector = ALL_VECTOR_TYPES ------------------------------------------------------------------------ section "Prefetch" |