summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorPaul Monday <paul.monday@parsci.com>2011-11-03 15:35:37 -0600
committerGeoffrey Mainland <gmainlan@microsoft.com>2013-02-01 22:00:24 +0000
commit3984b912854a2261f5bf35e25bf929c15490e71a (patch)
tree318403ba7b6550e878b9f09574741dea759f1f58 /compiler
parent4af62075bbe9e96a3678fc90288496e0c4c7c17d (diff)
downloadhaskell-3984b912854a2261f5bf35e25bf929c15490e71a.tar.gz
Add the Int32X4# primitive type and associated primops.
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CmmMachOp.hs26
-rw-r--r--compiler/cmm/PprC.hs26
-rw-r--r--compiler/codeGen/StgCmmPrim.hs37
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs23
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs18
-rw-r--r--compiler/prelude/PrelNames.lhs3
-rw-r--r--compiler/prelude/TysPrim.lhs12
-rw-r--r--compiler/prelude/primops.txt.pp98
8 files changed, 240 insertions, 3 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 304dfb0938..4e38cd42b9 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -107,6 +107,16 @@ data MachOp
-- Vector element insertion and extraction operations
| MO_V_Insert Length Width -- Insert scalar into vector
| MO_V_Extract Length Width -- Extract scalar from vector
+
+ -- Integer vector operations
+ | MO_V_Add Length Width
+ | MO_V_Sub Length Width
+ | MO_V_Mul Length Width
+
+ -- Signed vector multiply/divide
+ | MO_VS_Quot Length Width
+ | MO_VS_Rem Length Width
+ | MO_VS_Neg Length Width
-- Floating point vector operations
| MO_VF_Add Length Width
@@ -352,6 +362,14 @@ machOpResultType dflags mop tys =
MO_V_Insert {} -> ty1
MO_V_Extract {} -> vecElemType ty1
+
+ MO_V_Add {} -> ty1
+ MO_V_Sub {} -> ty1
+ MO_V_Mul {} -> ty1
+
+ MO_VS_Quot {} -> ty1
+ MO_VS_Rem {} -> ty1
+ MO_VS_Neg {} -> ty1
MO_VF_Add {} -> ty1
MO_VF_Sub {} -> ty1
@@ -428,6 +446,14 @@ machOpArgReps dflags op =
MO_V_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_V_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
+ MO_V_Add _ r -> [r,r]
+ MO_V_Sub _ r -> [r,r]
+ MO_V_Mul _ r -> [r,r]
+
+ MO_VS_Quot _ r -> [r,r]
+ MO_VS_Rem _ r -> [r,r]
+ MO_VS_Neg _ r -> [r]
+
MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r]
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index b714e834b6..8712d5fb5c 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -635,6 +635,32 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_V_Extract"
++ " should have been handled earlier!")
+ MO_V_Add {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_V_Add")
+ (panic $ "PprC.pprMachOp_for_C: MO_V_Add"
+ ++ " should have been handled earlier!")
+ MO_V_Sub {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_V_Sub")
+ (panic $ "PprC.pprMachOp_for_C: MO_V_Sub"
+ ++ " should have been handled earlier!")
+ MO_V_Mul {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_V_Mul")
+ (panic $ "PprC.pprMachOp_for_C: MO_V_Mul"
+ ++ " should have been handled earlier!")
+
+ MO_VS_Quot {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VS_Quot")
+ (panic $ "PprC.pprMachOp_for_C: MO_VS_Quot"
+ ++ " should have been handled earlier!")
+ MO_VS_Rem {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VS_Rem")
+ (panic $ "PprC.pprMachOp_for_C: MO_VS_Rem"
+ ++ " should have been handled earlier!")
+ MO_VS_Neg {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VS_Neg")
+ (panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
+ ++ " should have been handled earlier!")
+
MO_VF_Add {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Add")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 9a583b8354..0637cd90de 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -383,6 +383,8 @@ emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp
emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
+emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
+emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
@@ -404,6 +406,8 @@ emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (
emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
+emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
+emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
-- IndexXXXArray
@@ -425,6 +429,8 @@ emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayO
emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
+emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
+emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-- ReadXXXArray, identical to IndexXXXArray.
@@ -446,6 +452,8 @@ emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp
emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
+emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
+emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
-- WriteXXXoffAddr
@@ -467,6 +475,8 @@ emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (J
emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
+emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
+emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
-- WriteXXXArray
@@ -488,6 +498,8 @@ emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayO
emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
+emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
+emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
-- Copying and setting byte arrays
emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
@@ -529,6 +541,24 @@ emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
doVecInsertOp Nothing vec4f32 v e i res
+emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
+ doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
+ where
+ zero :: CmmExpr
+ zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+
+emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
+ doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
+ where
+ zero :: CmmExpr
+ zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
+
+emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
+ doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
+
+emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
+ doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
+
-- The rest just translate straightforwardly
emitPrimOp dflags [res] op [arg]
| nopOp op
@@ -843,6 +873,13 @@ translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
+translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
+translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
+translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
+translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
+translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
+translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
+
-- Conversions
translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index cd864ca1a2..efa7e9a706 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -879,6 +879,13 @@ genMachOp env _ op [x] = case op of
MO_FF_Conv from to
-> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext
+ MO_VS_Neg len w ->
+ let ty = widthToLlvmInt w
+ vecty = LMVector len ty
+ all0 = LMIntLit (-0) ty
+ all0s = LMLitVar $ LMVectorLit (replicate len all0)
+ in negate vecty all0s LM_MO_Sub
+
MO_VF_Neg len w ->
let ty = widthToLlvmFloat w
vecty = LMVector len ty
@@ -929,6 +936,13 @@ genMachOp env _ op [x] = case op of
MO_V_Insert _ _ -> panicOp
MO_V_Extract _ _ -> panicOp
+
+ MO_V_Add _ _ -> panicOp
+ MO_V_Sub _ _ -> panicOp
+ MO_V_Mul _ _ -> panicOp
+
+ MO_VS_Quot _ _ -> panicOp
+ MO_VS_Rem _ _ -> panicOp
MO_VF_Add _ _ -> panicOp
MO_VF_Sub _ _ -> panicOp
@@ -1065,6 +1079,13 @@ genMachOp_slow env opt op [x, y] = case op of
MO_Shl _ -> genBinMach LM_MO_Shl
MO_U_Shr _ -> genBinMach LM_MO_LShr
MO_S_Shr _ -> genBinMach LM_MO_AShr
+
+ MO_V_Add _ _ -> genBinMach LM_MO_Add
+ MO_V_Sub _ _ -> genBinMach LM_MO_Sub
+ MO_V_Mul _ _ -> genBinMach LM_MO_Mul
+
+ MO_VS_Quot _ _ -> genBinMach LM_MO_SDiv
+ MO_VS_Rem _ _ -> genBinMach LM_MO_SRem
MO_VF_Add _ _ -> genBinMach LM_MO_FAdd
MO_VF_Sub _ _ -> genBinMach LM_MO_FSub
@@ -1084,6 +1105,8 @@ genMachOp_slow env opt op [x, y] = case op of
MO_V_Insert {} -> panicOp
MO_V_Extract {} -> panicOp
+ MO_VS_Neg {} -> panicOp
+
MO_VF_Neg {} -> panicOp
where
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 46e8e9b81f..0df95a2f73 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -604,6 +604,12 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
+ MO_V_Add {} -> needLlvm
+ MO_V_Sub {} -> needLlvm
+ MO_V_Mul {} -> needLlvm
+ MO_VS_Quot {} -> needLlvm
+ MO_VS_Rem {} -> needLlvm
+ MO_VS_Neg {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
@@ -704,6 +710,12 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
+ MO_V_Add {} -> needLlvm
+ MO_V_Sub {} -> needLlvm
+ MO_V_Mul {} -> needLlvm
+ MO_VS_Quot {} -> needLlvm
+ MO_VS_Rem {} -> needLlvm
+ MO_VS_Neg {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
@@ -2712,6 +2724,12 @@ sse2NegCode w x = do
isVecExpr :: CmmExpr -> Bool
isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
+isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
+isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
+isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
+isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
+isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
+isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
diff --git a/compiler/prelude/PrelNames.lhs b/compiler/prelude/PrelNames.lhs
index 961a823436..5b5d0ce9c9 100644
--- a/compiler/prelude/PrelNames.lhs
+++ b/compiler/prelude/PrelNames.lhs
@@ -1421,9 +1421,10 @@ typeNatMulTyFamNameKey = mkPreludeTyConUnique 163
typeNatExpTyFamNameKey = mkPreludeTyConUnique 164
-- SIMD vector types (Unique keys)
-floatX4PrimTyConKey :: Unique
+floatX4PrimTyConKey, int32X4PrimTyConKey :: Unique
floatX4PrimTyConKey = mkPreludeTyConUnique 170
+int32X4PrimTyConKey = mkPreludeTyConUnique 171
---------------- Template Haskell -------------------
-- USES TyConUniques 200-299
diff --git a/compiler/prelude/TysPrim.lhs b/compiler/prelude/TysPrim.lhs
index 960a27b276..acecf4ce95 100644
--- a/compiler/prelude/TysPrim.lhs
+++ b/compiler/prelude/TysPrim.lhs
@@ -76,7 +76,8 @@ module TysPrim(
anyTy, anyTyCon, anyTypeOfKind,
-- * SIMD
- floatX4PrimTyCon, floatX4PrimTy
+ floatX4PrimTyCon, floatX4PrimTy,
+ int32X4PrimTyCon, int32X4PrimTy
) where
#include "HsVersions.h"
@@ -140,6 +141,7 @@ primTyCons
, anyKindTyCon
, floatX4PrimTyCon
+ , int32X4PrimTyCon
]
mkPrimTc :: FastString -> Unique -> TyCon -> Name
@@ -149,7 +151,7 @@ mkPrimTc fs unique tycon
(ATyCon tycon) -- Relevant TyCon
UserSyntax -- None are built-in syntax
-charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, floatX4PrimTyConName :: Name
+charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, floatX4PrimTyConName, int32X4PrimTyConName :: Name
charPrimTyConName = mkPrimTc (fsLit "Char#") charPrimTyConKey charPrimTyCon
intPrimTyConName = mkPrimTc (fsLit "Int#") intPrimTyConKey intPrimTyCon
int32PrimTyConName = mkPrimTc (fsLit "Int32#") int32PrimTyConKey int32PrimTyCon
@@ -178,6 +180,7 @@ bcoPrimTyConName = mkPrimTc (fsLit "BCO#") bcoPrimTyConKey bcoPrimTyCon
weakPrimTyConName = mkPrimTc (fsLit "Weak#") weakPrimTyConKey weakPrimTyCon
threadIdPrimTyConName = mkPrimTc (fsLit "ThreadId#") threadIdPrimTyConKey threadIdPrimTyCon
floatX4PrimTyConName = mkPrimTc (fsLit "FloatX4#") floatX4PrimTyConKey floatX4PrimTyCon
+int32X4PrimTyConName = mkPrimTc (fsLit "Int32X4#") int32X4PrimTyConKey int32X4PrimTyCon
\end{code}
%************************************************************************
@@ -747,4 +750,9 @@ floatX4PrimTy :: Type
floatX4PrimTy = mkTyConTy floatX4PrimTyCon
floatX4PrimTyCon :: TyCon
floatX4PrimTyCon = pcPrimTyCon0 floatX4PrimTyConName (VecRep 4 FloatElemRep)
+
+int32X4PrimTy :: Type
+int32X4PrimTy = mkTyConTy int32X4PrimTyCon
+int32X4PrimTyCon :: TyCon
+int32X4PrimTyCon = pcPrimTyCon0 int32X4PrimTyConName (VecRep 4 Int32ElemRep)
\end{code}
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 9cdda0ec8f..a9ef0659ef 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -2297,6 +2297,104 @@ primop WriteOffAddrOp_FloatAsFloatX4 "writeFloatOffAddrAsFloatX4#" GenPrimOp
can_fail = True
------------------------------------------------------------------------
+section "Int32 SIMD Vectors"
+ {Operations on SIMD vectors of 4 32-bit signed integers.}
+------------------------------------------------------------------------
+
+primtype Int32X4#
+
+primop Int32ToInt32X4Op "int32ToInt32X4#" GenPrimOp
+ INT32 -> Int32X4#
+
+primop Int32X4InsertOp "insertInt32X4#" GenPrimOp
+ Int32X4# -> INT32 -> Int# -> Int32X4#
+ with can_fail = True
+
+primop Int32X4PackOp "packInt32X4#" GenPrimOp
+ INT32 -> INT32 -> INT32 -> INT32 -> Int32X4#
+
+primop Int32X4UnpackOp "unpackInt32X4#" GenPrimOp
+ Int32X4# -> (# INT32, INT32, INT32, INT32 #)
+
+primop Int32X4AddOp "plusInt32X4#" Dyadic
+ Int32X4# -> Int32X4# -> Int32X4#
+ with commutable = True
+
+primop Int32X4SubOp "minusInt32X4#" Dyadic
+ Int32X4# -> Int32X4# -> Int32X4#
+
+primop Int32X4MulOp "timesInt32X4#" Dyadic
+ Int32X4# -> Int32X4# -> Int32X4#
+ with commutable = True
+
+primop Int32X4QuotOp "quotInt32X4#" Dyadic
+ Int32X4# -> Int32X4# -> Int32X4#
+ with can_fail = True
+
+primop Int32X4RemOp "remInt32X4#" Dyadic
+ Int32X4# -> Int32X4# -> Int32X4#
+ with can_fail = True
+
+primop Int32X4NegOp "negateInt32X4#" Monadic
+ Int32X4# -> Int32X4#
+
+primop IndexByteArrayOp_Int32X4 "indexInt32X4Array#" GenPrimOp
+ ByteArray# -> Int# -> Int32X4#
+ with can_fail = True
+
+primop ReadByteArrayOp_Int32X4 "readInt32X4Array#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> (# State# s, Int32X4# #)
+ with has_side_effects = True
+ can_fail = True
+
+primop WriteByteArrayOp_Int32X4 "writeInt32X4Array#" GenPrimOp
+ MutableByteArray# s -> Int# -> Int32X4# -> State# s -> State# s
+ with has_side_effects = True
+ can_fail = True
+
+primop IndexOffAddrOp_Int32X4 "indexInt32X4OffAddr#" GenPrimOp
+ Addr# -> Int# -> Int32X4#
+ with can_fail = True
+
+primop ReadOffAddrOp_Int32X4 "readInt32X4OffAddr#" GenPrimOp
+ Addr# -> Int# -> State# s -> (# State# s, Int32X4# #)
+ with has_side_effects = True
+ can_fail = True
+
+primop WriteOffAddrOp_Int32X4 "writeInt32X4OffAddr#" GenPrimOp
+ Addr# -> Int# -> Int32X4# -> State# s -> State# s
+ with has_side_effects = True
+ can_fail = True
+
+primop IndexByteArrayOp_Int32AsInt32X4 "indexInt32ArrayAsInt32X4#" GenPrimOp
+ ByteArray# -> Int# -> Int32X4#
+ with can_fail = True
+
+primop ReadByteArrayOp_Int32AsInt32X4 "readInt32ArrayAsInt32X4#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> (# State# s, Int32X4# #)
+ with has_side_effects = True
+ can_fail = True
+
+primop WriteByteArrayOp_Int32AsInt32X4 "writeInt32ArrayAsInt32X4#" GenPrimOp
+ MutableByteArray# s -> Int# -> Int32X4# -> State# s -> State# s
+ with has_side_effects = True
+ can_fail = True
+
+primop IndexOffAddrOp_Int32AsInt32X4 "indexInt32OffAddrAsInt32X4#" GenPrimOp
+ Addr# -> Int# -> Int32X4#
+ with can_fail = True
+
+primop ReadOffAddrOp_Int32AsInt32X4 "readInt32OffAddrAsInt32X4#" GenPrimOp
+ Addr# -> Int# -> State# s -> (# State# s, Int32X4# #)
+ with has_side_effects = True
+ can_fail = True
+
+primop WriteOffAddrOp_Int32AsInt32X4 "writeInt32OffAddrAsInt32X4#" GenPrimOp
+ Addr# -> Int# -> Int32X4# -> State# s -> State# s
+ with has_side_effects = True
+ can_fail = True
+
+------------------------------------------------------------------------
--- ---
------------------------------------------------------------------------