diff options
author | sheaf <sam.derbyshire@gmail.com> | 2022-10-14 14:31:15 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-10-19 10:45:45 -0400 |
commit | 6d7d91817795d7ee7f45557411368a1738daa488 (patch) | |
tree | 41c6e50ae120420658ca8076a469202f7021ffd1 | |
parent | 5b3a992f5d166007c3c5a22f120ed08e0a27f01a (diff) | |
download | haskell-6d7d91817795d7ee7f45557411368a1738daa488.tar.gz |
Remove SIMD conversions
This patch makes it so that packing/unpacking SIMD
vectors always uses the right sized types, e.g.
unpacking a Word16X4# will give a tuple of Word16#s.
As a result, we can get rid of the conversion instructions
that were previously required.
Fixes #22296
-rw-r--r-- | compiler/GHC/Builtin/primops.txt.pp | 12 | ||||
-rw-r--r-- | compiler/GHC/Cmm/MachOp.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Prim.hs | 78 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T22296.hs | 41 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T22296.stdout | 3 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/all.T | 1 |
6 files changed, 74 insertions, 71 deletions
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp index 9e58d5b7eb..2deb2f48ef 100644 --- a/compiler/GHC/Builtin/primops.txt.pp +++ b/compiler/GHC/Builtin/primops.txt.pp @@ -3832,9 +3832,9 @@ section "SIMD Vectors" [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \ ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \ ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \ - ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \ - ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \ - ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,Word64#,8> \ + ,<Word8,Word8#,16>,<Word16,Word16#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \ + ,<Word8,Word8#,32>,<Word16,Word16#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \ + ,<Word8,Word8#,64>,<Word16,Word16#,32>,<Word32,Word32#,16>,<Word64,Word64#,8> \ ,<Float,Float#,4>,<Double,Double#,2> \ ,<Float,Float#,8>,<Double,Double#,4> \ ,<Float,Float#,16>,<Double,Double#,8>] @@ -3856,9 +3856,9 @@ section "SIMD Vectors" [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \ ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \ ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \ - ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \ - ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \ - ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,Word64#,8>] + ,<Word8,Word8#,16>,<Word16,Word16#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \ + ,<Word8,Word8#,32>,<Word16,Word16#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \ + ,<Word8,Word8#,64>,<Word16,Word16#,32>,<Word32,Word32#,16>,<Word64,Word64#,8>] primtype VECTOR with llvm_only = True diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs index 0bd3ac1111..632165b6b2 100644 --- a/compiler/GHC/Cmm/MachOp.hs +++ b/compiler/GHC/Cmm/MachOp.hs @@ -514,8 +514,11 @@ machOpArgReps platform op = MO_FS_Conv from _ -> [from] MO_FF_Conv from _ -> [from] - MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth platform] - MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth platform] + MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r, W32] + MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)), W32] + MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,W32] + MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),W32] + -- SIMD vector indices are always 32 bit MO_V_Add _ r -> [r,r] MO_V_Sub _ r -> [r,r] @@ -528,9 +531,6 @@ machOpArgReps platform op = MO_VU_Quot _ r -> [r,r] MO_VU_Rem _ r -> [r,r] - MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth platform] - MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth platform] - MO_VF_Add _ r -> [r,r] MO_VF_Sub _ r -> [r,r] MO_VF_Mul _ r -> [r,r] diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index d4c336e802..518080797f 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -853,7 +853,7 @@ emitPrimOp cfg primop = -- SIMD primops (VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do checkVecCompatibility cfg vcat n w - doVecPackOp (vecElemInjectCast platform vcat w) ty zeros (replicate n e) res + doVecPackOp ty zeros (replicate n e) res where zeros :: CmmExpr zeros = CmmLit $ CmmVec (replicate n zero) @@ -871,7 +871,7 @@ emitPrimOp cfg primop = checkVecCompatibility cfg vcat n w when (es `lengthIsNot` n) $ panic "emitPrimOp: VecPackOp has wrong number of arguments" - doVecPackOp (vecElemInjectCast platform vcat w) ty zeros es res + doVecPackOp ty zeros es res where zeros :: CmmExpr zeros = CmmLit $ CmmVec (replicate n zero) @@ -889,14 +889,14 @@ emitPrimOp cfg primop = checkVecCompatibility cfg vcat n w when (res `lengthIsNot` n) $ panic "emitPrimOp: VecUnpackOp has wrong number of results" - doVecUnpackOp (vecElemProjectCast platform vcat w) ty arg res + doVecUnpackOp ty arg res where ty :: CmmType ty = vecVmmType vcat n w (VecInsertOp vcat n w) -> \[v,e,i] -> opIntoRegs $ \[res] -> do checkVecCompatibility cfg vcat n w - doVecInsertOp (vecElemInjectCast platform vcat w) ty v e i res + doVecInsertOp ty v e i res where ty :: CmmType ty = vecVmmType vcat n w @@ -2215,31 +2215,6 @@ vecCmmCat IntVec = cmmBits vecCmmCat WordVec = cmmBits vecCmmCat FloatVec = cmmFloat -vecElemInjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp -vecElemInjectCast _ FloatVec _ = Nothing -vecElemInjectCast platform IntVec W8 = Just (mo_WordTo8 platform) -vecElemInjectCast platform IntVec W16 = Just (mo_WordTo16 platform) -vecElemInjectCast platform IntVec W32 = Just (mo_WordTo32 platform) -vecElemInjectCast _ IntVec W64 = Nothing -vecElemInjectCast platform WordVec W8 = Just (mo_WordTo8 platform) -vecElemInjectCast platform WordVec W16 = Just (mo_WordTo16 platform) -vecElemInjectCast platform WordVec W32 = Just (mo_WordTo32 platform) -vecElemInjectCast _ WordVec W64 = Nothing -vecElemInjectCast _ _ _ = Nothing - -vecElemProjectCast :: Platform -> PrimOpVecCat -> Width -> Maybe MachOp -vecElemProjectCast _ FloatVec _ = Nothing -vecElemProjectCast platform IntVec W8 = Just (mo_s_8ToWord platform) -vecElemProjectCast platform IntVec W16 = Just (mo_s_16ToWord platform) -vecElemProjectCast platform IntVec W32 = Just (mo_s_32ToWord platform) -vecElemProjectCast _ IntVec W64 = Nothing -vecElemProjectCast platform WordVec W8 = Just (mo_u_8ToWord platform) -vecElemProjectCast platform WordVec W16 = Just (mo_u_16ToWord platform) -vecElemProjectCast platform WordVec W32 = Just (mo_u_32ToWord platform) -vecElemProjectCast _ WordVec W64 = Nothing -vecElemProjectCast _ _ _ = Nothing - - -- Note [SIMD Design for the future] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- Check to make sure that we can generate code for the specified vector type @@ -2302,13 +2277,12 @@ checkVecCompatibility cfg vcat l w = ------------------------------------------------------------------------------ -- Helpers for translating vector packing and unpacking. -doVecPackOp :: Maybe MachOp -- Cast from element to vector component - -> CmmType -- Type of vector +doVecPackOp :: CmmType -- Type of vector -> CmmExpr -- Initial vector -> [CmmExpr] -- Elements -> CmmFormal -- Destination for result -> FCode () -doVecPackOp maybe_pre_write_cast ty z es res = do +doVecPackOp ty z es res = do dst <- newTemp ty emitAssign (CmmLocal dst) z vecPack dst es 0 @@ -2321,31 +2295,25 @@ doVecPackOp maybe_pre_write_cast ty z es res = do dst <- newTemp ty if isFloatType (vecElemType ty) then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) + [CmmReg (CmmLocal src), e, iLit]) else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) + [CmmReg (CmmLocal src), e, iLit]) vecPack dst es (i + 1) where -- vector indices are always 32-bits iLit = CmmLit (CmmInt (toInteger i) W32) - cast :: CmmExpr -> CmmExpr - cast val = case maybe_pre_write_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] - len :: Length len = vecLength ty wid :: Width wid = typeWidth (vecElemType ty) -doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result - -> CmmType -- Type of vector +doVecUnpackOp :: CmmType -- Type of vector -> CmmExpr -- Vector -> [CmmFormal] -- Element results -> FCode () -doVecUnpackOp maybe_post_read_cast ty e res = +doVecUnpackOp ty e res = vecUnpack res 0 where vecUnpack :: [CmmFormal] -> Int -> FCode () @@ -2354,46 +2322,36 @@ doVecUnpackOp maybe_post_read_cast ty e res = vecUnpack (r : rs) i = do if isFloatType (vecElemType ty) - then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid) - [e, iLit])) - else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid) - [e, iLit])) + then emitAssign (CmmLocal r) (CmmMachOp (MO_VF_Extract len wid) + [e, iLit]) + else emitAssign (CmmLocal r) (CmmMachOp (MO_V_Extract len wid) + [e, iLit]) vecUnpack rs (i + 1) where -- vector indices are always 32-bits iLit = CmmLit (CmmInt (toInteger i) W32) - cast :: CmmExpr -> CmmExpr - cast val = case maybe_post_read_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] - len :: Length len = vecLength ty wid :: Width wid = typeWidth (vecElemType ty) -doVecInsertOp :: Maybe MachOp -- Cast from element to vector component - -> CmmType -- Vector type +doVecInsertOp :: CmmType -- Vector type -> CmmExpr -- Source vector -> CmmExpr -- Element -> CmmExpr -- Index at which to insert element -> CmmFormal -- Destination for result -> FCode () -doVecInsertOp maybe_pre_write_cast ty src e idx res = do +doVecInsertOp ty src e idx res = do platform <- getPlatform -- vector indices are always 32-bits let idx' :: CmmExpr idx' = CmmMachOp (MO_SS_Conv (wordWidth platform) W32) [idx] if isFloatType (vecElemType ty) - then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx']) - else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx']) + then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, e, idx']) + else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, e, idx']) where - cast :: CmmExpr -> CmmExpr - cast val = case maybe_pre_write_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] len :: Length len = vecLength ty diff --git a/testsuite/tests/codeGen/should_run/T22296.hs b/testsuite/tests/codeGen/should_run/T22296.hs new file mode 100644 index 0000000000..d5ea23afda --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T22296.hs @@ -0,0 +1,41 @@ +{-# language MagicHash, UnboxedTuples, UnboxedSums #-} + +module Main ( main ) where + +import GHC.Exts +import GHC.Int +import GHC.Word + +foo :: Word16X8# -> Integer +foo w16x8 = + case unpackWord16X8# w16x8 of + (# w1, w2, w3, w4, w5, w6, w7, w8 #) -> + let + s = sum $ map fromIntegral + [ W16# w1, W16# w2, W16# w3, W16# w4 + , W16# w5, W16# w6, W16# w7, W16# w8 ] + in s + +bar :: Int32X4# -> Integer +bar i32x4 = + case unpackInt32X4# i32x4 of + (# i1, i2, i3, i4 #) -> + let + s = sum $ map fromIntegral + [ I32# i1, I32# i2, I32# i3, I32# i4 ] + in s + +baz :: FloatX4# -> Float +baz fx4 = + case unpackFloatX4# fx4 of + (# f1, f2, f3, f4 #) -> + let + s = sum + [ F# f1, F# f2, F# f3, F# f4 ] + in s + +main :: IO () +main = do + print ( foo ( broadcastWord16X8# ( wordToWord16# 1## ) ) ) + print ( bar ( broadcastInt32X4# ( intToInt32# 1# ) ) ) + print ( baz ( broadcastFloatX4# ( 1.0# ) ) ) diff --git a/testsuite/tests/codeGen/should_run/T22296.stdout b/testsuite/tests/codeGen/should_run/T22296.stdout new file mode 100644 index 0000000000..52b2242af3 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T22296.stdout @@ -0,0 +1,3 @@ +8 +4 +4.0 diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 85acd521f6..e8f20b5837 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -218,3 +218,4 @@ test('T21141', normal, compile_and_run, ['']) test('T21186', normal, compile_and_run, ['']) test('T20640a', normal, compile_and_run, ['']) test('T20640b', normal, compile_and_run, ['']) +test('T22296',[only_ways(llvm_ways)],compile_and_run,['']) |