summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <bgamari.foss@gmail.com>2015-06-16 20:16:08 +0200
committerBen Gamari <ben@smart-cactus.org>2015-06-16 20:16:08 +0200
commit681973c31c614185229bdae4f6b7ab4f6e64753d (patch)
tree9ef8257217c05f4a05828a04e24199f42e0e2fe0
parentd20031d4c88b256cdae264cb05d9d850e973d956 (diff)
downloadhaskell-681973c31c614185229bdae4f6b7ab4f6e64753d.tar.gz
Encode alignment in MO_Memcpy and friends
Summary: Alignment needs to be a compile-time constant. Previously the code generators had to jump through hoops to ensure this was the case as the alignment was passed as a CmmExpr in the arguments list. Now we take care of this up front. This fixes #8131. Authored-by: Reid Barton <rwbarton@gmail.com> Dusted-off-by: Ben Gamari <ben@smart-cactus.org> Tests for T8131 Test Plan: Validate Reviewers: rwbarton, austin Reviewed By: rwbarton, austin Subscribers: bgamari, carter, thomie Differential Revision: https://phabricator.haskell.org/D624 GHC Trac Issues: #8131
-rw-r--r--compiler/cmm/CmmMachOp.hs29
-rw-r--r--compiler/cmm/CmmParse.y37
-rw-r--r--compiler/cmm/PprC.hs12
-rw-r--r--compiler/codeGen/StgCmmPrim.hs49
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs43
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs20
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs21
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs25
-rw-r--r--includes/Cmm.h6
-rw-r--r--rts/PrimOps.cmm8
-rw-r--r--testsuite/tests/codeGen/should_fail/Makefile3
-rw-r--r--testsuite/tests/codeGen/should_fail/T8131.cmm7
-rw-r--r--testsuite/tests/codeGen/should_fail/all.T3
-rw-r--r--testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.cmm2
-rw-r--r--testsuite/tests/codeGen/should_run/cgrun069_cmm.cmm20
-rw-r--r--testsuite/tests/llvm/should_compile/T8131b.hs9
-rw-r--r--testsuite/tests/llvm/should_compile/all.T2
17 files changed, 155 insertions, 141 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index e9215d5021..f3f9e74a0b 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -21,6 +21,7 @@ module CmmMachOp
-- CallishMachOp
, CallishMachOp(..), callishMachOpHints
, pprCallishMachOp
+ , machOpMemcpyishAlign
-- Atomic read-modify-write
, AtomicMachOp(..)
@@ -565,12 +566,12 @@ data CallishMachOp
-- would the majority of use cases in ghc anyways
- -- Note that these three MachOps all take 1 extra parameter than the
- -- standard C lib versions. The extra (last) parameter contains
- -- alignment of the pointers. Used for optimisation in backends.
- | MO_Memcpy
- | MO_Memset
- | MO_Memmove
+ -- These three MachOps are parameterised by the known alignment
+ -- of the destination and source (for memcpy/memmove) pointers.
+ -- This information may be used for optimisation in backends.
+ | MO_Memcpy Int
+ | MO_Memset Int
+ | MO_Memmove Int
| MO_PopCnt Width
| MO_Clz Width
@@ -600,8 +601,16 @@ pprCallishMachOp mo = text (show mo)
callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint])
callishMachOpHints op = case op of
- MO_Memcpy -> ([], [AddrHint,AddrHint,NoHint,NoHint])
- MO_Memset -> ([], [AddrHint,NoHint,NoHint,NoHint])
- MO_Memmove -> ([], [AddrHint,AddrHint,NoHint,NoHint])
- _ -> ([],[])
+ MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint])
+ MO_Memset _ -> ([], [AddrHint,NoHint,NoHint])
+ MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint])
+ _ -> ([],[])
-- empty lists indicate NoHint
+
+-- | The alignment of a 'memcpy'-ish operation.
+machOpMemcpyishAlign :: CallishMachOp -> Maybe Int
+machOpMemcpyishAlign op = case op of
+ MO_Memcpy align -> Just align
+ MO_Memset align -> Just align
+ MO_Memmove align -> Just align
+ _ -> Nothing
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index fca231e988..694d79ead9 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -975,22 +975,38 @@ machOps = listToUFM $
( "i2f64", flip MO_SF_Conv W64 )
]
+callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr]))
callishMachOps = listToUFM $
map (\(x, y) -> (mkFastString x, y)) [
- ( "write_barrier", MO_WriteBarrier ),
- ( "memcpy", MO_Memcpy ),
- ( "memset", MO_Memset ),
- ( "memmove", MO_Memmove ),
+ ( "write_barrier", (,) MO_WriteBarrier ),
+ ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
+ ( "memset", memcpyLikeTweakArgs MO_Memset ),
+ ( "memmove", memcpyLikeTweakArgs MO_Memmove ),
- ("prefetch0",MO_Prefetch_Data 0),
- ("prefetch1",MO_Prefetch_Data 1),
- ("prefetch2",MO_Prefetch_Data 2),
- ("prefetch3",MO_Prefetch_Data 3)
+ ("prefetch0", (,) $ MO_Prefetch_Data 0),
+ ("prefetch1", (,) $ MO_Prefetch_Data 1),
+ ("prefetch2", (,) $ MO_Prefetch_Data 2),
+ ("prefetch3", (,) $ MO_Prefetch_Data 3)
-- ToDo: the rest, maybe
-- edit: which rest?
-- also: how do we tell CMM Lint how to type check callish macops?
]
+ where
+ memcpyLikeTweakArgs :: (Int -> CallishMachOp) -> [CmmExpr] -> (CallishMachOp, [CmmExpr])
+ memcpyLikeTweakArgs op [] = pgmError "memcpy-like function requires at least one argument"
+ memcpyLikeTweakArgs op args@(_:_) =
+ -- Force alignment with result to ensure pprPgmError fires
+ align `seq` (op align, args')
+ where
+ args' = init args
+ align = case last args of
+ CmmLit (CmmInt alignInteger _) -> fromInteger alignInteger
+ e -> pprPgmError "Non-constant alignment in memcpy-like function:" (ppr e)
+ -- The alignment of memcpy-ish operations must be a
+ -- compile-time constant. We verify this here, passing it around
+ -- in the MO_* constructor. In order to do this, however, we
+ -- must intercept the arguments in primCall.
parseSafety :: String -> P Safety
parseSafety "safe" = return PlaySafe
@@ -1207,10 +1223,11 @@ primCall
primCall results_code name args_code
= case lookupUFM callishMachOps name of
Nothing -> fail ("unknown primitive " ++ unpackFS name)
- Just p -> return $ do
+ Just f -> return $ do
results <- sequence results_code
args <- sequence args_code
- code (emitPrimCall (map fst results) p args)
+ let (p, args') = f args
+ code (emitPrimCall (map fst results) p args')
doStore :: CmmType -> CmmParse CmmExpr -> CmmParse CmmExpr -> CmmParse ()
doStore rep addr_code val_code
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 92c818242d..3703f0ae1f 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -238,13 +238,13 @@ pprStmt stmt =
hargs = zip args arg_hints
fn_call
- -- The mem primops carry an extra alignment arg, must drop it.
+ -- The mem primops carry an extra alignment arg.
-- We could maybe emit an alignment directive using this info.
-- We also need to cast mem primops to prevent conflicts with GCC
-- builtins (see bug #5967).
- | op `elem` [MO_Memcpy, MO_Memset, MO_Memmove]
+ | Just _align <- machOpMemcpyishAlign op
= (ptext (sLit ";EF_(") <> fn <> char ')' <> semi) $$
- pprForeignCall fn cconv hresults (init hargs)
+ pprForeignCall fn cconv hresults hargs
| otherwise
= pprCall fn cconv hresults hargs
@@ -745,9 +745,9 @@ pprCallishMachOp_for_C mop
MO_F32_Exp -> ptext (sLit "expf")
MO_F32_Sqrt -> ptext (sLit "sqrtf")
MO_WriteBarrier -> ptext (sLit "write_barrier")
- MO_Memcpy -> ptext (sLit "memcpy")
- MO_Memset -> ptext (sLit "memset")
- MO_Memmove -> ptext (sLit "memmove")
+ MO_Memcpy _ -> ptext (sLit "memcpy")
+ MO_Memset _ -> ptext (sLit "memset")
+ MO_Memmove _ -> ptext (sLit "memmove")
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
(MO_Clz w) -> ptext (sLit $ clzLabel w)
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index e208318e17..d812905594 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -1644,8 +1644,7 @@ doCopyByteArrayOp = emitCopyByteArray copy
-- Copy data (we assume the arrays aren't overlapping since
-- they're of different types)
copy _src _dst dst_p src_p bytes =
- do dflags <- getDynFlags
- emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
+ emitMemcpyCall dst_p src_p bytes 1
-- | Takes a source 'MutableByteArray#', an offset in the source
-- array, a destination 'MutableByteArray#', an offset into the
@@ -1662,8 +1661,8 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
copy src dst dst_p src_p bytes = do
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts [
- getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
- getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
+ getCode $ emitMemmoveCall dst_p src_p bytes 1,
+ getCode $ emitMemcpyCall dst_p src_p bytes 1
]
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
@@ -1685,7 +1684,7 @@ doCopyByteArrayToAddrOp src src_off dst_p bytes = do
-- Use memcpy (we are allowed to assume the arrays aren't overlapping)
dflags <- getDynFlags
src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
- emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
+ emitMemcpyCall dst_p src_p bytes 1
-- | Takes a source 'MutableByteArray#', an offset in the source array, a
-- destination 'Addr#', and the number of bytes to copy. Copies the given
@@ -1702,7 +1701,7 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
-- Use memcpy (we are allowed to assume the arrays aren't overlapping)
dflags <- getDynFlags
dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
- emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
+ emitMemcpyCall dst_p src_p bytes 1
-- ----------------------------------------------------------------------------
@@ -1716,7 +1715,7 @@ doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
doSetByteArrayOp ba off len c
= do dflags <- getDynFlags
p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
- emitMemsetCall p c len (mkIntExpr dflags 1)
+ emitMemsetCall p c len 1
-- ----------------------------------------------------------------------------
-- Allocating arrays
@@ -1789,7 +1788,7 @@ doCopyArrayOp = emitCopyArray copy
copy _src _dst dst_p src_p bytes =
do dflags <- getDynFlags
emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (mkIntExpr dflags (wORD_SIZE dflags))
+ (wORD_SIZE dflags)
-- | Takes a source 'MutableArray#', an offset in the source array, a
@@ -1807,9 +1806,9 @@ doCopyMutableArrayOp = emitCopyArray copy
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts [
getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
- (mkIntExpr dflags (wORD_SIZE dflags)),
+ (wORD_SIZE dflags),
getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (mkIntExpr dflags (wORD_SIZE dflags))
+ (wORD_SIZE dflags)
]
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
@@ -1856,7 +1855,7 @@ doCopySmallArrayOp = emitCopySmallArray copy
copy _src _dst dst_p src_p bytes =
do dflags <- getDynFlags
emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (mkIntExpr dflags (wORD_SIZE dflags))
+ (wORD_SIZE dflags)
doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
@@ -1870,9 +1869,9 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts
[ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
- (mkIntExpr dflags (wORD_SIZE dflags))
+ (wORD_SIZE dflags)
, getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (mkIntExpr dflags (wORD_SIZE dflags))
+ (wORD_SIZE dflags)
]
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
@@ -1937,7 +1936,7 @@ emitCloneArray info_p res_r src src_off n = do
(mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
- (mkIntExpr dflags (wORD_SIZE dflags))
+ (wORD_SIZE dflags)
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
@@ -1974,7 +1973,7 @@ emitCloneSmallArray info_p res_r src src_off n = do
(mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
- (mkIntExpr dflags (wORD_SIZE dflags))
+ (wORD_SIZE dflags)
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
@@ -1993,7 +1992,7 @@ emitSetCards dst_start dst_cards_start n = do
emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
(mkIntExpr dflags 1)
(cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
- (mkIntExpr dflags 1) -- no alignment (1 byte)
+ 1 -- no alignment (1 byte)
-- Convert an element index to a card index
cardCmm :: DynFlags -> CmmExpr -> CmmExpr
@@ -2101,29 +2100,29 @@ doCasByteArray res mba idx idx_ty old new = do
-- Helpers for emitting function calls
-- | Emit a call to @memcpy@.
-emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
emitMemcpyCall dst src n align = do
emitPrimCall
[ {-no results-} ]
- MO_Memcpy
- [ dst, src, n, align ]
+ (MO_Memcpy align)
+ [ dst, src, n ]
-- | Emit a call to @memmove@.
-emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
emitMemmoveCall dst src n align = do
emitPrimCall
[ {- no results -} ]
- MO_Memmove
- [ dst, src, n, align ]
+ (MO_Memmove align)
+ [ dst, src, n ]
-- | Emit a call to @memset@. The second argument must fit inside an
-- unsigned char.
-emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
emitMemsetCall dst c n align = do
emitPrimCall
[ {- no results -} ]
- MO_Memset
- [ dst, c, n, align ]
+ (MO_Memset align)
+ [ dst, c, n ]
emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitBSwapCall res x width = do
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 2c48c28a28..ffe9d619f6 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -24,7 +24,8 @@ import Hoopl
import DynFlags
import FastString
import ForeignCall
-import Outputable
+import Outputable hiding (panic, pprPanic)
+import qualified Outputable
import Platform
import OrdList
import UniqSupply
@@ -230,16 +231,13 @@ genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
-genCall t@(PrimTarget op) [] args'
- | op == MO_Memcpy ||
- op == MO_Memset ||
- op == MO_Memmove = do
+genCall t@(PrimTarget op) [] args
+ | Just align <- machOpMemcpyishAlign op = do
dflags <- getDynFlags
- let (args, alignVal) = splitAlignVal args'
- isVolTy = [i1]
+ let isVolTy = [i1]
isVolVal = [mkIntLit i1 0]
- argTy | op == MO_Memset = [i8Ptr, i8, llvmWord dflags, i32] ++ isVolTy
- | otherwise = [i8Ptr, i8Ptr, llvmWord dflags, i32] ++ isVolTy
+ argTy | MO_Memset _ <- op = [i8Ptr, i8, llvmWord dflags, i32] ++ isVolTy
+ | otherwise = [i8Ptr, i8Ptr, llvmWord dflags, i32] ++ isVolTy
funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
@@ -250,21 +248,12 @@ genCall t@(PrimTarget op) [] args'
(argVars', stmts3) <- castVars $ zip argVars argTy
stmts4 <- getTrashStmts
- let arguments = argVars' ++ (alignVal:isVolVal)
+ let alignVal = mkIntLit i32 align
+ arguments = argVars' ++ (alignVal:isVolVal)
call = Expr $ Call StdCall fptr arguments []
stmts = stmts1 `appOL` stmts2 `appOL` stmts3
`appOL` stmts4 `snocOL` call
return (stmts, top1 ++ top2)
- where
- splitAlignVal xs = (init xs, extractLit $ last xs)
-
- -- Fix for trac #6158. Since LLVM 3.1, opt fails when given anything other
- -- than a direct constant (i.e. 'i32 8') as the alignment argument for the
- -- memcpy & co llvm intrinsic functions. So we handle this directly now.
- extractLit (CmmLit (CmmInt i _)) = mkIntLit i32 i
- extractLit _other = trace ("WARNING: Non constant alignment value given" ++
- " for memcpy! Please report to GHC developers")
- mkIntLit i32 0
-- Handle all other foreign calls and prim ops.
genCall target res args = do
@@ -534,9 +523,9 @@ cmmPrimOpFunctions mop = do
MO_F64_Cosh -> fsLit "cosh"
MO_F64_Tanh -> fsLit "tanh"
- MO_Memcpy -> fsLit $ "llvm.memcpy." ++ intrinTy1
- MO_Memmove -> fsLit $ "llvm.memmove." ++ intrinTy1
- MO_Memset -> fsLit $ "llvm.memset." ++ intrinTy2
+ MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
+ MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1
+ MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
@@ -1646,6 +1635,14 @@ toIWord :: Integral a => DynFlags -> a -> LlvmVar
toIWord dflags = mkIntLit (llvmWord dflags)
+-- | Error functions
+panic :: String -> a
+panic s = Outputable.panic $ "LlvmCodeGen.CodeGen." ++ s
+
+pprPanic :: String -> SDoc -> a
+pprPanic s d = Outputable.pprPanic ("LlvmCodeGen.CodeGen." ++ s) d
+
+
-- | Returns TBAA meta data by unique
getTBAAMeta :: Unique -> LlvmM [MetaAnnot]
getTBAAMeta u = do
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index a115980183..299d6b702b 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -923,7 +923,7 @@ genCCall' _ _ (PrimTarget MO_Touch) _ _
genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
= return $ nilOL
-genCCall' dflags gcp target dest_regs args0
+genCCall' dflags gcp target dest_regs args
= ASSERT(not $ any (`elem` [II16]) $ map cmmTypeSize argReps)
-- we rely on argument promotion in the codeGen
do
@@ -978,17 +978,7 @@ genCCall' dflags gcp target dest_regs args0
map (widthInBytes . typeWidth) argReps
GCPLinux -> roundTo 16 finalStack
- -- need to remove alignment information
- args | PrimTarget mop <- target,
- (mop == MO_Memcpy ||
- mop == MO_Memset ||
- mop == MO_Memmove)
- = init args0
-
- | otherwise
- = args0
-
- argReps = map (cmmExprType dflags) args0
+ argReps = map (cmmExprType dflags) args
roundTo a x | x `mod` a == 0 = x
| otherwise = x + a - (x `mod` a)
@@ -1173,9 +1163,9 @@ genCCall' dflags gcp target dest_regs args0
MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
- MO_Memcpy -> (fsLit "memcpy", False)
- MO_Memset -> (fsLit "memset", False)
- MO_Memmove -> (fsLit "memmove", False)
+ MO_Memcpy _ -> (fsLit "memcpy", False)
+ MO_Memset _ -> (fsLit "memset", False)
+ MO_Memmove _ -> (fsLit "memmove", False)
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index a9d861946e..4792933366 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -404,19 +404,8 @@ genCCall (PrimTarget MO_WriteBarrier) _ _
genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
= return $ nilOL
-genCCall target dest_regs args0
- = do
- -- need to remove alignment information
- let args | PrimTarget mop <- target,
- (mop == MO_Memcpy ||
- mop == MO_Memset ||
- mop == MO_Memmove)
- = init args0
-
- | otherwise
- = args0
-
- -- work out the arguments, and assign them to integer regs
+genCCall target dest_regs args
+ = do -- work out the arguments, and assign them to integer regs
argcode_and_vregs <- mapM arg_to_int_vregs args
let (argcodes, vregss) = unzip argcode_and_vregs
let vregs = concat vregss
@@ -653,9 +642,9 @@ outOfLineMachOp_table mop
MO_UF_Conv w -> fsLit $ word2FloatLabel w
- MO_Memcpy -> fsLit "memcpy"
- MO_Memset -> fsLit "memset"
- MO_Memmove -> fsLit "memmove"
+ MO_Memcpy _ -> fsLit "memcpy"
+ MO_Memset _ -> fsLit "memset"
+ MO_Memmove _ -> fsLit "memmove"
MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 7b7cc54bbe..a052fdacdf 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1645,10 +1645,8 @@ genCCall
-- Unroll memcpy calls if the source and destination pointers are at
-- least DWORD aligned and the number of bytes to copy isn't too
-- large. Otherwise, call C's memcpy.
-genCCall dflags is32Bit (PrimTarget MO_Memcpy) _
- [dst, src,
- (CmmLit (CmmInt n _)),
- (CmmLit (CmmInt align _))]
+genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
+ [dst, src, CmmLit (CmmInt n _)]
| fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
code_dst <- getAnyReg dst
dst_r <- getNewRegNat size
@@ -1694,11 +1692,10 @@ genCCall dflags is32Bit (PrimTarget MO_Memcpy) _
dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
(ImmInteger (n - i))
-genCCall dflags _ (PrimTarget MO_Memset) _
+genCCall dflags _ (PrimTarget (MO_Memset align)) _
[dst,
CmmLit (CmmInt c _),
- CmmLit (CmmInt n _),
- CmmLit (CmmInt align _)]
+ CmmLit (CmmInt n _)]
| fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
code_dst <- getAnyReg dst
dst_r <- getNewRegNat size
@@ -2507,19 +2504,13 @@ outOfLineCmmOp mop res args
let target = ForeignTarget targetExpr
(ForeignConvention CCallConv [] [] CmmMayReturn)
- stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args')
+ stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
where
-- Assume we can call these functions directly, and that they're not in a dynamic library.
-- TODO: Why is this ok? Under linux this code will be in libm.so
-- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
- args' = case mop of
- MO_Memcpy -> init args
- MO_Memset -> init args
- MO_Memmove -> init args
- _ -> args
-
fn = case mop of
MO_F32_Sqrt -> fsLit "sqrtf"
MO_F32_Sin -> fsLit "sinf"
@@ -2553,9 +2544,9 @@ outOfLineCmmOp mop res args
MO_F64_Tanh -> fsLit "tanh"
MO_F64_Pwr -> fsLit "pow"
- MO_Memcpy -> fsLit "memcpy"
- MO_Memset -> fsLit "memset"
- MO_Memmove -> fsLit "memmove"
+ MO_Memcpy _ -> fsLit "memcpy"
+ MO_Memset _ -> fsLit "memset"
+ MO_Memmove _ -> fsLit "memmove"
MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap"
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 802ab51a5a..908a3763ca 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -855,7 +855,7 @@
src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
bytes = WDS(n); \
\
- prim %memcpy(dst_p, src_p, bytes, WDS(1)); \
+ prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
\
dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
setCards(dst_cards_p, dst_off, n); \
@@ -875,9 +875,9 @@
bytes = WDS(n); \
\
if ((src) == (dst)) { \
- prim %memmove(dst_p, src_p, bytes, WDS(1)); \
+ prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \
} else { \
- prim %memcpy(dst_p, src_p, bytes, WDS(1)); \
+ prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
} \
\
dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 2e6ca46ede..26a67167bc 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -186,7 +186,7 @@ stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size )
// copy over old content
prim %memcpy(BYTE_ARR_CTS(new_mba), BYTE_ARR_CTS(mba),
- StgArrWords_bytes(mba), WDS(1));
+ StgArrWords_bytes(mba), SIZEOF_W);
return (new_mba);
}
@@ -438,7 +438,7 @@ stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
bytes = WDS(n);
- prim %memcpy(dst_p, src_p, bytes, WDS(1));
+ prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
return ();
}
@@ -453,9 +453,9 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
bytes = WDS(n);
if (src == dst) {
- prim %memmove(dst_p, src_p, bytes, WDS(1));
+ prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
} else {
- prim %memcpy(dst_p, src_p, bytes, WDS(1));
+ prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
}
return ();
diff --git a/testsuite/tests/codeGen/should_fail/Makefile b/testsuite/tests/codeGen/should_fail/Makefile
new file mode 100644
index 0000000000..9101fbd40a
--- /dev/null
+++ b/testsuite/tests/codeGen/should_fail/Makefile
@@ -0,0 +1,3 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
diff --git a/testsuite/tests/codeGen/should_fail/T8131.cmm b/testsuite/tests/codeGen/should_fail/T8131.cmm
new file mode 100644
index 0000000000..153fb02b24
--- /dev/null
+++ b/testsuite/tests/codeGen/should_fail/T8131.cmm
@@ -0,0 +1,7 @@
+#include "Cmm.h"
+
+testMemcpy (W_ dst, W_ src, W_ l, W_ sz)
+{
+ prim %memcpy(dst, src, l, sz);
+ return ();
+}
diff --git a/testsuite/tests/codeGen/should_fail/all.T b/testsuite/tests/codeGen/should_fail/all.T
new file mode 100644
index 0000000000..39faebb619
--- /dev/null
+++ b/testsuite/tests/codeGen/should_fail/all.T
@@ -0,0 +1,3 @@
+# Tests for code generator and CMM parser
+
+test('T8131', cmm_src, compile_fail, [''])
diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.cmm b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.cmm
index be4883d34f..bdb5c3cc9e 100644
--- a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.cmm
+++ b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.cmm
@@ -6,7 +6,7 @@ callMemcpy (W_ dst, W_ src)
W_ size;
W_ alig;
size = 16;
- alig = 4;
+#define alig 4
if (dst != 0) {
prim %memcpy(dst, src, size, alig);
}
diff --git a/testsuite/tests/codeGen/should_run/cgrun069_cmm.cmm b/testsuite/tests/codeGen/should_run/cgrun069_cmm.cmm
index 61cc5d8bfd..13a26aa012 100644
--- a/testsuite/tests/codeGen/should_run/cgrun069_cmm.cmm
+++ b/testsuite/tests/codeGen/should_run/cgrun069_cmm.cmm
@@ -11,23 +11,24 @@ section "rodata" { memmoveErr : bits8[] "Memmove Error Occured\n"; }
memintrinTest (W_ dummy)
{
- W_ size, src, dst, off, alignV, set;
+ W_ size, src, dst, off, set;
bits8 set8;
- // Need two versions as memset takes a word for historical reasons
+ // Need two versions as memset takes a word for historical reasons
// but really its a bits8. We check that setting has ben done correctly
// at the bits8 level, so need bits8 version for checking.
set = 4;
set8 = 4::bits8;
size = 1024;
- alignV = 4;
+// Alignment must be constant expression
+#define alignV 4
("ptr" src) = foreign "C" malloc(size);
("ptr" dst) = foreign "C" malloc(size);
// Test memset
- prim %memset(src, set, size, alignV);
+ prim %memset(src, set, size, alignV);
// Check memset worked
off = 0;
@@ -100,6 +101,7 @@ while3_end:
return (0);
}
+#undef alignV
// ---------------------------------------------------------------------
// Tests for unrolling
@@ -113,15 +115,14 @@ while3_end:
// has ben done correctly at the bits8 level, so need bits8 version
// for checking.
#define TEST_MEMSET(ALIGN,SIZE) \
- W_ size, src, dst, off, alignV, set; \
+ W_ size, src, dst, off, set; \
bits8 set8; \
set = 4; \
set8 = 4::bits8; \
size = SIZE; \
- alignV = ALIGN; \
("ptr" src) = foreign "C" malloc(size); \
("ptr" dst) = foreign "C" malloc(size); \
- prim %memset(src, set, size, alignV); \
+ prim %memset(src, set, size, ALIGN); \
off = 0; \
loop: \
if (off == size) { \
@@ -164,9 +165,8 @@ testMemset4_7 (W_ dummy) { TEST_MEMSET(4,7); }
testMemset4_8 (W_ dummy) { TEST_MEMSET(4,8); }
#define TEST_MEMCPY(ALIGN,SIZE) \
- W_ size, src, dst, off, alignV; \
+ W_ size, src, dst, off; \
size = SIZE; \
- alignV = ALIGN; \
("ptr" src) = foreign "C" malloc(size); \
("ptr" dst) = foreign "C" malloc(size); \
off = 0; \
@@ -178,7 +178,7 @@ init: \
off = off + 1; \
goto init; \
init_end: \
- prim %memcpy(dst, src, size, alignV); \
+ prim %memcpy(dst, src, size, ALIGN); \
off = 0; \
loop: \
if (off == size) { \
diff --git a/testsuite/tests/llvm/should_compile/T8131b.hs b/testsuite/tests/llvm/should_compile/T8131b.hs
new file mode 100644
index 0000000000..b9bc1f6184
--- /dev/null
+++ b/testsuite/tests/llvm/should_compile/T8131b.hs
@@ -0,0 +1,9 @@
+{-# LANGUAGE MagicHash, UnboxedTuples #-}
+import GHC.Prim
+import GHC.IO
+
+main = IO $ \s ->
+ let (# s1, p0 #) = newByteArray# 10# s
+ (# s2, p #) = unsafeFreezeByteArray# p0 s1
+ (# s3, q #) = newByteArray# 10# s2
+ in (# copyByteArray# p 0# q 0# 10# s, () #)
diff --git a/testsuite/tests/llvm/should_compile/all.T b/testsuite/tests/llvm/should_compile/all.T
index b630645f1e..0082635445 100644
--- a/testsuite/tests/llvm/should_compile/all.T
+++ b/testsuite/tests/llvm/should_compile/all.T
@@ -12,4 +12,4 @@ test('T5681', normal, compile, [''])
test('T6158', [reqlib('vector'), reqlib('primitive')], compile, ['-package vector -package primitive'])
test('T7571', cmm_src, compile, [''])
test('T7575', unless(wordsize(32), skip), compile, [''])
-test('T8131', [cmm_src, expect_broken(8131)], compile, [''])
+test('T8131b', [cmm_src], compile, [''])