diff options
author | Austin Seipp <aseipp@pobox.com> | 2013-07-17 03:41:01 -0500 |
---|---|---|
committer | Austin Seipp <aseipp@pobox.com> | 2013-07-17 04:18:52 -0500 |
commit | 18087a119b47368b15231c43402c81888c75957d (patch) | |
tree | 9bc069d9d0540fd40dc75745f4ab80218c1bf6d3 /compiler | |
parent | 2f99cdb9f9e561f29d726fea90a5a98de7499a2d (diff) | |
download | haskell-18087a119b47368b15231c43402c81888c75957d.tar.gz |
Add support for byte endian swapping for Word 16/32/64.
* Exposes bSwap{,16,32,64}# primops
* Add a new machop: MO_BSwap
* Use a Stg implementation (hs_bswap{16,32,64}) for other implementation
in NCG.
* Generate bswap in X86 NCG for 32 and 64 bits, and for 16 bits, bswap+shr
instead of using xchg.
* Generate llvm.bswap intrinsics in llvm codegen.
Authored-by: Vincent Hanquez <tab@snarc.org>
Signed-off-by: Austin Seipp <aseipp@pobox.com>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 1 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 12 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 61 | ||||
-rw-r--r-- | compiler/nativeGen/CPrim.hs | 9 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 24 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 3 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 1 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 9 |
11 files changed, 97 insertions, 26 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index fae84e5d53..8d42bbd2cb 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -529,6 +529,7 @@ data CallishMachOp | MO_Memmove | MO_PopCnt Width + | MO_BSwap Width deriving (Eq, Show) pprCallishMachOp :: CallishMachOp -> SDoc diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 00ba7acb06..b0c9bd3f2f 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -738,6 +738,7 @@ pprCallishMachOp_for_C mop MO_Memcpy -> ptext (sLit "memcpy") MO_Memset -> ptext (sLit "memset") MO_Memmove -> ptext (sLit "memmove") + (MO_BSwap w) -> ptext (sLit $ bSwapLabel w) (MO_PopCnt w) -> ptext (sLit $ popCntLabel w) (MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w) diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 54002e8171..7ce329a707 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -541,6 +541,11 @@ emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] = emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] = doSetByteArrayOp ba off len c +emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16 +emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32 +emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64 +emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags) + -- Population count emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16 @@ -1568,6 +1573,13 @@ emitAllocateCall res cap n = do allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing ForeignLabelInExternalPackage IsFunction)) +emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode () +emitBSwapCall res x width = do + emitPrimCall + [ res ] + (MO_BSwap width) + [ x ] + emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode () emitPopCntCall res x width = do emitPrimCall diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index d223a5c9cd..6f898fa56c 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -221,30 +221,11 @@ genCall t@(PrimTarget MO_Prefetch_Data) [] args = do `appOL` trash `snocOL` call return (stmts, top1 ++ top2) --- Handle popcnt function specifically since GHC only really has i32 and i64 --- types and things like Word8 are backed by an i32 and just present a logical --- i8 range. So we must handle conversions from i32 to i8 explicitly as LLVM --- is strict about types. -genCall t@(PrimTarget op@(MO_PopCnt w)) [dst] args = do - let width = widthToLlvmInt w - dstTy = cmmToLlvmType $ localRegType dst - - fname <- cmmPrimOpFunctions op - (fptr, _, top3) <- getInstrinct fname width [width] - - dstV <- getCmmReg (CmmLocal dst) - - let (_, arg_hints) = foreignTargetHints t - let args_hints = zip args arg_hints - (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, []) - (argsV', stmts4) <- castVars $ zip argsV [width] - (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] - ([retV'], stmts5) <- castVars [(retV,dstTy)] - let s2 = Store retV' dstV - - let stmts = stmts2 `appOL` stmts4 `snocOL` - s1 `appOL` stmts5 `snocOL` s2 - return (stmts, top2 ++ top3) +-- Handle PopCnt and BSwap that need to only convert arg and return types +genCall t@(PrimTarget (MO_PopCnt w)) dsts args = + genCallSimpleCast w t dsts args +genCall t@(PrimTarget (MO_BSwap w)) dsts args = + genCallSimpleCast w t dsts args -- Handle memcpy function specifically since llvm's intrinsic version takes -- some extra parameters. @@ -390,9 +371,36 @@ genCall target res args = do return (allStmts `snocOL` s2 `snocOL` s3 `appOL` retStmt, top1 ++ top2) +-- Handle simple function call that only need simple type casting, of the form: +-- truncate arg >>= \a -> call(a) >>= zext +-- +-- since GHC only really has i32 and i64 types and things like Word8 are backed +-- by an i32 and just present a logical i8 range. So we must handle conversions +-- from i32 to i8 explicitly as LLVM is strict about types. +genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual] + -> LlvmM StmtData +genCallSimpleCast w t@(PrimTarget op) [dst] args = do + let width = widthToLlvmInt w + dstTy = cmmToLlvmType $ localRegType dst + + fname <- cmmPrimOpFunctions op + (fptr, _, top3) <- getInstrinct fname width [width] + + dstV <- getCmmReg (CmmLocal dst) --- genCallSimpleCast _ _ _ dsts _ = --- panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts") + let (_, arg_hints) = foreignTargetHints t + let args_hints = zip args arg_hints + (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, []) + (argsV', stmts4) <- castVars $ zip argsV [width] + (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] + ([retV'], stmts5) <- castVars [(retV,dstTy)] + let s2 = Store retV' dstV + + let stmts = stmts2 `appOL` stmts4 `snocOL` + s1 `appOL` stmts5 `snocOL` s2 + return (stmts, top2 ++ top3) +genCallSimpleCast _ _ dsts _ = + panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts") -- | Create a function pointer from a target. getFunPtr :: (LMString -> LlvmType) -> ForeignTarget @@ -534,6 +542,7 @@ cmmPrimOpFunctions mop = do MO_Memset -> fsLit $ "llvm.memset." ++ intrinTy2 (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w) + (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w) MO_Prefetch_Data -> fsLit "llvm.prefetch" diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs index dd9d38f434..a6f4cab7bd 100644 --- a/compiler/nativeGen/CPrim.hs +++ b/compiler/nativeGen/CPrim.hs @@ -1,6 +1,7 @@ -- | Generating C symbol names emitted by the compiler. module CPrim ( popCntLabel + , bSwapLabel , word2FloatLabel ) where @@ -16,6 +17,14 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w pprWidth W64 = "64" pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w) +bSwapLabel :: Width -> String +bSwapLabel w = "hs_bswap" ++ pprWidth w + where + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w) + word2FloatLabel :: Width -> String word2FloatLabel w = "hs_word2float" ++ pprWidth w where diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 39b64002dc..65533d8f9a 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1155,6 +1155,7 @@ genCCall' dflags gcp target dest_regs args0 MO_Memset -> (fsLit "memset", False) MO_Memmove -> (fsLit "memmove", False) + MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False) MO_S_QuotRem {} -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 9c84a38f6a..5d2b9a9d6d 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -647,6 +647,7 @@ outOfLineMachOp_table mop MO_Memset -> fsLit "memset" MO_Memmove -> fsLit "memmove" + MO_BSwap w -> fsLit $ bSwapLabel w MO_PopCnt w -> fsLit $ popCntLabel w MO_S_QuotRem {} -> unsupported diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 848a804cf1..f6143d3fb9 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1658,6 +1658,29 @@ genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL +genCCall is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do + dflags <- getDynFlags + let platform = targetPlatform dflags + let dst_r = getRegisterReg platform False (CmmLocal dst) + case width of + W64 | is32Bit -> do + ChildCode64 vcode rlo <- iselExpr64 src + let dst_rhi = getHiVRegFromLo dst_r + rhi = getHiVRegFromLo rlo + return $ vcode `appOL` + toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi), + MOV II32 (OpReg rhi) (OpReg dst_r), + BSWAP II32 dst_rhi, + BSWAP II32 dst_r ] + W16 -> do code_src <- getAnyReg src + return $ code_src dst_r `appOL` + unitOL (BSWAP II32 dst_r) `appOL` + unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r)) + _ -> do code_src <- getAnyReg src + return $ code_src dst_r `appOL` unitOL (BSWAP size dst_r) + where + size = intSize width + genCCall is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] args@[src] = do sse4_2 <- sse4_2Enabled @@ -2325,6 +2348,7 @@ outOfLineCmmOp mop res args MO_Memmove -> fsLit "memmove" MO_PopCnt _ -> fsLit "popcnt" + MO_BSwap _ -> fsLit "bswap" MO_UF_Conv _ -> unsupported diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 76f0e8bd91..266a4ea58a 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -208,6 +208,7 @@ data Instr | XOR Size Operand Operand | NOT Size Operand | NEGI Size Operand -- NEG instruction (name clash with Cond) + | BSWAP Size Reg -- Shifts (amount may be immediate or %cl only) | SHL Size Operand{-amount-} Operand @@ -351,6 +352,7 @@ x86_regUsageOfInstr platform instr XOR _ src dst -> usageRM src dst NOT _ op -> usageM op + BSWAP _ reg -> mkRU [reg] [reg] NEGI _ op -> usageM op SHL _ imm dst -> usageRM imm dst SAR _ imm dst -> usageRM imm dst @@ -489,6 +491,7 @@ x86_patchRegsOfInstr instr env OR sz src dst -> patch2 (OR sz) src dst XOR sz src dst -> patch2 (XOR sz) src dst NOT sz op -> patch1 (NOT sz) op + BSWAP sz reg -> BSWAP sz (env reg) NEGI sz op -> patch1 (NEGI sz) op SHL sz imm dst -> patch1 (SHL sz imm) dst SAR sz imm dst -> patch1 (SAR sz imm) dst diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 75d18a1ff4..7f9c6901da 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -578,6 +578,7 @@ pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst pprInstr (POPCNT size src dst) = pprOpOp (sLit "popcnt") size src (OpReg dst) pprInstr (NOT size op) = pprSizeOp (sLit "not") size op +pprInstr (BSWAP size op) = pprSizeOp (sLit "bswap") size (OpReg op) pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index f31a237189..3d3518b095 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -363,6 +363,15 @@ primop PopCnt64Op "popCnt64#" GenPrimOp WORD64 -> Word# primop PopCntOp "popCnt#" Monadic Word# -> Word# {Count the number of set bits in a word.} +primop BSwap16Op "byteSwap16#" Monadic Word# -> Word# + {Swap bytes in the lower 16 bits of a word. The higher bytes are undefined. } +primop BSwap32Op "byteSwap32#" Monadic Word# -> Word# + {Swap bytes in the lower 32 bits of a word. The higher bytes are undefined. } +primop BSwap64Op "byteSwap64#" Monadic WORD64 -> WORD64 + {Swap bytes in a 64 bits of a word.} +primop BSwapOp "byteSwap#" Monadic Word# -> Word# + {Swap bytes in a word.} + ------------------------------------------------------------------------ section "Narrowings" {Explicit narrowing of native-sized ints or words.} |