diff options
| author | Herbert Valerio Riedel <hvr@gnu.org> | 2014-08-11 18:56:57 +0200 |
|---|---|---|
| committer | Herbert Valerio Riedel <hvr@gnu.org> | 2014-08-14 11:34:23 +0200 |
| commit | e0c1767d0ea8d12e0a4badf43682a08784e379c6 (patch) | |
| tree | 6662fe33cd7e803253458f91307b1b5826e30b0f /compiler/nativeGen | |
| parent | 6b5ea617dcd162e682886d5843df51a2866218d3 (diff) | |
| download | haskell-e0c1767d0ea8d12e0a4badf43682a08784e379c6.tar.gz | |
Implement new CLZ and CTZ primops (re #9340)
This implements the new primops
clz#, clz32#, clz64#,
ctz#, ctz32#, ctz64#
which provide efficient implementations of the popular
count-leading-zero and count-trailing-zero respectively
(see testcase for a pure Haskell reference implementation).
On x86, NCG as well as LLVM generates code based on the BSF/BSR
instructions (which need extra logic to make the 0-case well-defined).
Test Plan: validate and succesful tests on i686 and amd64
Reviewers: rwbarton, simonmar, ezyang, austin
Subscribers: simonmar, relrod, ezyang, carter
Differential Revision: https://phabricator.haskell.org/D144
GHC Trac Issues: #9340
Diffstat (limited to 'compiler/nativeGen')
| -rw-r--r-- | compiler/nativeGen/CPrim.hs | 20 | ||||
| -rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 2 | ||||
| -rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 2 | ||||
| -rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 65 |
4 files changed, 89 insertions, 0 deletions
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs index 34782dfc1c..c52fe10b13 100644 --- a/compiler/nativeGen/CPrim.hs +++ b/compiler/nativeGen/CPrim.hs @@ -6,6 +6,8 @@ module CPrim , cmpxchgLabel , popCntLabel , bSwapLabel + , clzLabel + , ctzLabel , word2FloatLabel ) where @@ -30,6 +32,24 @@ bSwapLabel w = "hs_bswap" ++ pprWidth w pprWidth W64 = "64" pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w) +clzLabel :: Width -> String +clzLabel w = "hs_clz" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "clzLabel: Unsupported word width " (ppr w) + +ctzLabel :: Width -> String +ctzLabel w = "hs_ctz" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "ctzLabel: Unsupported word width " (ppr w) + word2FloatLabel :: Width -> String word2FloatLabel w = "hs_word2float" ++ pprWidth w where diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 014117dd4c..3d3dff2e73 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1151,6 +1151,8 @@ genCCall' dflags gcp target dest_regs args0 MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False) + MO_Clz w -> (fsLit $ clzLabel w, False) + MO_Ctz w -> (fsLit $ ctzLabel w, False) MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False) MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False) MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False) diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 51f89d629f..c192b8bda6 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -654,6 +654,8 @@ outOfLineMachOp_table mop MO_BSwap w -> fsLit $ bSwapLabel w MO_PopCnt w -> fsLit $ popCntLabel w + MO_Clz w -> fsLit $ clzLabel w + MO_Ctz w -> fsLit $ ctzLabel w MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop MO_Cmpxchg w -> fsLit $ cmpxchgLabel w MO_AtomicRead w -> fsLit $ atomicReadLabel w diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index ce7120e24b..bc79e5e264 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1767,6 +1767,69 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] size = intSize width lbl = mkCmmCodeLabel primPackageKey (fsLit (popCntLabel width)) +genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] + | is32Bit && width == W64 = do + -- Fallback to `hs_clz64` on i386 + targetExpr <- cmmMakeDynamicReference dflags CallReference lbl + let target = ForeignTarget targetExpr (ForeignConvention CCallConv + [NoHint] [NoHint] + CmmMayReturn) + genCCall dflags is32Bit target dest_regs args + + | otherwise = do + code_src <- getAnyReg src + src_r <- getNewRegNat size + tmp_r <- getNewRegNat size + let dst_r = getRegisterReg platform False (CmmLocal dst) + + -- The following insn sequence makes sure 'clz 0' has a defined value. + -- starting with Haswell, one could use the LZCNT insn instead. + return $ code_src src_r `appOL` toOL + ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++ + [ BSR size (OpReg src_r) tmp_r + , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r) + , CMOV NE size (OpReg tmp_r) dst_r + , XOR size (OpImm (ImmInt (bw-1))) (OpReg dst_r) + ]) -- NB: We don't need to zero-extend the result for the + -- W8/W16 cases because the 'MOV' insn already + -- took care of implicitly clearing the upper bits + where + bw = widthInBits width + platform = targetPlatform dflags + size = if width == W8 then II16 else intSize width + lbl = mkCmmCodeLabel primPackageKey (fsLit (clzLabel width)) + +genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) dest_regs@[dst] args@[src] + | is32Bit, width == W64 = do + -- Fallback to `hs_ctz64` on i386 + targetExpr <- cmmMakeDynamicReference dflags CallReference lbl + let target = ForeignTarget targetExpr (ForeignConvention CCallConv + [NoHint] [NoHint] + CmmMayReturn) + genCCall dflags is32Bit target dest_regs args + + | otherwise = do + code_src <- getAnyReg src + src_r <- getNewRegNat size + tmp_r <- getNewRegNat size + let dst_r = getRegisterReg platform False (CmmLocal dst) + + -- The following insn sequence makes sure 'ctz 0' has a defined value. + -- starting with Haswell, one could use the TZCNT insn instead. + return $ code_src src_r `appOL` toOL + ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++ + [ BSF size (OpReg src_r) tmp_r + , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r) + , CMOV NE size (OpReg tmp_r) dst_r + ]) -- NB: We don't need to zero-extend the result for the + -- W8/W16 cases because the 'MOV' insn already + -- took care of implicitly clearing the upper bits + where + bw = widthInBits width + platform = targetPlatform dflags + size = if width == W8 then II16 else intSize width + lbl = mkCmmCodeLabel primPackageKey (fsLit (ctzLabel width)) + genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do targetExpr <- cmmMakeDynamicReference dflags CallReference lbl @@ -2403,6 +2466,8 @@ outOfLineCmmOp mop res args MO_PopCnt _ -> fsLit "popcnt" MO_BSwap _ -> fsLit "bswap" + MO_Clz w -> fsLit $ clzLabel w + MO_Ctz w -> fsLit $ ctzLabel w MO_AtomicRMW _ _ -> fsLit "atomicrmw" MO_AtomicRead _ -> fsLit "atomicread" |
