summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmPrim.hs
diff options
context:
space:
mode:
authorHerbert Valerio Riedel <hvr@gnu.org>2014-08-11 18:56:57 +0200
committerHerbert Valerio Riedel <hvr@gnu.org>2014-08-14 11:34:23 +0200
commite0c1767d0ea8d12e0a4badf43682a08784e379c6 (patch)
tree6662fe33cd7e803253458f91307b1b5826e30b0f /compiler/codeGen/StgCmmPrim.hs
parent6b5ea617dcd162e682886d5843df51a2866218d3 (diff)
downloadhaskell-e0c1767d0ea8d12e0a4badf43682a08784e379c6.tar.gz
Implement new CLZ and CTZ primops (re #9340)
This implements the new primops clz#, clz32#, clz64#, ctz#, ctz32#, ctz64# which provide efficient implementations of the popular count-leading-zero and count-trailing-zero respectively (see testcase for a pure Haskell reference implementation). On x86, NCG as well as LLVM generates code based on the BSF/BSR instructions (which need extra logic to make the 0-case well-defined). Test Plan: validate and succesful tests on i686 and amd64 Reviewers: rwbarton, simonmar, ezyang, austin Subscribers: simonmar, relrod, ezyang, carter Differential Revision: https://phabricator.haskell.org/D144 GHC Trac Issues: #9340
Diffstat (limited to 'compiler/codeGen/StgCmmPrim.hs')
-rw-r--r--compiler/codeGen/StgCmmPrim.hs28
1 files changed, 28 insertions, 0 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 77739fe0fb..9e12427355 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -563,6 +563,20 @@ emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
+-- count leading zeros
+emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
+emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
+emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
+emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
+emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
+
+-- count trailing zeros
+emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
+emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
+emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
+emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
+emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
+
-- Unsigned int to floating point conversions
emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
(MO_UF_Conv W32) [w]
@@ -2096,3 +2110,17 @@ emitPopCntCall res x width = do
[ res ]
(MO_PopCnt width)
[ x ]
+
+emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitClzCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_Clz width)
+ [ x ]
+
+emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitCtzCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_Ctz width)
+ [ x ]