nativeGen/AArch64: Fix bitmask immediate predicate

Previously the predicate for determining whether a logical instruction operand could be encoded as a bitmask immediate was far too conservative. This meant that, e.g., pointer untagged required five instructions whereas it should only require one. Fixes #23030. (cherry picked from commit b8d783d24b9a617ad1e3038abeb75d322703ef65)
author: Ben Gamari <ben@smart-cactus.org> 2023-03-08 18:33:59 -0500
committer: Ben Gamari <ben@smart-cactus.org> 2023-05-16 07:56:09 -0400
commit: ab6779014e171ddfb2bcc1032cd1a1885dddd6ab (patch)
tree: 02e5719b88e3b888d1e9e287da6500cb4923cfe6 /compiler/GHC/CmmToAsm
parent: 373ec872ee5557b0dbecd6761837523a9840ffc1 (diff)
download: haskell-ab6779014e171ddfb2bcc1032cd1a1885dddd6ab.tar.gz
1 files changed, 35 insertions, 16 deletions
diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
index 12ef1a25fd..79240662fc 100644
--- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
@@ -3,7 +3,6 @@
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE BinaryLiterals #-}
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE NumericUnderscores #-}
 module GHC.CmmToAsm.AArch64.CodeGen (
       cmmTopCodeGen
     , generateJumpTableForInstr
@@ -773,12 +772,12 @@ getRegister' config plat expr
       return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
 
     -- 3. Logic &&, ||
-    CmmMachOp (MO_And w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
+    CmmMachOp (MO_And w) [(CmmReg reg), CmmLit (CmmInt n _)] | isAArch64Bitmask (fromIntegral n) ->
       return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
       where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
             r' = getRegisterReg plat reg
 
-    CmmMachOp (MO_Or w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
+    CmmMachOp (MO_Or w) [(CmmReg reg), CmmLit (CmmInt n _)] | isAArch64Bitmask (fromIntegral n) ->
       return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORR (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
       where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
             r' = getRegisterReg plat reg
@@ -963,19 +962,6 @@ getRegister' config plat expr
   where
     isNbitEncodeable :: Int -> Integer -> Bool
     isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
-    -- This needs to check if n can be encoded as a bitmask immediate:
-    --
-    -- See https://stackoverflow.com/questions/30904718/range-of-immediate-values-in-armv8-a64-assembly
-    --
-    isBitMaskImmediate :: Integer -> Bool
-    isBitMaskImmediate i = i `elem` [0b0000_0001, 0b0000_0010, 0b0000_0100, 0b0000_1000, 0b0001_0000, 0b0010_0000, 0b0100_0000, 0b1000_0000
-                                    ,0b0000_0011, 0b0000_0110, 0b0000_1100, 0b0001_1000, 0b0011_0000, 0b0110_0000, 0b1100_0000
-                                    ,0b0000_0111, 0b0000_1110, 0b0001_1100, 0b0011_1000, 0b0111_0000, 0b1110_0000
-                                    ,0b0000_1111, 0b0001_1110, 0b0011_1100, 0b0111_1000, 0b1111_0000
-                                    ,0b0001_1111, 0b0011_1110, 0b0111_1100, 0b1111_1000
-                                    ,0b0011_1111, 0b0111_1110, 0b1111_1100
-                                    ,0b0111_1111, 0b1111_1110
-                                    ,0b1111_1111]
 
     -- N.B. MUL does not set the overflow flag.
     do_mul_may_oflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
@@ -1018,6 +1004,39 @@ getRegister' config plat expr
             CMP (OpReg tmp_w tmp) (OpRegExt tmp_w tmp ext_mode 0) `snocOL`
             CSET (OpReg w dst) NE)
 
+-- | Is a given number encodable as a bitmask immediate?
+--
+-- https://stackoverflow.com/questions/30904718/range-of-immediate-values-in-armv8-a64-assembly
+isAArch64Bitmask :: Integer -> Bool
+-- N.B. zero and ~0 are not encodable as bitmask immediates
+isAArch64Bitmask 0  = False
+isAArch64Bitmask n
+  | n == bit 64 - 1 = False
+isAArch64Bitmask n  =
+    check 64 || check 32 || check 16 || check 8
+  where
+    -- Check whether @n@ can be represented as a subpattern of the given
+    -- width.
+    check width
+      | hasOneRun subpat =
+          let n' = fromIntegral (mkPat width subpat)
+          in n == n'
+      | otherwise = False
+      where
+        subpat :: Word64
+        subpat = fromIntegral (n .&. (bit width - 1))
+
+    -- Construct a bit-pattern from a repeated subpatterns the given width.
+    mkPat :: Int -> Word64 -> Word64
+    mkPat width subpat =
+        foldl' (.|.) 0 [ subpat `shiftL` p | p <- [0, width..63] ]
+
+    -- Does the given number's bit representation match the regular expression
+    -- @0*1*0*@?
+    hasOneRun :: Word64 -> Bool
+    hasOneRun m =
+        64 == popCount m + countLeadingZeros m + countTrailingZeros m
+
 -- | Instructions to sign-extend the value in the given register from width @w@
 -- up to width @w'@.
 signExtendReg :: Width -> Width -> Reg -> NatM (Reg, OrdList Instr)
author	Ben Gamari <ben@smart-cactus.org>	2023-03-08 18:33:59 -0500
committer	Ben Gamari <ben@smart-cactus.org>	2023-05-16 07:56:09 -0400
commit	ab6779014e171ddfb2bcc1032cd1a1885dddd6ab (patch)
tree	02e5719b88e3b888d1e9e287da6500cb4923cfe6 /compiler/GHC/CmmToAsm
parent	373ec872ee5557b0dbecd6761837523a9840ffc1 (diff)
download	haskell-ab6779014e171ddfb2bcc1032cd1a1885dddd6ab.tar.gz