Modules: CmmToAsm (#13009)

author: Sylvain Henry <sylvain@haskus.fr> 2020-02-22 15:05:20 +0100
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2020-02-24 20:55:25 -0500
commit: 1b1067d14b656bbbfa7c47f156ec2700c9751549 (patch)
tree: 32346e3c4c3f89117190b36364144d85dc260e05 /compiler/nativeGen/X86/Instr.hs
parent: 354e2787be08fb6d973de1a39e58080ff8e107f8 (diff)
download: haskell-1b1067d14b656bbbfa7c47f156ec2700c9751549.tar.gz
1 files changed, 0 insertions, 1054 deletions
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
deleted file mode 100644
index 099437265c..0000000000
--- a/compiler/nativeGen/X86/Instr.hs
+++ /dev/null
@@ -1,1054 +0,0 @@
-{-# LANGUAGE CPP, TypeFamilies #-}
-
------------------------------------------------------------------------------
---
--- Machine-dependent assembly language
---
--- (c) The University of Glasgow 1993-2004
---
------------------------------------------------------------------------------
-
-module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..),
-                  getJumpDestBlockId, canShortcut, shortcutStatics,
-                  shortcutJump, allocMoreStack,
-                  maxSpillSlots, archWordFormat )
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import X86.Cond
-import X86.Regs
-import Instruction
-import Format
-import RegClass
-import Reg
-import TargetReg
-
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Platform.Regs
-import GHC.Cmm
-import FastString
-import Outputable
-import GHC.Platform
-
-import BasicTypes       (Alignment)
-import GHC.Cmm.CLabel
-import GHC.Driver.Session
-import UniqSet
-import Unique
-import UniqSupply
-import GHC.Cmm.DebugBlock (UnwindTable)
-
-import Control.Monad
-import Data.Maybe       (fromMaybe)
-
--- Format of an x86/x86_64 memory address, in bytes.
---
-archWordFormat :: Bool -> Format
-archWordFormat is32Bit
- | is32Bit   = II32
- | otherwise = II64
-
--- | Instruction instance for x86 instruction set.
-instance Instruction Instr where
-        regUsageOfInstr         = x86_regUsageOfInstr
-        patchRegsOfInstr        = x86_patchRegsOfInstr
-        isJumpishInstr          = x86_isJumpishInstr
-        jumpDestsOfInstr        = x86_jumpDestsOfInstr
-        patchJumpInstr          = x86_patchJumpInstr
-        mkSpillInstr            = x86_mkSpillInstr
-        mkLoadInstr             = x86_mkLoadInstr
-        takeDeltaInstr          = x86_takeDeltaInstr
-        isMetaInstr             = x86_isMetaInstr
-        mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
-        takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
-        mkJumpInstr             = x86_mkJumpInstr
-        mkStackAllocInstr       = x86_mkStackAllocInstr
-        mkStackDeallocInstr     = x86_mkStackDeallocInstr
-
-
--- -----------------------------------------------------------------------------
--- Intel x86 instructions
-
-{-
-Intel, in their infinite wisdom, selected a stack model for floating
-point registers on x86.  That might have made sense back in 1979 --
-nowadays we can see it for the nonsense it really is.  A stack model
-fits poorly with the existing nativeGen infrastructure, which assumes
-flat integer and FP register sets.  Prior to this commit, nativeGen
-could not generate correct x86 FP code -- to do so would have meant
-somehow working the register-stack paradigm into the register
-allocator and spiller, which sounds very difficult.
-
-We have decided to cheat, and go for a simple fix which requires no
-infrastructure modifications, at the expense of generating ropey but
-correct FP code.  All notions of the x86 FP stack and its insns have
-been removed.  Instead, we pretend (to the instruction selector and
-register allocator) that x86 has six floating point registers, %fake0
-.. %fake5, which can be used in the usual flat manner.  We further
-claim that x86 has floating point instructions very similar to SPARC
-and Alpha, that is, a simple 3-operand register-register arrangement.
-Code generation and register allocation proceed on this basis.
-
-When we come to print out the final assembly, our convenient fiction
-is converted to dismal reality.  Each fake instruction is
-independently converted to a series of real x86 instructions.
-%fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
-arithmetic operations, the two operands are pushed onto the top of the
-FP stack, the operation done, and the result copied back into the
-relevant register.  There are only six %fake registers because 2 are
-needed for the translation, and x86 has 8 in total.
-
-The translation is inefficient but is simple and it works.  A cleverer
-translation would handle a sequence of insns, simulating the FP stack
-contents, would not impose a fixed mapping from %fake to %st regs, and
-hopefully could avoid most of the redundant reg-reg moves of the
-current translation.
-
-We might as well make use of whatever unique FP facilities Intel have
-chosen to bless us with (let's not be churlish, after all).
-Hence GLDZ and GLD1.  Bwahahahahahahaha!
--}
-
-{-
-Note [x86 Floating point precision]
-
-Intel's internal floating point registers are by default 80 bit
-extended precision.  This means that all operations done on values in
-registers are done at 80 bits, and unless the intermediate values are
-truncated to the appropriate size (32 or 64 bits) by storing in
-memory, calculations in registers will give different results from
-calculations which pass intermediate values in memory (eg. via
-function calls).
-
-One solution is to set the FPU into 64 bit precision mode.  Some OSs
-do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
-that this will only affect 64-bit precision arithmetic; 32-bit
-calculations will still be done at 64-bit precision in registers.  So
-it doesn't solve the whole problem.
-
-There's also the issue of what the C library is expecting in terms of
-precision.  It seems to be the case that glibc on Linux expects the
-FPU to be set to 80 bit precision, so setting it to 64 bit could have
-unexpected effects.  Changing the default could have undesirable
-effects on other 3rd-party library code too, so the right thing would
-be to save/restore the FPU control word across Haskell code if we were
-to do this.
-
-gcc's -ffloat-store gives consistent results by always storing the
-results of floating-point calculations in memory, which works for both
-32 and 64-bit precision.  However, it only affects the values of
-user-declared floating point variables in C, not intermediate results.
-GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
-flag).
-
-Another problem is how to spill floating point registers in the
-register allocator.  Should we spill the whole 80 bits, or just 64?
-On an OS which is set to 64 bit precision, spilling 64 is fine.  On
-Linux, spilling 64 bits will round the results of some operations.
-This is what gcc does.  Spilling at 80 bits requires taking up a full
-128 bit slot (so we get alignment).  We spill at 80-bits and ignore
-the alignment problems.
-
-In the future [edit: now available in GHC 7.0.1, with the -msse2
-flag], we'll use the SSE registers for floating point.  This requires
-a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
-float ops), which means P4 or Xeon and above.  Using SSE will solve
-all these problems, because the SSE registers use fixed 32 bit or 64
-bit precision.
-
---SDM 1/2003
--}
-
-data Instr
-        -- comment pseudo-op
-        = COMMENT FastString
-
-        -- location pseudo-op (file, line, col, name)
-        | LOCATION Int Int Int String
-
-        -- some static data spat out during code
-        -- generation.  Will be extracted before
-        -- pretty-printing.
-        | LDATA   Section (Alignment, RawCmmStatics)
-
-        -- start a new basic block.  Useful during
-        -- codegen, removed later.  Preceding
-        -- instruction should be a jump, as per the
-        -- invariants for a BasicBlock (see Cmm).
-        | NEWBLOCK BlockId
-
-        -- unwinding information
-        -- See Note [Unwinding information in the NCG].
-        | UNWIND CLabel UnwindTable
-
-        -- specify current stack offset for benefit of subsequent passes.
-        -- This carries a BlockId so it can be used in unwinding information.
-        | DELTA  Int
-
-        -- Moves.
-        | MOV         Format Operand Operand
-        | CMOV   Cond Format Operand Reg
-        | MOVZxL      Format Operand Operand -- format is the size of operand 1
-        | MOVSxL      Format Operand Operand -- format is the size of operand 1
-        -- x86_64 note: plain mov into a 32-bit register always zero-extends
-        -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
-        -- don't affect the high bits of the register.
-
-        -- Load effective address (also a very useful three-operand add instruction :-)
-        | LEA         Format Operand Operand
-
-        -- Int Arithmetic.
-        | ADD         Format Operand Operand
-        | ADC         Format Operand Operand
-        | SUB         Format Operand Operand
-        | SBB         Format Operand Operand
-
-        | MUL         Format Operand Operand
-        | MUL2        Format Operand         -- %edx:%eax = operand * %rax
-        | IMUL        Format Operand Operand -- signed int mul
-        | IMUL2       Format Operand         -- %edx:%eax = operand * %eax
-
-        | DIV         Format Operand         -- eax := eax:edx/op, edx := eax:edx%op
-        | IDIV        Format Operand         -- ditto, but signed
-
-        -- Int Arithmetic, where the effects on the condition register
-        -- are important. Used in specialized sequences such as MO_Add2.
-        -- Do not rewrite these instructions to "equivalent" ones that
-        -- have different effect on the condition register! (See #9013.)
-        | ADD_CC      Format Operand Operand
-        | SUB_CC      Format Operand Operand
-
-        -- Simple bit-twiddling.
-        | AND         Format Operand Operand
-        | OR          Format Operand Operand
-        | XOR         Format Operand Operand
-        | NOT         Format Operand
-        | NEGI        Format Operand         -- NEG instruction (name clash with Cond)
-        | BSWAP       Format Reg
-
-        -- Shifts (amount may be immediate or %cl only)
-        | SHL         Format Operand{-amount-} Operand
-        | SAR         Format Operand{-amount-} Operand
-        | SHR         Format Operand{-amount-} Operand
-
-        | BT          Format Imm Operand
-        | NOP
-
-
-        -- We need to support the FSTP (x87 store and pop) instruction
-        -- so that we can correctly read off the return value of an
-        -- x86 CDECL C function call when its floating point.
-        -- so we dont include a register argument, and just use st(0)
-        -- this instruction is used ONLY for return values of C ffi calls
-        -- in x86_32 abi
-        | X87Store         Format  AddrMode -- st(0), dst
-
-
-        -- SSE2 floating point: we use a restricted set of the available SSE2
-        -- instructions for floating-point.
-        -- use MOV for moving (either movss or movsd (movlpd better?))
-        | CVTSS2SD      Reg Reg            -- F32 to F64
-        | CVTSD2SS      Reg Reg            -- F64 to F32
-        | CVTTSS2SIQ    Format Operand Reg -- F32 to I32/I64 (with truncation)
-        | CVTTSD2SIQ    Format Operand Reg -- F64 to I32/I64 (with truncation)
-        | CVTSI2SS      Format Operand Reg -- I32/I64 to F32
-        | CVTSI2SD      Format Operand Reg -- I32/I64 to F64
-
-        -- use ADD, SUB, and SQRT for arithmetic.  In both cases, operands
-        -- are  Operand Reg.
-
-        -- SSE2 floating-point division:
-        | FDIV          Format Operand Operand   -- divisor, dividend(dst)
-
-        -- use CMP for comparisons.  ucomiss and ucomisd instructions
-        -- compare single/double prec floating point respectively.
-
-        | SQRT          Format Operand Reg      -- src, dst
-
-
-        -- Comparison
-        | TEST          Format Operand Operand
-        | CMP           Format Operand Operand
-        | SETCC         Cond Operand
-
-        -- Stack Operations.
-        | PUSH          Format Operand
-        | POP           Format Operand
-        -- both unused (SDM):
-        --  | PUSHA
-        --  | POPA
-
-        -- Jumping around.
-        | JMP         Operand [Reg] -- including live Regs at the call
-        | JXX         Cond BlockId  -- includes unconditional branches
-        | JXX_GBL     Cond Imm      -- non-local version of JXX
-        -- Table jump
-        | JMP_TBL     Operand   -- Address to jump to
-                      [Maybe JumpDest] -- Targets of the jump table
-                      Section   -- Data section jump table should be put in
-                      CLabel    -- Label of jump table
-        -- | X86 call instruction
-        | CALL        (Either Imm Reg) -- ^ Jump target
-                      [Reg]            -- ^ Arguments (required for register allocation)
-
-        -- Other things.
-        | CLTD Format            -- sign extend %eax into %edx:%eax
-
-        | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
-                                 -- pretty-prints as
-                                 --       call 1f
-                                 -- 1:    popl %reg
-                                 --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
-        | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
-                                 -- pretty-prints as
-                                 --       call 1f
-                                 -- 1:    popl %reg
-
-    -- bit counting instructions
-        | POPCNT      Format Operand Reg -- [SSE4.2] count number of bits set to 1
-        | LZCNT       Format Operand Reg -- [BMI2] count number of leading zeros
-        | TZCNT       Format Operand Reg -- [BMI2] count number of trailing zeros
-        | BSF         Format Operand Reg -- bit scan forward
-        | BSR         Format Operand Reg -- bit scan reverse
-
-    -- bit manipulation instructions
-        | PDEP        Format Operand Operand Reg -- [BMI2] deposit bits to   the specified mask
-        | PEXT        Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
-
-    -- prefetch
-        | PREFETCH  PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
-                                        -- variant can be NTA, Lvl0, Lvl1, or Lvl2
-
-        | LOCK        Instr -- lock prefix
-        | XADD        Format Operand Operand -- src (r), dst (r/m)
-        | CMPXCHG     Format Operand Operand -- src (r), dst (r/m), eax implicit
-        | MFENCE
-
-data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
-
-
-data Operand
-        = OpReg  Reg            -- register
-        | OpImm  Imm            -- immediate value
-        | OpAddr AddrMode       -- memory reference
-
-
-
--- | Returns which registers are read and written as a (read, written)
--- pair.
-x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
-x86_regUsageOfInstr platform instr
- = case instr of
-    MOV    _ src dst    -> usageRW src dst
-    CMOV _ _ src dst    -> mkRU (use_R src [dst]) [dst]
-    MOVZxL _ src dst    -> usageRW src dst
-    MOVSxL _ src dst    -> usageRW src dst
-    LEA    _ src dst    -> usageRW src dst
-    ADD    _ src dst    -> usageRM src dst
-    ADC    _ src dst    -> usageRM src dst
-    SUB    _ src dst    -> usageRM src dst
-    SBB    _ src dst    -> usageRM src dst
-    IMUL   _ src dst    -> usageRM src dst
-
-    -- Result of IMULB will be in just in %ax
-    IMUL2  II8 src       -> mkRU (eax:use_R src []) [eax]
-    -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
-    -- %ax/%eax/%rax.
-    IMUL2  _ src        -> mkRU (eax:use_R src []) [eax,edx]
-
-    MUL    _ src dst    -> usageRM src dst
-    MUL2   _ src        -> mkRU (eax:use_R src []) [eax,edx]
-    DIV    _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
-    IDIV   _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
-    ADD_CC _ src dst    -> usageRM src dst
-    SUB_CC _ src dst    -> usageRM src dst
-    AND    _ src dst    -> usageRM src dst
-    OR     _ src dst    -> usageRM src dst
-
-    XOR    _ (OpReg src) (OpReg dst)
-        | src == dst    -> mkRU [] [dst]
-
-    XOR    _ src dst    -> usageRM src dst
-    NOT    _ op         -> usageM op
-    BSWAP  _ reg        -> mkRU [reg] [reg]
-    NEGI   _ op         -> usageM op
-    SHL    _ imm dst    -> usageRM imm dst
-    SAR    _ imm dst    -> usageRM imm dst
-    SHR    _ imm dst    -> usageRM imm dst
-    BT     _ _   src    -> mkRUR (use_R src [])
-
-    PUSH   _ op         -> mkRUR (use_R op [])
-    POP    _ op         -> mkRU [] (def_W op)
-    TEST   _ src dst    -> mkRUR (use_R src $! use_R dst [])
-    CMP    _ src dst    -> mkRUR (use_R src $! use_R dst [])
-    SETCC  _ op         -> mkRU [] (def_W op)
-    JXX    _ _          -> mkRU [] []
-    JXX_GBL _ _         -> mkRU [] []
-    JMP     op regs     -> mkRUR (use_R op regs)
-    JMP_TBL op _ _ _    -> mkRUR (use_R op [])
-    CALL (Left _)  params   -> mkRU params (callClobberedRegs platform)
-    CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
-    CLTD   _            -> mkRU [eax] [edx]
-    NOP                 -> mkRU [] []
-
-    X87Store    _  dst    -> mkRUR ( use_EA dst [])
-
-    CVTSS2SD   src dst  -> mkRU [src] [dst]
-    CVTSD2SS   src dst  -> mkRU [src] [dst]
-    CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
-    CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
-    CVTSI2SS   _ src dst -> mkRU (use_R src []) [dst]
-    CVTSI2SD   _ src dst -> mkRU (use_R src []) [dst]
-    FDIV _     src dst  -> usageRM src dst
-    SQRT _ src dst      -> mkRU (use_R src []) [dst]
-
-    FETCHGOT reg        -> mkRU [] [reg]
-    FETCHPC  reg        -> mkRU [] [reg]
-
-    COMMENT _           -> noUsage
-    LOCATION{}          -> noUsage
-    UNWIND{}            -> noUsage
-    DELTA   _           -> noUsage
-
-    POPCNT _ src dst -> mkRU (use_R src []) [dst]
-    LZCNT  _ src dst -> mkRU (use_R src []) [dst]
-    TZCNT  _ src dst -> mkRU (use_R src []) [dst]
-    BSF    _ src dst -> mkRU (use_R src []) [dst]
-    BSR    _ src dst -> mkRU (use_R src []) [dst]
-
-    PDEP   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
-    PEXT   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
-
-    -- note: might be a better way to do this
-    PREFETCH _  _ src -> mkRU (use_R src []) []
-    LOCK i              -> x86_regUsageOfInstr platform i
-    XADD _ src dst      -> usageMM src dst
-    CMPXCHG _ src dst   -> usageRMM src dst (OpReg eax)
-    MFENCE -> noUsage
-
-    _other              -> panic "regUsage: unrecognised instr"
- where
-    -- # Definitions
-    --
-    -- Written: If the operand is a register, it's written. If it's an
-    -- address, registers mentioned in the address are read.
-    --
-    -- Modified: If the operand is a register, it's both read and
-    -- written. If it's an address, registers mentioned in the address
-    -- are read.
-
-    -- 2 operand form; first operand Read; second Written
-    usageRW :: Operand -> Operand -> RegUsage
-    usageRW op (OpReg reg)      = mkRU (use_R op []) [reg]
-    usageRW op (OpAddr ea)      = mkRUR (use_R op $! use_EA ea [])
-    usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
-
-    -- 2 operand form; first operand Read; second Modified
-    usageRM :: Operand -> Operand -> RegUsage
-    usageRM op (OpReg reg)      = mkRU (use_R op [reg]) [reg]
-    usageRM op (OpAddr ea)      = mkRUR (use_R op $! use_EA ea [])
-    usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
-
-    -- 2 operand form; first operand Modified; second Modified
-    usageMM :: Operand -> Operand -> RegUsage
-    usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
-    usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
-    usageMM _ _                     = panic "X86.RegInfo.usageMM: no match"
-
-    -- 3 operand form; first operand Read; second Modified; third Modified
-    usageRMM :: Operand -> Operand -> Operand -> RegUsage
-    usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
-    usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
-    usageRMM _ _ _                               = panic "X86.RegInfo.usageRMM: no match"
-
-    -- 1 operand form; operand Modified
-    usageM :: Operand -> RegUsage
-    usageM (OpReg reg)          = mkRU [reg] [reg]
-    usageM (OpAddr ea)          = mkRUR (use_EA ea [])
-    usageM _                    = panic "X86.RegInfo.usageM: no match"
-
-    -- Registers defd when an operand is written.
-    def_W (OpReg reg)           = [reg]
-    def_W (OpAddr _ )           = []
-    def_W _                     = panic "X86.RegInfo.def_W: no match"
-
-    -- Registers used when an operand is read.
-    use_R (OpReg reg)  tl = reg : tl
-    use_R (OpImm _)    tl = tl
-    use_R (OpAddr ea)  tl = use_EA ea tl
-
-    -- Registers used to compute an effective address.
-    use_EA (ImmAddr _ _) tl = tl
-    use_EA (AddrBaseIndex base index _) tl =
-        use_base base $! use_index index tl
-        where use_base (EABaseReg r)  tl = r : tl
-              use_base _              tl = tl
-              use_index EAIndexNone   tl = tl
-              use_index (EAIndex i _) tl = i : tl
-
-    mkRUR src = src' `seq` RU src' []
-        where src' = filter (interesting platform) src
-
-    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
-        where src' = filter (interesting platform) src
-              dst' = filter (interesting platform) dst
-
--- | Is this register interesting for the register allocator?
-interesting :: Platform -> Reg -> Bool
-interesting _        (RegVirtual _)              = True
-interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
-interesting _        (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
-
-
-
--- | Applies the supplied function to all registers in instructions.
--- Typically used to change virtual registers to real registers.
-x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
-x86_patchRegsOfInstr instr env
- = case instr of
-    MOV  fmt src dst     -> patch2 (MOV  fmt) src dst
-    CMOV cc fmt src dst  -> CMOV cc fmt (patchOp src) (env dst)
-    MOVZxL fmt src dst   -> patch2 (MOVZxL fmt) src dst
-    MOVSxL fmt src dst   -> patch2 (MOVSxL fmt) src dst
-    LEA  fmt src dst     -> patch2 (LEA  fmt) src dst
-    ADD  fmt src dst     -> patch2 (ADD  fmt) src dst
-    ADC  fmt src dst     -> patch2 (ADC  fmt) src dst
-    SUB  fmt src dst     -> patch2 (SUB  fmt) src dst
-    SBB  fmt src dst     -> patch2 (SBB  fmt) src dst
-    IMUL fmt src dst     -> patch2 (IMUL fmt) src dst
-    IMUL2 fmt src        -> patch1 (IMUL2 fmt) src
-    MUL fmt src dst      -> patch2 (MUL fmt) src dst
-    MUL2 fmt src         -> patch1 (MUL2 fmt) src
-    IDIV fmt op          -> patch1 (IDIV fmt) op
-    DIV fmt op           -> patch1 (DIV fmt) op
-    ADD_CC fmt src dst   -> patch2 (ADD_CC fmt) src dst
-    SUB_CC fmt src dst   -> patch2 (SUB_CC fmt) src dst
-    AND  fmt src dst     -> patch2 (AND  fmt) src dst
-    OR   fmt src dst     -> patch2 (OR   fmt) src dst
-    XOR  fmt src dst     -> patch2 (XOR  fmt) src dst
-    NOT  fmt op          -> patch1 (NOT  fmt) op
-    BSWAP fmt reg        -> BSWAP fmt (env reg)
-    NEGI fmt op          -> patch1 (NEGI fmt) op
-    SHL  fmt imm dst     -> patch1 (SHL fmt imm) dst
-    SAR  fmt imm dst     -> patch1 (SAR fmt imm) dst
-    SHR  fmt imm dst     -> patch1 (SHR fmt imm) dst
-    BT   fmt imm src     -> patch1 (BT  fmt imm) src
-    TEST fmt src dst     -> patch2 (TEST fmt) src dst
-    CMP  fmt src dst     -> patch2 (CMP  fmt) src dst
-    PUSH fmt op          -> patch1 (PUSH fmt) op
-    POP  fmt op          -> patch1 (POP  fmt) op
-    SETCC cond op        -> patch1 (SETCC cond) op
-    JMP op regs          -> JMP (patchOp op) regs
-    JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
-
-    -- literally only support storing the top x87 stack value st(0)
-    X87Store  fmt  dst     -> X87Store fmt  (lookupAddr dst)
-
-    CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
-    CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
-    CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
-    CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
-    CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
-    CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
-    FDIV fmt src dst     -> FDIV fmt (patchOp src) (patchOp dst)
-    SQRT fmt src dst    -> SQRT fmt (patchOp src) (env dst)
-
-    CALL (Left _)  _    -> instr
-    CALL (Right reg) p  -> CALL (Right (env reg)) p
-
-    FETCHGOT reg        -> FETCHGOT (env reg)
-    FETCHPC  reg        -> FETCHPC  (env reg)
-
-    NOP                 -> instr
-    COMMENT _           -> instr
-    LOCATION {}         -> instr
-    UNWIND {}           -> instr
-    DELTA _             -> instr
-
-    JXX _ _             -> instr
-    JXX_GBL _ _         -> instr
-    CLTD _              -> instr
-
-    POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
-    LZCNT  fmt src dst -> LZCNT  fmt (patchOp src) (env dst)
-    TZCNT  fmt src dst -> TZCNT  fmt (patchOp src) (env dst)
-    PDEP   fmt src mask dst -> PDEP   fmt (patchOp src) (patchOp mask) (env dst)
-    PEXT   fmt src mask dst -> PEXT   fmt (patchOp src) (patchOp mask) (env dst)
-    BSF    fmt src dst -> BSF    fmt (patchOp src) (env dst)
-    BSR    fmt src dst -> BSR    fmt (patchOp src) (env dst)
-
-    PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
-
-    LOCK i               -> LOCK (x86_patchRegsOfInstr i env)
-    XADD fmt src dst     -> patch2 (XADD fmt) src dst
-    CMPXCHG fmt src dst  -> patch2 (CMPXCHG fmt) src dst
-    MFENCE               -> instr
-
-    _other              -> panic "patchRegs: unrecognised instr"
-
-  where
-    patch1 :: (Operand -> a) -> Operand -> a
-    patch1 insn op      = insn $! patchOp op
-    patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
-    patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
-
-    patchOp (OpReg  reg) = OpReg $! env reg
-    patchOp (OpImm  imm) = OpImm imm
-    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
-
-    lookupAddr (ImmAddr imm off) = ImmAddr imm off
-    lookupAddr (AddrBaseIndex base index disp)
-      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
-      where
-        lookupBase EABaseNone       = EABaseNone
-        lookupBase EABaseRip        = EABaseRip
-        lookupBase (EABaseReg r)    = EABaseReg $! env r
-
-        lookupIndex EAIndexNone     = EAIndexNone
-        lookupIndex (EAIndex r i)   = (EAIndex $! env r) i
-
-
---------------------------------------------------------------------------------
-x86_isJumpishInstr
-        :: Instr -> Bool
-
-x86_isJumpishInstr instr
- = case instr of
-        JMP{}           -> True
-        JXX{}           -> True
-        JXX_GBL{}       -> True
-        JMP_TBL{}       -> True
-        CALL{}          -> True
-        _               -> False
-
-
-x86_jumpDestsOfInstr
-        :: Instr
-        -> [BlockId]
-
-x86_jumpDestsOfInstr insn
-  = case insn of
-        JXX _ id        -> [id]
-        JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
-        _               -> []
-
-
-x86_patchJumpInstr
-        :: Instr -> (BlockId -> BlockId) -> Instr
-
-x86_patchJumpInstr insn patchF
-  = case insn of
-        JXX cc id       -> JXX cc (patchF id)
-        JMP_TBL op ids section lbl
-          -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
-        _               -> insn
-    where
-        patchJumpDest f (DestBlockId id) = DestBlockId (f id)
-        patchJumpDest _ dest             = dest
-
-
-
-
-
--- -----------------------------------------------------------------------------
--- | Make a spill instruction.
-x86_mkSpillInstr
-    :: DynFlags
-    -> Reg      -- register to spill
-    -> Int      -- current stack delta
-    -> Int      -- spill slot to use
-    -> Instr
-
-x86_mkSpillInstr dflags reg delta slot
-  = let off     = spillSlotToOffset platform slot - delta
-    in
-    case targetClassOfReg platform reg of
-           RcInteger   -> MOV (archWordFormat is32Bit)
-                              (OpReg reg) (OpAddr (spRel dflags off))
-           RcDouble    -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
-           _         -> panic "X86.mkSpillInstr: no match"
-    where platform = targetPlatform dflags
-          is32Bit = target32Bit platform
-
--- | Make a spill reload instruction.
-x86_mkLoadInstr
-    :: DynFlags
-    -> Reg      -- register to load
-    -> Int      -- current stack delta
-    -> Int      -- spill slot to use
-    -> Instr
-
-x86_mkLoadInstr dflags reg delta slot
-  = let off     = spillSlotToOffset platform slot - delta
-    in
-        case targetClassOfReg platform reg of
-              RcInteger -> MOV (archWordFormat is32Bit)
-                               (OpAddr (spRel dflags off)) (OpReg reg)
-              RcDouble  -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
-              _           -> panic "X86.x86_mkLoadInstr"
-    where platform = targetPlatform dflags
-          is32Bit = target32Bit platform
-
-spillSlotSize :: Platform -> Int
-spillSlotSize dflags = if is32Bit then 12 else 8
-    where is32Bit = target32Bit dflags
-
-maxSpillSlots :: DynFlags -> Int
-maxSpillSlots dflags
-    = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
---     = 0 -- useful for testing allocMoreStack
-
--- number of bytes that the stack pointer should be aligned to
-stackAlign :: Int
-stackAlign = 16
-
--- convert a spill slot number to a *byte* offset, with no sign:
--- decide on a per arch basis whether you are spilling above or below
--- the C stack pointer.
-spillSlotToOffset :: Platform -> Int -> Int
-spillSlotToOffset platform slot
-   = 64 + spillSlotSize platform * slot
-
---------------------------------------------------------------------------------
-
--- | See if this instruction is telling us the current C stack delta
-x86_takeDeltaInstr
-        :: Instr
-        -> Maybe Int
-
-x86_takeDeltaInstr instr
- = case instr of
-        DELTA i         -> Just i
-        _               -> Nothing
-
-
-x86_isMetaInstr
-        :: Instr
-        -> Bool
-
-x86_isMetaInstr instr
- = case instr of
-        COMMENT{}       -> True
-        LOCATION{}      -> True
-        LDATA{}         -> True
-        NEWBLOCK{}      -> True
-        UNWIND{}        -> True
-        DELTA{}         -> True
-        _               -> False
-
-
-
----  TODO: why is there
--- | Make a reg-reg move instruction.
---      On SPARC v8 there are no instructions to move directly between
---      floating point and integer regs. If we need to do that then we
---      have to go via memory.
---
-x86_mkRegRegMoveInstr
-    :: Platform
-    -> Reg
-    -> Reg
-    -> Instr
-
-x86_mkRegRegMoveInstr platform src dst
- = case targetClassOfReg platform src of
-        RcInteger -> case platformArch platform of
-                     ArchX86    -> MOV II32 (OpReg src) (OpReg dst)
-                     ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
-                     _          -> panic "x86_mkRegRegMoveInstr: Bad arch"
-        RcDouble    ->  MOV FF64 (OpReg src) (OpReg dst)
-        -- this code is the lie we tell ourselves because both float and double
-        -- use the same register class.on x86_64 and x86 32bit with SSE2,
-        -- more plainly, both use the XMM registers
-        _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-
--- | Check whether an instruction represents a reg-reg move.
---      The register allocator attempts to eliminate reg->reg moves whenever it can,
---      by assigning the src and dest temporaries to the same real register.
---
-x86_takeRegRegMoveInstr
-        :: Instr
-        -> Maybe (Reg,Reg)
-
-x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
-        = Just (r1,r2)
-
-x86_takeRegRegMoveInstr _  = Nothing
-
-
--- | Make an unconditional branch instruction.
-x86_mkJumpInstr
-        :: BlockId
-        -> [Instr]
-
-x86_mkJumpInstr id
-        = [JXX ALWAYS id]
-
--- Note [Windows stack layout]
--- | On most OSes the kernel will place a guard page after the current stack
---   page.  If you allocate larger than a page worth you may jump over this
---   guard page.  Not only is this a security issue, but on certain OSes such
---   as Windows a new page won't be allocated if you don't hit the guard.  This
---   will cause a segfault or access fault.
---
---   This function defines if the current allocation amount requires a probe.
---   On Windows (for now) we emit a call to _chkstk for this.  For other OSes
---   this is not yet implemented.
---   See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk
---   The Windows stack looks like this:
---
---                         +-------------------+
---                         |        SP         |
---                         +-------------------+
---                         |                   |
---                         |    GUARD PAGE     |
---                         |                   |
---                         +-------------------+
---                         |                   |
---                         |                   |
---                         |     UNMAPPED      |
---                         |                   |
---                         |                   |
---                         +-------------------+
---
---   In essence each allocation larger than a page size needs to be chunked and
---   a probe emitted after each page allocation.  You have to hit the guard
---   page so the kernel can map in the next page, otherwise you'll segfault.
---
-needs_probe_call :: Platform -> Int -> Bool
-needs_probe_call platform amount
-  = case platformOS platform of
-     OSMinGW32 -> case platformArch platform of
-                    ArchX86    -> amount > (4 * 1024)
-                    ArchX86_64 -> amount > (8 * 1024)
-                    _          -> False
-     _         -> False
-
-x86_mkStackAllocInstr
-        :: Platform
-        -> Int
-        -> [Instr]
-x86_mkStackAllocInstr platform amount
-  = case platformOS platform of
-      OSMinGW32 ->
-        -- These will clobber AX but this should be ok because
-        --
-        -- 1. It is the first thing we do when entering the closure and AX is
-        --    a caller saved registers on Windows both on x86_64 and x86.
-        --
-        -- 2. The closures are only entered via a call or longjmp in which case
-        --    there are no expectations for volatile registers.
-        --
-        -- 3. When the target is a local branch point it is re-targeted
-        --    after the dealloc, preserving #2.  See note [extra spill slots].
-        --
-        -- We emit a call because the stack probes are quite involved and
-        -- would bloat code size a lot.  GHC doesn't really have an -Os.
-        -- __chkstk is guaranteed to leave all nonvolatile registers and AX
-        -- untouched.  It's part of the standard prologue code for any Windows
-        -- function dropping the stack more than a page.
-        -- See Note [Windows stack layout]
-        case platformArch platform of
-            ArchX86    | needs_probe_call platform amount ->
-                           [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax)
-                           , CALL (Left $ strImmLit "___chkstk_ms") [eax]
-                           , SUB II32 (OpReg eax) (OpReg esp)
-                           ]
-                       | otherwise ->
-                           [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
-                           , TEST II32 (OpReg esp) (OpReg esp)
-                           ]
-            ArchX86_64 | needs_probe_call platform amount ->
-                           [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
-                           , CALL (Left $ strImmLit "___chkstk_ms") [rax]
-                           , SUB II64 (OpReg rax) (OpReg rsp)
-                           ]
-                       | otherwise ->
-                           [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
-                           , TEST II64 (OpReg rsp) (OpReg rsp)
-                           ]
-            _ -> panic "x86_mkStackAllocInstr"
-      _       ->
-        case platformArch platform of
-          ArchX86    -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ]
-          ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ]
-          _ -> panic "x86_mkStackAllocInstr"
-
-x86_mkStackDeallocInstr
-        :: Platform
-        -> Int
-        -> [Instr]
-x86_mkStackDeallocInstr platform amount
-  = case platformArch platform of
-      ArchX86    -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)]
-      ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)]
-      _ -> panic "x86_mkStackDeallocInstr"
-
-
---
--- Note [extra spill slots]
---
--- If the register allocator used more spill slots than we have
--- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
--- C stack space on entry and exit from this proc.  Therefore we
--- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
--- before every non-local jump.
---
--- This became necessary when the new codegen started bundling entire
--- functions together into one proc, because the register allocator
--- assigns a different stack slot to each virtual reg within a proc.
--- To avoid using so many slots we could also:
---
---   - split up the proc into connected components before code generator
---
---   - rename the virtual regs, so that we re-use vreg names and hence
---     stack slots for non-overlapping vregs.
---
--- Note that when a block is both a non-local entry point (with an
--- info table) and a local branch target, we have to split it into
--- two, like so:
---
---    <info table>
---    L:
---       <code>
---
--- becomes
---
---    <info table>
---    L:
---       subl $rsp, N
---       jmp Lnew
---    Lnew:
---       <code>
---
--- and all branches pointing to L are retargetted to point to Lnew.
--- Otherwise, we would repeat the $rsp adjustment for each branch to
--- L.
---
--- Returns a list of (L,Lnew) pairs.
---
-allocMoreStack
-  :: Platform
-  -> Int
-  -> NatCmmDecl statics X86.Instr.Instr
-  -> UniqSM (NatCmmDecl statics X86.Instr.Instr, [(BlockId,BlockId)])
-
-allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
-allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
-    let entries = entryBlocks proc
-
-    uniqs <- replicateM (length entries) getUniqueM
-
-    let
-      delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
-        where x = slots * spillSlotSize platform -- sp delta
-
-      alloc   = mkStackAllocInstr   platform delta
-      dealloc = mkStackDeallocInstr platform delta
-
-      retargetList = (zip entries (map mkBlockId uniqs))
-
-      new_blockmap :: LabelMap BlockId
-      new_blockmap = mapFromList retargetList
-
-      insert_stack_insns (BasicBlock id insns)
-         | Just new_blockid <- mapLookup id new_blockmap
-         = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid]
-           , BasicBlock new_blockid block' ]
-         | otherwise
-         = [ BasicBlock id block' ]
-         where
-           block' = foldr insert_dealloc [] insns
-
-      insert_dealloc insn r = case insn of
-         JMP _ _     -> dealloc ++ (insn : r)
-         JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
-         _other      -> x86_patchJumpInstr insn retarget : r
-           where retarget b = fromMaybe b (mapLookup b new_blockmap)
-
-      new_code = concatMap insert_stack_insns code
-    -- in
-    return (CmmProc info lbl live (ListGraph new_code), retargetList)
-
-data JumpDest = DestBlockId BlockId | DestImm Imm
-
--- Debug Instance
-instance Outputable JumpDest where
-  ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
-  ppr (DestImm _imm)    = text "jd<imm>:noShow"
-
-
-getJumpDestBlockId :: JumpDest -> Maybe BlockId
-getJumpDestBlockId (DestBlockId bid) = Just bid
-getJumpDestBlockId _                 = Nothing
-
-canShortcut :: Instr -> Maybe JumpDest
-canShortcut (JXX ALWAYS id)      = Just (DestBlockId id)
-canShortcut (JMP (OpImm imm) _)  = Just (DestImm imm)
-canShortcut _                    = Nothing
-
-
--- This helper shortcuts a sequence of branches.
--- The blockset helps avoid following cycles.
-shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
-shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
-  where
-    shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr
-    shortcutJump' fn seen insn@(JXX cc id) =
-        if setMember id seen then insn
-        else case fn id of
-            Nothing                -> insn
-            Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
-            Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
-        where seen' = setInsert id seen
-    shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
-        let updateBlock (Just (DestBlockId bid))  =
-                case fn bid of
-                    Nothing   -> Just (DestBlockId bid )
-                    Just dest -> Just dest
-            updateBlock dest = dest
-            blocks' = map updateBlock blocks
-        in  JMP_TBL addr blocks' section tblId
-    shortcutJump' _ _ other = other
-
--- Here because it knows about JumpDest
-shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, RawCmmStatics) -> (Alignment, RawCmmStatics)
-shortcutStatics fn (align, RawCmmStatics lbl statics)
-  = (align, RawCmmStatics lbl $ map (shortcutStatic fn) statics)
-  -- we need to get the jump tables, so apply the mapping to the entries
-  -- of a CmmData too.
-
-shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
-shortcutLabel fn lab
-  | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId
-  | otherwise                              = lab
-
-shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
-shortcutStatic fn (CmmStaticLit (CmmLabel lab))
-  = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
-  = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
-        -- slightly dodgy, we're ignoring the second label, but this
-        -- works with the way we use CmmLabelDiffOff for jump tables now.
-shortcutStatic _ other_static
-        = other_static
-
-shortBlockId
-        :: (BlockId -> Maybe JumpDest)
-        -> UniqSet Unique
-        -> BlockId
-        -> CLabel
-
-shortBlockId fn seen blockid =
-  case (elementOfUniqSet uq seen, fn blockid) of
-    (True, _)    -> blockLbl blockid
-    (_, Nothing) -> blockLbl blockid
-    (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
-    (_, Just (DestImm (ImmCLbl lbl))) -> lbl
-    (_, _other) -> panic "shortBlockId"
-  where uq = getUnique blockid
author	Sylvain Henry <sylvain@haskus.fr>	2020-02-22 15:05:20 +0100
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2020-02-24 20:55:25 -0500
commit	1b1067d14b656bbbfa7c47f156ec2700c9751549 (patch)
tree	32346e3c4c3f89117190b36364144d85dc260e05 /compiler/nativeGen/X86/Instr.hs
parent	354e2787be08fb6d973de1a39e58080ff8e107f8 (diff)
download	haskell-1b1067d14b656bbbfa7c47f156ec2700c9751549.tar.gz