summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/X86/Instr.hs
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2020-02-22 15:05:20 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-02-24 20:55:25 -0500
commit1b1067d14b656bbbfa7c47f156ec2700c9751549 (patch)
tree32346e3c4c3f89117190b36364144d85dc260e05 /compiler/nativeGen/X86/Instr.hs
parent354e2787be08fb6d973de1a39e58080ff8e107f8 (diff)
downloadhaskell-1b1067d14b656bbbfa7c47f156ec2700c9751549.tar.gz
Modules: CmmToAsm (#13009)
Diffstat (limited to 'compiler/nativeGen/X86/Instr.hs')
-rw-r--r--compiler/nativeGen/X86/Instr.hs1054
1 files changed, 0 insertions, 1054 deletions
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
deleted file mode 100644
index 099437265c..0000000000
--- a/compiler/nativeGen/X86/Instr.hs
+++ /dev/null
@@ -1,1054 +0,0 @@
-{-# LANGUAGE CPP, TypeFamilies #-}
-
------------------------------------------------------------------------------
---
--- Machine-dependent assembly language
---
--- (c) The University of Glasgow 1993-2004
---
------------------------------------------------------------------------------
-
-module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..),
- getJumpDestBlockId, canShortcut, shortcutStatics,
- shortcutJump, allocMoreStack,
- maxSpillSlots, archWordFormat )
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import X86.Cond
-import X86.Regs
-import Instruction
-import Format
-import RegClass
-import Reg
-import TargetReg
-
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Platform.Regs
-import GHC.Cmm
-import FastString
-import Outputable
-import GHC.Platform
-
-import BasicTypes (Alignment)
-import GHC.Cmm.CLabel
-import GHC.Driver.Session
-import UniqSet
-import Unique
-import UniqSupply
-import GHC.Cmm.DebugBlock (UnwindTable)
-
-import Control.Monad
-import Data.Maybe (fromMaybe)
-
--- Format of an x86/x86_64 memory address, in bytes.
---
-archWordFormat :: Bool -> Format
-archWordFormat is32Bit
- | is32Bit = II32
- | otherwise = II64
-
--- | Instruction instance for x86 instruction set.
-instance Instruction Instr where
- regUsageOfInstr = x86_regUsageOfInstr
- patchRegsOfInstr = x86_patchRegsOfInstr
- isJumpishInstr = x86_isJumpishInstr
- jumpDestsOfInstr = x86_jumpDestsOfInstr
- patchJumpInstr = x86_patchJumpInstr
- mkSpillInstr = x86_mkSpillInstr
- mkLoadInstr = x86_mkLoadInstr
- takeDeltaInstr = x86_takeDeltaInstr
- isMetaInstr = x86_isMetaInstr
- mkRegRegMoveInstr = x86_mkRegRegMoveInstr
- takeRegRegMoveInstr = x86_takeRegRegMoveInstr
- mkJumpInstr = x86_mkJumpInstr
- mkStackAllocInstr = x86_mkStackAllocInstr
- mkStackDeallocInstr = x86_mkStackDeallocInstr
-
-
--- -----------------------------------------------------------------------------
--- Intel x86 instructions
-
-{-
-Intel, in their infinite wisdom, selected a stack model for floating
-point registers on x86. That might have made sense back in 1979 --
-nowadays we can see it for the nonsense it really is. A stack model
-fits poorly with the existing nativeGen infrastructure, which assumes
-flat integer and FP register sets. Prior to this commit, nativeGen
-could not generate correct x86 FP code -- to do so would have meant
-somehow working the register-stack paradigm into the register
-allocator and spiller, which sounds very difficult.
-
-We have decided to cheat, and go for a simple fix which requires no
-infrastructure modifications, at the expense of generating ropey but
-correct FP code. All notions of the x86 FP stack and its insns have
-been removed. Instead, we pretend (to the instruction selector and
-register allocator) that x86 has six floating point registers, %fake0
-.. %fake5, which can be used in the usual flat manner. We further
-claim that x86 has floating point instructions very similar to SPARC
-and Alpha, that is, a simple 3-operand register-register arrangement.
-Code generation and register allocation proceed on this basis.
-
-When we come to print out the final assembly, our convenient fiction
-is converted to dismal reality. Each fake instruction is
-independently converted to a series of real x86 instructions.
-%fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
-arithmetic operations, the two operands are pushed onto the top of the
-FP stack, the operation done, and the result copied back into the
-relevant register. There are only six %fake registers because 2 are
-needed for the translation, and x86 has 8 in total.
-
-The translation is inefficient but is simple and it works. A cleverer
-translation would handle a sequence of insns, simulating the FP stack
-contents, would not impose a fixed mapping from %fake to %st regs, and
-hopefully could avoid most of the redundant reg-reg moves of the
-current translation.
-
-We might as well make use of whatever unique FP facilities Intel have
-chosen to bless us with (let's not be churlish, after all).
-Hence GLDZ and GLD1. Bwahahahahahahaha!
--}
-
-{-
-Note [x86 Floating point precision]
-
-Intel's internal floating point registers are by default 80 bit
-extended precision. This means that all operations done on values in
-registers are done at 80 bits, and unless the intermediate values are
-truncated to the appropriate size (32 or 64 bits) by storing in
-memory, calculations in registers will give different results from
-calculations which pass intermediate values in memory (eg. via
-function calls).
-
-One solution is to set the FPU into 64 bit precision mode. Some OSs
-do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
-that this will only affect 64-bit precision arithmetic; 32-bit
-calculations will still be done at 64-bit precision in registers. So
-it doesn't solve the whole problem.
-
-There's also the issue of what the C library is expecting in terms of
-precision. It seems to be the case that glibc on Linux expects the
-FPU to be set to 80 bit precision, so setting it to 64 bit could have
-unexpected effects. Changing the default could have undesirable
-effects on other 3rd-party library code too, so the right thing would
-be to save/restore the FPU control word across Haskell code if we were
-to do this.
-
-gcc's -ffloat-store gives consistent results by always storing the
-results of floating-point calculations in memory, which works for both
-32 and 64-bit precision. However, it only affects the values of
-user-declared floating point variables in C, not intermediate results.
-GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
-flag).
-
-Another problem is how to spill floating point registers in the
-register allocator. Should we spill the whole 80 bits, or just 64?
-On an OS which is set to 64 bit precision, spilling 64 is fine. On
-Linux, spilling 64 bits will round the results of some operations.
-This is what gcc does. Spilling at 80 bits requires taking up a full
-128 bit slot (so we get alignment). We spill at 80-bits and ignore
-the alignment problems.
-
-In the future [edit: now available in GHC 7.0.1, with the -msse2
-flag], we'll use the SSE registers for floating point. This requires
-a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
-float ops), which means P4 or Xeon and above. Using SSE will solve
-all these problems, because the SSE registers use fixed 32 bit or 64
-bit precision.
-
---SDM 1/2003
--}
-
-data Instr
- -- comment pseudo-op
- = COMMENT FastString
-
- -- location pseudo-op (file, line, col, name)
- | LOCATION Int Int Int String
-
- -- some static data spat out during code
- -- generation. Will be extracted before
- -- pretty-printing.
- | LDATA Section (Alignment, RawCmmStatics)
-
- -- start a new basic block. Useful during
- -- codegen, removed later. Preceding
- -- instruction should be a jump, as per the
- -- invariants for a BasicBlock (see Cmm).
- | NEWBLOCK BlockId
-
- -- unwinding information
- -- See Note [Unwinding information in the NCG].
- | UNWIND CLabel UnwindTable
-
- -- specify current stack offset for benefit of subsequent passes.
- -- This carries a BlockId so it can be used in unwinding information.
- | DELTA Int
-
- -- Moves.
- | MOV Format Operand Operand
- | CMOV Cond Format Operand Reg
- | MOVZxL Format Operand Operand -- format is the size of operand 1
- | MOVSxL Format Operand Operand -- format is the size of operand 1
- -- x86_64 note: plain mov into a 32-bit register always zero-extends
- -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
- -- don't affect the high bits of the register.
-
- -- Load effective address (also a very useful three-operand add instruction :-)
- | LEA Format Operand Operand
-
- -- Int Arithmetic.
- | ADD Format Operand Operand
- | ADC Format Operand Operand
- | SUB Format Operand Operand
- | SBB Format Operand Operand
-
- | MUL Format Operand Operand
- | MUL2 Format Operand -- %edx:%eax = operand * %rax
- | IMUL Format Operand Operand -- signed int mul
- | IMUL2 Format Operand -- %edx:%eax = operand * %eax
-
- | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op
- | IDIV Format Operand -- ditto, but signed
-
- -- Int Arithmetic, where the effects on the condition register
- -- are important. Used in specialized sequences such as MO_Add2.
- -- Do not rewrite these instructions to "equivalent" ones that
- -- have different effect on the condition register! (See #9013.)
- | ADD_CC Format Operand Operand
- | SUB_CC Format Operand Operand
-
- -- Simple bit-twiddling.
- | AND Format Operand Operand
- | OR Format Operand Operand
- | XOR Format Operand Operand
- | NOT Format Operand
- | NEGI Format Operand -- NEG instruction (name clash with Cond)
- | BSWAP Format Reg
-
- -- Shifts (amount may be immediate or %cl only)
- | SHL Format Operand{-amount-} Operand
- | SAR Format Operand{-amount-} Operand
- | SHR Format Operand{-amount-} Operand
-
- | BT Format Imm Operand
- | NOP
-
-
- -- We need to support the FSTP (x87 store and pop) instruction
- -- so that we can correctly read off the return value of an
- -- x86 CDECL C function call when its floating point.
- -- so we dont include a register argument, and just use st(0)
- -- this instruction is used ONLY for return values of C ffi calls
- -- in x86_32 abi
- | X87Store Format AddrMode -- st(0), dst
-
-
- -- SSE2 floating point: we use a restricted set of the available SSE2
- -- instructions for floating-point.
- -- use MOV for moving (either movss or movsd (movlpd better?))
- | CVTSS2SD Reg Reg -- F32 to F64
- | CVTSD2SS Reg Reg -- F64 to F32
- | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation)
- | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation)
- | CVTSI2SS Format Operand Reg -- I32/I64 to F32
- | CVTSI2SD Format Operand Reg -- I32/I64 to F64
-
- -- use ADD, SUB, and SQRT for arithmetic. In both cases, operands
- -- are Operand Reg.
-
- -- SSE2 floating-point division:
- | FDIV Format Operand Operand -- divisor, dividend(dst)
-
- -- use CMP for comparisons. ucomiss and ucomisd instructions
- -- compare single/double prec floating point respectively.
-
- | SQRT Format Operand Reg -- src, dst
-
-
- -- Comparison
- | TEST Format Operand Operand
- | CMP Format Operand Operand
- | SETCC Cond Operand
-
- -- Stack Operations.
- | PUSH Format Operand
- | POP Format Operand
- -- both unused (SDM):
- -- | PUSHA
- -- | POPA
-
- -- Jumping around.
- | JMP Operand [Reg] -- including live Regs at the call
- | JXX Cond BlockId -- includes unconditional branches
- | JXX_GBL Cond Imm -- non-local version of JXX
- -- Table jump
- | JMP_TBL Operand -- Address to jump to
- [Maybe JumpDest] -- Targets of the jump table
- Section -- Data section jump table should be put in
- CLabel -- Label of jump table
- -- | X86 call instruction
- | CALL (Either Imm Reg) -- ^ Jump target
- [Reg] -- ^ Arguments (required for register allocation)
-
- -- Other things.
- | CLTD Format -- sign extend %eax into %edx:%eax
-
- | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
- -- pretty-prints as
- -- call 1f
- -- 1: popl %reg
- -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
- | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
- -- pretty-prints as
- -- call 1f
- -- 1: popl %reg
-
- -- bit counting instructions
- | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1
- | LZCNT Format Operand Reg -- [BMI2] count number of leading zeros
- | TZCNT Format Operand Reg -- [BMI2] count number of trailing zeros
- | BSF Format Operand Reg -- bit scan forward
- | BSR Format Operand Reg -- bit scan reverse
-
- -- bit manipulation instructions
- | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask
- | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
-
- -- prefetch
- | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
- -- variant can be NTA, Lvl0, Lvl1, or Lvl2
-
- | LOCK Instr -- lock prefix
- | XADD Format Operand Operand -- src (r), dst (r/m)
- | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit
- | MFENCE
-
-data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
-
-
-data Operand
- = OpReg Reg -- register
- | OpImm Imm -- immediate value
- | OpAddr AddrMode -- memory reference
-
-
-
--- | Returns which registers are read and written as a (read, written)
--- pair.
-x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
-x86_regUsageOfInstr platform instr
- = case instr of
- MOV _ src dst -> usageRW src dst
- CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
- MOVZxL _ src dst -> usageRW src dst
- MOVSxL _ src dst -> usageRW src dst
- LEA _ src dst -> usageRW src dst
- ADD _ src dst -> usageRM src dst
- ADC _ src dst -> usageRM src dst
- SUB _ src dst -> usageRM src dst
- SBB _ src dst -> usageRM src dst
- IMUL _ src dst -> usageRM src dst
-
- -- Result of IMULB will be in just in %ax
- IMUL2 II8 src -> mkRU (eax:use_R src []) [eax]
- -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
- -- %ax/%eax/%rax.
- IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
-
- MUL _ src dst -> usageRM src dst
- MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
- DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
- IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
- ADD_CC _ src dst -> usageRM src dst
- SUB_CC _ src dst -> usageRM src dst
- AND _ src dst -> usageRM src dst
- OR _ src dst -> usageRM src dst
-
- XOR _ (OpReg src) (OpReg dst)
- | src == dst -> mkRU [] [dst]
-
- XOR _ src dst -> usageRM src dst
- NOT _ op -> usageM op
- BSWAP _ reg -> mkRU [reg] [reg]
- NEGI _ op -> usageM op
- SHL _ imm dst -> usageRM imm dst
- SAR _ imm dst -> usageRM imm dst
- SHR _ imm dst -> usageRM imm dst
- BT _ _ src -> mkRUR (use_R src [])
-
- PUSH _ op -> mkRUR (use_R op [])
- POP _ op -> mkRU [] (def_W op)
- TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
- CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
- SETCC _ op -> mkRU [] (def_W op)
- JXX _ _ -> mkRU [] []
- JXX_GBL _ _ -> mkRU [] []
- JMP op regs -> mkRUR (use_R op regs)
- JMP_TBL op _ _ _ -> mkRUR (use_R op [])
- CALL (Left _) params -> mkRU params (callClobberedRegs platform)
- CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
- CLTD _ -> mkRU [eax] [edx]
- NOP -> mkRU [] []
-
- X87Store _ dst -> mkRUR ( use_EA dst [])
-
- CVTSS2SD src dst -> mkRU [src] [dst]
- CVTSD2SS src dst -> mkRU [src] [dst]
- CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
- CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
- CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
- CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
- FDIV _ src dst -> usageRM src dst
- SQRT _ src dst -> mkRU (use_R src []) [dst]
-
- FETCHGOT reg -> mkRU [] [reg]
- FETCHPC reg -> mkRU [] [reg]
-
- COMMENT _ -> noUsage
- LOCATION{} -> noUsage
- UNWIND{} -> noUsage
- DELTA _ -> noUsage
-
- POPCNT _ src dst -> mkRU (use_R src []) [dst]
- LZCNT _ src dst -> mkRU (use_R src []) [dst]
- TZCNT _ src dst -> mkRU (use_R src []) [dst]
- BSF _ src dst -> mkRU (use_R src []) [dst]
- BSR _ src dst -> mkRU (use_R src []) [dst]
-
- PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
- PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
-
- -- note: might be a better way to do this
- PREFETCH _ _ src -> mkRU (use_R src []) []
- LOCK i -> x86_regUsageOfInstr platform i
- XADD _ src dst -> usageMM src dst
- CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
- MFENCE -> noUsage
-
- _other -> panic "regUsage: unrecognised instr"
- where
- -- # Definitions
- --
- -- Written: If the operand is a register, it's written. If it's an
- -- address, registers mentioned in the address are read.
- --
- -- Modified: If the operand is a register, it's both read and
- -- written. If it's an address, registers mentioned in the address
- -- are read.
-
- -- 2 operand form; first operand Read; second Written
- usageRW :: Operand -> Operand -> RegUsage
- usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
- usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
- usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
-
- -- 2 operand form; first operand Read; second Modified
- usageRM :: Operand -> Operand -> RegUsage
- usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
- usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
- usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
-
- -- 2 operand form; first operand Modified; second Modified
- usageMM :: Operand -> Operand -> RegUsage
- usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
- usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
- usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
-
- -- 3 operand form; first operand Read; second Modified; third Modified
- usageRMM :: Operand -> Operand -> Operand -> RegUsage
- usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
- usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
- usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
-
- -- 1 operand form; operand Modified
- usageM :: Operand -> RegUsage
- usageM (OpReg reg) = mkRU [reg] [reg]
- usageM (OpAddr ea) = mkRUR (use_EA ea [])
- usageM _ = panic "X86.RegInfo.usageM: no match"
-
- -- Registers defd when an operand is written.
- def_W (OpReg reg) = [reg]
- def_W (OpAddr _ ) = []
- def_W _ = panic "X86.RegInfo.def_W: no match"
-
- -- Registers used when an operand is read.
- use_R (OpReg reg) tl = reg : tl
- use_R (OpImm _) tl = tl
- use_R (OpAddr ea) tl = use_EA ea tl
-
- -- Registers used to compute an effective address.
- use_EA (ImmAddr _ _) tl = tl
- use_EA (AddrBaseIndex base index _) tl =
- use_base base $! use_index index tl
- where use_base (EABaseReg r) tl = r : tl
- use_base _ tl = tl
- use_index EAIndexNone tl = tl
- use_index (EAIndex i _) tl = i : tl
-
- mkRUR src = src' `seq` RU src' []
- where src' = filter (interesting platform) src
-
- mkRU src dst = src' `seq` dst' `seq` RU src' dst'
- where src' = filter (interesting platform) src
- dst' = filter (interesting platform) dst
-
--- | Is this register interesting for the register allocator?
-interesting :: Platform -> Reg -> Bool
-interesting _ (RegVirtual _) = True
-interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
-interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
-
-
-
--- | Applies the supplied function to all registers in instructions.
--- Typically used to change virtual registers to real registers.
-x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
-x86_patchRegsOfInstr instr env
- = case instr of
- MOV fmt src dst -> patch2 (MOV fmt) src dst
- CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst)
- MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst
- MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst
- LEA fmt src dst -> patch2 (LEA fmt) src dst
- ADD fmt src dst -> patch2 (ADD fmt) src dst
- ADC fmt src dst -> patch2 (ADC fmt) src dst
- SUB fmt src dst -> patch2 (SUB fmt) src dst
- SBB fmt src dst -> patch2 (SBB fmt) src dst
- IMUL fmt src dst -> patch2 (IMUL fmt) src dst
- IMUL2 fmt src -> patch1 (IMUL2 fmt) src
- MUL fmt src dst -> patch2 (MUL fmt) src dst
- MUL2 fmt src -> patch1 (MUL2 fmt) src
- IDIV fmt op -> patch1 (IDIV fmt) op
- DIV fmt op -> patch1 (DIV fmt) op
- ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst
- SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst
- AND fmt src dst -> patch2 (AND fmt) src dst
- OR fmt src dst -> patch2 (OR fmt) src dst
- XOR fmt src dst -> patch2 (XOR fmt) src dst
- NOT fmt op -> patch1 (NOT fmt) op
- BSWAP fmt reg -> BSWAP fmt (env reg)
- NEGI fmt op -> patch1 (NEGI fmt) op
- SHL fmt imm dst -> patch1 (SHL fmt imm) dst
- SAR fmt imm dst -> patch1 (SAR fmt imm) dst
- SHR fmt imm dst -> patch1 (SHR fmt imm) dst
- BT fmt imm src -> patch1 (BT fmt imm) src
- TEST fmt src dst -> patch2 (TEST fmt) src dst
- CMP fmt src dst -> patch2 (CMP fmt) src dst
- PUSH fmt op -> patch1 (PUSH fmt) op
- POP fmt op -> patch1 (POP fmt) op
- SETCC cond op -> patch1 (SETCC cond) op
- JMP op regs -> JMP (patchOp op) regs
- JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
-
- -- literally only support storing the top x87 stack value st(0)
- X87Store fmt dst -> X87Store fmt (lookupAddr dst)
-
- CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
- CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
- CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
- CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
- CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
- CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
- FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst)
- SQRT fmt src dst -> SQRT fmt (patchOp src) (env dst)
-
- CALL (Left _) _ -> instr
- CALL (Right reg) p -> CALL (Right (env reg)) p
-
- FETCHGOT reg -> FETCHGOT (env reg)
- FETCHPC reg -> FETCHPC (env reg)
-
- NOP -> instr
- COMMENT _ -> instr
- LOCATION {} -> instr
- UNWIND {} -> instr
- DELTA _ -> instr
-
- JXX _ _ -> instr
- JXX_GBL _ _ -> instr
- CLTD _ -> instr
-
- POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
- LZCNT fmt src dst -> LZCNT fmt (patchOp src) (env dst)
- TZCNT fmt src dst -> TZCNT fmt (patchOp src) (env dst)
- PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst)
- PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst)
- BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
- BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
-
- PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
-
- LOCK i -> LOCK (x86_patchRegsOfInstr i env)
- XADD fmt src dst -> patch2 (XADD fmt) src dst
- CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst
- MFENCE -> instr
-
- _other -> panic "patchRegs: unrecognised instr"
-
- where
- patch1 :: (Operand -> a) -> Operand -> a
- patch1 insn op = insn $! patchOp op
- patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
- patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
-
- patchOp (OpReg reg) = OpReg $! env reg
- patchOp (OpImm imm) = OpImm imm
- patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
-
- lookupAddr (ImmAddr imm off) = ImmAddr imm off
- lookupAddr (AddrBaseIndex base index disp)
- = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
- where
- lookupBase EABaseNone = EABaseNone
- lookupBase EABaseRip = EABaseRip
- lookupBase (EABaseReg r) = EABaseReg $! env r
-
- lookupIndex EAIndexNone = EAIndexNone
- lookupIndex (EAIndex r i) = (EAIndex $! env r) i
-
-
---------------------------------------------------------------------------------
-x86_isJumpishInstr
- :: Instr -> Bool
-
-x86_isJumpishInstr instr
- = case instr of
- JMP{} -> True
- JXX{} -> True
- JXX_GBL{} -> True
- JMP_TBL{} -> True
- CALL{} -> True
- _ -> False
-
-
-x86_jumpDestsOfInstr
- :: Instr
- -> [BlockId]
-
-x86_jumpDestsOfInstr insn
- = case insn of
- JXX _ id -> [id]
- JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
- _ -> []
-
-
-x86_patchJumpInstr
- :: Instr -> (BlockId -> BlockId) -> Instr
-
-x86_patchJumpInstr insn patchF
- = case insn of
- JXX cc id -> JXX cc (patchF id)
- JMP_TBL op ids section lbl
- -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
- _ -> insn
- where
- patchJumpDest f (DestBlockId id) = DestBlockId (f id)
- patchJumpDest _ dest = dest
-
-
-
-
-
--- -----------------------------------------------------------------------------
--- | Make a spill instruction.
-x86_mkSpillInstr
- :: DynFlags
- -> Reg -- register to spill
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-
-x86_mkSpillInstr dflags reg delta slot
- = let off = spillSlotToOffset platform slot - delta
- in
- case targetClassOfReg platform reg of
- RcInteger -> MOV (archWordFormat is32Bit)
- (OpReg reg) (OpAddr (spRel dflags off))
- RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
- _ -> panic "X86.mkSpillInstr: no match"
- where platform = targetPlatform dflags
- is32Bit = target32Bit platform
-
--- | Make a spill reload instruction.
-x86_mkLoadInstr
- :: DynFlags
- -> Reg -- register to load
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-
-x86_mkLoadInstr dflags reg delta slot
- = let off = spillSlotToOffset platform slot - delta
- in
- case targetClassOfReg platform reg of
- RcInteger -> MOV (archWordFormat is32Bit)
- (OpAddr (spRel dflags off)) (OpReg reg)
- RcDouble -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
- _ -> panic "X86.x86_mkLoadInstr"
- where platform = targetPlatform dflags
- is32Bit = target32Bit platform
-
-spillSlotSize :: Platform -> Int
-spillSlotSize dflags = if is32Bit then 12 else 8
- where is32Bit = target32Bit dflags
-
-maxSpillSlots :: DynFlags -> Int
-maxSpillSlots dflags
- = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
--- = 0 -- useful for testing allocMoreStack
-
--- number of bytes that the stack pointer should be aligned to
-stackAlign :: Int
-stackAlign = 16
-
--- convert a spill slot number to a *byte* offset, with no sign:
--- decide on a per arch basis whether you are spilling above or below
--- the C stack pointer.
-spillSlotToOffset :: Platform -> Int -> Int
-spillSlotToOffset platform slot
- = 64 + spillSlotSize platform * slot
-
---------------------------------------------------------------------------------
-
--- | See if this instruction is telling us the current C stack delta
-x86_takeDeltaInstr
- :: Instr
- -> Maybe Int
-
-x86_takeDeltaInstr instr
- = case instr of
- DELTA i -> Just i
- _ -> Nothing
-
-
-x86_isMetaInstr
- :: Instr
- -> Bool
-
-x86_isMetaInstr instr
- = case instr of
- COMMENT{} -> True
- LOCATION{} -> True
- LDATA{} -> True
- NEWBLOCK{} -> True
- UNWIND{} -> True
- DELTA{} -> True
- _ -> False
-
-
-
---- TODO: why is there
--- | Make a reg-reg move instruction.
--- On SPARC v8 there are no instructions to move directly between
--- floating point and integer regs. If we need to do that then we
--- have to go via memory.
---
-x86_mkRegRegMoveInstr
- :: Platform
- -> Reg
- -> Reg
- -> Instr
-
-x86_mkRegRegMoveInstr platform src dst
- = case targetClassOfReg platform src of
- RcInteger -> case platformArch platform of
- ArchX86 -> MOV II32 (OpReg src) (OpReg dst)
- ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
- _ -> panic "x86_mkRegRegMoveInstr: Bad arch"
- RcDouble -> MOV FF64 (OpReg src) (OpReg dst)
- -- this code is the lie we tell ourselves because both float and double
- -- use the same register class.on x86_64 and x86 32bit with SSE2,
- -- more plainly, both use the XMM registers
- _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-
--- | Check whether an instruction represents a reg-reg move.
--- The register allocator attempts to eliminate reg->reg moves whenever it can,
--- by assigning the src and dest temporaries to the same real register.
---
-x86_takeRegRegMoveInstr
- :: Instr
- -> Maybe (Reg,Reg)
-
-x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
- = Just (r1,r2)
-
-x86_takeRegRegMoveInstr _ = Nothing
-
-
--- | Make an unconditional branch instruction.
-x86_mkJumpInstr
- :: BlockId
- -> [Instr]
-
-x86_mkJumpInstr id
- = [JXX ALWAYS id]
-
--- Note [Windows stack layout]
--- | On most OSes the kernel will place a guard page after the current stack
--- page. If you allocate larger than a page worth you may jump over this
--- guard page. Not only is this a security issue, but on certain OSes such
--- as Windows a new page won't be allocated if you don't hit the guard. This
--- will cause a segfault or access fault.
---
--- This function defines if the current allocation amount requires a probe.
--- On Windows (for now) we emit a call to _chkstk for this. For other OSes
--- this is not yet implemented.
--- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk
--- The Windows stack looks like this:
---
--- +-------------------+
--- | SP |
--- +-------------------+
--- | |
--- | GUARD PAGE |
--- | |
--- +-------------------+
--- | |
--- | |
--- | UNMAPPED |
--- | |
--- | |
--- +-------------------+
---
--- In essence each allocation larger than a page size needs to be chunked and
--- a probe emitted after each page allocation. You have to hit the guard
--- page so the kernel can map in the next page, otherwise you'll segfault.
---
-needs_probe_call :: Platform -> Int -> Bool
-needs_probe_call platform amount
- = case platformOS platform of
- OSMinGW32 -> case platformArch platform of
- ArchX86 -> amount > (4 * 1024)
- ArchX86_64 -> amount > (8 * 1024)
- _ -> False
- _ -> False
-
-x86_mkStackAllocInstr
- :: Platform
- -> Int
- -> [Instr]
-x86_mkStackAllocInstr platform amount
- = case platformOS platform of
- OSMinGW32 ->
- -- These will clobber AX but this should be ok because
- --
- -- 1. It is the first thing we do when entering the closure and AX is
- -- a caller saved registers on Windows both on x86_64 and x86.
- --
- -- 2. The closures are only entered via a call or longjmp in which case
- -- there are no expectations for volatile registers.
- --
- -- 3. When the target is a local branch point it is re-targeted
- -- after the dealloc, preserving #2. See note [extra spill slots].
- --
- -- We emit a call because the stack probes are quite involved and
- -- would bloat code size a lot. GHC doesn't really have an -Os.
- -- __chkstk is guaranteed to leave all nonvolatile registers and AX
- -- untouched. It's part of the standard prologue code for any Windows
- -- function dropping the stack more than a page.
- -- See Note [Windows stack layout]
- case platformArch platform of
- ArchX86 | needs_probe_call platform amount ->
- [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax)
- , CALL (Left $ strImmLit "___chkstk_ms") [eax]
- , SUB II32 (OpReg eax) (OpReg esp)
- ]
- | otherwise ->
- [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
- , TEST II32 (OpReg esp) (OpReg esp)
- ]
- ArchX86_64 | needs_probe_call platform amount ->
- [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
- , CALL (Left $ strImmLit "___chkstk_ms") [rax]
- , SUB II64 (OpReg rax) (OpReg rsp)
- ]
- | otherwise ->
- [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
- , TEST II64 (OpReg rsp) (OpReg rsp)
- ]
- _ -> panic "x86_mkStackAllocInstr"
- _ ->
- case platformArch platform of
- ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ]
- ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ]
- _ -> panic "x86_mkStackAllocInstr"
-
-x86_mkStackDeallocInstr
- :: Platform
- -> Int
- -> [Instr]
-x86_mkStackDeallocInstr platform amount
- = case platformArch platform of
- ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)]
- ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)]
- _ -> panic "x86_mkStackDeallocInstr"
-
-
---
--- Note [extra spill slots]
---
--- If the register allocator used more spill slots than we have
--- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
--- C stack space on entry and exit from this proc. Therefore we
--- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
--- before every non-local jump.
---
--- This became necessary when the new codegen started bundling entire
--- functions together into one proc, because the register allocator
--- assigns a different stack slot to each virtual reg within a proc.
--- To avoid using so many slots we could also:
---
--- - split up the proc into connected components before code generator
---
--- - rename the virtual regs, so that we re-use vreg names and hence
--- stack slots for non-overlapping vregs.
---
--- Note that when a block is both a non-local entry point (with an
--- info table) and a local branch target, we have to split it into
--- two, like so:
---
--- <info table>
--- L:
--- <code>
---
--- becomes
---
--- <info table>
--- L:
--- subl $rsp, N
--- jmp Lnew
--- Lnew:
--- <code>
---
--- and all branches pointing to L are retargetted to point to Lnew.
--- Otherwise, we would repeat the $rsp adjustment for each branch to
--- L.
---
--- Returns a list of (L,Lnew) pairs.
---
-allocMoreStack
- :: Platform
- -> Int
- -> NatCmmDecl statics X86.Instr.Instr
- -> UniqSM (NatCmmDecl statics X86.Instr.Instr, [(BlockId,BlockId)])
-
-allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
-allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
- let entries = entryBlocks proc
-
- uniqs <- replicateM (length entries) getUniqueM
-
- let
- delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
- where x = slots * spillSlotSize platform -- sp delta
-
- alloc = mkStackAllocInstr platform delta
- dealloc = mkStackDeallocInstr platform delta
-
- retargetList = (zip entries (map mkBlockId uniqs))
-
- new_blockmap :: LabelMap BlockId
- new_blockmap = mapFromList retargetList
-
- insert_stack_insns (BasicBlock id insns)
- | Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid]
- , BasicBlock new_blockid block' ]
- | otherwise
- = [ BasicBlock id block' ]
- where
- block' = foldr insert_dealloc [] insns
-
- insert_dealloc insn r = case insn of
- JMP _ _ -> dealloc ++ (insn : r)
- JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
- _other -> x86_patchJumpInstr insn retarget : r
- where retarget b = fromMaybe b (mapLookup b new_blockmap)
-
- new_code = concatMap insert_stack_insns code
- -- in
- return (CmmProc info lbl live (ListGraph new_code), retargetList)
-
-data JumpDest = DestBlockId BlockId | DestImm Imm
-
--- Debug Instance
-instance Outputable JumpDest where
- ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
- ppr (DestImm _imm) = text "jd<imm>:noShow"
-
-
-getJumpDestBlockId :: JumpDest -> Maybe BlockId
-getJumpDestBlockId (DestBlockId bid) = Just bid
-getJumpDestBlockId _ = Nothing
-
-canShortcut :: Instr -> Maybe JumpDest
-canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
-canShortcut (JMP (OpImm imm) _) = Just (DestImm imm)
-canShortcut _ = Nothing
-
-
--- This helper shortcuts a sequence of branches.
--- The blockset helps avoid following cycles.
-shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
-shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
- where
- shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr
- shortcutJump' fn seen insn@(JXX cc id) =
- if setMember id seen then insn
- else case fn id of
- Nothing -> insn
- Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
- Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
- where seen' = setInsert id seen
- shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
- let updateBlock (Just (DestBlockId bid)) =
- case fn bid of
- Nothing -> Just (DestBlockId bid )
- Just dest -> Just dest
- updateBlock dest = dest
- blocks' = map updateBlock blocks
- in JMP_TBL addr blocks' section tblId
- shortcutJump' _ _ other = other
-
--- Here because it knows about JumpDest
-shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, RawCmmStatics) -> (Alignment, RawCmmStatics)
-shortcutStatics fn (align, RawCmmStatics lbl statics)
- = (align, RawCmmStatics lbl $ map (shortcutStatic fn) statics)
- -- we need to get the jump tables, so apply the mapping to the entries
- -- of a CmmData too.
-
-shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
-shortcutLabel fn lab
- | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId
- | otherwise = lab
-
-shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
-shortcutStatic fn (CmmStaticLit (CmmLabel lab))
- = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
- = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
- -- slightly dodgy, we're ignoring the second label, but this
- -- works with the way we use CmmLabelDiffOff for jump tables now.
-shortcutStatic _ other_static
- = other_static
-
-shortBlockId
- :: (BlockId -> Maybe JumpDest)
- -> UniqSet Unique
- -> BlockId
- -> CLabel
-
-shortBlockId fn seen blockid =
- case (elementOfUniqSet uq seen, fn blockid) of
- (True, _) -> blockLbl blockid
- (_, Nothing) -> blockLbl blockid
- (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
- (_, Just (DestImm (ImmCLbl lbl))) -> lbl
- (_, _other) -> panic "shortBlockId"
- where uq = getUnique blockid