diff options
27 files changed, 343 insertions, 985 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index f338415adc..078390638d 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -81,7 +81,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) | passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss)) (W64, (vs, fs, d:ds, ls, ss)) | not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss)) - (W80, _) -> panic "F80 unsupported register type" _ -> (assts, (r:rs)) int = case (w, regs) of (W128, _) -> panic "W128 unsupported register type" @@ -100,6 +99,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) passFloatArgsInXmm :: DynFlags -> Bool passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of ArchX86_64 -> True + ArchX86 -> False _ -> False -- We used to spill vector registers to the stack since the LLVM backend didn't diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 601b1d9b85..dd4e777436 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -474,6 +474,9 @@ instance Eq GlobalReg where FloatReg i == FloatReg j = i==j DoubleReg i == DoubleReg j = i==j LongReg i == LongReg j = i==j + -- NOTE: XMM, YMM, ZMM registers actually are the same registers + -- at least with respect to store at YMM i and then read from XMM i + -- and similarly for ZMM etc. XmmReg i == XmmReg j = i==j YmmReg i == YmmReg j = i==j ZmmReg i == ZmmReg j = i==j @@ -584,6 +587,9 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags globalRegType _ (FloatReg _) = cmmFloat W32 globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (LongReg _) = cmmBits W64 +-- TODO: improve the internal model of SIMD/vectorized registers +-- the right design SHOULd improve handling of float and double code too. +-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs index 0d6e770904..276fbff534 100644 --- a/compiler/cmm/CmmType.hs +++ b/compiler/cmm/CmmType.hs @@ -166,9 +166,6 @@ isFloat64 _other = False ----------------------------------------------------------------------------- data Width = W8 | W16 | W32 | W64 - | W80 -- Extended double-precision float, - -- used in x86 native codegen only. - -- (we use Ord, so it'd better be in this order) | W128 | W256 | W512 @@ -185,7 +182,7 @@ mrStr W64 = sLit("W64") mrStr W128 = sLit("W128") mrStr W256 = sLit("W256") mrStr W512 = sLit("W512") -mrStr W80 = sLit("W80") + -------- Common Widths ------------ @@ -222,7 +219,7 @@ widthInBits W64 = 64 widthInBits W128 = 128 widthInBits W256 = 256 widthInBits W512 = 512 -widthInBits W80 = 80 + widthInBytes :: Width -> Int widthInBytes W8 = 1 @@ -232,7 +229,7 @@ widthInBytes W64 = 8 widthInBytes W128 = 16 widthInBytes W256 = 32 widthInBytes W512 = 64 -widthInBytes W80 = 10 + widthFromBytes :: Int -> Width widthFromBytes 1 = W8 @@ -242,7 +239,7 @@ widthFromBytes 8 = W64 widthFromBytes 16 = W128 widthFromBytes 32 = W256 widthFromBytes 64 = W512 -widthFromBytes 10 = W80 + widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n) -- log_2 of the width in bytes, useful for generating shifts. @@ -254,7 +251,7 @@ widthInLog W64 = 3 widthInLog W128 = 4 widthInLog W256 = 5 widthInLog W512 = 6 -widthInLog W80 = panic "widthInLog: F80" + -- widening / narrowing diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 63d8f7bc0e..0adb2272ee 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -1727,8 +1727,38 @@ vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags) vecElemProjectCast _ WordVec W64 = Nothing vecElemProjectCast _ _ _ = Nothing + +-- NOTE [SIMD Design for the future] -- Check to make sure that we can generate code for the specified vector type -- given the current set of dynamic flags. +-- Currently these checks are specific to x86 and x86_64 architecture. +-- This should be fixed! +-- In particular, +-- 1) Add better support for other architectures! (this may require a redesign) +-- 2) Decouple design choices from LLVM's pseudo SIMD model! +-- The high level LLVM naive rep makes per CPU family SIMD generation is own +-- optimization problem, and hides important differences in eg ARM vs x86_64 simd +-- 3) Depending on the architecture, the SIMD registers may also support general +-- computations on Float/Double/Word/Int scalars, but currently on +-- for example x86_64, we always put Word/Int (or sized) in GPR +-- (general purpose) registers. Would relaxing that allow for +-- useful optimization opportunities? +-- Phrased differently, it is worth experimenting with supporting +-- different register mapping strategies than we currently have, especially if +-- someday we want SIMD to be a first class denizen in GHC along with scalar +-- values! +-- The current design with respect to register mapping of scalars could +-- very well be the best,but exploring the design space and doing careful +-- measurments is the only only way to validate that. +-- In some next generation CPU ISAs, notably RISC V, the SIMD extension +-- includes support for a sort of run time CPU dependent vectorization parameter, +-- where a loop may act upon a single scalar each iteration OR some 2,4,8 ... +-- element chunk! Time will tell if that direction sees wide adoption, +-- but it is from that context that unifying our handling of simd and scalars +-- may benefit. It is not likely to benefit current architectures, though +-- it may very well be a design perspective that helps guide improving the NCG. + + checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode () checkVecCompatibility dflags vcat l w = do when (hscTarget dflags /= HscLlvm) $ do diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs index e978d6feaf..5ebb7b3830 100644 --- a/compiler/llvmGen/LlvmCodeGen/Base.hs +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -97,7 +97,6 @@ cmmToLlvmType ty | isVecType ty = LMVector (vecLength ty) (cmmToLlvmType (vecE widthToLlvmFloat :: Width -> LlvmType widthToLlvmFloat W32 = LMFloat widthToLlvmFloat W64 = LMDouble -widthToLlvmFloat W80 = LMFloat80 widthToLlvmFloat W128 = LMFloat128 widthToLlvmFloat w = panic $ "widthToLlvmFloat: Bad float size: " ++ show w diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 6f2dffff3c..ced0be5506 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -5833,20 +5833,24 @@ data SseVersion = SSE1 isSseEnabled :: DynFlags -> Bool isSseEnabled dflags = case platformArch (targetPlatform dflags) of ArchX86_64 -> True - ArchX86 -> sseVersion dflags >= Just SSE1 + ArchX86 -> True _ -> False isSse2Enabled :: DynFlags -> Bool isSse2Enabled dflags = case platformArch (targetPlatform dflags) of - ArchX86_64 -> -- SSE2 is fixed on for x86_64. It would be - -- possible to make it optional, but we'd need to - -- fix at least the foreign call code where the - -- calling convention specifies the use of xmm regs, - -- and possibly other places. - True - ArchX86 -> sseVersion dflags >= Just SSE2 + -- We Assume SSE1 and SSE2 operations are available on both + -- x86 and x86_64. Historically we didn't default to SSE2 and + -- SSE1 on x86, which results in defacto nondeterminism for how + -- rounding behaves in the associated x87 floating point instructions + -- because variations in the spill/fpu stack placement of arguments for + -- operations would change the precision and final result of what + -- would otherwise be the same expressions with respect to single or + -- double precision IEEE floating point computations. + ArchX86_64 -> True + ArchX86 -> True _ -> False + isSse4_2Enabled :: DynFlags -> Bool isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42 diff --git a/compiler/nativeGen/AsmCodeGen.hs b/compiler/nativeGen/AsmCodeGen.hs index 84c6a84845..c78ea5fcb1 100644 --- a/compiler/nativeGen/AsmCodeGen.hs +++ b/compiler/nativeGen/AsmCodeGen.hs @@ -179,7 +179,7 @@ nativeCodeGen dflags this_mod modLoc h us cmms x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest x86NcgImpl dflags - = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge } + = (x86_64NcgImpl dflags) x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest @@ -194,7 +194,6 @@ x86_64NcgImpl dflags ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl ,maxSpillSlots = X86.Instr.maxSpillSlots dflags ,allocatableRegs = X86.Regs.allocatableRegs platform - ,ncg_x86fp_kludge = id ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform ,ncgExpandTop = id ,ncgMakeFarBranches = const id @@ -215,7 +214,6 @@ ppcNcgImpl dflags ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags ,allocatableRegs = PPC.Regs.allocatableRegs platform - ,ncg_x86fp_kludge = id ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform ,ncgExpandTop = id ,ncgMakeFarBranches = PPC.Instr.makeFarBranches @@ -236,7 +234,6 @@ sparcNcgImpl dflags ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags ,allocatableRegs = SPARC.Regs.allocatableRegs - ,ncg_x86fp_kludge = id ,ncgAllocMoreStack = noAllocMoreStack ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop ,ncgMakeFarBranches = const id @@ -680,19 +677,10 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count foldl' (\m (from,to) -> addImmediateSuccessor from to m ) cfgWithFixupBlks stack_updt_blks - ---- x86fp_kludge. This pass inserts ffree instructions to clear - ---- the FPU stack on x86. The x86 ABI requires that the FPU stack - ---- is clear, and library functions can return odd results if it - ---- isn't. - ---- - ---- NB. must happen before shortcutBranches, because that - ---- generates JXX_GBLs which we can't fix up in x86fp_kludge. - let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced - ---- generate jump tables let tabled = {-# SCC "generateJumpTables" #-} - generateJumpTables ncgImpl kludged + generateJumpTables ncgImpl alloced dumpIfSet_dyn dflags Opt_D_dump_cfg_weights "CFG Update information" @@ -787,12 +775,6 @@ checkLayout procsUnsequenced procsSequenced = getBlockIds (CmmProc _ _ _ (ListGraph blocks)) = setFromList $ map blockId blocks - -x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -x86fp_kludge top@(CmmData _ _) = top -x86fp_kludge (CmmProc info lbl live (ListGraph code)) = - CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code) - -- | Compute unwinding tables for the blocks of a procedure computeUnwinding :: Instruction instr => DynFlags -> NcgImpl statics instr jumpDest diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs index 82ecbecc14..31472893e7 100644 --- a/compiler/nativeGen/Format.hs +++ b/compiler/nativeGen/Format.hs @@ -47,7 +47,6 @@ data Format | II64 | FF32 | FF64 - | FF80 deriving (Show, Eq) @@ -70,7 +69,7 @@ floatFormat width = case width of W32 -> FF32 W64 -> FF64 - W80 -> FF80 + other -> pprPanic "Format.floatFormat" (ppr other) @@ -80,7 +79,6 @@ isFloatFormat format = case format of FF32 -> True FF64 -> True - FF80 -> True _ -> False @@ -101,7 +99,7 @@ formatToWidth format II64 -> W64 FF32 -> W32 FF64 -> W64 - FF80 -> W80 + formatInBytes :: Format -> Int formatInBytes = widthInBytes . formatToWidth diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs index c22a656d2a..0f53ef6690 100644 --- a/compiler/nativeGen/NCGMonad.hs +++ b/compiler/nativeGen/NCGMonad.hs @@ -76,7 +76,6 @@ data NcgImpl statics instr jumpDest = NcgImpl { pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc, maxSpillSlots :: Int, allocatableRegs :: [RealReg], - ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)]), diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 86525f4736..a9bddfd5f7 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1593,7 +1593,7 @@ genCCall' -> [CmmActual] -- arguments (of mixed type) -> NatM InstrBlock -{- +{- PowerPC Linux uses the System V Release 4 Calling Convention for PowerPC. It is described in the "System V Application Binary Interface PowerPC Processor Supplement". @@ -1906,7 +1906,7 @@ genCCall' dflags gcp target dest_regs args FF32 -> (1, 1, 4, fprs) FF64 -> (2, 1, 8, fprs) II64 -> panic "genCCall' passArguments II64" - FF80 -> panic "genCCall' passArguments FF80" + GCP32ELF -> case cmmTypeFormat rep of II8 -> (1, 0, 4, gprs) @@ -1916,7 +1916,6 @@ genCCall' dflags gcp target dest_regs args FF32 -> (0, 1, 4, fprs) FF64 -> (0, 1, 8, fprs) II64 -> panic "genCCall' passArguments II64" - FF80 -> panic "genCCall' passArguments FF80" GCP64ELF _ -> case cmmTypeFormat rep of II8 -> (1, 0, 8, gprs) @@ -1928,7 +1927,6 @@ genCCall' dflags gcp target dest_regs args -- the FPRs. FF32 -> (1, 1, 8, fprs) FF64 -> (1, 1, 8, fprs) - FF80 -> panic "genCCall' passArguments FF80" moveResult reduceToFF32 = case dest_regs of diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index 23525ffb5a..8e9ff95b9f 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -161,7 +161,7 @@ pprReg r RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u - RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> pprUniqueAlways u + where ppr_reg_no :: Int -> SDoc ppr_reg_no i @@ -179,8 +179,7 @@ pprFormat x II32 -> sLit "w" II64 -> sLit "d" FF32 -> sLit "fs" - FF64 -> sLit "fd" - _ -> panic "PPC.Ppr.pprFormat: no match") + FF64 -> sLit "fd") pprCond :: Cond -> SDoc @@ -365,7 +364,6 @@ pprInstr (LD fmt reg addr) = hcat [ II64 -> sLit "d" FF32 -> sLit "fs" FF64 -> sLit "fd" - _ -> panic "PPC.Ppr.pprInstr: no match" ), case addr of AddrRegImm _ _ -> empty AddrRegReg _ _ -> char 'x', @@ -405,7 +403,6 @@ pprInstr (LA fmt reg addr) = hcat [ II64 -> sLit "d" FF32 -> sLit "fs" FF64 -> sLit "fd" - _ -> panic "PPC.Ppr.pprInstr: no match" ), case addr of AddrRegImm _ _ -> empty AddrRegReg _ _ -> char 'x', diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs index 7b16f217d9..8ba2ead926 100644 --- a/compiler/nativeGen/PPC/Regs.hs +++ b/compiler/nativeGen/PPC/Regs.hs @@ -131,7 +131,7 @@ regDotColor reg RcInteger -> text "blue" RcFloat -> text "red" RcDouble -> text "green" - RcDoubleSSE -> text "yellow" + -- immediates ------------------------------------------------------------------ diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs index d9d56d47c4..7f69ea01a4 100644 --- a/compiler/nativeGen/Reg.hs +++ b/compiler/nativeGen/Reg.hs @@ -56,7 +56,7 @@ data VirtualReg | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register | VirtualRegF {-# UNPACK #-} !Unique | VirtualRegD {-# UNPACK #-} !Unique - | VirtualRegSSE {-# UNPACK #-} !Unique + deriving (Eq, Show) -- This is laborious, but necessary. We can't derive Ord because @@ -69,15 +69,14 @@ instance Ord VirtualReg where compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b - compare (VirtualRegSSE a) (VirtualRegSSE b) = nonDetCmpUnique a b + compare VirtualRegI{} _ = LT compare _ VirtualRegI{} = GT compare VirtualRegHi{} _ = LT compare _ VirtualRegHi{} = GT compare VirtualRegF{} _ = LT compare _ VirtualRegF{} = GT - compare VirtualRegD{} _ = LT - compare _ VirtualRegD{} = GT + instance Uniquable VirtualReg where @@ -87,16 +86,18 @@ instance Uniquable VirtualReg where VirtualRegHi u -> u VirtualRegF u -> u VirtualRegD u -> u - VirtualRegSSE u -> u instance Outputable VirtualReg where ppr reg = case reg of VirtualRegI u -> text "%vI_" <> pprUniqueAlways u VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u - VirtualRegF u -> text "%vF_" <> pprUniqueAlways u - VirtualRegD u -> text "%vD_" <> pprUniqueAlways u - VirtualRegSSE u -> text "%vSSE_" <> pprUniqueAlways u + -- this code is kinda wrong on x86 + -- because float and double occupy the same register set + -- namely SSE2 register xmm0 .. xmm15 + VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u + VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u + renameVirtualReg :: Unique -> VirtualReg -> VirtualReg @@ -106,7 +107,6 @@ renameVirtualReg u r VirtualRegHi _ -> VirtualRegHi u VirtualRegF _ -> VirtualRegF u VirtualRegD _ -> VirtualRegD u - VirtualRegSSE _ -> VirtualRegSSE u classOfVirtualReg :: VirtualReg -> RegClass @@ -116,7 +116,7 @@ classOfVirtualReg vr VirtualRegHi{} -> RcInteger VirtualRegF{} -> RcFloat VirtualRegD{} -> RcDouble - VirtualRegSSE{} -> RcDoubleSSE + -- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs index 7774985dce..8db80ef064 100644 --- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs @@ -134,6 +134,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts excl trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions | let cALLOCATABLE_REGS_FLOAT = (case platformArch platform of + -- On x86_64 and x86, Float and RcDouble + -- use the same registers, + -- so we only use RcDouble to represent the + -- register allocation problem on those types. ArchX86 -> 0 ArchX86_64 -> 0 ArchPPC -> 0 @@ -160,8 +164,14 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclus trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions | let cALLOCATABLE_REGS_DOUBLE = (case platformArch platform of - ArchX86 -> 6 - ArchX86_64 -> 0 + ArchX86 -> 8 + -- in x86 32bit mode sse2 there are only + -- 8 XMM registers xmm0 ... xmm7 + ArchX86_64 -> 10 + -- in x86_64 there are 16 XMM registers + -- xmm0 .. xmm15, here 10 is a + -- "dont need to solve conflicts" count that + -- was chosen at some point in the past. ArchPPC -> 26 ArchSPARC -> 11 ArchSPARC64 -> panic "trivColorable ArchSPARC64" @@ -183,31 +193,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu = count3 < cALLOCATABLE_REGS_DOUBLE -trivColorable platform virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions - | let cALLOCATABLE_REGS_SSE - = (case platformArch platform of - ArchX86 -> 8 - ArchX86_64 -> 10 - ArchPPC -> 0 - ArchSPARC -> 0 - ArchSPARC64 -> panic "trivColorable ArchSPARC64" - ArchPPC_64 _ -> 0 - ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchARM64 -> panic "trivColorable ArchARM64" - ArchAlpha -> panic "trivColorable ArchAlpha" - ArchMipseb -> panic "trivColorable ArchMipseb" - ArchMipsel -> panic "trivColorable ArchMipsel" - ArchJavaScript-> panic "trivColorable ArchJavaScript" - ArchUnknown -> panic "trivColorable ArchUnknown") - , count2 <- accSqueeze 0 cALLOCATABLE_REGS_SSE - (virtualRegSqueeze RcDoubleSSE) - conflicts - - , count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE - (realRegSqueeze RcDoubleSSE) - exclusions - = count3 < cALLOCATABLE_REGS_SSE -- Specification Code ---------------------------------------------------------- diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs index cd008bbbb1..fbbb786817 100644 --- a/compiler/nativeGen/RegClass.hs +++ b/compiler/nativeGen/RegClass.hs @@ -18,7 +18,6 @@ data RegClass = RcInteger | RcFloat | RcDouble - | RcDoubleSSE -- x86 only: the SSE regs are a separate class deriving Eq @@ -26,10 +25,8 @@ instance Uniquable RegClass where getUnique RcInteger = mkRegClassUnique 0 getUnique RcFloat = mkRegClassUnique 1 getUnique RcDouble = mkRegClassUnique 2 - getUnique RcDoubleSSE = mkRegClassUnique 3 instance Outputable RegClass where ppr RcInteger = Outputable.text "I" ppr RcFloat = Outputable.text "F" ppr RcDouble = Outputable.text "D" - ppr RcDoubleSSE = Outputable.text "S" diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs index 54fb513478..3f78ac51d7 100644 --- a/compiler/nativeGen/SPARC/Instr.hs +++ b/compiler/nativeGen/SPARC/Instr.hs @@ -384,7 +384,6 @@ sparc_mkSpillInstr dflags reg _ slot RcInteger -> II32 RcFloat -> FF32 RcDouble -> FF64 - _ -> panic "sparc_mkSpillInstr" in ST fmt reg (fpRel (negate off_w)) @@ -405,7 +404,6 @@ sparc_mkLoadInstr dflags reg _ slot RcInteger -> II32 RcFloat -> FF32 RcDouble -> FF64 - _ -> panic "sparc_mkLoadInstr" in LD fmt (fpRel (- off_w)) reg @@ -454,7 +452,6 @@ sparc_mkRegRegMoveInstr platform src dst RcInteger -> ADD False False src (RIReg g0) dst RcDouble -> FMOV FF64 src dst RcFloat -> FMOV FF32 src dst - _ -> panic "sparc_mkRegRegMoveInstr" | otherwise = panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same" diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs index d367b0726a..6b441819a6 100644 --- a/compiler/nativeGen/SPARC/Ppr.hs +++ b/compiler/nativeGen/SPARC/Ppr.hs @@ -143,7 +143,7 @@ pprReg reg VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u VirtualRegF u -> text "%vF_" <> pprUniqueAlways u VirtualRegD u -> text "%vD_" <> pprUniqueAlways u - VirtualRegSSE u -> text "%vSSE_" <> pprUniqueAlways u + RegReal rr -> case rr of @@ -211,8 +211,7 @@ pprFormat x II32 -> sLit "" II64 -> sLit "d" FF32 -> sLit "" - FF64 -> sLit "d" - _ -> panic "SPARC.Ppr.pprFormat: no match") + FF64 -> sLit "d") -- | Pretty print a format for an instruction suffix. @@ -226,8 +225,8 @@ pprStFormat x II32 -> sLit "" II64 -> sLit "x" FF32 -> sLit "" - FF64 -> sLit "d" - _ -> panic "SPARC.Ppr.pprFormat: no match") + FF64 -> sLit "d") + -- | Pretty print a condition code. diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs index d6aadbae94..0d7edc346a 100644 --- a/compiler/nativeGen/SPARC/Regs.hs +++ b/compiler/nativeGen/SPARC/Regs.hs @@ -104,7 +104,6 @@ virtualRegSqueeze cls vr VirtualRegD{} -> 1 _other -> 0 - _other -> 0 {-# INLINE realRegSqueeze #-} realRegSqueeze :: RegClass -> RealReg -> Int @@ -135,7 +134,6 @@ realRegSqueeze cls rr RealRegPair{} -> 1 - _other -> 0 -- | All the allocatable registers in the machine, -- including register pairs. diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 61686186f1..70df468857 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -98,17 +98,25 @@ is32BitPlatform = do sse2Enabled :: NatM Bool sse2Enabled = do dflags <- getDynFlags - return (isSse2Enabled dflags) + case platformArch (targetPlatform dflags) of + -- We Assume SSE1 and SSE2 operations are available on both + -- x86 and x86_64. Historically we didn't default to SSE2 and + -- SSE1 on x86, which results in defacto nondeterminism for how + -- rounding behaves in the associated x87 floating point instructions + -- because variations in the spill/fpu stack placement of arguments for + -- operations would change the precision and final result of what + -- would otherwise be the same expressions with respect to single or + -- double precision IEEE floating point computations. + ArchX86_64 -> return True + ArchX86 -> return True + _ -> panic "trying to generate x86/x86_64 on the wrong platform" + sse4_2Enabled :: NatM Bool sse4_2Enabled = do dflags <- getDynFlags return (isSse4_2Enabled dflags) -if_sse2 :: NatM a -> NatM a -> NatM a -if_sse2 sse2 x87 = do - b <- sse2Enabled - if b then sse2 else x87 cmmTopCodeGen :: RawCmmDecl @@ -284,15 +292,14 @@ swizzleRegisterRep (Any _ codefn) format = Any format codefn -- | Grab the Reg for a CmmReg -getRegisterReg :: Platform -> Bool -> CmmReg -> Reg +getRegisterReg :: Platform -> CmmReg -> Reg -getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk)) - = let fmt = cmmTypeFormat pk in - if isFloatFormat fmt && not use_sse2 - then RegVirtual (mkVirtualReg u FF80) - else RegVirtual (mkVirtualReg u fmt) +getRegisterReg _ (CmmLocal (LocalReg u pk)) + = -- by Assuming SSE2, Int,Word,Float,Double all can be register allocated + let fmt = cmmTypeFormat pk in + RegVirtual (mkVirtualReg u fmt) -getRegisterReg platform _ (CmmGlobal mid) +getRegisterReg platform (CmmGlobal mid) = case globalRegMaybe platform mid of Just reg -> RegReal $ reg Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) @@ -513,15 +520,14 @@ getRegister' dflags is32Bit (CmmReg reg) do reg' <- getPicBaseNat (archWordFormat is32Bit) return (Fixed (archWordFormat is32Bit) reg' nilOL) _ -> - do use_sse2 <- sse2Enabled + do let fmt = cmmTypeFormat (cmmRegType dflags reg) - format | not use_sse2 && isFloatFormat fmt = FF80 - | otherwise = fmt + format = fmt -- let platform = targetPlatform dflags return (Fixed format - (getRegisterReg platform use_sse2 reg) + (getRegisterReg platform reg) nilOL) @@ -557,8 +563,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x]) return $ Fixed II32 rlo code getRegister' _ _ (CmmLit lit@(CmmFloat f w)) = - if_sse2 float_const_sse2 float_const_x87 - where + float_const_sse2 where float_const_sse2 | f == 0.0 = do let @@ -570,21 +575,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) = | otherwise = do Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit - loadFloatAmode True w addr code - - float_const_x87 = case w of - W64 - | f == 0.0 -> - let code dst = unitOL (GLDZ dst) - in return (Any FF80 code) - - | f == 1.0 -> - let code dst = unitOL (GLD1 dst) - in return (Any FF80 code) - - _otherwise -> do - Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit - loadFloatAmode False w addr code + loadFloatAmode w addr code -- catch simple cases of zero- or sign-extended load getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do @@ -641,11 +632,9 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps - sse2 <- sse2Enabled case mop of - MO_F_Neg w - | sse2 -> sse2NegCode w x - | otherwise -> trivialUFCode FF80 (GNEG FF80) x + MO_F_Neg w -> sse2NegCode w x + MO_S_Neg w -> triv_ucode NEGI (intFormat w) MO_Not w -> triv_ucode NOT (intFormat w) @@ -711,9 +700,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x - MO_FF_Conv W32 W64 - | sse2 -> coerceFP2FP W64 x - | otherwise -> conversionNop FF80 x + MO_FF_Conv W32 W64 -> coerceFP2FP W64 x + MO_FF_Conv W64 W32 -> coerceFP2FP W32 x @@ -776,7 +764,6 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps - sse2 <- sse2Enabled case mop of MO_F_Eq _ -> condFltReg is32Bit EQQ x y MO_F_Ne _ -> condFltReg is32Bit NE x y @@ -800,14 +787,14 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_U_Lt _ -> condIntReg LU x y MO_U_Le _ -> condIntReg LEU x y - MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y - | otherwise -> trivialFCode_x87 GADD x y - MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y - | otherwise -> trivialFCode_x87 GSUB x y - MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y - | otherwise -> trivialFCode_x87 GDIV x y - MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y - | otherwise -> trivialFCode_x87 GMUL x y + MO_F_Add w -> trivialFCode_sse2 w ADD x y + + MO_F_Sub w -> trivialFCode_sse2 w SUB x y + + MO_F_Quot w -> trivialFCode_sse2 w FDIV x y + + MO_F_Mul w -> trivialFCode_sse2 w MUL x y + MO_Add rep -> add_code rep x y MO_Sub rep -> sub_code rep x y @@ -1001,8 +988,7 @@ getRegister' _ _ (CmmLoad mem pk) | isFloatType pk = do Amode addr mem_code <- getAmode mem - use_sse2 <- sse2Enabled - loadFloatAmode use_sse2 (typeWidth pk) addr mem_code + loadFloatAmode (typeWidth pk) addr mem_code getRegister' _ is32Bit (CmmLoad mem pk) | is32Bit && not (isWord64 pk) @@ -1132,9 +1118,7 @@ getNonClobberedReg expr = do return (reg, code) reg2reg :: Format -> Reg -> Reg -> Instr -reg2reg format src dst - | format == FF80 = GMOV src dst - | otherwise = MOV format (OpReg src) (OpReg dst) +reg2reg format src dst = MOV format (OpReg src) (OpReg dst) -------------------------------------------------------------------------------- @@ -1243,8 +1227,7 @@ x86_complex_amode base index shift offset getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) getNonClobberedOperand (CmmLit lit) = do - use_sse2 <- sse2Enabled - if use_sse2 && isSuitableFloatingPointLit lit + if isSuitableFloatingPointLit lit then do let CmmFloat _ w = lit Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit @@ -1259,9 +1242,12 @@ getNonClobberedOperand (CmmLit lit) = do getNonClobberedOperand (CmmLoad mem pk) = do is32Bit <- is32BitPlatform - use_sse2 <- sse2Enabled - if (not (isFloatType pk) || use_sse2) - && (if is32Bit then not (isWord64 pk) else True) + -- this logic could be simplified + -- TODO FIXME + if (if is32Bit then not (isWord64 pk) else True) + -- if 32bit and pk is at float/double/simd value + -- or if 64bit + -- this could use some eyeballs or i'll need to stare at it more later then do dflags <- getDynFlags let platform = targetPlatform dflags @@ -1278,6 +1264,7 @@ getNonClobberedOperand (CmmLoad mem pk) = do return (src, nilOL) return (OpAddr src', mem_code `appOL` save_code) else do + -- if its a word or gcptr on 32bit? getNonClobberedOperand_generic (CmmLoad mem pk) getNonClobberedOperand e = getNonClobberedOperand_generic e @@ -1370,14 +1357,13 @@ memConstant align lit = do return (Amode addr code) -loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register -loadFloatAmode use_sse2 w addr addr_code = do +loadFloatAmode :: Width -> AddrMode -> InstrBlock -> NatM Register +loadFloatAmode w addr addr_code = do let format = floatFormat w code dst = addr_code `snocOL` - if use_sse2 - then MOV format (OpAddr addr) (OpReg dst) - else GLD format addr dst - return (Any (if use_sse2 then format else FF80) code) + MOV format (OpAddr addr) (OpReg dst) + + return (Any format code) -- if we want a floating-point literal as an operand, we can @@ -1538,19 +1524,9 @@ condIntCode' _ cond x y = do condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode condFltCode cond x y - = if_sse2 condFltCode_sse2 condFltCode_x87 + = condFltCode_sse2 where - condFltCode_x87 - = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do - (x_reg, x_code) <- getNonClobberedReg x - (y_reg, y_code) <- getSomeReg y - let - code = x_code `appOL` y_code `snocOL` - GCMP cond x_reg y_reg - -- The GCMP insn does the test and sets the zero flag if comparable - -- and true. Hence we always supply EQQ as the condition to test. - return (CondCode True EQQ code) -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be -- an operand, but the right must be a reg. We can probably do better @@ -1634,35 +1610,33 @@ assignReg_IntCode pk reg (CmmLoad src _) = do load_code <- intLoadCode (MOV pk) src dflags <- getDynFlags let platform = targetPlatform dflags - return (load_code (getRegisterReg platform False{-no sse2-} reg)) + return (load_code (getRegisterReg platform reg)) -- dst is a reg, but src could be anything assignReg_IntCode _ reg src = do dflags <- getDynFlags let platform = targetPlatform dflags code <- getAnyReg src - return (code (getRegisterReg platform False{-no sse2-} reg)) + return (code (getRegisterReg platform reg)) -- Floating point assignment to memory assignMem_FltCode pk addr src = do (src_reg, src_code) <- getNonClobberedReg src Amode addr addr_code <- getAmode addr - use_sse2 <- sse2Enabled let code = src_code `appOL` addr_code `snocOL` - if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr) - else GST pk src_reg addr + MOV pk (OpReg src_reg) (OpAddr addr) + return code -- Floating point assignment to a register/temporary assignReg_FltCode _ reg src = do - use_sse2 <- sse2Enabled src_code <- getAnyReg src dflags <- getDynFlags let platform = targetPlatform dflags - return (src_code (getRegisterReg platform use_sse2 reg)) + return (src_code (getRegisterReg platform reg)) genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock @@ -1945,7 +1919,7 @@ genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] _ = genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] _ = do let platform = targetPlatform dflags - let dst_r = getRegisterReg platform False (CmmLocal dst) + let dst_r = getRegisterReg platform (CmmLocal dst) case width of W64 | is32Bit -> do ChildCode64 vcode rlo <- iselExpr64 src @@ -1972,7 +1946,7 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] if sse4_2 then do code_src <- getAnyReg src src_r <- getNewRegNat format - let dst_r = getRegisterReg platform False (CmmLocal dst) + let dst_r = getRegisterReg platform (CmmLocal dst) return $ code_src src_r `appOL` (if width == W8 then -- The POPCNT instruction doesn't take a r/m8 @@ -2004,7 +1978,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst] code_mask <- getAnyReg mask src_r <- getNewRegNat format mask_r <- getNewRegNat format - let dst_r = getRegisterReg platform False (CmmLocal dst) + let dst_r = getRegisterReg platform (CmmLocal dst) return $ code_src src_r `appOL` code_mask mask_r `appOL` (if width == W8 then -- The PDEP instruction doesn't take a r/m8 @@ -2037,7 +2011,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst] code_mask <- getAnyReg mask src_r <- getNewRegNat format mask_r <- getNewRegNat format - let dst_r = getRegisterReg platform False (CmmLocal dst) + let dst_r = getRegisterReg platform (CmmLocal dst) return $ code_src src_r `appOL` code_mask mask_r `appOL` (if width == W8 then -- The PEXT instruction doesn't take a r/m8 @@ -2073,7 +2047,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] b | otherwise = do code_src <- getAnyReg src - let dst_r = getRegisterReg platform False (CmmLocal dst) + let dst_r = getRegisterReg platform (CmmLocal dst) if isBmi2Enabled dflags then do src_r <- getNewRegNat (intFormat width) @@ -2110,7 +2084,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid | is32Bit, width == W64 = do ChildCode64 vcode rlo <- iselExpr64 src let rhi = getHiVRegFromLo rlo - dst_r = getRegisterReg platform False (CmmLocal dst) + dst_r = getRegisterReg platform (CmmLocal dst) lbl1 <- getBlockIdNat lbl2 <- getBlockIdNat let format = if width == W8 then II16 else intFormat width @@ -2150,7 +2124,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid | otherwise = do code_src <- getAnyReg src - let dst_r = getRegisterReg platform False (CmmLocal dst) + let dst_r = getRegisterReg platform (CmmLocal dst) if isBmi2Enabled dflags then do @@ -2201,9 +2175,8 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg arg <- getNewRegNat format arg_code <- getAnyReg n - use_sse2 <- sse2Enabled let platform = targetPlatform dflags - dst_r = getRegisterReg platform use_sse2 (CmmLocal dst) + dst_r = getRegisterReg platform (CmmLocal dst) code <- op_code dst_r arg amode return $ addr_code `appOL` arg_code arg `appOL` code where @@ -2260,8 +2233,8 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] _ = do load_code <- intLoadCode (MOV (intFormat width)) addr let platform = targetPlatform dflags - use_sse2 <- sse2Enabled - return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst))) + + return (load_code (getRegisterReg platform (CmmLocal dst))) genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do code <- assignMem_IntCode (intFormat width) addr val @@ -2276,9 +2249,8 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ newval_code <- getAnyReg new oldval <- getNewRegNat format oldval_code <- getAnyReg old - use_sse2 <- sse2Enabled let platform = targetPlatform dflags - dst_r = getRegisterReg platform use_sse2 (CmmLocal dst) + dst_r = getRegisterReg platform (CmmLocal dst) code = toOL [ MOV format (OpReg oldval) (OpReg eax) , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode)) @@ -2292,14 +2264,12 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ genCCall _ is32Bit target dest_regs args bid = do dflags <- getDynFlags let platform = targetPlatform dflags - sse2 = isSse2Enabled dflags case (target, dest_regs) of -- void return type prim op (PrimTarget op, []) -> outOfLineCmmOp bid op Nothing args -- we only cope with a single result for foreign calls - (PrimTarget op, [r]) - | sse2 -> case op of + (PrimTarget op, [r]) -> case op of MO_F32_Fabs -> case args of [x] -> sse2FabsCode W32 x _ -> panic "genCCall: Wrong number of arguments for fabs" @@ -2310,36 +2280,16 @@ genCCall _ is32Bit target dest_regs args bid = do MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args _other_op -> outOfLineCmmOp bid op (Just r) args - | otherwise -> do - l1 <- getNewLabelNat - l2 <- getNewLabelNat - if sse2 - then outOfLineCmmOp bid op (Just r) args - else case op of - MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args - MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args - - MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args - MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args - - MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args - MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args - - MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args - MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args - - _other_op -> outOfLineCmmOp bid op (Just r) args where - actuallyInlineFloatOp = actuallyInlineFloatOp' False - actuallyInlineSSE2Op = actuallyInlineFloatOp' True + actuallyInlineSSE2Op = actuallyInlineFloatOp' - actuallyInlineFloatOp' usesSSE instr format [x] + actuallyInlineFloatOp' instr format [x] = do res <- trivialUFCode format (instr format) x any <- anyReg res - return (any (getRegisterReg platform usesSSE (CmmLocal r))) + return (any (getRegisterReg platform (CmmLocal r))) - actuallyInlineFloatOp' _ _ _ args + actuallyInlineFloatOp' _ _ args = panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! (" ++ show (length args) ++ ")" @@ -2358,7 +2308,7 @@ genCCall _ is32Bit target dest_regs args bid = do AND fmt (OpReg tmp) (OpReg dst) ] - return $ code (getRegisterReg platform True (CmmLocal r)) + return $ code (getRegisterReg platform (CmmLocal r)) (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args @@ -2370,8 +2320,8 @@ genCCall _ is32Bit target dest_regs args bid = do let format = intFormat width lCode <- anyReg =<< trivialCode width (ADD_CC format) (Just (ADD_CC format)) arg_x arg_y - let reg_l = getRegisterReg platform True (CmmLocal res_l) - reg_h = getRegisterReg platform True (CmmLocal res_h) + let reg_l = getRegisterReg platform (CmmLocal res_l) + reg_h = getRegisterReg platform (CmmLocal res_h) code = hCode reg_h `appOL` lCode reg_l `snocOL` ADC format (OpImm (ImmInteger 0)) (OpReg reg_h) @@ -2391,8 +2341,8 @@ genCCall _ is32Bit target dest_regs args bid = do do (y_reg, y_code) <- getRegOrMem arg_y x_code <- getAnyReg arg_x let format = intFormat width - reg_h = getRegisterReg platform True (CmmLocal res_h) - reg_l = getRegisterReg platform True (CmmLocal res_l) + reg_h = getRegisterReg platform (CmmLocal res_h) + reg_l = getRegisterReg platform (CmmLocal res_l) code = y_code `appOL` x_code rax `appOL` toOL [MUL2 format y_reg, @@ -2428,8 +2378,8 @@ genCCall _ is32Bit target dest_regs args bid = do divOp platform signed width [res_q, res_r] m_arg_x_high arg_x_low arg_y = do let format = intFormat width - reg_q = getRegisterReg platform True (CmmLocal res_q) - reg_r = getRegisterReg platform True (CmmLocal res_r) + reg_q = getRegisterReg platform (CmmLocal res_q) + reg_r = getRegisterReg platform (CmmLocal res_r) widen | signed = CLTD format | otherwise = XOR format (OpReg rdx) (OpReg rdx) instr | signed = IDIV @@ -2456,8 +2406,8 @@ genCCall _ is32Bit target dest_regs args bid = do rCode <- anyReg =<< trivialCode width (instr format) (mrevinstr format) arg_x arg_y reg_tmp <- getNewRegNat II8 - let reg_c = getRegisterReg platform True (CmmLocal res_c) - reg_r = getRegisterReg platform True (CmmLocal res_r) + let reg_c = getRegisterReg platform (CmmLocal res_c) + reg_r = getRegisterReg platform (CmmLocal res_r) code = rCode reg_r `snocOL` SETCC cond (OpReg reg_tmp) `snocOL` MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c) @@ -2501,8 +2451,7 @@ genCCall32' dflags target dest_regs args = do delta0 <- getDeltaNat setDeltaNat (delta0 - arg_pad_size) - use_sse2 <- sse2Enabled - push_codes <- mapM (push_arg use_sse2) (reverse prom_args) + push_codes <- mapM push_arg (reverse prom_args) delta <- getDeltaNat MASSERT(delta == delta0 - tot_arg_size) @@ -2555,18 +2504,21 @@ genCCall32' dflags target dest_regs args = do assign_code [] = nilOL assign_code [dest] | isFloatType ty = - if use_sse2 - then let tmp_amode = AddrBaseIndex (EABaseReg esp) + -- we assume SSE2 + let tmp_amode = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt 0) - fmt = floatFormat w + fmt = floatFormat w in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp), DELTA (delta0 - b), - GST fmt fake0 tmp_amode, + X87Store fmt tmp_amode, + -- X87Store only supported for the CDECL ABI + -- NB: This code will need to be + -- revisted once GHC does more work around + -- SIGFPE f MOV fmt (OpAddr tmp_amode) (OpReg r_dest), ADD II32 (OpImm (ImmInt b)) (OpReg esp), DELTA delta0] - else unitOL (GMOV fake0 r_dest) | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest), MOV II32 (OpReg edx) (OpReg r_dest_hi)] | otherwise = unitOL (MOV (intFormat w) @@ -2577,7 +2529,7 @@ genCCall32' dflags target dest_regs args = do w = typeWidth ty b = widthInBytes w r_dest_hi = getHiVRegFromLo r_dest - r_dest = getRegisterReg platform use_sse2 (CmmLocal dest) + r_dest = getRegisterReg platform (CmmLocal dest) assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many) return (push_code `appOL` @@ -2592,10 +2544,10 @@ genCCall32' dflags target dest_regs args = do roundTo a x | x `mod` a == 0 = x | otherwise = x + a - (x `mod` a) - push_arg :: Bool -> CmmActual {-current argument-} + push_arg :: CmmActual {-current argument-} -> NatM InstrBlock -- code - push_arg use_sse2 arg -- we don't need the hints on x86 + push_arg arg -- we don't need the hints on x86 | isWord64 arg_ty = do ChildCode64 code r_lo <- iselExpr64 arg delta <- getDeltaNat @@ -2619,9 +2571,10 @@ genCCall32' dflags target dest_regs args = do (ImmInt 0) format = floatFormat (typeWidth arg_ty) in - if use_sse2 - then MOV format (OpReg reg) (OpAddr addr) - else GST format reg addr + + -- assume SSE2 + MOV format (OpReg reg) (OpAddr addr) + ] ) @@ -2749,7 +2702,7 @@ genCCall64' dflags target dest_regs args = do _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest)) where rep = localRegType dest - r_dest = getRegisterReg platform True (CmmLocal dest) + r_dest = getRegisterReg platform (CmmLocal dest) assign_code _many = panic "genCCall.assign_code many" return (adjust_rsp `appOL` @@ -3162,17 +3115,9 @@ condIntReg cond x y = do -- and plays better with the uOP cache. condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register -condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87 +condFltReg is32Bit cond x y = condFltReg_sse2 where - condFltReg_x87 = do - CondCode _ cond cond_code <- condFltCode cond x y - tmp <- getNewRegNat II8 - let - code dst = cond_code `appOL` toOL [ - SETCC cond (OpReg tmp), - MOVZxL II8 (OpReg tmp) (OpReg dst) - ] - return (Any II32 code) + condFltReg_sse2 = do CondCode _ cond cond_code <- condFltCode cond x y @@ -3336,18 +3281,6 @@ trivialUCode rep instr x = do ----------- -trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialFCode_x87 instr x y = do - (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too - (y_reg, y_code) <- getSomeReg y - let - format = FF80 -- always, on x87 - code dst = - x_code `appOL` - y_code `snocOL` - instr format x_reg y_reg dst - return (Any format code) trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr) -> CmmExpr -> CmmExpr -> NatM Register @@ -3368,17 +3301,8 @@ trivialUFCode format instr x = do -------------------------------------------------------------------------------- coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register -coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87 +coerceInt2FP from to x = coerce_sse2 where - coerce_x87 = do - (x_reg, x_code) <- getSomeReg x - let - opc = case to of W32 -> GITOF; W64 -> GITOD; - n -> panic $ "coerceInt2FP.x87: unhandled width (" - ++ show n ++ ")" - code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-II32 reps? - return (Any FF80 code) coerce_sse2 = do (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand @@ -3392,18 +3316,8 @@ coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87 -------------------------------------------------------------------------------- coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87 +coerceFP2Int from to x = coerceFP2Int_sse2 where - coerceFP2Int_x87 = do - (x_reg, x_code) <- getSomeReg x - let - opc = case from of W32 -> GFTOI; W64 -> GDTOI - n -> panic $ "coerceFP2Int.x87: unhandled width (" - ++ show n ++ ")" - code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-II32 reps? - return (Any (intFormat to) code) - coerceFP2Int_sse2 = do (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand let @@ -3418,15 +3332,13 @@ coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87 -------------------------------------------------------------------------------- coerceFP2FP :: Width -> CmmExpr -> NatM Register coerceFP2FP to x = do - use_sse2 <- sse2Enabled (x_reg, x_code) <- getSomeReg x let - opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD; + opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD; n -> panic $ "coerceFP2FP: unhandled width (" ++ show n ++ ")" - | otherwise = GDTOF code dst = x_code `snocOL` opc x_reg dst - return (Any (if use_sse2 then floatFormat to else FF80) code) + return (Any ( floatFormat to) code) -------------------------------------------------------------------------------- @@ -3443,7 +3355,7 @@ sse2NegCode w x = do x@II16 -> wrongFmt x x@II32 -> wrongFmt x x@II64 -> wrongFmt x - x@FF80 -> wrongFmt x + where wrongFmt x = panic $ "sse2NegCode: " ++ show x Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index beb9a97097..0e69d421a3 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -10,7 +10,7 @@ module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..), getJumpDestBlockId, canShortcut, shortcutStatics, - shortcutJump, i386_insert_ffrees, allocMoreStack, + shortcutJump, allocMoreStack, maxSpillSlots, archWordFormat ) where @@ -240,46 +240,14 @@ data Instr | BT Format Imm Operand | NOP - -- x86 Float Arithmetic. - -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles - -- as single instructions right up until we spit them out. - -- all the 3-operand fake fp insns are src1 src2 dst - -- and furthermore are constrained to be fp regs only. - -- IMPORTANT: keep is_G_insn up to date with any changes here - | GMOV Reg Reg -- src(fpreg), dst(fpreg) - | GLD Format AddrMode Reg -- src, dst(fpreg) - | GST Format Reg AddrMode -- src(fpreg), dst - | GLDZ Reg -- dst(fpreg) - | GLD1 Reg -- dst(fpreg) - - | GFTOI Reg Reg -- src(fpreg), dst(intreg) - | GDTOI Reg Reg -- src(fpreg), dst(intreg) - - | GITOF Reg Reg -- src(intreg), dst(fpreg) - | GITOD Reg Reg -- src(intreg), dst(fpreg) - - | GDTOF Reg Reg -- src(fpreg), dst(fpreg) - - | GADD Format Reg Reg Reg -- src1, src2, dst - | GDIV Format Reg Reg Reg -- src1, src2, dst - | GSUB Format Reg Reg Reg -- src1, src2, dst - | GMUL Format Reg Reg Reg -- src1, src2, dst - - -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT] - -- Compare src1 with src2; set the Zero flag iff the numbers are - -- comparable and the comparison is True. Subsequent code must - -- test the %eflags zero flag regardless of the supplied Cond. - | GCMP Cond Reg Reg -- src1, src2 - - | GABS Format Reg Reg -- src, dst - | GNEG Format Reg Reg -- src, dst - | GSQRT Format Reg Reg -- src, dst - | GSIN Format CLabel CLabel Reg Reg -- src, dst - | GCOS Format CLabel CLabel Reg Reg -- src, dst - | GTAN Format CLabel CLabel Reg Reg -- src, dst - - | GFREE -- do ffree on all x86 regs; an ugly hack + -- We need to support the FSTP (x87 store and pop) instruction + -- so that we can correctly read off the return value of an + -- x86 CDECL C function call when its floating point. + -- so we dont include a register argument, and just use st(0) + -- this instruction is used ONLY for return values of C ffi calls + -- in x86_32 abi + | X87Store Format AddrMode -- st(0), dst -- SSE2 floating point: we use a restricted set of the available SSE2 @@ -427,33 +395,7 @@ x86_regUsageOfInstr platform instr CLTD _ -> mkRU [eax] [edx] NOP -> mkRU [] [] - GMOV src dst -> mkRU [src] [dst] - GLD _ src dst -> mkRU (use_EA src []) [dst] - GST _ src dst -> mkRUR (src : use_EA dst []) - - GLDZ dst -> mkRU [] [dst] - GLD1 dst -> mkRU [] [dst] - - GFTOI src dst -> mkRU [src] [dst] - GDTOI src dst -> mkRU [src] [dst] - - GITOF src dst -> mkRU [src] [dst] - GITOD src dst -> mkRU [src] [dst] - - GDTOF src dst -> mkRU [src] [dst] - - GADD _ s1 s2 dst -> mkRU [s1,s2] [dst] - GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst] - GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst] - GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst] - - GCMP _ src1 src2 -> mkRUR [src1,src2] - GABS _ src dst -> mkRU [src] [dst] - GNEG _ src dst -> mkRU [src] [dst] - GSQRT _ src dst -> mkRU [src] [dst] - GSIN _ _ _ src dst -> mkRU [src] [dst] - GCOS _ _ _ src dst -> mkRU [src] [dst] - GTAN _ _ _ src dst -> mkRU [src] [dst] + X87Store _ dst -> mkRUR ( use_EA dst []) CVTSS2SD src dst -> mkRU [src] [dst] CVTSD2SS src dst -> mkRU [src] [dst] @@ -603,33 +545,8 @@ x86_patchRegsOfInstr instr env JMP op regs -> JMP (patchOp op) regs JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl - GMOV src dst -> GMOV (env src) (env dst) - GLD fmt src dst -> GLD fmt (lookupAddr src) (env dst) - GST fmt src dst -> GST fmt (env src) (lookupAddr dst) - - GLDZ dst -> GLDZ (env dst) - GLD1 dst -> GLD1 (env dst) - - GFTOI src dst -> GFTOI (env src) (env dst) - GDTOI src dst -> GDTOI (env src) (env dst) - - GITOF src dst -> GITOF (env src) (env dst) - GITOD src dst -> GITOD (env src) (env dst) - - GDTOF src dst -> GDTOF (env src) (env dst) - - GADD fmt s1 s2 dst -> GADD fmt (env s1) (env s2) (env dst) - GSUB fmt s1 s2 dst -> GSUB fmt (env s1) (env s2) (env dst) - GMUL fmt s1 s2 dst -> GMUL fmt (env s1) (env s2) (env dst) - GDIV fmt s1 s2 dst -> GDIV fmt (env s1) (env s2) (env dst) - - GCMP fmt src1 src2 -> GCMP fmt (env src1) (env src2) - GABS fmt src dst -> GABS fmt (env src) (env dst) - GNEG fmt src dst -> GNEG fmt (env src) (env dst) - GSQRT fmt src dst -> GSQRT fmt (env src) (env dst) - GSIN fmt l1 l2 src dst -> GSIN fmt l1 l2 (env src) (env dst) - GCOS fmt l1 l2 src dst -> GCOS fmt l1 l2 (env src) (env dst) - GTAN fmt l1 l2 src dst -> GTAN fmt l1 l2 (env src) (env dst) + -- literally only support storing the top x87 stack value st(0) + X87Store fmt dst -> X87Store fmt (lookupAddr dst) CVTSS2SD src dst -> CVTSS2SD (env src) (env dst) CVTSD2SS src dst -> CVTSD2SS (env src) (env dst) @@ -752,8 +669,7 @@ x86_mkSpillInstr dflags reg delta slot case targetClassOfReg platform reg of RcInteger -> MOV (archWordFormat is32Bit) (OpReg reg) (OpAddr (spRel dflags off)) - RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -} - RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off)) + RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off)) _ -> panic "X86.mkSpillInstr: no match" where platform = targetPlatform dflags is32Bit = target32Bit platform @@ -772,8 +688,7 @@ x86_mkLoadInstr dflags reg delta slot case targetClassOfReg platform reg of RcInteger -> MOV (archWordFormat is32Bit) (OpAddr (spRel dflags off)) (OpReg reg) - RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -} - RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg) + RcDouble -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg) _ -> panic "X86.x86_mkLoadInstr" where platform = targetPlatform dflags is32Bit = target32Bit platform @@ -827,6 +742,7 @@ x86_isMetaInstr instr +--- TODO: why is there -- | Make a reg-reg move instruction. -- On SPARC v8 there are no instructions to move directly between -- floating point and integer regs. If we need to do that then we @@ -844,8 +760,10 @@ x86_mkRegRegMoveInstr platform src dst ArchX86 -> MOV II32 (OpReg src) (OpReg dst) ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst) _ -> panic "x86_mkRegRegMoveInstr: Bad arch" - RcDouble -> GMOV src dst - RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst) + RcDouble -> MOV FF64 (OpReg src) (OpReg dst) + -- this code is the lie we tell ourselves because both float and double + -- use the same register class.on x86_64 and x86 32bit with SSE2, + -- more plainly, both use the XMM registers _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" -- | Check whether an instruction represents a reg-reg move. @@ -970,58 +888,6 @@ x86_mkStackDeallocInstr platform amount ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)] _ -> panic "x86_mkStackDeallocInstr" -i386_insert_ffrees - :: [GenBasicBlock Instr] - -> [GenBasicBlock Instr] - -i386_insert_ffrees blocks - | any (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ] - = map insertGFREEs blocks - | otherwise - = blocks - where - insertGFREEs (BasicBlock id insns) - = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns) - -insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr] -insertBeforeNonlocalTransfers insert insns - = foldr p [] insns - where p insn r = case insn of - CALL _ _ -> insert : insn : r - JMP _ _ -> insert : insn : r - JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL" - _ -> insn : r - - --- if you ever add a new FP insn to the fake x86 FP insn set, --- you must update this too -is_G_instr :: Instr -> Bool -is_G_instr instr - = case instr of - GMOV{} -> True - GLD{} -> True - GST{} -> True - GLDZ{} -> True - GLD1{} -> True - GFTOI{} -> True - GDTOI{} -> True - GITOF{} -> True - GITOD{} -> True - GDTOF{} -> True - GADD{} -> True - GDIV{} -> True - GSUB{} -> True - GMUL{} -> True - GCMP{} -> True - GABS{} -> True - GNEG{} -> True - GSQRT{} -> True - GSIN{} -> True - GCOS{} -> True - GTAN{} -> True - GFREE -> panic "is_G_instr: GFREE (!)" - _ -> False - -- -- Note [extra spill slots] diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 83d53be553..ccfaff4753 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -272,7 +272,7 @@ pprReg f r RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u - RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> pprUniqueAlways u + where ppr32_reg_no :: Format -> Int -> SDoc ppr32_reg_no II8 = ppr32_reg_byte @@ -364,17 +364,14 @@ pprReg f r ppr_reg_float :: Int -> PtrString ppr_reg_float i = case i of - 16 -> sLit "%fake0"; 17 -> sLit "%fake1" - 18 -> sLit "%fake2"; 19 -> sLit "%fake3" - 20 -> sLit "%fake4"; 21 -> sLit "%fake5" - 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1" - 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3" - 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5" - 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7" - 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9" - 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11" - 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13" - 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15" + 16 -> sLit "%xmm0" ; 17 -> sLit "%xmm1" + 18 -> sLit "%xmm2" ; 19 -> sLit "%xmm3" + 20 -> sLit "%xmm4" ; 21 -> sLit "%xmm5" + 22 -> sLit "%xmm6" ; 23 -> sLit "%xmm7" + 24 -> sLit "%xmm8" ; 25 -> sLit "%xmm9" + 26 -> sLit "%xmm10"; 27 -> sLit "%xmm11" + 28 -> sLit "%xmm12"; 29 -> sLit "%xmm13" + 30 -> sLit "%xmm14"; 31 -> sLit "%xmm15" _ -> sLit "very naughty x86 register" pprFormat :: Format -> SDoc @@ -386,7 +383,6 @@ pprFormat x II64 -> sLit "q" FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2) FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2) - FF80 -> sLit "t" ) pprFormat_x87 :: Format -> SDoc @@ -394,9 +390,9 @@ pprFormat_x87 x = ptext $ case x of FF32 -> sLit "s" FF64 -> sLit "l" - FF80 -> sLit "t" _ -> panic "X86.Ppr.pprFormat_x87" + pprCond :: Cond -> SDoc pprCond c = ptext (case c of { @@ -807,224 +803,12 @@ pprInstr (FETCHPC reg) ] --- ----------------------------------------------------------------------------- --- i386 floating-point - --- Simulating a flat register set on the x86 FP stack is tricky. --- you have to free %st(7) before pushing anything on the FP reg stack --- so as to preclude the possibility of a FP stack overflow exception. -pprInstr g@(GMOV src dst) - | src == dst - = empty - | otherwise - = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1]) - --- GLD fmt addr dst ==> FLDsz addr ; FSTP (dst+1) -pprInstr g@(GLD fmt addr dst) - = pprG g (hcat [gtab, text "fld", pprFormat_x87 fmt, gsp, - pprAddr addr, gsemi, gpop dst 1]) - +-- the -- GST fmt src addr ==> FLD dst ; FSTPsz addr -pprInstr g@(GST fmt src addr) - | src == fake0 && fmt /= FF80 -- fstt instruction doesn't exist - = pprG g (hcat [gtab, - text "fst", pprFormat_x87 fmt, gsp, pprAddr addr]) - | otherwise - = pprG g (hcat [gtab, gpush src 0, gsemi, +pprInstr g@(X87Store fmt addr) + = pprX87 g (hcat [gtab, text "fstp", pprFormat_x87 fmt, gsp, pprAddr addr]) -pprInstr g@(GLDZ dst) - = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1]) -pprInstr g@(GLD1 dst) - = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1]) - -pprInstr (GFTOI src dst) - = pprInstr (GDTOI src dst) - -pprInstr g@(GDTOI src dst) - = pprG g (vcat [ - hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"], - hcat [gtab, gpush src 0], - hcat [gtab, text "movzwl 4(%esp), ", reg, - text " ; orl $0xC00, ", reg], - hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"], - hcat [gtab, text "fistpl 0(%esp)"], - hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg], - hcat [gtab, text "addl $8, %esp"] - ]) - where - reg = pprReg II32 dst - -pprInstr (GITOF src dst) - = pprInstr (GITOD src dst) - -pprInstr g@(GITOD src dst) - = pprG g (hcat [gtab, text "pushl ", pprReg II32 src, - text " ; fildl (%esp) ; ", - gpop dst 1, text " ; addl $4,%esp"]) - -pprInstr g@(GDTOF src dst) - = pprG g (vcat [gtab <> gpush src 0, - gtab <> text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ;", - gtab <> gpop dst 1]) - -{- Gruesome swamp follows. If you're unfortunate enough to have ventured - this far into the jungle AND you give a Rat's Ass (tm) what's going - on, here's the deal. Generate code to do a floating point comparison - of src1 and src2, of kind cond, and set the Zero flag if true. - - The complications are to do with handling NaNs correctly. We want the - property that if either argument is NaN, then the result of the - comparison is False ... except if we're comparing for inequality, - in which case the answer is True. - - Here's how the general (non-inequality) case works. As an - example, consider generating the an equality test: - - pushl %eax -- we need to mess with this - <get src1 to top of FPU stack> - fcomp <src2 location in FPU stack> and pop pushed src1 - -- Result of comparison is in FPU Status Register bits - -- C3 C2 and C0 - fstsw %ax -- Move FPU Status Reg to %ax - sahf -- move C3 C2 C0 from %ax to integer flag reg - -- now the serious magic begins - setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0 - sete %al -- %al = if arg1 == arg2 then 1 else 0 - andb %ah,%al -- %al &= %ah - -- so %al == 1 iff (comparable && same); else it holds 0 - decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same); - else %al == 0xFF, ZeroFlag=0 - -- the zero flag is now set as we desire. - popl %eax - - The special case of inequality differs thusly: - - setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0 - setne %al -- %al = if arg1 /= arg2 then 1 else 0 - orb %ah,%al -- %al = if (incomparable || different) then 1 else 0 - decb %al -- if (incomparable || different) then (%al == 0, ZF=1) - else (%al == 0xFF, ZF=0) --} -pprInstr g@(GCMP cond src1 src2) - | case cond of { NE -> True; _ -> False } - = pprG g (vcat [ - hcat [gtab, text "pushl %eax ; ",gpush src1 0], - hcat [gtab, text "fcomp ", greg src2 1, - text "; fstsw %ax ; sahf ; setpe %ah"], - hcat [gtab, text "setne %al ; ", - text "orb %ah,%al ; decb %al ; popl %eax"] - ]) - | otherwise - = pprG g (vcat [ - hcat [gtab, text "pushl %eax ; ",gpush src1 0], - hcat [gtab, text "fcomp ", greg src2 1, - text "; fstsw %ax ; sahf ; setpo %ah"], - hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ", - text "andb %ah,%al ; decb %al ; popl %eax"] - ]) - where - {- On the 486, the flags set by FP compare are the unsigned ones! - (This looks like a HACK to me. WDP 96/03) - -} - fix_FP_cond :: Cond -> Cond - fix_FP_cond GE = GEU - fix_FP_cond GTT = GU - fix_FP_cond LTT = LU - fix_FP_cond LE = LEU - fix_FP_cond EQQ = EQQ - fix_FP_cond NE = NE - fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match" - -- there should be no others - - -pprInstr g@(GABS _ src dst) - = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1]) - -pprInstr g@(GNEG _ src dst) - = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1]) - -pprInstr g@(GSQRT fmt src dst) - = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$ - hcat [gtab, gcoerceto fmt, gpop dst 1]) - -pprInstr g@(GSIN fmt l1 l2 src dst) - = pprG g (pprTrigOp "fsin" False l1 l2 src dst fmt) - -pprInstr g@(GCOS fmt l1 l2 src dst) - = pprG g (pprTrigOp "fcos" False l1 l2 src dst fmt) - -pprInstr g@(GTAN fmt l1 l2 src dst) - = pprG g (pprTrigOp "fptan" True l1 l2 src dst fmt) - --- In the translations for GADD, GMUL, GSUB and GDIV, --- the first two cases are mere optimisations. The otherwise clause --- generates correct code under all circumstances. - -pprInstr g@(GADD _ src1 src2 dst) - | src1 == dst - = pprG g (text "\t#GADD-xxxcase1" $$ - hcat [gtab, gpush src2 0, - text " ; faddp %st(0),", greg src1 1]) - | src2 == dst - = pprG g (text "\t#GADD-xxxcase2" $$ - hcat [gtab, gpush src1 0, - text " ; faddp %st(0),", greg src2 1]) - | otherwise - = pprG g (hcat [gtab, gpush src1 0, - text " ; fadd ", greg src2 1, text ",%st(0)", - gsemi, gpop dst 1]) - - -pprInstr g@(GMUL _ src1 src2 dst) - | src1 == dst - = pprG g (text "\t#GMUL-xxxcase1" $$ - hcat [gtab, gpush src2 0, - text " ; fmulp %st(0),", greg src1 1]) - | src2 == dst - = pprG g (text "\t#GMUL-xxxcase2" $$ - hcat [gtab, gpush src1 0, - text " ; fmulp %st(0),", greg src2 1]) - | otherwise - = pprG g (hcat [gtab, gpush src1 0, - text " ; fmul ", greg src2 1, text ",%st(0)", - gsemi, gpop dst 1]) - - -pprInstr g@(GSUB _ src1 src2 dst) - | src1 == dst - = pprG g (text "\t#GSUB-xxxcase1" $$ - hcat [gtab, gpush src2 0, - text " ; fsubrp %st(0),", greg src1 1]) - | src2 == dst - = pprG g (text "\t#GSUB-xxxcase2" $$ - hcat [gtab, gpush src1 0, - text " ; fsubp %st(0),", greg src2 1]) - | otherwise - = pprG g (hcat [gtab, gpush src1 0, - text " ; fsub ", greg src2 1, text ",%st(0)", - gsemi, gpop dst 1]) - - -pprInstr g@(GDIV _ src1 src2 dst) - | src1 == dst - = pprG g (text "\t#GDIV-xxxcase1" $$ - hcat [gtab, gpush src2 0, - text " ; fdivrp %st(0),", greg src1 1]) - | src2 == dst - = pprG g (text "\t#GDIV-xxxcase2" $$ - hcat [gtab, gpush src1 0, - text " ; fdivp %st(0),", greg src2 1]) - | otherwise - = pprG g (hcat [gtab, gpush src1 0, - text " ; fdiv ", greg src2 1, text ",%st(0)", - gsemi, gpop dst 1]) - - -pprInstr GFREE - = vcat [ text "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)", - text "\tffree %st(4) ;ffree %st(5)" - ] -- Atomics @@ -1038,70 +822,11 @@ pprInstr (CMPXCHG format src dst) = pprFormatOpOp (sLit "cmpxchg") format src dst -pprTrigOp :: String -> Bool -> CLabel -> CLabel - -> Reg -> Reg -> Format -> SDoc -pprTrigOp op -- fsin, fcos or fptan - isTan -- we need a couple of extra steps if we're doing tan - l1 l2 -- internal labels for us to use - src dst fmt - = -- We'll be needing %eax later on - hcat [gtab, text "pushl %eax;"] $$ - -- tan is going to use an extra space on the FP stack - (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$ - -- First put the value in %st(0) and try to apply the op to it - hcat [gpush src 0, text ("; " ++ op)] $$ - -- Now look to see if C2 was set (overflow, |value| >= 2^63) - hcat [gtab, text "fnstsw %ax"] $$ - hcat [gtab, text "test $0x400,%eax"] $$ - -- If we were in bounds then jump to the end - hcat [gtab, text "je " <> ppr l1] $$ - -- Otherwise we need to shrink the value. Start by - -- loading pi, doubleing it (by adding it to itself), - -- and then swapping pi with the value, so the value we - -- want to apply op to is in %st(0) again - hcat [gtab, text "ffree %st(7); fldpi"] $$ - hcat [gtab, text "fadd %st(0),%st"] $$ - hcat [gtab, text "fxch %st(1)"] $$ - -- Now we have a loop in which we make the value smaller, - -- see if it's small enough, and loop if not - (ppr l2 <> char ':') $$ - hcat [gtab, text "fprem1"] $$ - -- My Debian libc uses fstsw here for the tan code, but I can't - -- see any reason why it should need to be different for tan. - hcat [gtab, text "fnstsw %ax"] $$ - hcat [gtab, text "test $0x400,%eax"] $$ - hcat [gtab, text "jne " <> ppr l2] $$ - hcat [gtab, text "fstp %st(1)"] $$ - hcat [gtab, text op] $$ - (ppr l1 <> char ':') $$ - -- Pop the 1.0 tan gave us - (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$ - -- Restore %eax - hcat [gtab, text "popl %eax;"] $$ - -- And finally make the result the right size - hcat [gtab, gcoerceto fmt, gpop dst 1] -------------------------- +-- some left over --- coerce %st(0) to the specified size -gcoerceto :: Format -> SDoc -gcoerceto FF64 = empty -gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; " -gcoerceto _ = panic "X86.Ppr.gcoerceto: no match" -gpush :: Reg -> RegNo -> SDoc -gpush reg offset - = hcat [text "fld ", greg reg offset] - -gpop :: Reg -> RegNo -> SDoc -gpop reg offset - = hcat [text "fstp ", greg reg offset] - -greg :: Reg -> RegNo -> SDoc -greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')' - -gsemi :: SDoc -gsemi = text " ; " gtab :: SDoc gtab = char '\t' @@ -1109,45 +834,15 @@ gtab = char '\t' gsp :: SDoc gsp = char ' ' -gregno :: Reg -> RegNo -gregno (RegReal (RealRegSingle i)) = i -gregno _ = --pprPanic "gregno" (ppr other) - 999 -- bogus; only needed for debug printing - -pprG :: Instr -> SDoc -> SDoc -pprG fake actual - = (char '#' <> pprGInstr fake) $$ actual - - -pprGInstr :: Instr -> SDoc -pprGInstr (GMOV src dst) = pprFormatRegReg (sLit "gmov") FF64 src dst -pprGInstr (GLD fmt src dst) = pprFormatAddrReg (sLit "gld") fmt src dst -pprGInstr (GST fmt src dst) = pprFormatRegAddr (sLit "gst") fmt src dst - -pprGInstr (GLDZ dst) = pprFormatReg (sLit "gldz") FF64 dst -pprGInstr (GLD1 dst) = pprFormatReg (sLit "gld1") FF64 dst -pprGInstr (GFTOI src dst) = pprFormatFormatRegReg (sLit "gftoi") FF32 II32 src dst -pprGInstr (GDTOI src dst) = pprFormatFormatRegReg (sLit "gdtoi") FF64 II32 src dst -pprGInstr (GITOF src dst) = pprFormatFormatRegReg (sLit "gitof") II32 FF32 src dst -pprGInstr (GITOD src dst) = pprFormatFormatRegReg (sLit "gitod") II32 FF64 src dst -pprGInstr (GDTOF src dst) = pprFormatFormatRegReg (sLit "gdtof") FF64 FF32 src dst +pprX87 :: Instr -> SDoc -> SDoc +pprX87 fake actual + = (char '#' <> pprX87Instr fake) $$ actual -pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst -pprGInstr (GABS fmt src dst) = pprFormatRegReg (sLit "gabs") fmt src dst -pprGInstr (GNEG fmt src dst) = pprFormatRegReg (sLit "gneg") fmt src dst -pprGInstr (GSQRT fmt src dst) = pprFormatRegReg (sLit "gsqrt") fmt src dst -pprGInstr (GSIN fmt _ _ src dst) = pprFormatRegReg (sLit "gsin") fmt src dst -pprGInstr (GCOS fmt _ _ src dst) = pprFormatRegReg (sLit "gcos") fmt src dst -pprGInstr (GTAN fmt _ _ src dst) = pprFormatRegReg (sLit "gtan") fmt src dst - -pprGInstr (GADD fmt src1 src2 dst) = pprFormatRegRegReg (sLit "gadd") fmt src1 src2 dst -pprGInstr (GSUB fmt src1 src2 dst) = pprFormatRegRegReg (sLit "gsub") fmt src1 src2 dst -pprGInstr (GMUL fmt src1 src2 dst) = pprFormatRegRegReg (sLit "gmul") fmt src1 src2 dst -pprGInstr (GDIV fmt src1 src2 dst) = pprFormatRegRegReg (sLit "gdiv") fmt src1 src2 dst - -pprGInstr _ = panic "X86.Ppr.pprGInstr: no match" +pprX87Instr :: Instr -> SDoc +pprX87Instr (X87Store fmt dst) = pprFormatAddr (sLit "gst") fmt dst +pprX87Instr _ = panic "X86.Ppr.pprX87Instr: no match" pprDollImm :: Imm -> SDoc pprDollImm i = text "$" <> pprImm i @@ -1215,23 +910,6 @@ pprOpOp name format op1 op2 ] -pprFormatReg :: PtrString -> Format -> Reg -> SDoc -pprFormatReg name format reg1 - = hcat [ - pprMnemonic name format, - pprReg format reg1 - ] - - -pprFormatRegReg :: PtrString -> Format -> Reg -> Reg -> SDoc -pprFormatRegReg name format reg1 reg2 - = hcat [ - pprMnemonic name format, - pprReg format reg1, - comma, - pprReg format reg2 - ] - pprRegReg :: PtrString -> Reg -> Reg -> SDoc pprRegReg name reg1 reg2 @@ -1266,31 +944,6 @@ pprCondOpReg name format cond op1 reg2 pprReg format reg2 ] -pprCondRegReg :: PtrString -> Format -> Cond -> Reg -> Reg -> SDoc -pprCondRegReg name format cond reg1 reg2 - = hcat [ - char '\t', - ptext name, - pprCond cond, - space, - pprReg format reg1, - comma, - pprReg format reg2 - ] - -pprFormatFormatRegReg :: PtrString -> Format -> Format -> Reg -> Reg -> SDoc -pprFormatFormatRegReg name format1 format2 reg1 reg2 - = hcat [ - char '\t', - ptext name, - pprFormat format1, - pprFormat format2, - space, - pprReg format1 reg1, - comma, - pprReg format2 reg2 - ] - pprFormatFormatOpReg :: PtrString -> Format -> Format -> Operand -> Reg -> SDoc pprFormatFormatOpReg name format1 format2 op1 reg2 = hcat [ @@ -1300,17 +953,6 @@ pprFormatFormatOpReg name format1 format2 op1 reg2 pprReg format2 reg2 ] -pprFormatRegRegReg :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc -pprFormatRegRegReg name format reg1 reg2 reg3 - = hcat [ - pprMnemonic name format, - pprReg format reg1, - comma, - pprReg format reg2, - comma, - pprReg format reg3 - ] - pprFormatOpOpReg :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc pprFormatOpOpReg name format op1 op2 reg3 = hcat [ @@ -1322,26 +964,16 @@ pprFormatOpOpReg name format op1 op2 reg3 pprReg format reg3 ] -pprFormatAddrReg :: PtrString -> Format -> AddrMode -> Reg -> SDoc -pprFormatAddrReg name format op dst - = hcat [ - pprMnemonic name format, - pprAddr op, - comma, - pprReg format dst - ] -pprFormatRegAddr :: PtrString -> Format -> Reg -> AddrMode -> SDoc -pprFormatRegAddr name format src op +pprFormatAddr :: PtrString -> Format -> AddrMode -> SDoc +pprFormatAddr name format op = hcat [ pprMnemonic name format, - pprReg format src, comma, pprAddr op ] - pprShift :: PtrString -> Format -> Operand -> Operand -> SDoc pprShift name format src dest = hcat [ diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs index 339fdd7a8c..24823e3985 100644 --- a/compiler/nativeGen/X86/RegInfo.hs +++ b/compiler/nativeGen/X86/RegInfo.hs @@ -25,10 +25,13 @@ import X86.Regs mkVirtualReg :: Unique -> Format -> VirtualReg mkVirtualReg u format = case format of - FF32 -> VirtualRegSSE u - FF64 -> VirtualRegSSE u - FF80 -> VirtualRegD u - _other -> VirtualRegI u + FF32 -> VirtualRegD u + -- for scalar F32, we use the same xmm as F64! + -- this is a hack that needs some improvement. + -- For now we map both to being allocated as "Double" Registers + -- on X86/X86_64 + FF64 -> VirtualRegD u + _other -> VirtualRegI u regDotColor :: Platform -> RealReg -> SDoc regDotColor platform reg @@ -37,11 +40,12 @@ regDotColor platform reg _ -> panic "Register not assigned a color" regColors :: Platform -> UniqFM [Char] -regColors platform = listToUFM (normalRegColors platform ++ fpRegColors platform) +regColors platform = listToUFM (normalRegColors platform) normalRegColors :: Platform -> [(Reg,String)] normalRegColors platform = zip (map regSingle [0..lastint platform]) colors + ++ zip (map regSingle [firstxmm..lastxmm platform]) greys where -- 16 colors - enough for amd64 gp regs colors = ["#800000","#ff0000","#808000","#ffff00","#008000" @@ -49,17 +53,6 @@ normalRegColors platform = ,"#800080","#ff00ff","#87005f","#875f00","#87af00" ,"#ff00af"] -fpRegColors :: Platform -> [(Reg,String)] -fpRegColors platform = - [ (fake0, "red") - , (fake1, "red") - , (fake2, "red") - , (fake3, "red") - , (fake4, "red") - , (fake5, "red") ] - - ++ zip (map regSingle [firstxmm..lastxmm platform]) greys - where -- 16 shades of grey, enough for the currently supported -- SSE extensions. greys = ["#0e0e0e","#1c1c1c","#2a2a2a","#383838","#464646" diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index b7181189a4..37e99c5a71 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -29,7 +29,7 @@ module X86.Regs ( EABase(..), EAIndex(..), addrModeRegs, eax, ebx, ecx, edx, esi, edi, ebp, esp, - fake0, fake1, fake2, fake3, fake4, fake5, firstfake, + rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, r8, r9, r10, r11, r12, r13, r14, r15, @@ -86,10 +86,6 @@ virtualRegSqueeze cls vr VirtualRegF{} -> 0 _other -> 0 - RcDoubleSSE - -> case vr of - VirtualRegSSE{} -> 1 - _other -> 0 _other -> 0 @@ -100,7 +96,7 @@ realRegSqueeze cls rr RcInteger -> case rr of RealRegSingle regNo - | regNo < firstfake -> 1 + | regNo < firstxmm -> 1 | otherwise -> 0 RealRegPair{} -> 0 @@ -108,15 +104,11 @@ realRegSqueeze cls rr RcDouble -> case rr of RealRegSingle regNo - | regNo >= firstfake && regNo <= lastfake -> 1 + | regNo >= firstxmm -> 1 | otherwise -> 0 RealRegPair{} -> 0 - RcDoubleSSE - -> case rr of - RealRegSingle regNo | regNo >= firstxmm -> 1 - _otherwise -> 0 _other -> 0 @@ -210,17 +202,16 @@ spRel dflags n -- use a Word32 to represent the set of free registers in the register -- allocator. -firstfake, lastfake :: RegNo -firstfake = 16 -lastfake = 21 + firstxmm :: RegNo -firstxmm = 24 +firstxmm = 16 +-- on 32bit platformOSs, only the first 8 XMM/YMM/ZMM registers are available lastxmm :: Platform -> RegNo lastxmm platform - | target32Bit platform = 31 - | otherwise = 39 + | target32Bit platform = firstxmm + 7 -- xmm0 - xmmm7 + | otherwise = firstxmm + 15 -- xmm0 -xmm15 lastint :: Platform -> RegNo lastint platform @@ -230,14 +221,13 @@ lastint platform intregnos :: Platform -> [RegNo] intregnos platform = [0 .. lastint platform] -fakeregnos :: [RegNo] -fakeregnos = [firstfake .. lastfake] + xmmregnos :: Platform -> [RegNo] xmmregnos platform = [firstxmm .. lastxmm platform] floatregnos :: Platform -> [RegNo] -floatregnos platform = fakeregnos ++ xmmregnos platform +floatregnos platform = xmmregnos platform -- argRegs is the set of regs which are read for an n-argument call to C. -- For archs which pass all args on the stack (x86), is empty. @@ -257,20 +247,19 @@ classOfRealReg :: Platform -> RealReg -> RegClass -- However, we can get away without this at the moment because the -- only allocatable integer regs are also 8-bit compatible (1, 3, 4). classOfRealReg platform reg - = case reg of + = case reg of RealRegSingle i - | i <= lastint platform -> RcInteger - | i <= lastfake -> RcDouble - | otherwise -> RcDoubleSSE - - RealRegPair{} -> panic "X86.Regs.classOfRealReg: RegPairs on this arch" + | i <= lastint platform -> RcInteger + | i <= lastxmm platform -> RcDouble + | otherwise -> panic "X86.Reg.classOfRealReg registerSingle too high" + _ -> panic "X86.Regs.classOfRealReg: RegPairs on this arch" -- | Get the name of the register with this number. +-- NOTE: fixme, we dont track which "way" the XMM registers are used showReg :: Platform -> RegNo -> String showReg platform n - | n >= firstxmm = "%xmm" ++ show (n-firstxmm) - | n >= firstfake = "%fake" ++ show (n-firstfake) - | n >= 8 = "%r" ++ show n + | n >= firstxmm && n <= lastxmm platform = "%xmm" ++ show (n-firstxmm) + | n >= 8 && n < firstxmm = "%r" ++ show n | otherwise = regNames platform A.! n regNames :: Platform -> A.Array Int String @@ -290,17 +279,16 @@ Intel x86 architecture: - Only ebx, esi, edi and esp are available across a C call (they are callee-saves). - Registers 0-7 have 16-bit counterparts (ax, bx etc.) - Registers 0-3 have 8 bit counterparts (ah, bh etc.) -- Registers fake0..fake5 are fakes; we pretend x86 has 6 conventionally-addressable - fp registers, and 3-operand insns for them, and we translate this into - real stack-based x86 fp code after register allocation. The fp registers are all Double registers; we don't have any RcFloat class regs. @regClass@ barfs if you give it a VirtualRegF, and mkVReg above should never generate them. + +TODO: cleanup modelling float vs double registers and how they are the same class. -} -fake0, fake1, fake2, fake3, fake4, fake5, - eax, ebx, ecx, edx, esp, ebp, esi, edi :: Reg + +eax, ebx, ecx, edx, esp, ebp, esi, edi :: Reg eax = regSingle 0 ebx = regSingle 1 @@ -310,12 +298,7 @@ esi = regSingle 4 edi = regSingle 5 ebp = regSingle 6 esp = regSingle 7 -fake0 = regSingle 16 -fake1 = regSingle 17 -fake2 = regSingle 18 -fake3 = regSingle 19 -fake4 = regSingle 20 -fake5 = regSingle 21 + @@ -362,22 +345,22 @@ r12 = regSingle 12 r13 = regSingle 13 r14 = regSingle 14 r15 = regSingle 15 -xmm0 = regSingle 24 -xmm1 = regSingle 25 -xmm2 = regSingle 26 -xmm3 = regSingle 27 -xmm4 = regSingle 28 -xmm5 = regSingle 29 -xmm6 = regSingle 30 -xmm7 = regSingle 31 -xmm8 = regSingle 32 -xmm9 = regSingle 33 -xmm10 = regSingle 34 -xmm11 = regSingle 35 -xmm12 = regSingle 36 -xmm13 = regSingle 37 -xmm14 = regSingle 38 -xmm15 = regSingle 39 +xmm0 = regSingle 16 +xmm1 = regSingle 17 +xmm2 = regSingle 18 +xmm3 = regSingle 19 +xmm4 = regSingle 20 +xmm5 = regSingle 21 +xmm6 = regSingle 22 +xmm7 = regSingle 23 +xmm8 = regSingle 24 +xmm9 = regSingle 25 +xmm10 = regSingle 26 +xmm11 = regSingle 27 +xmm12 = regSingle 28 +xmm13 = regSingle 29 +xmm14 = regSingle 30 +xmm15 = regSingle 31 ripRel :: Displacement -> AddrMode ripRel imm = AddrBaseIndex EABaseRip EAIndexNone imm @@ -411,7 +394,7 @@ callClobberedRegs platform -- Only xmm0-5 are caller-saves registers on 64bit windows. -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage ) -- For details check the Win64 ABI. - ++ map regSingle fakeregnos ++ map xmm [0 .. 5] + ++ map xmm [0 .. 5] | otherwise -- all xmm regs are caller-saves -- caller-saves registers @@ -430,11 +413,15 @@ allIntArgRegs platform = panic "X86.Regs.allIntArgRegs: not defined for this platform" | otherwise = [rdi,rsi,rdx,rcx,r8,r9] + +-- | on 64bit platforms we pass the first 8 float/double arguments +-- in the xmm registers. allFPArgRegs :: Platform -> [Reg] allFPArgRegs platform | platformOS platform == OSMinGW32 = panic "X86.Regs.allFPArgRegs: not defined for this platform" - | otherwise = map regSingle [firstxmm .. firstxmm+7] + | otherwise = map regSingle [firstxmm .. firstxmm + 7 ] + -- Machine registers which might be clobbered by instructions that -- generate results into fixed registers, or need arguments in a fixed diff --git a/compiler/types/TyCon.hs b/compiler/types/TyCon.hs index a943284612..e55cf8b2a3 100644 --- a/compiler/types/TyCon.hs +++ b/compiler/types/TyCon.hs @@ -1328,12 +1328,12 @@ Roughly in order of "includes more information": number of bits. It may represent a signed or unsigned integer, a floating-point value, or an address. - data Width = W8 | W16 | W32 | W64 | W80 | W128 + data Width = W8 | W16 | W32 | W64 | W128 - Size, which is used in the native code generator, is Width + floating point information. - data Size = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80 + data Size = II8 | II16 | II32 | II64 | FF32 | FF64 it is necessary because e.g. the instruction to move a 64-bit float on x86 (movsd) is different from the instruction to move a 64-bit diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs index 2a373283d4..867dd14fb0 100644 --- a/includes/CodeGen.Platform.hs +++ b/includes/CodeGen.Platform.hs @@ -41,65 +41,59 @@ import Reg # define r15 15 # endif -# define fake0 16 -# define fake1 17 -# define fake2 18 -# define fake3 19 -# define fake4 20 -# define fake5 21 -- N.B. XMM, YMM, and ZMM are all aliased to the same hardware registers hence -- being assigned the same RegNos. -# define xmm0 24 -# define xmm1 25 -# define xmm2 26 -# define xmm3 27 -# define xmm4 28 -# define xmm5 29 -# define xmm6 30 -# define xmm7 31 -# define xmm8 32 -# define xmm9 33 -# define xmm10 34 -# define xmm11 35 -# define xmm12 36 -# define xmm13 37 -# define xmm14 38 -# define xmm15 39 +# define xmm0 16 +# define xmm1 17 +# define xmm2 18 +# define xmm3 19 +# define xmm4 20 +# define xmm5 21 +# define xmm6 22 +# define xmm7 23 +# define xmm8 24 +# define xmm9 25 +# define xmm10 26 +# define xmm11 27 +# define xmm12 28 +# define xmm13 29 +# define xmm14 30 +# define xmm15 31 -# define ymm0 24 -# define ymm1 25 -# define ymm2 26 -# define ymm3 27 -# define ymm4 28 -# define ymm5 29 -# define ymm6 30 -# define ymm7 31 -# define ymm8 32 -# define ymm9 33 -# define ymm10 34 -# define ymm11 35 -# define ymm12 36 -# define ymm13 37 -# define ymm14 38 -# define ymm15 39 +# define ymm0 16 +# define ymm1 17 +# define ymm2 18 +# define ymm3 19 +# define ymm4 20 +# define ymm5 21 +# define ymm6 22 +# define ymm7 23 +# define ymm8 24 +# define ymm9 25 +# define ymm10 26 +# define ymm11 27 +# define ymm12 28 +# define ymm13 29 +# define ymm14 30 +# define ymm15 31 -# define zmm0 24 -# define zmm1 25 -# define zmm2 26 -# define zmm3 27 -# define zmm4 28 -# define zmm5 29 -# define zmm6 30 -# define zmm7 31 -# define zmm8 32 -# define zmm9 33 -# define zmm10 34 -# define zmm11 35 -# define zmm12 36 -# define zmm13 37 -# define zmm14 38 -# define zmm15 39 +# define zmm0 16 +# define zmm1 17 +# define zmm2 18 +# define zmm3 19 +# define zmm4 20 +# define zmm5 21 +# define zmm6 22 +# define zmm7 23 +# define zmm8 24 +# define zmm9 25 +# define zmm10 26 +# define zmm11 27 +# define zmm12 28 +# define zmm13 29 +# define zmm14 30 +# define zmm15 31 -- Note: these are only needed for ARM/ARM64 because globalRegMaybe is now used in CmmSink.hs. -- Since it's only used to check 'isJust', the actual values don't matter, thus diff --git a/libraries/base/tests/Numeric/all.T b/libraries/base/tests/Numeric/all.T index c4c9bb4646..9969a5552a 100644 --- a/libraries/base/tests/Numeric/all.T +++ b/libraries/base/tests/Numeric/all.T @@ -7,21 +7,10 @@ test('num006', normal, compile_and_run, ['']) test('num007', normal, compile_and_run, ['']) test('num008', normal, compile_and_run, ['']) -# On i386, we need -msse2 to get reliable floating point results -if config.arch == 'i386': - opts = '-msse2' -else: - opts = '' + test('num009', [ when(fast(), skip) - , when(wordsize(32), expect_broken(15062)) - , when(platform('i386-apple-darwin'), expect_broken(2370)) - , when(platform('powerpc64le-unknown-linux'), expect_broken(13634)) - , when(opsys('mingw32'), omit_ways(['ghci'])) ], - # We get different results at 1e20 on x86/Windows, so there is - # a special output file for that. I (SDM) don't think these are - # serious, since the results for lower numbers are all fine. - # We also get another set of results for 1e02 with GHCi, so - # I'm skipping that way altogether. + # , when(wordsize(32), expect_broken(15062)) + , when(platform('powerpc64le-unknown-linux'), expect_broken(13634))], compile_and_run, [opts]) test('num010', when(platform('i386-apple-darwin'), expect_broken_for(7043, 'ghci')), diff --git a/libraries/base/tests/Numeric/num009.hs b/libraries/base/tests/Numeric/num009.hs index e405ddf050..5133d43920 100644 --- a/libraries/base/tests/Numeric/num009.hs +++ b/libraries/base/tests/Numeric/num009.hs @@ -1,8 +1,6 @@ -- trac #2059 -- --- Note that this test fails miserably when compiled to use X87 floating point. --- For instance, in the case of (sin 1e20) the X86 FSIN instruction doesn't even --- get the sign right on my machine. + module Main(main) where |
