diff options
Diffstat (limited to 'compiler/nativeGen/X86/CodeGen.hs')
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 105 |
1 files changed, 96 insertions, 9 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index a2e26bd68b..38dc76090d 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -644,20 +644,27 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps -- Nop conversions MO_UU_Conv W32 W8 -> toI8Reg W32 x MO_SS_Conv W32 W8 -> toI8Reg W32 x + MO_XX_Conv W32 W8 -> toI8Reg W32 x MO_UU_Conv W16 W8 -> toI8Reg W16 x MO_SS_Conv W16 W8 -> toI8Reg W16 x + MO_XX_Conv W16 W8 -> toI8Reg W16 x MO_UU_Conv W32 W16 -> toI16Reg W32 x MO_SS_Conv W32 W16 -> toI16Reg W32 x + MO_XX_Conv W32 W16 -> toI16Reg W32 x MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x + MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x + MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x + MO_XX_Conv W64 W8 | not is32Bit -> toI8Reg W64 x MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x + MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x -- widenings MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x @@ -668,16 +675,32 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x + -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we + -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register + -- has 8-bit version). So for 32-bit code, we'll just zero-extend. + MO_XX_Conv W8 W32 + | is32Bit -> integerExtend W8 W32 MOVZxL x + | otherwise -> integerExtend W8 W32 MOV x + MO_XX_Conv W8 W16 + | is32Bit -> integerExtend W8 W16 MOVZxL x + | otherwise -> integerExtend W8 W16 MOV x + MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x + MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x - -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl. - -- However, we don't want the register allocator to throw it - -- away as an unnecessary reg-to-reg move, so we keep it in - -- the form of a movzl and print it as a movl later. + -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl. + -- However, we don't want the register allocator to throw it + -- away as an unnecessary reg-to-reg move, so we keep it in + -- the form of a movzl and print it as a movl later. + -- This doesn't apply to MO_XX_Conv since in this case we don't care about + -- the upper bits. So we can just use MOV. + MO_XX_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOV x + MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x + MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x MO_FF_Conv W32 W64 | sse2 -> coerceFP2FP W64 x @@ -787,6 +810,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_S_MulMayOflo rep -> imulMayOflo rep x y + MO_Mul W8 -> imulW8 x y MO_Mul rep -> triv_op rep IMUL MO_And rep -> triv_op rep AND MO_Or rep -> triv_op rep OR @@ -822,6 +846,21 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps triv_op width instr = trivialCode width op (Just op) x y where op = instr (intFormat width) + -- Special case for IMUL for bytes, since the result of IMULB will be in + -- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider + -- values. + imulW8 :: CmmExpr -> CmmExpr -> NatM Register + imulW8 arg_a arg_b = do + (a_reg, a_code) <- getNonClobberedReg arg_a + b_code <- getAnyReg arg_b + + let code = a_code `appOL` b_code eax `appOL` + toOL [ IMUL2 format (OpReg a_reg) ] + format = intFormat W8 + + return (Fixed format eax code) + + imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register imulMayOflo rep a b = do (a_reg, a_code) <- getNonClobberedReg a @@ -916,6 +955,18 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps return (Any format code) ---------------------- + + -- See Note [DIV/IDIV for bytes] + div_code W8 signed quotient x y = do + let widen | signed = MO_SS_Conv W8 W16 + | otherwise = MO_UU_Conv W8 W16 + div_code + W16 + signed + quotient + (CmmMachOp widen [x]) + (CmmMachOp widen [y]) + div_code width signed quotient x y = do (y_op, y_code) <- getRegOrMem y -- cannot be clobbered x_code <- getAnyReg x @@ -2277,6 +2328,18 @@ genCCall _ is32Bit target dest_regs args = do = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y divOp2 _ _ _ _ _ = panic "genCCall: Wrong number of arguments for divOp2" + + -- See Note [DIV/IDIV for bytes] + divOp platform signed W8 [res_q, res_r] m_arg_x_high arg_x_low arg_y = + let widen | signed = MO_SS_Conv W8 W16 + | otherwise = MO_UU_Conv W8 W16 + arg_x_low_16 = CmmMachOp widen [arg_x_low] + arg_y_16 = CmmMachOp widen [arg_y] + m_arg_x_high_16 = (\p -> CmmMachOp widen [p]) <$> m_arg_x_high + in divOp + platform signed W16 [res_q, res_r] + m_arg_x_high_16 arg_x_low_16 arg_y_16 + divOp platform signed width [res_q, res_r] m_arg_x_high arg_x_low arg_y = do let format = intFormat width @@ -2318,6 +2381,22 @@ genCCall _ is32Bit target dest_regs args = do addSubIntC _ _ _ _ _ _ _ _ = panic "genCCall: Wrong number of arguments/results for addSubIntC" +-- Note [DIV/IDIV for bytes] +-- +-- IDIV reminder: +-- Size Dividend Divisor Quotient Remainder +-- byte %ax r/m8 %al %ah +-- word %dx:%ax r/m16 %ax %dx +-- dword %edx:%eax r/m32 %eax %edx +-- qword %rdx:%rax r/m64 %rax %rdx +-- +-- We do a special case for the byte division because the current +-- codegen doesn't deal well with accessing %ah register (also, +-- accessing %ah in 64-bit mode is complicated because it cannot be an +-- operand of many instructions). So we just widen operands to 16 bits +-- and get the results from %al, %dl. This is not optimal, but a few +-- register moves are probably not a huge deal when doing division. + genCCall32' :: DynFlags -> ForeignTarget -- function to call -> [CmmFormal] -- where to put the result @@ -2330,7 +2409,7 @@ genCCall32' dflags target dest_regs args = do -- Align stack to 16n for calls, assuming a starting stack -- alignment of 16n - word_size on procedure entry. Which we -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86] - sizes = map (arg_size . cmmExprType dflags) (reverse args) + sizes = map (arg_size_bytes . cmmExprType dflags) (reverse args) raw_arg_size = sum sizes + wORD_SIZE dflags arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags @@ -2421,8 +2500,9 @@ genCCall32' dflags target dest_regs args = do assign_code dest_regs) where - arg_size :: CmmType -> Int -- Width in bytes - arg_size ty = widthInBytes (typeWidth ty) + -- If the size is smaller than the word, we widen things (see maybePromoteCArg) + arg_size_bytes :: CmmType -> Int + arg_size_bytes ty = max (widthInBytes (typeWidth ty)) (widthInBytes (wordWidth dflags)) roundTo a x | x `mod` a == 0 = x | otherwise = x + a - (x `mod` a) @@ -2461,6 +2541,10 @@ genCCall32' dflags target dest_regs args = do ) | otherwise = do + -- Arguments can be smaller than 32-bit, but we still use @PUSH + -- II32@ - the usual calling conventions expect integers to be + -- 4-byte aligned. + ASSERT((typeWidth arg_ty) <= W32) return () (operand, code) <- getOperand arg delta <- getDeltaNat setDeltaNat (delta-size) @@ -2470,7 +2554,7 @@ genCCall32' dflags target dest_regs args = do where arg_ty = cmmExprType dflags arg - size = arg_size arg_ty -- Byte size + size = arg_size_bytes arg_ty -- Byte size genCCall64' :: DynFlags -> ForeignTarget -- function to call @@ -2700,7 +2784,10 @@ genCCall64' dflags target dest_regs args = do push_args rest code' | otherwise = do - ASSERT(width == W64) return () + -- Arguments can be smaller than 64-bit, but we still use @PUSH + -- II64@ - the usual calling conventions expect integers to be + -- 8-byte aligned. + ASSERT(width <= W64) return () (arg_op, arg_code) <- getOperand arg delta <- getDeltaNat setDeltaNat (delta-arg_size) |