summaryrefslogtreecommitdiff
path: root/asmcomp/amd64/emit.mlp
diff options
context:
space:
mode:
Diffstat (limited to 'asmcomp/amd64/emit.mlp')
-rw-r--r--asmcomp/amd64/emit.mlp111
1 files changed, 72 insertions, 39 deletions
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index a9ea549037..8dad2206aa 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -10,8 +10,6 @@
(* *)
(***********************************************************************)
-(* $Id: emit.mlp 12959 2012-09-27 13:12:51Z maranget $ *)
-
(* Emission of x86-64 (AMD 64) assembly code *)
open Cmm
@@ -25,6 +23,8 @@ open Emitaux
let macosx = (Config.system = "macosx")
let mingw64 = (Config.system = "mingw64")
+let fp = Config.with_frame_pointers
+
(* Tradeoff between code size and code speed *)
let fastcode_flag = ref true
@@ -34,12 +34,13 @@ let stack_offset = ref 0
(* Layout of the stack frame *)
let frame_required () =
- !contains_calls || num_stack_slots.(0) > 0 || num_stack_slots.(1) > 0
+ fp || !contains_calls || num_stack_slots.(0) > 0 || num_stack_slots.(1) > 0
let frame_size () = (* includes return address *)
if frame_required() then begin
let sz =
- (!stack_offset + 8 * (num_stack_slots.(0) + num_stack_slots.(1)) + 8)
+ (!stack_offset + 8 * (num_stack_slots.(0) + num_stack_slots.(1)) + 8
+ + (if fp then 8 else 0) )
in Misc.align sz 16
end else
!stack_offset + 8
@@ -109,13 +110,13 @@ let emit_reg = function
let reg_low_8_name =
[| "%al"; "%bl"; "%dil"; "%sil"; "%dl"; "%cl"; "%r8b"; "%r9b";
- "%r10b"; "%r11b"; "%bpl"; "%r12b"; "%r13b" |]
+ "%r12b"; "%r13b"; "%r10b"; "%r11b"; "%bpl" |]
let reg_low_16_name =
[| "%ax"; "%bx"; "%di"; "%si"; "%dx"; "%cx"; "%r8w"; "%r9w";
- "%r10w"; "%r11w"; "%bp"; "%r12w"; "%r13w" |]
+ "%r12w"; "%r13w"; "%r10w"; "%r11w"; "%bp" |]
let reg_low_32_name =
[| "%eax"; "%ebx"; "%edi"; "%esi"; "%edx"; "%ecx"; "%r8d"; "%r9d";
- "%r10d"; "%r11d"; "%ebp"; "%r12d"; "%r13d" |]
+ "%r12d"; "%r13d"; "%r10d"; "%r11d"; "%ebp" |]
let emit_subreg tbl r =
match r.loc with
@@ -290,25 +291,25 @@ let emit_float_test cmp neg arg lbl =
` jp {emit_label lbl}\n`; (* branch taken if unordered *)
` jne {emit_label lbl}\n` (* branch taken if x<y or x>y *)
| (Clt, _) ->
- ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
+ ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
if not neg then
` ja {emit_label lbl}\n` (* branch taken if y>x i.e. x<y *)
else
` jbe {emit_label lbl}\n` (* taken if unordered or y<=x i.e. !(x<y) *)
| (Cle, _) ->
- ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
+ ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
if not neg then
` jae {emit_label lbl}\n` (* branch taken if y>=x i.e. x<=y *)
else
` jb {emit_label lbl}\n` (* taken if unordered or y<x i.e. !(x<=y) *)
| (Cgt, _) ->
- ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
+ ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
if not neg then
` ja {emit_label lbl}\n` (* branch taken if x>y *)
else
` jbe {emit_label lbl}\n` (* taken if unordered or x<=y i.e. !(x>y) *)
| (Cge, _) ->
- ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *)
+ ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *)
if not neg then
` jae {emit_label lbl}\n` (* branch taken if x>=y *)
else
@@ -318,9 +319,12 @@ let emit_float_test cmp neg arg lbl =
let output_epilogue f =
if frame_required() then begin
- let n = frame_size() - 8 in
+ let n = frame_size() - 8 - (if fp then 8 else 0) in
` addq ${emit_int n}, %rsp\n`;
cfi_adjust_cfa_offset (-n);
+ if fp then begin
+ ` popq %rbp\n`
+ end;
f ();
(* reset CFA back cause function body may continue *)
cfi_adjust_cfa_offset n
@@ -328,6 +332,23 @@ let output_epilogue f =
else
f ()
+(* Floating-point constants *)
+
+let float_constants = ref ([] : (string * int) list)
+
+let add_float_constant cst =
+ try
+ List.assoc cst !float_constants
+ with
+ Not_found ->
+ let lbl = new_label() in
+ float_constants := (cst, lbl) :: !float_constants;
+ lbl
+
+let emit_float_constant (cst, lbl) =
+ `{emit_label lbl}:`;
+ emit_float64_directive ".quad" cst
+
(* Output the assembly code for an instruction *)
(* Name of current function *)
@@ -335,8 +356,6 @@ let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
-let float_constants = ref ([] : (int * string) list)
-
(* Emit an instruction *)
let emit_instr fallthrough i =
emit_debug_info i.dbg;
@@ -367,8 +386,7 @@ let emit_instr fallthrough i =
| 0x0000_0000_0000_0000L -> (* +0.0 *)
` xorpd {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
| _ ->
- let lbl = new_label() in
- float_constants := (lbl, s) :: !float_constants;
+ let lbl = add_float_constant s in
` movsd {emit_label lbl}(%rip), {emit_reg i.res.(0)}\n`
end
| Lop(Iconst_symbol s) ->
@@ -545,6 +563,22 @@ let emit_instr fallthrough i =
` addq ${emit_int n}, {emit_addressing addr i.arg 0}\n`
| Lop(Ispecific(Ifloatarithmem(op, addr))) ->
` {emit_string(instr_for_floatarithmem op)} {emit_addressing addr i.arg 1}, {emit_reg i.res.(0)}\n`
+ | Lop(Ispecific(Ibswap size)) ->
+ begin match size with
+ | 16 ->
+ ` xchg %ah, %al\n`;
+ ` movzwq {emit_reg16 i.res.(0)}, {emit_reg i.res.(0)}\n`
+ | 32 ->
+ ` bswap {emit_reg32 i.res.(0)}\n`;
+ ` movslq {emit_reg32 i.res.(0)}, {emit_reg i.res.(0)}\n`
+ | 64 ->
+ ` bswap {emit_reg i.res.(0)}\n`
+ | _ -> assert false
+ end
+ | Lop(Ispecific Isqrtf) ->
+ ` sqrtsd {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
+ | Lop(Ispecific(Ifloatsqrtf addr)) ->
+ ` sqrtsd {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
| Lreloadretaddr ->
()
| Lreturn ->
@@ -657,26 +691,20 @@ let rec emit_all fallthrough i =
emit_instr fallthrough i;
emit_all (Linearize.has_fallthrough i.desc) i.next
-(* Emission of the floating-point constants *)
-
-let emit_float_constant (lbl, cst) =
- `{emit_label lbl}:`;
- emit_float64_directive ".quad" cst
-
(* Emission of the profiling prelude *)
let emit_profile () =
match Config.system with
| "linux" | "gnu" ->
(* mcount preserves rax, rcx, rdx, rsi, rdi, r8, r9 explicitly
- and rbx, rbp, r12-r15 like all C functions.
- We need to preserve r10 and r11 ourselves, since OCaml can
- use them for argument passing. *)
+ and rbx, rbp, r12-r15 like all C functions. This includes
+ all the registers used for argument passing, so we don't
+ need to preserve other regs. We do need to initialize rbp
+ like mcount expects it, though. *)
` pushq %r10\n`;
- ` movq %rsp, %rbp\n`;
- ` pushq %r11\n`;
+ if not fp then
+ ` movq %rsp, %rbp\n`;
` {emit_call "mcount"}\n`;
- ` popq %r11\n`;
` popq %r10\n`
| _ ->
() (*unsupported yet*)
@@ -688,7 +716,6 @@ let fundecl fundecl =
fastcode_flag := fundecl.fun_fast;
tailrec_entry_point := new_label();
stack_offset := 0;
- float_constants := [];
call_gc_sites := [];
bound_error_sites := [];
bound_error_call := 0;
@@ -704,9 +731,14 @@ let fundecl fundecl =
`{emit_symbol fundecl.fun_name}:\n`;
emit_debug_info fundecl.fun_dbg;
cfi_startproc ();
+ if fp then begin
+ ` pushq %rbp\n`;
+ cfi_adjust_cfa_offset 8;
+ ` movq %rsp, %rbp\n`;
+ end;
if !Clflags.gprofile then emit_profile();
if frame_required() then begin
- let n = frame_size() - 8 in
+ let n = frame_size() - 8 - (if fp then 8 else 0) in
` subq ${emit_int n}, %rsp\n`;
cfi_adjust_cfa_offset n;
end;
@@ -720,15 +752,6 @@ let fundecl fundecl =
` .type {emit_symbol fundecl.fun_name},@function\n`;
` .size {emit_symbol fundecl.fun_name},.-{emit_symbol fundecl.fun_name}\n`
| _ -> ()
- end;
- if !float_constants <> [] then begin
- if macosx then
- ` .literal8\n`
- else if mingw64 then
- ` .section .rdata,\"dr\"\n`
- else
- ` .section .rodata.cst8,\"a\",@progbits\n`;
- List.iter emit_float_constant !float_constants
end
(* Emission of data *)
@@ -771,6 +794,7 @@ let data l =
let begin_assembly() =
reset_debug_info(); (* PR#5603 *)
+ float_constants := [];
if !Clflags.dlcode then begin
(* from amd64.S; could emit these constants on demand *)
if macosx then
@@ -795,6 +819,15 @@ let begin_assembly() =
if macosx then ` nop\n` (* PR#4690 *)
let end_assembly() =
+ if !float_constants <> [] then begin
+ if macosx then
+ ` .literal8\n`
+ else if mingw64 then
+ ` .section .rdata,\"dr\"\n`
+ else
+ ` .section .rodata.cst8,\"a\",@progbits\n`;
+ List.iter emit_float_constant !float_constants
+ end;
let lbl_end = Compilenv.make_symbol (Some "code_end") in
` .text\n`;
if macosx then ` nop\n`; (* suppress "ld warning: atom sorting error" *)