6 files changed, 93 insertions, 56 deletions
diff --git a/asmcomp/amd64/CSE.ml b/asmcomp/amd64/CSE.ml
new file mode 100644
index 0000000000..63ef088531
--- /dev/null
+++ b/asmcomp/amd64/CSE.ml
@@ -0,0 +1,36 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2014 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for the AMD64 *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  | Ispecific(Ilea _) -> Op_pure
+  | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg
+  | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg
+  | Ispecific(Ioffset_loc(_, _)) -> Op_store true
+  | Ispecific(Ifloatarithmem _) -> Op_load
+  | _ -> super#class_of_operation op
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
+
diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml
index b0a5ffb8b7..3741dd74bc 100644
--- a/asmcomp/amd64/arch.ml
+++ b/asmcomp/amd64/arch.ml
@@ -33,8 +33,8 @@ type addressing_mode =
 
 type specific_operation =
     Ilea of addressing_mode             (* "lea" gives scaled adds *)
-  | Istore_int of nativeint * addressing_mode (* Store an integer constant *)
-  | Istore_symbol of string * addressing_mode (* Store a symbol *)
+  | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *)
+  | Istore_symbol of string * addressing_mode * bool (* Store a symbol *)
   | Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
   | Ifloatarithmem of float_operation * addressing_mode
                                        (* Float arith operation with memory *)
@@ -101,10 +101,14 @@ let print_addressing printreg addr ppf arg =
 let print_specific_operation printreg op ppf arg =
   match op with
   | Ilea addr -> print_addressing printreg addr ppf arg
-  | Istore_int(n, addr) ->
-      fprintf ppf "[%a] := %nd" (print_addressing printreg addr) arg n
-  | Istore_symbol(lbl, addr) ->
-      fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl
+  | Istore_int(n, addr, is_assign) ->
+      fprintf ppf "[%a] := %nd %s"
+         (print_addressing printreg addr) arg n
+         (if is_assign then "(assign)" else "(init)")
+  | Istore_symbol(lbl, addr, is_assign) ->
+      fprintf ppf "[%a] := \"%s\" %s"
+         (print_addressing printreg addr) arg lbl
+         (if is_assign then "(assign)" else "(init)")
   | Ioffset_loc(n, addr) ->
       fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
   | Isqrtf ->
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index bdcc3a18d3..b576ece983 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -335,15 +335,16 @@ let output_epilogue f =
 
 (* Floating-point constants *)
 
-let float_constants = ref ([] : (string * int) list)
+let float_constants = ref ([] : (int64 * int) list)
 
 let add_float_constant cst =
+  let repr = Int64.bits_of_float cst in
   try
-    List.assoc cst !float_constants
+    List.assoc repr !float_constants
   with
     Not_found ->
       let lbl = new_label() in
-      float_constants := (cst, lbl) :: !float_constants;
+      float_constants := (repr, lbl) :: !float_constants;
       lbl
 
 let emit_float_constant (cst, lbl) =
@@ -382,12 +383,12 @@ let emit_instr fallthrough i =
           `	movq	${emit_nativeint n}, {emit_reg i.res.(0)}\n`
         else
           `	movabsq	${emit_nativeint n}, {emit_reg i.res.(0)}\n`
-    | Lop(Iconst_float s) ->
-        begin match Int64.bits_of_float (float_of_string s) with
+    | Lop(Iconst_float f) ->
+        begin match Int64.bits_of_float f with
         | 0x0000_0000_0000_0000L ->       (* +0.0 *)
           `	xorpd	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
         | _ ->
-          let lbl = add_float_constant s in
+          let lbl = add_float_constant f in
           `	movsd	{emit_label lbl}(%rip), {emit_reg i.res.(0)}\n`
         end
     | Lop(Iconst_symbol s) ->
@@ -448,7 +449,7 @@ let emit_instr fallthrough i =
           | Double | Double_u ->
             `	movsd	{emit_addressing addr i.arg 0}, {emit_reg dest}\n`
         end
-    | Lop(Istore(chunk, addr)) ->
+    | Lop(Istore(chunk, addr, _)) ->
         begin match chunk with
           | Word ->
             `	movq	{emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
@@ -541,9 +542,9 @@ let emit_instr fallthrough i =
         `	cvttsd2siq	{emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
     | Lop(Ispecific(Ilea addr)) ->
         `	leaq	{emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
-    | Lop(Ispecific(Istore_int(n, addr))) ->
+    | Lop(Ispecific(Istore_int(n, addr, _))) ->
         `	movq	${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
-    | Lop(Ispecific(Istore_symbol(s, addr))) ->
+    | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
         assert (not !pic_code && not !Clflags.dlcode);
         `	movq	${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
     | Lop(Ispecific(Ioffset_loc(n, addr))) ->
@@ -764,9 +765,9 @@ let emit_item = function
   | Cint n ->
       `	.quad	{emit_nativeint n}\n`
   | Csingle f ->
-      emit_float32_directive ".long" f
+      emit_float32_directive ".long" (Int32.bits_of_float f)
   | Cdouble f ->
-      emit_float64_directive ".quad" f
+      emit_float64_directive ".quad" (Int64.bits_of_float f)
   | Csymbol_address s ->
       `	.quad	{emit_symbol s}\n`
   | Clabel_address lbl ->
diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp
index 77156b8f01..a66f0c93b8 100644
--- a/asmcomp/amd64/emit_nt.mlp
+++ b/asmcomp/amd64/emit_nt.mlp
@@ -53,9 +53,10 @@ let slot_offset loc cl =
       else !stack_offset + (num_stack_slots.(0) + n) * 8
   | Outgoing n -> n
 
-(* Output a 32 bit integer in hex *)
+(* Output a 32 or 64 bit integer in hex *)
 
 let emit_int32 n = emit_printf "0%lxh" n
+let emit_int64 n = emit_printf "0%Lxh" n
 
 (* Symbols *)
 
@@ -321,36 +322,20 @@ let output_epilogue () =
 
 (* Floating-point constants *)
 
-let float_constants = ref ([] : (string * int) list)
+let float_constants = ref ([] : (int64 * int) list)
 
 let add_float_constant cst =
+  let repr = Int64.bits_of_float cst in
   try
-    List.assoc cst !float_constants
+    List.assoc repr !float_constants
   with
     Not_found ->
       let lbl = new_label() in
-      float_constants := (cst, lbl) :: !float_constants;
+      float_constants := (repr, lbl) :: !float_constants;
       lbl
 
-let emit_float s =
-  (* MASM doesn't like floating-point constants such as 2e9.
-     Turn them into 2.0e9. *)
-  let pos_e = ref (-1) and pos_dot = ref (-1) in
-  for i = 0 to String.length s - 1 do
-    match s.[i] with
-      'e'|'E' -> pos_e := i
-    | '.'     -> pos_dot := i
-    | _       -> ()
-  done;
-  if !pos_dot < 0 && !pos_e >= 0 then begin
-    emit_string (String.sub s 0 !pos_e);
-    emit_string ".0";
-    emit_string (String.sub s !pos_e (String.length s - !pos_e))
-  end else
-    emit_string s
-
 let emit_float_constant (cst, lbl) =
-  `{emit_label lbl}     REAL8   {emit_float cst}\n`
+  `{emit_label lbl}:	QWORD	{emit_int64 cst}\n`
 
 let emit_movabs reg n =
   (* force ml64 to use mov reg, imm64 instruction *)
@@ -389,12 +374,12 @@ let emit_instr fallthrough i =
           `	mov	{emit_reg32 i.res.(0)}, {emit_nativeint n}\n`
         else
           emit_movabs i.res.(0) n
-    | Lop(Iconst_float s) ->
-        begin match Int64.bits_of_float (float_of_string s) with
+    | Lop(Iconst_float f) ->
+        begin match Int64.bits_of_float f with
         | 0x0000_0000_0000_0000L ->       (* +0.0 *)
           `	xorpd	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
         | _ ->
-          let lbl = add_float_constant s in
+          let lbl = add_float_constant f in
           `	movsd	{emit_reg i.res.(0)}, {emit_label lbl}\n`
         end
     | Lop(Iconst_symbol s) ->
@@ -458,7 +443,7 @@ let emit_instr fallthrough i =
           | Double | Double_u ->
             `	movsd	{emit_reg dest}, REAL8 PTR {emit_addressing addr i.arg 0}\n`
         end
-    | Lop(Istore(chunk, addr)) ->
+    | Lop(Istore(chunk, addr, _)) ->
         begin match chunk with
           | Word ->
             `	mov	QWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
@@ -547,9 +532,9 @@ let emit_instr fallthrough i =
         `	cvttsd2si	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
     | Lop(Ispecific(Ilea addr)) ->
         `	lea	{emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
-    | Lop(Ispecific(Istore_int(n, addr))) ->
+    | Lop(Ispecific(Istore_int(n, addr, _))) ->
         `	mov	QWORD PTR {emit_addressing addr i.arg 0}, {emit_nativeint n}\n`
-    | Lop(Ispecific(Istore_symbol(s, addr))) ->
+    | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
         assert (not !pic_code);
 	add_used_symbol s;
         `	mov	QWORD PTR {emit_addressing addr i.arg 0}, OFFSET {emit_symbol s}\n`
@@ -721,9 +706,9 @@ let emit_item = function
   | Cint n ->
       `	QWORD	{emit_nativeint n}\n`
   | Csingle f ->
-      `	REAL4	{emit_float f}\n`
+      `	DWORD	{emit_int32 (Int32.bits_of_float f)}\n`
   | Cdouble f ->
-      `	REAL8	{emit_float f}\n`
+      `	QWORD	{emit_int64 (Int64.bits_of_float f)}\n`
   | Csymbol_address s ->
       add_used_symbol s;
       `	QWORD	{emit_symbol s}\n`
diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml
index b6e0fa94ab..cd06559e1e 100644
--- a/asmcomp/amd64/proc.ml
+++ b/asmcomp/amd64/proc.ml
@@ -259,7 +259,7 @@ let destroyed_at_oper = function
   | Iop(Iextcall(_, false)) -> destroyed_at_c_call
   | Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _))
         -> [| rax; rdx |]
-  | Iop(Istore(Single, _)) -> [| rxmm15 |]
+  | Iop(Istore(Single, _, _)) -> [| rxmm15 |]
   | Iop(Ialloc _ | Iintop(Imulh | Icomp _) | Iintop_imm((Icomp _), _))
         -> [| rax |]
   | Iswitch(_, _) -> [| rax; rdx |]
@@ -290,10 +290,21 @@ let max_register_pressure = function
     if fp then [| 10; 16 |] else [| 11; 16 |]
   | Ialloc _ | Iintop(Icomp _) | Iintop_imm((Icomp _), _) ->
     if fp then [| 11; 16 |] else [| 12; 16 |]
-  | Istore(Single, _) ->
+  | Istore(Single, _, _) ->
     if fp then [| 12; 15 |] else [| 13; 15 |]
   | _ -> if fp then [| 12; 16 |] else [| 13; 16 |]
 
+(* Pure operations (without any side effect besides updating their result
+   registers). *)
+
+let op_is_pure = function
+  | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
+  | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
+  | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false
+  | Ispecific(Ilea _) -> true
+  | Ispecific _ -> false
+  | _ -> true
+
 (* Layout of the stack frame *)
 
 let num_stack_slots = [| 0; 0 |]
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index 5e6afbcabf..fa7fe66c05 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -152,20 +152,20 @@ method select_addressing chunk exp =
     | Ascaledadd(e1, e2, scale) ->
         (Iindexed2scaled(scale, d), Ctuple[e1; e2])
 
-method! select_store addr exp =
+method! select_store is_assign addr exp =
   match exp with
     Cconst_int n when self#is_immediate n ->
-      (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+      (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
   | (Cconst_natint n | Cconst_blockheader n) when self#is_immediate_natint n ->
-      (Ispecific(Istore_int(n, addr)), Ctuple [])
+      (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
   | Cconst_pointer n when self#is_immediate n ->
-      (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
+      (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
   | Cconst_natpointer n when self#is_immediate_natint n ->
-      (Ispecific(Istore_int(n, addr)), Ctuple [])
+      (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
   | Cconst_symbol s when not (!pic_code || !Clflags.dlcode) ->
-      (Ispecific(Istore_symbol(s, addr)), Ctuple [])
+      (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple [])
   | _ ->
-      super#select_store addr exp
+      super#select_store is_assign addr exp
 
 method! select_operation op args =
   match op with