diff options
author | Damien Doligez <damien.doligez-inria.fr> | 2014-04-28 11:49:52 +0000 |
---|---|---|
committer | Damien Doligez <damien.doligez-inria.fr> | 2014-04-28 11:49:52 +0000 |
commit | cc25e53ad310eb32d4854a1505ac3a9a917c8368 (patch) | |
tree | 101a8f24490f8ef63c75820cfd945cc4d7f669fc /asmcomp | |
parent | e94190206fe983154d5606a448e434aec03783d0 (diff) | |
parent | f1f362698f931494a305d48667936ffee2012b64 (diff) | |
download | ocaml-safe-string.tar.gz |
merge trunk up to commit 14699safe-string
git-svn-id: http://caml.inria.fr/svn/ocaml/branches/safe-string@14700 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
Diffstat (limited to 'asmcomp')
51 files changed, 1169 insertions, 336 deletions
diff --git a/asmcomp/.ignore b/asmcomp/.ignore index 31d00178a0..8c24e74ad1 100644 --- a/asmcomp/.ignore +++ b/asmcomp/.ignore @@ -4,3 +4,4 @@ proc.ml selection.ml reload.ml scheduling.ml +CSE.ml diff --git a/asmcomp/CSEgen.ml b/asmcomp/CSEgen.ml new file mode 100644 index 0000000000..1cbef266b8 --- /dev/null +++ b/asmcomp/CSEgen.ml @@ -0,0 +1,258 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Common subexpression elimination by value numbering over extended + basic blocks. *) + +open Mach + +type valnum = int + +(* We maintain sets of equations of the form + valnums = operation(valnums) + plus a mapping from registers to value numbers. *) + +type rhs = operation * valnum array + +module Equations = + Map.Make(struct type t = rhs let compare = Pervasives.compare end) + +type numbering = + { num_next: int; (* next fresh value number *) + num_eqs: valnum array Equations.t; (* mapping rhs -> valnums *) + num_reg: valnum Reg.Map.t } (* mapping register -> valnum *) + +let empty_numbering = + { num_next = 0; num_eqs = Equations.empty; num_reg = Reg.Map.empty } + +(** [valnum_reg n r] returns the value number for the contents of + register [r]. If none exists, a fresh value number is returned + and associated with register [r]. The possibly updated numbering + is also returned. [valnum_regs] is similar, but for an array of + registers. *) + +let valnum_reg n r = + try + (n, Reg.Map.find r n.num_reg) + with Not_found -> + let v = n.num_next in + ({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v) + +let valnum_regs n rs = + let l = Array.length rs in + let vs = Array.make l 0 in + let n = ref n in + for i = 0 to l-1 do + let (ni, vi) = valnum_reg !n rs.(i) in + vs.(i) <- vi; + n := ni + done; + (!n, vs) + +(* Look up the set of equations for an equation with the given rhs. + Return [Some res] if there is one, where [res] is the lhs. *) + +let find_equation n rhs = + try + Some(Equations.find rhs n.num_eqs) + with Not_found -> + None + +(* Find a set of registers containing the given value numbers. *) + +let find_regs_containing n vs = + match Array.length vs with + | 0 -> Some [||] + | 1 -> let v = vs.(0) in + Reg.Map.fold (fun r v' res -> if v' = v then Some [|r|] else res) + n.num_reg None + | _ -> assert false + +(* Associate the given value numbers to the given result registers, + without adding new equations. *) + +let set_known_regs n rs vs = + match Array.length rs with + | 0 -> n + | 1 -> { n with num_reg = Reg.Map.add rs.(0) vs.(0) n.num_reg } + | _ -> assert false + +(* Record the effect of a move: no new equations, but the result reg + maps to the same value number as the argument reg. *) + +let set_move n src dst = + let (n1, v) = valnum_reg n src in + { n1 with num_reg = Reg.Map.add dst v n1.num_reg } + +(* Record the equation [fresh valnums = rhs] and associate the given + result registers [rs] to [fresh valnums]. *) + +let set_fresh_regs n rs rhs = + match Array.length rs with + | 0 -> { n with num_eqs = Equations.add rhs [||] n.num_eqs } + | 1 -> let v = n.num_next in + { num_next = v + 1; + num_eqs = Equations.add rhs [|v|] n.num_eqs; + num_reg = Reg.Map.add rs.(0) v n.num_reg } + | _ -> assert false + +(* Forget everything we know about the given result registers, + which are receiving unpredictable values at run-time. *) + +let set_unknown_regs n rs = + { n with num_reg = Array.fold_right Reg.Map.remove rs n.num_reg } + +(* Keep only the equations satisfying the given predicate. *) + +let filter_equations pred n = + { n with num_eqs = Equations.filter (fun (op,_) res -> pred op) n.num_eqs } + +(* Prepend a reg-reg move *) + +let insert_move srcs dsts i = + match Array.length srcs with + | 0 -> i + | 1 -> instr_cons (Iop Imove) srcs dsts i + | _ -> assert false + +(* Classification of operations *) + +type op_class = + | Op_pure (* pure, produce one result *) + | Op_checkbound (* checkbound-style: no result, can raise an exn *) + | Op_load (* memory load *) + | Op_store of bool (* memory store, false = init, true = assign *) + | Op_other (* anything else that does not store in memory *) + +class cse_generic = object (self) + +(* Default classification of operations. Can be overriden in + processor-specific files to classify specific operations better. *) + +method class_of_operation op = + match op with + | Imove | Ispill | Ireload -> assert false (* treated specially *) + | Iconst_int _ | Iconst_float _ | Iconst_symbol _ + | Iconst_blockheader _ -> Op_pure + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ -> assert false (* treated specially *) + | Istackoffset _ -> Op_other + | Iload(_,_) -> Op_load + | Istore(_,_,asg) -> Op_store asg + | Ialloc _ -> Op_other + | Iintop(Icheckbound) -> Op_checkbound + | Iintop _ -> Op_pure + | Iintop_imm(Icheckbound, _) -> Op_checkbound + | Iintop_imm(_, _) -> Op_pure + | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf + | Ifloatofint | Iintoffloat -> Op_pure + | Ispecific _ -> Op_other + +(* Operations that are so cheap that it isn't worth factoring them. *) + +method is_cheap_operation op = + match op with + | Iconst_int _ | Iconst_blockheader _ -> true + | _ -> false + +(* Forget all equations involving memory loads. Performed after a + non-initializing store *) + +method private kill_loads n = + filter_equations (fun o -> self#class_of_operation o <> Op_load) n + +(* Keep only equations involving checkbounds, and forget register values. + Performed across a call. *) + +method private keep_checkbounds n = + filter_equations (fun o -> self#class_of_operation o = Op_checkbound) + {n with num_reg = Reg.Map.empty } + +(* Perform CSE on the given instruction [i] and its successors. + [n] is the value numbering current at the beginning of [i]. *) + +method private cse n i = + match i.desc with + | Iend | Ireturn | Iop(Itailcall_ind) | Iop(Itailcall_imm _) + | Iexit _ | Iraise _ -> + i + | Iop (Imove | Ispill | Ireload) -> + (* For moves, we associate the same value number to the result reg + as to the argument reg. *) + let n1 = set_move n i.arg.(0) i.res.(0) in + {i with next = self#cse n1 i.next} + | Iop (Icall_ind | Icall_imm _ | Iextcall _) -> + (* We don't perform CSE across function calls, as it increases + register pressure too much. We do remember the checkbound + instructions already performed, though, since their reuse + cannot increase register pressure. *) + let n1 = self#keep_checkbounds n in + {i with next = self#cse n1 i.next} + | Iop op -> + begin match self#class_of_operation op with + | Op_pure | Op_checkbound | Op_load -> + assert (Array.length i.res <= 1); + let (n1, varg) = valnum_regs n i.arg in + begin match find_equation n1 (op, varg) with + | Some vres -> + (* This operation was computed earlier. *) + let n2 = set_known_regs n1 i.res vres in + begin match find_regs_containing n1 vres with + | Some res when not (self#is_cheap_operation op) -> + (* We can replace res <- op args with r <- move res. + If the operation is very cheap to compute, e.g. + an integer constant, don't bother. *) + insert_move res i.res (self#cse n2 i.next) + | _ -> + {i with next = self#cse n2 i.next} + end + | None -> + (* This operation produces a result we haven't seen earlier. *) + let n2 = set_fresh_regs n1 i.res (op, varg) in + {i with next = self#cse n2 i.next} + end + | Op_store false | Op_other -> + (* An initializing store or an "other" operation do not invalidate + any equations, but we do not know anything about the results. *) + let n1 = set_unknown_regs n i.res in + {i with next = self#cse n1 i.next} + | Op_store true -> + (* A non-initializing store: it can invalidate + anything we know about prior loads. *) + let n1 = set_unknown_regs (self#kill_loads n) i.res in + {i with next = self#cse n1 i.next} + end + (* For control structures, we set the numbering to empty at every + join point, but propagate the current numbering across fork points. *) + | Iifthenelse(test, ifso, ifnot) -> + {i with desc = Iifthenelse(test, self#cse n ifso, self#cse n ifnot); + next = self#cse empty_numbering i.next} + | Iswitch(index, cases) -> + {i with desc = Iswitch(index, Array.map (self#cse n) cases); + next = self#cse empty_numbering i.next} + | Iloop(body) -> + {i with desc = Iloop(self#cse empty_numbering body); + next = self#cse empty_numbering i.next} + | Icatch(nfail, body, handler) -> + {i with desc = Icatch(nfail, self#cse n body, self#cse empty_numbering handler); + next = self#cse empty_numbering i.next} + | Itrywith(body, handler) -> + {i with desc = Itrywith(self#cse n body, self#cse empty_numbering handler); + next = self#cse empty_numbering i.next} + +method fundecl f = + {f with fun_body = self#cse empty_numbering f.fun_body} + +end + + + diff --git a/asmcomp/CSEgen.mli b/asmcomp/CSEgen.mli new file mode 100644 index 0000000000..c19855eca5 --- /dev/null +++ b/asmcomp/CSEgen.mli @@ -0,0 +1,38 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Common subexpression elimination by value numbering over extended + basic blocks. *) + +type op_class = + | Op_pure (* pure, produce one result *) + | Op_checkbound (* checkbound-style: no result, can raise an exn *) + | Op_load (* memory load *) + | Op_store of bool (* memory store, false = init, true = assign *) + | Op_other (* anything else that does not store in memory *) + +class cse_generic : object + (* The following methods can be overriden to handle processor-specific + operations. *) + + method class_of_operation: Mach.operation -> op_class + + method is_cheap_operation: Mach.operation -> bool + (* Operations that are so cheap that it isn't worth factoring them. *) + + (* The following method is the entry point and should not be overridden *) + method fundecl: Mach.fundecl -> Mach.fundecl + +end + + + diff --git a/asmcomp/amd64/CSE.ml b/asmcomp/amd64/CSE.ml new file mode 100644 index 0000000000..63ef088531 --- /dev/null +++ b/asmcomp/amd64/CSE.ml @@ -0,0 +1,36 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for the AMD64 *) + +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Ilea _) -> Op_pure + | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg + | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg + | Ispecific(Ioffset_loc(_, _)) -> Op_store true + | Ispecific(Ifloatarithmem _) -> Op_load + | _ -> super#class_of_operation op + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml index b0a5ffb8b7..3741dd74bc 100644 --- a/asmcomp/amd64/arch.ml +++ b/asmcomp/amd64/arch.ml @@ -33,8 +33,8 @@ type addressing_mode = type specific_operation = Ilea of addressing_mode (* "lea" gives scaled adds *) - | Istore_int of nativeint * addressing_mode (* Store an integer constant *) - | Istore_symbol of string * addressing_mode (* Store a symbol *) + | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *) + | Istore_symbol of string * addressing_mode * bool (* Store a symbol *) | Ioffset_loc of int * addressing_mode (* Add a constant to a location *) | Ifloatarithmem of float_operation * addressing_mode (* Float arith operation with memory *) @@ -101,10 +101,14 @@ let print_addressing printreg addr ppf arg = let print_specific_operation printreg op ppf arg = match op with | Ilea addr -> print_addressing printreg addr ppf arg - | Istore_int(n, addr) -> - fprintf ppf "[%a] := %nd" (print_addressing printreg addr) arg n - | Istore_symbol(lbl, addr) -> - fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl + | Istore_int(n, addr, is_assign) -> + fprintf ppf "[%a] := %nd %s" + (print_addressing printreg addr) arg n + (if is_assign then "(assign)" else "(init)") + | Istore_symbol(lbl, addr, is_assign) -> + fprintf ppf "[%a] := \"%s\" %s" + (print_addressing printreg addr) arg lbl + (if is_assign then "(assign)" else "(init)") | Ioffset_loc(n, addr) -> fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n | Isqrtf -> diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp index bdcc3a18d3..b576ece983 100644 --- a/asmcomp/amd64/emit.mlp +++ b/asmcomp/amd64/emit.mlp @@ -335,15 +335,16 @@ let output_epilogue f = (* Floating-point constants *) -let float_constants = ref ([] : (string * int) list) +let float_constants = ref ([] : (int64 * int) list) let add_float_constant cst = + let repr = Int64.bits_of_float cst in try - List.assoc cst !float_constants + List.assoc repr !float_constants with Not_found -> let lbl = new_label() in - float_constants := (cst, lbl) :: !float_constants; + float_constants := (repr, lbl) :: !float_constants; lbl let emit_float_constant (cst, lbl) = @@ -382,12 +383,12 @@ let emit_instr fallthrough i = ` movq ${emit_nativeint n}, {emit_reg i.res.(0)}\n` else ` movabsq ${emit_nativeint n}, {emit_reg i.res.(0)}\n` - | Lop(Iconst_float s) -> - begin match Int64.bits_of_float (float_of_string s) with + | Lop(Iconst_float f) -> + begin match Int64.bits_of_float f with | 0x0000_0000_0000_0000L -> (* +0.0 *) ` xorpd {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` | _ -> - let lbl = add_float_constant s in + let lbl = add_float_constant f in ` movsd {emit_label lbl}(%rip), {emit_reg i.res.(0)}\n` end | Lop(Iconst_symbol s) -> @@ -448,7 +449,7 @@ let emit_instr fallthrough i = | Double | Double_u -> ` movsd {emit_addressing addr i.arg 0}, {emit_reg dest}\n` end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word -> ` movq {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n` @@ -541,9 +542,9 @@ let emit_instr fallthrough i = ` cvttsd2siq {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` | Lop(Ispecific(Ilea addr)) -> ` leaq {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n` - | Lop(Ispecific(Istore_int(n, addr))) -> + | Lop(Ispecific(Istore_int(n, addr, _))) -> ` movq ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n` - | Lop(Ispecific(Istore_symbol(s, addr))) -> + | Lop(Ispecific(Istore_symbol(s, addr, _))) -> assert (not !pic_code && not !Clflags.dlcode); ` movq ${emit_symbol s}, {emit_addressing addr i.arg 0}\n` | Lop(Ispecific(Ioffset_loc(n, addr))) -> @@ -764,9 +765,9 @@ let emit_item = function | Cint n -> ` .quad {emit_nativeint n}\n` | Csingle f -> - emit_float32_directive ".long" f + emit_float32_directive ".long" (Int32.bits_of_float f) | Cdouble f -> - emit_float64_directive ".quad" f + emit_float64_directive ".quad" (Int64.bits_of_float f) | Csymbol_address s -> ` .quad {emit_symbol s}\n` | Clabel_address lbl -> diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp index 77156b8f01..a66f0c93b8 100644 --- a/asmcomp/amd64/emit_nt.mlp +++ b/asmcomp/amd64/emit_nt.mlp @@ -53,9 +53,10 @@ let slot_offset loc cl = else !stack_offset + (num_stack_slots.(0) + n) * 8 | Outgoing n -> n -(* Output a 32 bit integer in hex *) +(* Output a 32 or 64 bit integer in hex *) let emit_int32 n = emit_printf "0%lxh" n +let emit_int64 n = emit_printf "0%Lxh" n (* Symbols *) @@ -321,36 +322,20 @@ let output_epilogue () = (* Floating-point constants *) -let float_constants = ref ([] : (string * int) list) +let float_constants = ref ([] : (int64 * int) list) let add_float_constant cst = + let repr = Int64.bits_of_float cst in try - List.assoc cst !float_constants + List.assoc repr !float_constants with Not_found -> let lbl = new_label() in - float_constants := (cst, lbl) :: !float_constants; + float_constants := (repr, lbl) :: !float_constants; lbl -let emit_float s = - (* MASM doesn't like floating-point constants such as 2e9. - Turn them into 2.0e9. *) - let pos_e = ref (-1) and pos_dot = ref (-1) in - for i = 0 to String.length s - 1 do - match s.[i] with - 'e'|'E' -> pos_e := i - | '.' -> pos_dot := i - | _ -> () - done; - if !pos_dot < 0 && !pos_e >= 0 then begin - emit_string (String.sub s 0 !pos_e); - emit_string ".0"; - emit_string (String.sub s !pos_e (String.length s - !pos_e)) - end else - emit_string s - let emit_float_constant (cst, lbl) = - `{emit_label lbl} REAL8 {emit_float cst}\n` + `{emit_label lbl}: QWORD {emit_int64 cst}\n` let emit_movabs reg n = (* force ml64 to use mov reg, imm64 instruction *) @@ -389,12 +374,12 @@ let emit_instr fallthrough i = ` mov {emit_reg32 i.res.(0)}, {emit_nativeint n}\n` else emit_movabs i.res.(0) n - | Lop(Iconst_float s) -> - begin match Int64.bits_of_float (float_of_string s) with + | Lop(Iconst_float f) -> + begin match Int64.bits_of_float f with | 0x0000_0000_0000_0000L -> (* +0.0 *) ` xorpd {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` | _ -> - let lbl = add_float_constant s in + let lbl = add_float_constant f in ` movsd {emit_reg i.res.(0)}, {emit_label lbl}\n` end | Lop(Iconst_symbol s) -> @@ -458,7 +443,7 @@ let emit_instr fallthrough i = | Double | Double_u -> ` movsd {emit_reg dest}, REAL8 PTR {emit_addressing addr i.arg 0}\n` end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word -> ` mov QWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n` @@ -547,9 +532,9 @@ let emit_instr fallthrough i = ` cvttsd2si {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` | Lop(Ispecific(Ilea addr)) -> ` lea {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n` - | Lop(Ispecific(Istore_int(n, addr))) -> + | Lop(Ispecific(Istore_int(n, addr, _))) -> ` mov QWORD PTR {emit_addressing addr i.arg 0}, {emit_nativeint n}\n` - | Lop(Ispecific(Istore_symbol(s, addr))) -> + | Lop(Ispecific(Istore_symbol(s, addr, _))) -> assert (not !pic_code); add_used_symbol s; ` mov QWORD PTR {emit_addressing addr i.arg 0}, OFFSET {emit_symbol s}\n` @@ -721,9 +706,9 @@ let emit_item = function | Cint n -> ` QWORD {emit_nativeint n}\n` | Csingle f -> - ` REAL4 {emit_float f}\n` + ` DWORD {emit_int32 (Int32.bits_of_float f)}\n` | Cdouble f -> - ` REAL8 {emit_float f}\n` + ` QWORD {emit_int64 (Int64.bits_of_float f)}\n` | Csymbol_address s -> add_used_symbol s; ` QWORD {emit_symbol s}\n` diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml index b6e0fa94ab..cd06559e1e 100644 --- a/asmcomp/amd64/proc.ml +++ b/asmcomp/amd64/proc.ml @@ -259,7 +259,7 @@ let destroyed_at_oper = function | Iop(Iextcall(_, false)) -> destroyed_at_c_call | Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _)) -> [| rax; rdx |] - | Iop(Istore(Single, _)) -> [| rxmm15 |] + | Iop(Istore(Single, _, _)) -> [| rxmm15 |] | Iop(Ialloc _ | Iintop(Imulh | Icomp _) | Iintop_imm((Icomp _), _)) -> [| rax |] | Iswitch(_, _) -> [| rax; rdx |] @@ -290,10 +290,21 @@ let max_register_pressure = function if fp then [| 10; 16 |] else [| 11; 16 |] | Ialloc _ | Iintop(Icomp _) | Iintop_imm((Icomp _), _) -> if fp then [| 11; 16 |] else [| 12; 16 |] - | Istore(Single, _) -> + | Istore(Single, _, _) -> if fp then [| 12; 15 |] else [| 13; 15 |] | _ -> if fp then [| 12; 16 |] else [| 13; 16 |] +(* Pure operations (without any side effect besides updating their result + registers). *) + +let op_is_pure = function + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false + | Ispecific(Ilea _) -> true + | Ispecific _ -> false + | _ -> true + (* Layout of the stack frame *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml index 5e6afbcabf..fa7fe66c05 100644 --- a/asmcomp/amd64/selection.ml +++ b/asmcomp/amd64/selection.ml @@ -152,20 +152,20 @@ method select_addressing chunk exp = | Ascaledadd(e1, e2, scale) -> (Iindexed2scaled(scale, d), Ctuple[e1; e2]) -method! select_store addr exp = +method! select_store is_assign addr exp = match exp with Cconst_int n when self#is_immediate n -> - (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple []) + (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | (Cconst_natint n | Cconst_blockheader n) when self#is_immediate_natint n -> - (Ispecific(Istore_int(n, addr)), Ctuple []) + (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) | Cconst_pointer n when self#is_immediate n -> - (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple []) + (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | Cconst_natpointer n when self#is_immediate_natint n -> - (Ispecific(Istore_int(n, addr)), Ctuple []) + (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) | Cconst_symbol s when not (!pic_code || !Clflags.dlcode) -> - (Ispecific(Istore_symbol(s, addr)), Ctuple []) + (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple []) | _ -> - super#select_store addr exp + super#select_store is_assign addr exp method! select_operation op args = match op with diff --git a/asmcomp/arm/CSE.ml b/asmcomp/arm/CSE.ml new file mode 100644 index 0000000000..00282f1f55 --- /dev/null +++ b/asmcomp/arm/CSE.ml @@ -0,0 +1,38 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for ARM *) + +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Ishiftcheckbound _) -> Op_checkbound + | Ispecific _ -> Op_pure + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int n | Iconst_blockheader n -> n <= 255n && n >= 0n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp index 2f20ecf61a..61035b85fd 100644 --- a/asmcomp/arm/emit.mlp +++ b/asmcomp/arm/emit.mlp @@ -273,7 +273,7 @@ let function_name = ref "" (* Entry point for tail recursive calls *) let tailrec_entry_point = ref 0 (* Pending floating-point literals *) -let float_literals = ref ([] : (string * label) list) +let float_literals = ref ([] : (int64 * label) list) (* Pending relative references to the global offset table *) let gotrel_literals = ref ([] : (label * label) list) (* Pending symbol literals *) @@ -283,12 +283,13 @@ let num_literals = ref 0 (* Label a floating-point literal *) let float_literal f = + let repr = Int64.bits_of_float f in try - List.assoc f !float_literals + List.assoc repr !float_literals with Not_found -> let lbl = new_label() in num_literals := !num_literals + 2; - float_literals := (f, lbl) :: !float_literals; + float_literals := (repr, lbl) :: !float_literals; lbl (* Label a GOTREL literal *) @@ -314,7 +315,7 @@ let emit_literals() = ` .align 3\n`; List.iter (fun (f, lbl) -> - `{emit_label lbl}: .double {emit_string f}\n`) + `{emit_label lbl}:`; emit_float64_split_directive ".long" f) !float_literals; float_literals := [] end; @@ -390,8 +391,7 @@ let emit_instr i = | Lop(Iconst_int n | Iconst_blockheader n) -> emit_intconst i.res.(0) (Nativeint.to_int32 n) | Lop(Iconst_float f) when !fpu = Soft -> - ` @ {emit_string f}\n`; - let bits = Int64.bits_of_float (float_of_string f) in + let bits = Int64.bits_of_float f in let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32) and low_bits = Int64.to_int32 bits in if is_immediate low_bits || is_immediate high_bits then begin @@ -406,7 +406,7 @@ let emit_instr i = end | Lop(Iconst_float f) when !fpu = VFPv2 -> let lbl = float_literal f in - ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`; + ` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n`; 1 | Lop(Iconst_float f) -> let encode imm = @@ -425,12 +425,12 @@ let emit_instr i = let ex = ((ex + 3) land 0x07) lxor 0x04 in Some((sg lsl 7) lor (ex lsl 4) lor mn) end in - begin match encode (Int64.bits_of_float (float_of_string f)) with + begin match encode (Int64.bits_of_float f) with None -> let lbl = float_literal f in - ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n` + ` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n` | Some imm8 -> - ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n` + ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8}\n` end; 1 | Lop(Iconst_symbol s) -> emit_load_symbol_addr i.res.(0) s @@ -508,10 +508,10 @@ let emit_instr i = | Double_u -> "fldd" | _ (* 32-bit quantities *) -> "ldr" in ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1 - | Lop(Istore(Single, addr)) when !fpu >= VFPv2 -> + | Lop(Istore(Single, addr, _)) when !fpu >= VFPv2 -> ` fcvtsd s14, {emit_reg i.arg.(0)}\n`; ` fsts s14, {emit_addressing addr i.arg 1}\n`; 2 - | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft -> + | Lop(Istore((Double | Double_u), addr, _)) when !fpu = Soft -> (* Use STM or STRD if possible *) begin match i.arg.(0), i.arg.(1), addr with {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0 @@ -525,7 +525,7 @@ let emit_instr i = ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`; ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2 end - | Lop(Istore(size, addr)) -> + | Lop(Istore(size, addr, _)) -> let r = i.arg.(0) in let instr = match size with @@ -874,8 +874,8 @@ let emit_item = function | Cint16 n -> ` .short {emit_int n}\n` | Cint32 n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n` | Cint n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n` - | Csingle f -> ` .single {emit_string f}\n` - | Cdouble f -> ` .double {emit_string f}\n` + | Csingle f -> emit_float32_directive ".long" (Int32.bits_of_float f) + | Cdouble f -> emit_float64_split_directive ".long" (Int64.bits_of_float f) | Csymbol_address s -> ` .word {emit_symbol s}\n` | Clabel_address lbl -> ` .word {emit_data_label lbl}\n` | Cstring s -> emit_string_directive " .ascii " s diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml index a16c35a226..a5bf3d5c8c 100644 --- a/asmcomp/arm/proc.ml +++ b/asmcomp/arm/proc.ml @@ -203,7 +203,7 @@ let destroyed_at_oper = function [| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *) | Iop(Iintop Imulh) when !arch < ARMv6 -> [| phys_reg 8 |] (* r12 destroyed *) - | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) -> + | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) -> [| phys_reg 107 |] (* d7 (s14-s15) destroyed *) | _ -> [||] @@ -222,9 +222,19 @@ let max_register_pressure = function | Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |] | Iconst_symbol _ when !pic_code -> [| 7; 16; 32 |] | Iintoffloat | Ifloatofint - | Iload(Single, _) | Istore(Single, _) -> [| 9; 15; 31 |] + | Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |] | _ -> [| 9; 16; 32 |] +(* Pure operations (without any side effect besides updating their result + registers). *) + +let op_is_pure = function + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) + | Ispecific(Ishiftcheckbound _) -> false + | _ -> true + (* Layout of the stack *) let num_stack_slots = [| 0; 0; 0 |] diff --git a/asmcomp/arm64/CSE.ml b/asmcomp/arm64/CSE.ml new file mode 100644 index 0000000000..359e57eb55 --- /dev/null +++ b/asmcomp/arm64/CSE.ml @@ -0,0 +1,38 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for ARM64 *) + +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Ishiftcheckbound _) -> Op_checkbound + | Ispecific _ -> Op_pure + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int n | Iconst_blockheader n -> n <= 65535n && n >= 0n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp index 4a3e3cd7b4..2c2454fde6 100644 --- a/asmcomp/arm64/emit.mlp +++ b/asmcomp/arm64/emit.mlp @@ -286,7 +286,7 @@ let emit_literals() = ` .align 3\n`; List.iter (fun (f, lbl) -> - `{emit_label lbl}: .quad `; emit_printf "0x%Lx\n" f) + `{emit_label lbl}:`; emit_float64_directive ".quad" f) !float_literals; float_literals := [] end @@ -326,15 +326,15 @@ let emit_instr i = | Lop(Iconst_int n | Iconst_blockheader n) -> emit_intconst i.res.(0) n | Lop(Iconst_float f) -> - let b = Int64.bits_of_float(float_of_string f) in + let b = Int64.bits_of_float f in if b = 0L then - ` fmov {emit_reg i.res.(0)}, xzr /* {emit_string f} */\n` + ` fmov {emit_reg i.res.(0)}, xzr\n` else if is_immediate_float b then - ` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b} /* {emit_string f} */\n` + ` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b}\n` else begin let lbl = float_literal b in ` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`; - ` ldr {emit_reg i.res.(0)}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}] /* {emit_string f} */\n` + ` ldr {emit_reg i.res.(0)}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}]\n` end | Lop(Iconst_symbol s) -> emit_load_symbol_addr i.res.(0) s @@ -388,7 +388,7 @@ let emit_instr i = | Word | Double | Double_u -> ` ldr {emit_reg dst}, {emit_addressing addr base}\n` end - | Lop(Istore(size, addr)) -> + | Lop(Istore(size, addr, _)) -> let src = i.arg.(0) in let base = match addr with @@ -675,8 +675,8 @@ let emit_item = function | Cint16 n -> ` .short {emit_int n}\n` | Cint32 n -> ` .long {emit_nativeint n}\n` | Cint n -> ` .quad {emit_nativeint n}\n` - | Csingle f -> emit_float32_directive ".long" f - | Cdouble f -> emit_float64_directive ".quad" f + | Csingle f -> emit_float32_directive ".long" (Int32.bits_of_float f) + | Cdouble f -> emit_float64_directive ".quad" (Int64.bits_of_float f) | Csymbol_address s -> ` .quad {emit_symbol s}\n` | Clabel_address lbl -> ` .quad {emit_data_label lbl}\n` | Cstring s -> emit_string_directive " .ascii " s diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml index b52c2fd8ae..d2cda5c235 100644 --- a/asmcomp/arm64/proc.ml +++ b/asmcomp/arm64/proc.ml @@ -177,7 +177,7 @@ let destroyed_at_oper = function destroyed_at_c_call | Iop(Ialloc _) -> [| reg_x15 |] - | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) -> + | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) -> [| reg_d7 |] (* d7 / s7 destroyed *) | _ -> [||] @@ -194,9 +194,19 @@ let max_register_pressure = function | Iextcall(_, _) -> [| 10; 8 |] | Ialloc _ -> [| 25; 32 |] | Iintoffloat | Ifloatofint - | Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |] + | Iload(Single, _) | Istore(Single, _, _) -> [| 26; 31 |] | _ -> [| 26; 32 |] +(* Pure operations (without any side effect besides updating their result + registers). *) + +let op_is_pure = function + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) + | Ispecific(Ishiftcheckbound _) -> false + | _ -> true + (* Layout of the stack *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml index 34283875cb..311bb029b2 100644 --- a/asmcomp/asmgen.ml +++ b/asmcomp/asmgen.ml @@ -64,7 +64,10 @@ let compile_fundecl (ppf : formatter) fd_cmm = ++ pass_dump_if ppf dump_selection "After instruction selection" ++ Comballoc.fundecl ++ pass_dump_if ppf dump_combine "After allocation combining" + ++ CSE.fundecl + ++ pass_dump_if ppf dump_cse "After CSE" ++ liveness ppf + ++ Deadcode.fundecl ++ pass_dump_if ppf dump_live "Liveness analysis" ++ Spill.fundecl ++ liveness ppf diff --git a/asmcomp/clambda.ml b/asmcomp/clambda.ml index 3586296e4f..4088265337 100644 --- a/asmcomp/clambda.ml +++ b/asmcomp/clambda.ml @@ -19,12 +19,12 @@ open Lambda type function_label = string type ustructured_constant = - | Uconst_float of string + | Uconst_float of float | Uconst_int32 of int32 | Uconst_int64 of int64 | Uconst_nativeint of nativeint | Uconst_block of int * uconstant list - | Uconst_float_array of string list + | Uconst_float_array of float list | Uconst_string of string and uconstant = @@ -74,7 +74,9 @@ type function_description = { fun_label: function_label; (* Label of direct entry point *) fun_arity: int; (* Number of arguments *) mutable fun_closed: bool; (* True if environment not used *) - mutable fun_inline: (Ident.t list * ulambda) option } + mutable fun_inline: (Ident.t list * ulambda) option; + mutable fun_float_const_prop: bool (* Can propagate FP consts *) + } (* Approximation of values *) diff --git a/asmcomp/clambda.mli b/asmcomp/clambda.mli index e751326fe4..abb0e9c626 100644 --- a/asmcomp/clambda.mli +++ b/asmcomp/clambda.mli @@ -19,12 +19,12 @@ open Lambda type function_label = string type ustructured_constant = - | Uconst_float of string + | Uconst_float of float | Uconst_int32 of int32 | Uconst_int64 of int64 | Uconst_nativeint of nativeint | Uconst_block of int * uconstant list - | Uconst_float_array of string list + | Uconst_float_array of float list | Uconst_string of string and uconstant = @@ -74,7 +74,9 @@ type function_description = { fun_label: function_label; (* Label of direct entry point *) fun_arity: int; (* Number of arguments *) mutable fun_closed: bool; (* True if environment not used *) - mutable fun_inline: (Ident.t list * ulambda) option } + mutable fun_inline: (Ident.t list * ulambda) option; + mutable fun_float_const_prop: bool (* Can propagate FP consts *) + } (* Approximation of values *) diff --git a/asmcomp/closure.ml b/asmcomp/closure.ml index eff35ce4f2..2f37e0fcc7 100644 --- a/asmcomp/closure.ml +++ b/asmcomp/closure.ml @@ -245,14 +245,15 @@ let rec is_pure_clambda = function | Uprim(p, args, _) -> List.for_all is_pure_clambda args | _ -> false -(* Simplify primitive operations on integers *) +(* Simplify primitive operations on known arguments *) let make_const c = (Uconst c, Value_const c) - +let make_const_ref c = + make_const(Uconst_ref(Compilenv.new_structured_constant ~shared:true c, c)) let make_const_int n = make_const (Uconst_int n) let make_const_ptr n = make_const (Uconst_ptr n) let make_const_bool b = make_const_ptr(if b then 1 else 0) -let make_comparison cmp (x: int) (y: int) = +let make_comparison cmp x y = make_const_bool (match cmp with Ceq -> x = y @@ -261,71 +262,187 @@ let make_comparison cmp (x: int) (y: int) = | Cgt -> x > y | Cle -> x <= y | Cge -> x >= y) +let make_const_float n = make_const_ref (Uconst_float n) +let make_const_natint n = make_const_ref (Uconst_nativeint n) +let make_const_int32 n = make_const_ref (Uconst_int32 n) +let make_const_int64 n = make_const_ref (Uconst_int64 n) + +(* The [fpc] parameter is true if constant propagation of + floating-point computations is allowed *) -let simplif_int_prim_pure p (args, approxs) dbg = +let simplif_arith_prim_pure fpc p (args, approxs) dbg = + let default = (Uprim(p, args, dbg), Value_unknown) in match approxs with - [Value_const (Uconst_int x)] -> + (* int (or enumerated type) *) + | [ Value_const(Uconst_int n1 | Uconst_ptr n1) ] -> begin match p with - Pidentity -> make_const_int x - | Pnegint -> make_const_int (-x) - | Pbswap16 -> - make_const_int (((x land 0xff) lsl 8) lor - ((x land 0xff00) lsr 8)) - | Poffsetint y -> make_const_int (x + y) - | _ -> (Uprim(p, args, dbg), Value_unknown) + | Pnot -> make_const_bool (n1 = 0) + | Pnegint -> make_const_int (- n1) + | Poffsetint n -> make_const_int (n + n1) + | Pfloatofint when fpc -> make_const_float (float_of_int n1) + | Pbintofint Pnativeint -> make_const_natint (Nativeint.of_int n1) + | Pbintofint Pint32 -> make_const_int32 (Int32.of_int n1) + | Pbintofint Pint64 -> make_const_int64 (Int64.of_int n1) + | Pbswap16 -> make_const_int (((n1 land 0xff) lsl 8) + lor ((n1 land 0xff00) lsr 8)) + | _ -> default end - | [Value_const (Uconst_int x); Value_const (Uconst_int y)] -> + (* int (or enumerated type), int (or enumerated type) *) + | [ Value_const(Uconst_int n1 | Uconst_ptr n1); + Value_const(Uconst_int n2 | Uconst_ptr n2) ] -> begin match p with - Paddint -> make_const_int(x + y) - | Psubint -> make_const_int(x - y) - | Pmulint -> make_const_int(x * y) - | Pdivint when y <> 0 -> make_const_int(x / y) - | Pmodint when y <> 0 -> make_const_int(x mod y) - | Pandint -> make_const_int(x land y) - | Porint -> make_const_int(x lor y) - | Pxorint -> make_const_int(x lxor y) - | Plslint -> make_const_int(x lsl y) - | Plsrint -> make_const_int(x lsr y) - | Pasrint -> make_const_int(x asr y) - | Pintcomp cmp -> make_comparison cmp x y - | _ -> (Uprim(p, args, dbg), Value_unknown) + | Psequand -> make_const_bool (n1 <> 0 && n2 <> 0) + | Psequor -> make_const_bool (n1 <> 0 || n2 <> 0) + | Paddint -> make_const_int (n1 + n2) + | Psubint -> make_const_int (n1 - n2) + | Pmulint -> make_const_int (n1 * n2) + | Pdivint when n2 <> 0 -> make_const_int (n1 / n2) + | Pmodint when n2 <> 0 -> make_const_int (n1 mod n2) + | Pandint -> make_const_int (n1 land n2) + | Porint -> make_const_int (n1 lor n2) + | Pxorint -> make_const_int (n1 lxor n2) + | Plslint when 0 <= n2 && n2 < 8 * Arch.size_int -> + make_const_int (n1 lsl n2) + | Plsrint when 0 <= n2 && n2 < 8 * Arch.size_int -> + make_const_int (n1 lsr n2) + | Pasrint when 0 <= n2 && n2 < 8 * Arch.size_int -> + make_const_int (n1 asr n2) + | Pintcomp c -> make_comparison c n1 n2 + | _ -> default end - | [Value_const (Uconst_ptr x)] -> + (* float *) + | [Value_const(Uconst_ref(_, Uconst_float n1))] when fpc -> begin match p with - Pidentity -> make_const_ptr x - | Pnot -> make_const_bool(x = 0) - | Pisint -> make_const_bool true - | Pctconst c -> - begin - match c with - | Big_endian -> make_const_bool Arch.big_endian - | Word_size -> make_const_int (8*Arch.size_int) - | Ostype_unix -> make_const_bool (Sys.os_type = "Unix") - | Ostype_win32 -> make_const_bool (Sys.os_type = "Win32") - | Ostype_cygwin -> make_const_bool (Sys.os_type = "Cygwin") - end - | _ -> (Uprim(p, args, dbg), Value_unknown) + | Pintoffloat -> make_const_int (int_of_float n1) + | Pnegfloat -> make_const_float (-. n1) + | Pabsfloat -> make_const_float (abs_float n1) + | _ -> default end - | [Value_const (Uconst_ptr x); Value_const (Uconst_ptr y)] -> + (* float, float *) + | [Value_const(Uconst_ref(_, Uconst_float n1)); + Value_const(Uconst_ref(_, Uconst_float n2))] when fpc -> begin match p with - Psequand -> make_const_bool(x <> 0 && y <> 0) - | Psequor -> make_const_bool(x <> 0 || y <> 0) - | Pintcomp cmp -> make_comparison cmp x y - | _ -> (Uprim(p, args, dbg), Value_unknown) + | Paddfloat -> make_const_float (n1 +. n2) + | Psubfloat -> make_const_float (n1 -. n2) + | Pmulfloat -> make_const_float (n1 *. n2) + | Pdivfloat -> make_const_float (n1 /. n2) + | Pfloatcomp c -> make_comparison c n1 n2 + | _ -> default end - | [Value_const (Uconst_ptr x); Value_const (Uconst_int y)] -> + (* nativeint *) + | [Value_const(Uconst_ref(_, Uconst_nativeint n))] -> begin match p with - | Pintcomp cmp -> make_comparison cmp x y - | _ -> (Uprim(p, args, dbg), Value_unknown) + | Pintofbint Pnativeint -> make_const_int (Nativeint.to_int n) + | Pcvtbint(Pnativeint, Pint32) -> make_const_int32 (Nativeint.to_int32 n) + | Pcvtbint(Pnativeint, Pint64) -> make_const_int64 (Int64.of_nativeint n) + | Pnegbint Pnativeint -> make_const_natint (Nativeint.neg n) + | _ -> default end - | [Value_const (Uconst_int x); Value_const (Uconst_ptr y)] -> + (* nativeint, nativeint *) + | [Value_const(Uconst_ref(_, Uconst_nativeint n1)); + Value_const(Uconst_ref(_, Uconst_nativeint n2))] -> begin match p with - | Pintcomp cmp -> make_comparison cmp x y - | _ -> (Uprim(p, args, dbg), Value_unknown) + | Paddbint Pnativeint -> make_const_natint (Nativeint.add n1 n2) + | Psubbint Pnativeint -> make_const_natint (Nativeint.sub n1 n2) + | Pmulbint Pnativeint -> make_const_natint (Nativeint.mul n1 n2) + | Pdivbint Pnativeint when n2 <> 0n -> + make_const_natint (Nativeint.div n1 n2) + | Pmodbint Pnativeint when n2 <> 0n -> + make_const_natint (Nativeint.rem n1 n2) + | Pandbint Pnativeint -> make_const_natint (Nativeint.logand n1 n2) + | Porbint Pnativeint -> make_const_natint (Nativeint.logor n1 n2) + | Pxorbint Pnativeint -> make_const_natint (Nativeint.logxor n1 n2) + | Pbintcomp(Pnativeint, c) -> make_comparison c n1 n2 + | _ -> default + end + (* nativeint, int *) + | [Value_const(Uconst_ref(_, Uconst_nativeint n1)); + Value_const(Uconst_int n2)] -> + begin match p with + | Plslbint Pnativeint when 0 <= n2 && n2 < 8 * Arch.size_int -> + make_const_natint (Nativeint.shift_left n1 n2) + | Plsrbint Pnativeint when 0 <= n2 && n2 < 8 * Arch.size_int -> + make_const_natint (Nativeint.shift_right_logical n1 n2) + | Pasrbint Pnativeint when 0 <= n2 && n2 < 8 * Arch.size_int -> + make_const_natint (Nativeint.shift_right n1 n2) + | _ -> default + end + (* int32 *) + | [Value_const(Uconst_ref(_, Uconst_int32 n))] -> + begin match p with + | Pintofbint Pint32 -> make_const_int (Int32.to_int n) + | Pcvtbint(Pint32, Pnativeint) -> make_const_natint (Nativeint.of_int32 n) + | Pcvtbint(Pint32, Pint64) -> make_const_int64 (Int64.of_int32 n) + | Pnegbint Pint32 -> make_const_int32 (Int32.neg n) + | _ -> default + end + (* int32, int32 *) + | [Value_const(Uconst_ref(_, Uconst_int32 n1)); + Value_const(Uconst_ref(_, Uconst_int32 n2))] -> + begin match p with + | Paddbint Pint32 -> make_const_int32 (Int32.add n1 n2) + | Psubbint Pint32 -> make_const_int32 (Int32.sub n1 n2) + | Pmulbint Pint32 -> make_const_int32 (Int32.mul n1 n2) + | Pdivbint Pint32 when n2 <> 0l -> make_const_int32 (Int32.div n1 n2) + | Pmodbint Pint32 when n2 <> 0l -> make_const_int32 (Int32.rem n1 n2) + | Pandbint Pint32 -> make_const_int32 (Int32.logand n1 n2) + | Porbint Pint32 -> make_const_int32 (Int32.logor n1 n2) + | Pxorbint Pint32 -> make_const_int32 (Int32.logxor n1 n2) + | Pbintcomp(Pint32, c) -> make_comparison c n1 n2 + | _ -> default + end + (* int32, int *) + | [Value_const(Uconst_ref(_, Uconst_int32 n1)); + Value_const(Uconst_int n2)] -> + begin match p with + | Plslbint Pint32 when 0 <= n2 && n2 < 32 -> + make_const_int32 (Int32.shift_left n1 n2) + | Plsrbint Pint32 when 0 <= n2 && n2 < 32 -> + make_const_int32 (Int32.shift_right_logical n1 n2) + | Pasrbint Pint32 when 0 <= n2 && n2 < 32 -> + make_const_int32 (Int32.shift_right n1 n2) + | _ -> default + end + (* int64 *) + | [Value_const(Uconst_ref(_, Uconst_int64 n))] -> + begin match p with + | Pintofbint Pint64 -> make_const_int (Int64.to_int n) + | Pcvtbint(Pint64, Pint32) -> make_const_int32 (Int64.to_int32 n) + | Pcvtbint(Pint64, Pnativeint) -> make_const_natint (Int64.to_nativeint n) + | Pnegbint Pint64 -> make_const_int64 (Int64.neg n) + | _ -> default + end + (* int64, int64 *) + | [Value_const(Uconst_ref(_, Uconst_int64 n1)); + Value_const(Uconst_ref(_, Uconst_int64 n2))] -> + begin match p with + | Paddbint Pint64 -> make_const_int64 (Int64.add n1 n2) + | Psubbint Pint64 -> make_const_int64 (Int64.sub n1 n2) + | Pmulbint Pint64 -> make_const_int64 (Int64.mul n1 n2) + | Pdivbint Pint64 when n2 <> 0L -> make_const_int64 (Int64.div n1 n2) + | Pmodbint Pint64 when n2 <> 0L -> make_const_int64 (Int64.rem n1 n2) + | Pandbint Pint64 -> make_const_int64 (Int64.logand n1 n2) + | Porbint Pint64 -> make_const_int64 (Int64.logor n1 n2) + | Pxorbint Pint64 -> make_const_int64 (Int64.logxor n1 n2) + | Pbintcomp(Pint64, c) -> make_comparison c n1 n2 + | _ -> default + end + (* int64, int *) + | [Value_const(Uconst_ref(_, Uconst_int64 n1)); + Value_const(Uconst_int n2)] -> + begin match p with + | Plslbint Pint64 when 0 <= n2 && n2 < 64 -> + make_const_int64 (Int64.shift_left n1 n2) + | Plsrbint Pint64 when 0 <= n2 && n2 < 64 -> + make_const_int64 (Int64.shift_right_logical n1 n2) + | Pasrbint Pint64 when 0 <= n2 && n2 < 64 -> + make_const_int64 (Int64.shift_right n1 n2) + | _ -> default end + (* TODO: Pbbswap *) + (* Catch-all *) | _ -> - (Uprim(p, args, dbg), Value_unknown) - + default let field_approx n = function | Value_tuple a when n < Array.length a -> a.(n) @@ -333,8 +450,9 @@ let field_approx n = function Value_const (List.nth l n) | _ -> Value_unknown -let simplif_prim_pure p (args, approxs) dbg = +let simplif_prim_pure fpc p (args, approxs) dbg = match p, args, approxs with + (* Block construction *) | Pmakeblock(tag, Immutable), _, _ -> let field = function | Value_const c -> c @@ -349,24 +467,43 @@ let simplif_prim_pure p (args, approxs) dbg = with Exit -> (Uprim(p, args, dbg), Value_tuple (Array.of_list approxs)) end + (* Field access *) | Pfield n, _, [ Value_const(Uconst_ref(_, Uconst_block(_, l))) ] when n < List.length l -> make_const (List.nth l n) - - | Pfield n, [ Uprim(Pmakeblock _, ul, _) ], [approx] -> - assert(n < List.length ul); - List.nth ul n, field_approx n approx - - | Pstringlength, _, [ Value_const(Uconst_ref(_, Uconst_string s)) ] - -> + | Pfield n, [ Uprim(Pmakeblock _, ul, _) ], [approx] + when n < List.length ul -> + (List.nth ul n, field_approx n approx) + (* Strings *) + | Pstringlength, _, [ Value_const(Uconst_ref(_, Uconst_string s)) ] -> make_const_int (String.length s) - + (* Identity *) + | Pidentity, [arg1], [app1] -> + (arg1, app1) + (* Kind test *) + | Pisint, _, [a1] -> + begin match a1 with + | Value_const(Uconst_int _ | Uconst_ptr _) -> make_const_bool true + | Value_const(Uconst_ref _) -> make_const_bool false + | Value_closure _ | Value_tuple _ -> make_const_bool false + | _ -> (Uprim(p, args, dbg), Value_unknown) + end + (* Compile-time constants *) + | Pctconst c, _, _ -> + begin match c with + | Big_endian -> make_const_bool Arch.big_endian + | Word_size -> make_const_int (8*Arch.size_int) + | Ostype_unix -> make_const_bool (Sys.os_type = "Unix") + | Ostype_win32 -> make_const_bool (Sys.os_type = "Win32") + | Ostype_cygwin -> make_const_bool (Sys.os_type = "Cygwin") + end + (* Catch-all *) | _ -> - simplif_int_prim_pure p (args, approxs) dbg + simplif_arith_prim_pure fpc p (args, approxs) dbg -let simplif_prim p (args, approxs as args_approxs) dbg = +let simplif_prim fpc p (args, approxs as args_approxs) dbg = if List.for_all is_pure_clambda args - then simplif_prim_pure p args_approxs dbg + then simplif_prim_pure fpc p args_approxs dbg else (* XXX : always return the same approxs as simplif_prim_pure? *) let approx = @@ -391,15 +528,16 @@ let approx_ulam = function Uconst c -> Value_const c | _ -> Value_unknown -let rec substitute sb ulam = +let rec substitute fpc sb ulam = match ulam with Uvar v -> begin try Tbl.find v sb with Not_found -> ulam end | Uconst _ -> ulam | Udirect_apply(lbl, args, dbg) -> - Udirect_apply(lbl, List.map (substitute sb) args, dbg) + Udirect_apply(lbl, List.map (substitute fpc sb) args, dbg) | Ugeneric_apply(fn, args, dbg) -> - Ugeneric_apply(substitute sb fn, List.map (substitute sb) args, dbg) + Ugeneric_apply(substitute fpc sb fn, + List.map (substitute fpc sb) args, dbg) | Uclosure(defs, env) -> (* Question: should we rename function labels as well? Otherwise, there is a risk that function labels are not globally unique. @@ -409,11 +547,12 @@ let rec substitute sb ulam = - When we substitute offsets for idents bound by let rec in [close], case [Lletrec], we discard the original let rec body and use only the substituted term. *) - Uclosure(defs, List.map (substitute sb) env) - | Uoffset(u, ofs) -> Uoffset(substitute sb u, ofs) + Uclosure(defs, List.map (substitute fpc sb) env) + | Uoffset(u, ofs) -> Uoffset(substitute fpc sb u, ofs) | Ulet(id, u1, u2) -> let id' = Ident.rename id in - Ulet(id', substitute sb u1, substitute (Tbl.add id (Uvar id') sb) u2) + Ulet(id', substitute fpc sb u1, + substitute fpc (Tbl.add id (Uvar id') sb) u2) | Uletrec(bindings, body) -> let bindings1 = List.map (fun (id, rhs) -> (id, Ident.rename id, rhs)) bindings in @@ -422,57 +561,64 @@ let rec substitute sb ulam = (fun (id, id', _) s -> Tbl.add id (Uvar id') s) bindings1 sb in Uletrec( - List.map (fun (id, id', rhs) -> (id', substitute sb' rhs)) bindings1, - substitute sb' body) + List.map + (fun (id, id', rhs) -> (id', substitute fpc sb' rhs)) + bindings1, + substitute fpc sb' body) | Uprim(p, args, dbg) -> - let sargs = List.map (substitute sb) args in - let (res, _) = simplif_prim p (sargs, List.map approx_ulam sargs) dbg in + let sargs = + List.map (substitute fpc sb) args in + let (res, _) = + simplif_prim fpc p (sargs, List.map approx_ulam sargs) dbg in res | Uswitch(arg, sw) -> - Uswitch(substitute sb arg, + Uswitch(substitute fpc sb arg, { sw with us_actions_consts = - Array.map (substitute sb) sw.us_actions_consts; + Array.map (substitute fpc sb) sw.us_actions_consts; us_actions_blocks = - Array.map (substitute sb) sw.us_actions_blocks; + Array.map (substitute fpc sb) sw.us_actions_blocks; }) | Ustringswitch(arg,sw,d) -> Ustringswitch - (substitute sb arg, - List.map (fun (s,act) -> s,substitute sb act) sw, - Misc.may_map (substitute sb) d) + (substitute fpc sb arg, + List.map (fun (s,act) -> s,substitute fpc sb act) sw, + Misc.may_map (substitute fpc sb) d) | Ustaticfail (nfail, args) -> - Ustaticfail (nfail, List.map (substitute sb) args) + Ustaticfail (nfail, List.map (substitute fpc sb) args) | Ucatch(nfail, ids, u1, u2) -> - Ucatch(nfail, ids, substitute sb u1, substitute sb u2) + Ucatch(nfail, ids, substitute fpc sb u1, substitute fpc sb u2) | Utrywith(u1, id, u2) -> let id' = Ident.rename id in - Utrywith(substitute sb u1, id', substitute (Tbl.add id (Uvar id') sb) u2) + Utrywith(substitute fpc sb u1, id', + substitute fpc (Tbl.add id (Uvar id') sb) u2) | Uifthenelse(u1, u2, u3) -> - begin match substitute sb u1 with + begin match substitute fpc sb u1 with Uconst (Uconst_ptr n) -> - if n <> 0 then substitute sb u2 else substitute sb u3 + if n <> 0 then substitute fpc sb u2 else substitute fpc sb u3 | Uprim(Pmakeblock _, _, _) -> - substitute sb u2 + substitute fpc sb u2 | su1 -> - Uifthenelse(su1, substitute sb u2, substitute sb u3) + Uifthenelse(su1, substitute fpc sb u2, substitute fpc sb u3) end - | Usequence(u1, u2) -> Usequence(substitute sb u1, substitute sb u2) - | Uwhile(u1, u2) -> Uwhile(substitute sb u1, substitute sb u2) + | Usequence(u1, u2) -> + Usequence(substitute fpc sb u1, substitute fpc sb u2) + | Uwhile(u1, u2) -> + Uwhile(substitute fpc sb u1, substitute fpc sb u2) | Ufor(id, u1, u2, dir, u3) -> let id' = Ident.rename id in - Ufor(id', substitute sb u1, substitute sb u2, dir, - substitute (Tbl.add id (Uvar id') sb) u3) + Ufor(id', substitute fpc sb u1, substitute fpc sb u2, dir, + substitute fpc (Tbl.add id (Uvar id') sb) u3) | Uassign(id, u) -> let id' = try match Tbl.find id sb with Uvar i -> i | _ -> assert false with Not_found -> id in - Uassign(id', substitute sb u) + Uassign(id', substitute fpc sb u) | Usend(k, u1, u2, ul, dbg) -> - Usend(k, substitute sb u1, substitute sb u2, List.map (substitute sb) ul, - dbg) + Usend(k, substitute fpc sb u1, substitute fpc sb u2, + List.map (substitute fpc sb) ul, dbg) (* Perform an inline expansion *) @@ -484,12 +630,12 @@ let no_effects = function | Uclosure _ -> true | u -> is_simple_argument u -let rec bind_params_rec subst params args body = +let rec bind_params_rec fpc subst params args body = match (params, args) with - ([], []) -> substitute subst body + ([], []) -> substitute fpc subst body | (p1 :: pl, a1 :: al) -> if is_simple_argument a1 then - bind_params_rec (Tbl.add p1 a1 subst) pl al body + bind_params_rec fpc (Tbl.add p1 a1 subst) pl al body else begin let p1' = Ident.rename p1 in let u1, u2 = @@ -500,17 +646,17 @@ let rec bind_params_rec subst params args body = a1, Uvar p1' in let body' = - bind_params_rec (Tbl.add p1 u2 subst) pl al body in + bind_params_rec fpc (Tbl.add p1 u2 subst) pl al body in if occurs_var p1 body then Ulet(p1', u1, body') else if no_effects a1 then body' else Usequence(a1, body') end | (_, _) -> assert false -let bind_params params args body = +let bind_params fpc params args body = (* Reverse parameters and arguments to preserve right-to-left evaluation order (PR#2910). *) - bind_params_rec Tbl.empty (List.rev params) (List.rev args) body + bind_params_rec fpc Tbl.empty (List.rev params) (List.rev args) body (* Check if a lambda term is ``pure'', that is without side-effects *and* not containing function definitions *) @@ -532,8 +678,10 @@ let direct_apply fundesc funct ufunct uargs = if fundesc.fun_closed then uargs else uargs @ [ufunct] in let app = match fundesc.fun_inline with - None -> Udirect_apply(fundesc.fun_label, app_args, Debuginfo.none) - | Some(params, body) -> bind_params params app_args body in + | None -> + Udirect_apply(fundesc.fun_label, app_args, Debuginfo.none) + | Some(params, body) -> + bind_params fundesc.fun_float_const_prop params app_args body in (* If ufunct can contain side-effects or function definitions, we must make sure that it is evaluated exactly once. If the function is not closed, we evaluate ufunct as part of the @@ -648,14 +796,14 @@ let rec close fenv cenv = function str (Uconst_block (tag, List.map transl fields)) | Const_float_array sl -> (* constant float arrays are really immutable *) - str (Uconst_float_array sl) + str (Uconst_float_array (List.map float_of_string sl)) | Const_immstring s -> str (Uconst_string s) | Const_base (Const_string (s, _)) -> (* strings (even literal ones) are mutable! *) (* of course, the empty string is really immutable *) str ~shared:false(*(String.length s = 0)*) (Uconst_string s) - | Const_base(Const_float x) -> str (Uconst_float x) + | Const_base(Const_float x) -> str (Uconst_float (float_of_string x)) | Const_base(Const_int32 x) -> str (Uconst_int32 x) | Const_base(Const_int64 x) -> str (Uconst_int64 x) | Const_base(Const_nativeint x) -> str (Uconst_nativeint x) @@ -749,7 +897,7 @@ let rec close fenv cenv = function (fun (id, pos, approx) sb -> Tbl.add id (Uoffset(Uvar clos_ident, pos)) sb) infos Tbl.empty in - (Ulet(clos_ident, clos, substitute sb ubody), + (Ulet(clos_ident, clos, substitute !Clflags.float_const_prop sb ubody), approx) end else begin (* General case: recursive definition of values *) @@ -785,7 +933,8 @@ let rec close fenv cenv = function (Uprim(Praise k, [ulam], Debuginfo.from_raise ev), Value_unknown) | Lprim(p, args) -> - simplif_prim p (close_list_approx fenv cenv args) Debuginfo.none + simplif_prim !Clflags.float_const_prop + p (close_list_approx fenv cenv args) Debuginfo.none | Lswitch(arg, sw) -> let fn fail = let (uarg, _) = close fenv cenv arg in @@ -925,7 +1074,8 @@ and close_functions fenv cenv fun_defs = {fun_label = label; fun_arity = (if kind = Tupled then -arity else arity); fun_closed = initially_closed; - fun_inline = None } in + fun_inline = None; + fun_float_const_prop = !Clflags.float_const_prop } in (id, params, body, fundesc) | (_, _) -> fatal_error "Closure.close_functions") fun_defs in diff --git a/asmcomp/cmm.ml b/asmcomp/cmm.ml index 9a5f3ec6b8..67ee3445fd 100644 --- a/asmcomp/cmm.ml +++ b/asmcomp/cmm.ml @@ -85,7 +85,7 @@ type operation = type expression = Cconst_int of int | Cconst_natint of nativeint - | Cconst_float of string + | Cconst_float of float | Cconst_symbol of string | Cconst_pointer of int | Cconst_natpointer of nativeint @@ -118,8 +118,8 @@ type data_item = | Cint16 of int | Cint32 of nativeint | Cint of nativeint - | Csingle of string - | Cdouble of string + | Csingle of float + | Cdouble of float | Csymbol_address of string | Clabel_address of int | Cstring of string diff --git a/asmcomp/cmm.mli b/asmcomp/cmm.mli index be2bd41457..97b8d40971 100644 --- a/asmcomp/cmm.mli +++ b/asmcomp/cmm.mli @@ -71,7 +71,7 @@ type operation = type expression = Cconst_int of int | Cconst_natint of nativeint - | Cconst_float of string + | Cconst_float of float | Cconst_symbol of string | Cconst_pointer of int | Cconst_natpointer of nativeint @@ -104,8 +104,8 @@ type data_item = | Cint16 of int | Cint32 of nativeint | Cint of nativeint - | Csingle of string - | Cdouble of string + | Csingle of float + | Cdouble of float | Csymbol_address of string | Clabel_address of int | Cstring of string diff --git a/asmcomp/deadcode.ml b/asmcomp/deadcode.ml new file mode 100644 index 0000000000..d3d0fcb906 --- /dev/null +++ b/asmcomp/deadcode.ml @@ -0,0 +1,64 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Dead code elimination: remove pure instructions whose results are + not used. *) + +open Mach + +(* [deadcode i] returns a pair of an optimized instruction [i'] + and a set of registers live "before" instruction [i]. *) + +let rec deadcode i = + match i.desc with + | Iend | Ireturn | Iop(Itailcall_ind) | Iop(Itailcall_imm _) | Iraise _ -> + (i, Reg.add_set_array i.live i.arg) + | Iop op -> + let (s, before) = deadcode i.next in + if Proc.op_is_pure op + && Reg.disjoint_set_array before i.res then begin + assert (Array.length i.res > 0); (* sanity check *) + (s, before) + end else begin + ({i with next = s}, Reg.add_set_array i.live i.arg) + end + | Iifthenelse(test, ifso, ifnot) -> + let (ifso', _) = deadcode ifso in + let (ifnot', _) = deadcode ifnot in + let (s, _) = deadcode i.next in + ({i with desc = Iifthenelse(test, ifso', ifnot'); next = s}, + Reg.add_set_array i.live i.arg) + | Iswitch(index, cases) -> + let cases' = Array.map (fun c -> fst (deadcode c)) cases in + let (s, _) = deadcode i.next in + ({i with desc = Iswitch(index, cases'); next = s}, + Reg.add_set_array i.live i.arg) + | Iloop(body) -> + let (body', _) = deadcode body in + let (s, _) = deadcode i.next in + ({i with desc = Iloop body'; next = s}, i.live) + | Icatch(nfail, body, handler) -> + let (body', _) = deadcode body in + let (handler', _) = deadcode handler in + let (s, _) = deadcode i.next in + ({i with desc = Icatch(nfail, body', handler'); next = s}, i.live) + | Iexit nfail -> + (i, i.live) + | Itrywith(body, handler) -> + let (body', _) = deadcode body in + let (handler', _) = deadcode handler in + let (s, _) = deadcode i.next in + ({i with desc = Itrywith(body', handler'); next = s}, i.live) + +let fundecl f = + let (new_body, _) = deadcode f.fun_body in + {f with fun_body = new_body} diff --git a/asmcomp/deadcode.mli b/asmcomp/deadcode.mli new file mode 100644 index 0000000000..6aafae0540 --- /dev/null +++ b/asmcomp/deadcode.mli @@ -0,0 +1,16 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Dead code elimination: remove pure instructions whose results are + not used. *) + +val fundecl: Mach.fundecl -> Mach.fundecl diff --git a/asmcomp/emitaux.ml b/asmcomp/emitaux.ml index ccfa977ffa..11212140a2 100644 --- a/asmcomp/emitaux.ml +++ b/asmcomp/emitaux.ml @@ -88,16 +88,10 @@ let emit_bytes_directive directive s = done; if !pos > 0 then emit_char '\n' -(* PR#4813: assemblers do strange things with float literals indeed, - so we convert to IEEE representation ourselves and emit float - literals as 32- or 64-bit integers. *) - -let emit_float64_directive directive f = - let x = Int64.bits_of_float (float_of_string f) in +let emit_float64_directive directive x = emit_printf "\t%s\t0x%Lx\n" directive x -let emit_float64_split_directive directive f = - let x = Int64.bits_of_float (float_of_string f) in +let emit_float64_split_directive directive x = let lo = Int64.logand x 0xFFFF_FFFFL and hi = Int64.shift_right_logical x 32 in emit_printf "\t%s\t0x%Lx, 0x%Lx\n" @@ -105,8 +99,7 @@ let emit_float64_split_directive directive f = (if Arch.big_endian then hi else lo) (if Arch.big_endian then lo else hi) -let emit_float32_directive directive f = - let x = Int32.bits_of_float (float_of_string f) in +let emit_float32_directive directive x = emit_printf "\t%s\t0x%lx\n" directive x (* Record live pointers at call points *) diff --git a/asmcomp/emitaux.mli b/asmcomp/emitaux.mli index cc479d8ccf..9b19e294c7 100644 --- a/asmcomp/emitaux.mli +++ b/asmcomp/emitaux.mli @@ -23,9 +23,9 @@ val emit_char: char -> unit val emit_string_literal: string -> unit val emit_string_directive: string -> string -> unit val emit_bytes_directive: string -> string -> unit -val emit_float64_directive: string -> string -> unit -val emit_float64_split_directive: string -> string -> unit -val emit_float32_directive: string -> string -> unit +val emit_float64_directive: string -> int64 -> unit +val emit_float64_split_directive: string -> int64 -> unit +val emit_float32_directive: string -> int32 -> unit val reset_debug_info: unit -> unit val emit_debug_info: Debuginfo.t -> unit diff --git a/asmcomp/i386/CSE.ml b/asmcomp/i386/CSE.ml new file mode 100644 index 0000000000..3ce4567024 --- /dev/null +++ b/asmcomp/i386/CSE.ml @@ -0,0 +1,48 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for the i386 *) + +open Cmm +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + (* Operations that affect the floating-point stack cannot be factored *) + | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf + | Iintoffloat | Ifloatofint + | Iload((Single | Double | Double_u), _) -> Op_other + (* Specific ops *) + | Ispecific(Ilea _) -> Op_pure + | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg + | Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg + | Ispecific(Ioffset_loc(_, _)) -> Op_store true + | Ispecific _ -> Op_other + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int _ | Iconst_blockheader _ -> true + | Iconst_symbol _ -> true + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml index d2f9fd61a8..0d2130445e 100644 --- a/asmcomp/i386/arch.ml +++ b/asmcomp/i386/arch.ml @@ -31,8 +31,8 @@ type addressing_mode = type specific_operation = Ilea of addressing_mode (* Lea gives scaled adds *) - | Istore_int of nativeint * addressing_mode (* Store an integer constant *) - | Istore_symbol of string * addressing_mode (* Store a symbol *) + | Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *) + | Istore_symbol of string * addressing_mode * bool (* Store a symbol *) | Ioffset_loc of int * addressing_mode (* Add a constant to a location *) | Ipush (* Push regs on stack *) | Ipush_int of nativeint (* Push an integer constant *) @@ -105,11 +105,14 @@ let print_addressing printreg addr ppf arg = let print_specific_operation printreg op ppf arg = match op with | Ilea addr -> print_addressing printreg addr ppf arg - | Istore_int(n, addr) -> - fprintf ppf "[%a] := %s" (print_addressing printreg addr) arg - (Nativeint.to_string n) - | Istore_symbol(lbl, addr) -> - fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl + | Istore_int(n, addr, is_assign) -> + fprintf ppf "[%a] := %nd %s" + (print_addressing printreg addr) arg n + (if is_assign then "(assign)" else "(init)") + | Istore_symbol(lbl, addr, is_assign) -> + fprintf ppf "[%a] := \"%s\" %s" + (print_addressing printreg addr) arg lbl + (if is_assign then "(assign)" else "(init)") | Ioffset_loc(n, addr) -> fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n | Ipush -> diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp index 2b90d37f64..98df5f958b 100644 --- a/asmcomp/i386/emit.mlp +++ b/asmcomp/i386/emit.mlp @@ -412,15 +412,16 @@ let emit_floatspecial = function (* Floating-point constants *) -let float_constants = ref ([] : (string * int) list) +let float_constants = ref ([] : (int64 * int) list) let add_float_constant cst = + let repr = Int64.bits_of_float cst in try - List.assoc cst !float_constants + List.assoc repr !float_constants with Not_found -> let lbl = new_label() in - float_constants := (cst, lbl) :: !float_constants; + float_constants := (repr, lbl) :: !float_constants; lbl let emit_float_constant (cst, lbl) = @@ -465,8 +466,8 @@ let emit_instr fallthrough i = | _ -> ` movl $0, {emit_reg i.res.(0)}\n` end else ` movl ${emit_nativeint n}, {emit_reg i.res.(0)}\n` - | Lop(Iconst_float s) -> - begin match Int64.bits_of_float (float_of_string s) with + | Lop(Iconst_float f) -> + begin match Int64.bits_of_float f with | 0x0000_0000_0000_0000L -> (* +0.0 *) ` fldz\n` | 0x8000_0000_0000_0000L -> (* -0.0 *) @@ -476,7 +477,7 @@ let emit_instr fallthrough i = | 0xBFF0_0000_0000_0000L -> (* -1.0 *) ` fld1\n fchs\n` | _ -> - let lbl = add_float_constant s in + let lbl = add_float_constant f in ` fldl {emit_label lbl}\n` end | Lop(Iconst_symbol s) -> @@ -543,7 +544,7 @@ let emit_instr fallthrough i = | Double | Double_u -> ` fldl {emit_addressing addr i.arg 0}\n` end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word | Thirtytwo_signed | Thirtytwo_unsigned -> ` movl {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n` @@ -683,9 +684,9 @@ let emit_instr fallthrough i = stack_offset := !stack_offset + 8 | Lop(Ispecific(Ilea addr)) -> ` lea {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n` - | Lop(Ispecific(Istore_int(n, addr))) -> + | Lop(Ispecific(Istore_int(n, addr, _))) -> ` movl ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n` - | Lop(Ispecific(Istore_symbol(s, addr))) -> + | Lop(Ispecific(Istore_symbol(s, addr, _))) -> ` movl ${emit_symbol s}, {emit_addressing addr i.arg 0}\n` | Lop(Ispecific(Ioffset_loc(n, addr))) -> ` addl ${emit_int n}, {emit_addressing addr i.arg 0}\n` @@ -960,9 +961,9 @@ let emit_item = function | Cint n -> ` .long {emit_nativeint n}\n` | Csingle f -> - emit_float32_directive ".long" f + emit_float32_directive ".long" (Int32.bits_of_float f) | Cdouble f -> - emit_float64_split_directive ".long" f + emit_float64_split_directive ".long" (Int64.bits_of_float f) | Csymbol_address s -> ` .long {emit_symbol s}\n` | Clabel_address lbl -> diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp index 495a29aecc..a9c9db3e4f 100644 --- a/asmcomp/i386/emit_nt.mlp +++ b/asmcomp/i386/emit_nt.mlp @@ -62,7 +62,10 @@ let add_used_symbol s = let emit_symbol s = emit_string "_"; Emitaux.emit_symbol '$' s +(* Output a 32 or 64 bit integer in hex *) + let emit_int32 n = emit_printf "0%lxh" n +let emit_int64 n = emit_printf "0%Lxh" n (* Output a label *) @@ -361,36 +364,20 @@ let emit_floatspecial = function (* Floating-point constants *) -let float_constants = ref ([] : (string * int) list) +let float_constants = ref ([] : (int64 * int) list) let add_float_constant cst = + let repr = Int64.bits_of_float cst in try - List.assoc cst !float_constants + List.assoc repr !float_constants with Not_found -> let lbl = new_label() in - float_constants := (cst, lbl) :: !float_constants; + float_constants := (repr, lbl) :: !float_constants; lbl -let emit_float s = - (* MASM doesn't like floating-point constants such as 2e9. - Turn them into 2.0e9. *) - let pos_e = ref (-1) and pos_dot = ref (-1) in - for i = 0 to String.length s - 1 do - match s.[i] with - 'e'|'E' -> pos_e := i - | '.' -> pos_dot := i - | _ -> () - done; - if !pos_dot < 0 && !pos_e >= 0 then begin - emit_string (String.sub s 0 !pos_e); - emit_string ".0"; - emit_string (String.sub s !pos_e (String.length s - !pos_e)) - end else - emit_string s - let emit_float_constant (cst, lbl) = - `{emit_label lbl} REAL8 {emit_float cst}\n` + `{emit_label lbl}: QWORD {emit_int64 cst}\n` (* Output the assembly code for an instruction *) @@ -426,8 +413,8 @@ let emit_instr i = | _ -> ` mov {emit_reg i.res.(0)}, 0\n` end else ` mov {emit_reg i.res.(0)}, {emit_nativeint n}\n` - | Lop(Iconst_float s) -> - begin match Int64.bits_of_float (float_of_string s) with + | Lop(Iconst_float f) -> + begin match Int64.bits_of_float f with | 0x0000_0000_0000_0000L -> (* +0.0 *) ` fldz\n` | 0x8000_0000_0000_0000L -> (* -0.0 *) @@ -437,7 +424,7 @@ let emit_instr i = | 0xBFF0_0000_0000_0000L -> (* -1.0 *) ` fld1\n fchs\n` | _ -> - let lbl = add_float_constant s in + let lbl = add_float_constant f in ` fld {emit_label lbl}\n` end | Lop(Iconst_symbol s) -> @@ -493,7 +480,7 @@ let emit_instr i = | Double | Double_u -> ` fld REAL8 PTR {emit_addressing addr i.arg 0}\n` end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> begin match chunk with | Word | Thirtytwo_signed | Thirtytwo_unsigned -> ` mov DWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n` @@ -631,9 +618,9 @@ let emit_instr i = stack_offset := !stack_offset + 8 | Lop(Ispecific(Ilea addr)) -> ` lea {emit_reg i.res.(0)}, DWORD PTR {emit_addressing addr i.arg 0}\n` - | Lop(Ispecific(Istore_int(n, addr))) -> + | Lop(Ispecific(Istore_int(n, addr, _))) -> ` mov DWORD PTR {emit_addressing addr i.arg 0},{emit_nativeint n}\n` - | Lop(Ispecific(Istore_symbol(s, addr))) -> + | Lop(Ispecific(Istore_symbol(s, addr, _))) -> add_used_symbol s ; ` mov DWORD PTR {emit_addressing addr i.arg 0},OFFSET {emit_symbol s}\n` | Lop(Ispecific(Ioffset_loc(n, addr))) -> @@ -816,9 +803,9 @@ let emit_item = function | Cint32 n -> ` DWORD {emit_nativeint n}\n` | Csingle f -> - ` REAL4 {emit_float f}\n` + ` DWORD {emit_int32 (Int32.bits_of_float f)}\n` | Cdouble f -> - ` REAL8 {emit_float f}\n` + ` QWORD {emit_int64 (Int64.bits_of_float f)}\n` | Csymbol_address s -> add_used_symbol s ; ` DWORD {emit_symbol s}\n` diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml index d80d182088..38bfdb29f9 100644 --- a/asmcomp/i386/proc.ml +++ b/asmcomp/i386/proc.ml @@ -182,6 +182,20 @@ let max_register_pressure = function Iintoffloat -> [| 6; max_int |] | _ -> [|7; max_int |] +(* Pure operations (without any side effect besides updating their result + registers). Note that floating-point operations are not pure + because they update the float stack. *) + +let op_is_pure = function + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false + | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf + | Iintoffloat | Ifloatofint | Iload((Single | Double | Double_u), _) -> false + | Ispecific(Ilea _) -> true + | Ispecific _ -> false + | _ -> true + (* Layout of the stack frame *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml index d86f1b2823..10d2d40e37 100644 --- a/asmcomp/i386/selection.ml +++ b/asmcomp/i386/selection.ml @@ -135,7 +135,7 @@ let pseudoregs_for_operation op arg res = (* For storing a byte, the argument must be in eax...edx. (But for a short, any reg will do!) Keep it simple, just force the argument to be in edx. *) - | Istore((Byte_unsigned | Byte_signed), addr) -> + | Istore((Byte_unsigned | Byte_signed), addr, _) -> let newarg = Array.copy arg in newarg.(0) <- edx; (newarg, res, false) @@ -178,20 +178,20 @@ method select_addressing chunk exp = | (Ascaledadd(e1, e2, scale), d) -> (Iindexed2scaled(scale, d), Ctuple[e1; e2]) -method! select_store addr exp = +method! select_store is_assign addr exp = match exp with Cconst_int n -> - (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple []) + (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | (Cconst_natint n | Cconst_blockheader n) -> - (Ispecific(Istore_int(n, addr)), Ctuple []) + (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) | Cconst_pointer n -> - (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple []) + (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple []) | Cconst_natpointer n -> - (Ispecific(Istore_int(n, addr)), Ctuple []) + (Ispecific(Istore_int(n, addr, is_assign)), Ctuple []) | Cconst_symbol s -> - (Ispecific(Istore_symbol(s, addr)), Ctuple []) + (Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple []) | _ -> - super#select_store addr exp + super#select_store is_assign addr exp method! select_operation op args = match op with diff --git a/asmcomp/liveness.ml b/asmcomp/liveness.ml index 434d506558..7e3f1fe080 100644 --- a/asmcomp/liveness.ml +++ b/asmcomp/liveness.ml @@ -16,13 +16,13 @@ open Mach let live_at_exit = ref [] + let find_live_at_exit k = try List.assoc k !live_at_exit with - | Not_found -> Misc.fatal_error "Spill.find_live_at_exit" + | Not_found -> Misc.fatal_error "Liveness.find_live_at_exit" -let live_at_break = ref Reg.Set.empty let live_at_raise = ref Reg.Set.empty let rec live i finally = @@ -37,8 +37,30 @@ let rec live i finally = i.live <- finally; finally | Ireturn | Iop(Itailcall_ind) | Iop(Itailcall_imm _) -> - (* i.live remains empty since no regs are live across *) + i.live <- Reg.Set.empty; (* no regs are live across *) Reg.set_of_array i.arg + | Iop op -> + let after = live i.next finally in + if Proc.op_is_pure op && Reg.disjoint_set_array after i.res then begin + (* This operation is dead code. Ignore its arguments. *) + i.live <- after; + after + end else begin + let across_after = Reg.diff_set_array after i.res in + let across = + match op with + | Icall_ind | Icall_imm _ | Iextcall _ + | Iintop Icheckbound | Iintop_imm(Icheckbound, _) -> + (* The function call may raise an exception, branching to the + nearest enclosing try ... with. Similarly for bounds checks. + Hence, everything that must be live at the beginning of + the exception handler must also be live across this instr. *) + Reg.Set.union across_after !live_at_raise + | _ -> + across_after in + i.live <- across; + Reg.add_set_array across i.arg + end | Iifthenelse(test, ifso, ifnot) -> let at_join = live i.next finally in let at_fork = Reg.Set.union (live ifso at_join) (live ifnot at_join) in @@ -90,23 +112,8 @@ let rec live i finally = i.live <- before_body; before_body | Iraise _ -> - (* i.live remains empty since no regs are live across *) + i.live <- !live_at_raise; Reg.add_set_array !live_at_raise i.arg - | _ -> - let across_after = Reg.diff_set_array (live i.next finally) i.res in - let across = - match i.desc with - Iop Icall_ind | Iop(Icall_imm _) | Iop(Iextcall _) - | Iop(Iintop Icheckbound) | Iop(Iintop_imm(Icheckbound, _)) -> - (* The function call may raise an exception, branching to the - nearest enclosing try ... with. Similarly for bounds checks. - Hence, everything that must be live at the beginning of - the exception handler must also be live across this instr. *) - Reg.Set.union across_after !live_at_raise - | _ -> - across_after in - i.live <- across; - Reg.add_set_array across i.arg let fundecl ppf f = let initially_live = live f.fun_body Reg.Set.empty in diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml index a11910ec73..3a7174763a 100644 --- a/asmcomp/mach.ml +++ b/asmcomp/mach.ml @@ -36,7 +36,7 @@ type operation = | Ispill | Ireload | Iconst_int of nativeint - | Iconst_float of string + | Iconst_float of float | Iconst_symbol of string | Iconst_blockheader of nativeint | Icall_ind @@ -46,7 +46,7 @@ type operation = | Iextcall of string * bool | Istackoffset of int | Iload of Cmm.memory_chunk * Arch.addressing_mode - | Istore of Cmm.memory_chunk * Arch.addressing_mode + | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool | Ialloc of int | Iintop of integer_operation | Iintop_imm of integer_operation * int diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli index 000c3cf9f1..618e5e4ce7 100644 --- a/asmcomp/mach.mli +++ b/asmcomp/mach.mli @@ -36,17 +36,18 @@ type operation = | Ispill | Ireload | Iconst_int of nativeint - | Iconst_float of string + | Iconst_float of float | Iconst_symbol of string | Iconst_blockheader of nativeint | Icall_ind | Icall_imm of string | Itailcall_ind | Itailcall_imm of string - | Iextcall of string * bool + | Iextcall of string * bool (* false = noalloc, true = alloc *) | Istackoffset of int | Iload of Cmm.memory_chunk * Arch.addressing_mode - | Istore of Cmm.memory_chunk * Arch.addressing_mode + | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool + (* false = initialization, true = assignment *) | Ialloc of int | Iintop of integer_operation | Iintop_imm of integer_operation * int diff --git a/asmcomp/power/CSE.ml b/asmcomp/power/CSE.ml new file mode 100644 index 0000000000..50fefa5e35 --- /dev/null +++ b/asmcomp/power/CSE.ml @@ -0,0 +1,38 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for the PowerPC *) + +open Arch +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Imultaddf | Imultsubf) -> Op_pure + | Ispecific(Ialloc_far _) -> Op_other + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int n | Iconst_blockheader n -> n <= 32767n && n >= -32768n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp index f6ee1a2321..0a26ed1479 100644 --- a/asmcomp/power/emit.mlp +++ b/asmcomp/power/emit.mlp @@ -229,7 +229,7 @@ let record_frame live dbg = (* Record floating-point and large integer literals *) -let float_literals = ref ([] : (string * int) list) +let float_literals = ref ([] : (int64 * int) list) let int_literals = ref ([] : (nativeint * int) list) (* Record external C functions to be called in a position-independent way @@ -333,7 +333,7 @@ let instr_size = function if chunk = Byte_signed then load_store_size addr + 1 else load_store_size addr - | Lop(Istore(chunk, addr)) -> load_store_size addr + | Lop(Istore(chunk, addr, _)) -> load_store_size addr | Lop(Ialloc n) -> 4 | Lop(Ispecific(Ialloc_far n)) -> 5 | Lop(Iintop Imod) -> 3 @@ -466,9 +466,9 @@ let rec emit_instr i dslot = ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; ` {emit_string lg} {emit_reg i.res.(0)}, {emit_lower emit_label lbl}({emit_gpr 11})\n` end - | Lop(Iconst_float s) -> + | Lop(Iconst_float f) -> let lbl = new_label() in - float_literals := (s, lbl) :: !float_literals; + float_literals := (Int64.bits_of_float f, lbl) :: !float_literals; ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; ` lfd {emit_reg i.res.(0)}, {emit_lower emit_label lbl}({emit_gpr 11})\n` | Lop(Iconst_symbol s) -> @@ -548,7 +548,7 @@ let rec emit_instr i dslot = emit_load_store loadinstr addr i.arg 0 i.res.(0); if chunk = Byte_signed then ` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> let storeinstr = match chunk with Byte_unsigned | Byte_signed -> "stb" @@ -628,8 +628,7 @@ let rec emit_instr i dslot = ` fcfid {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` end else begin let lbl = new_label() in - float_literals := ("4.503601774854144e15", lbl) :: !float_literals; - (* That float above represents 0x4330000080000000 *) + float_literals := (0x4330000080000000L, lbl) :: !float_literals; ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; ` lfd {emit_fpr 0}, {emit_lower emit_label lbl}({emit_gpr 11})\n`; ` lis {emit_gpr 0}, 0x4330\n`; @@ -899,11 +898,11 @@ let emit_item = function | Cint n -> ` {emit_string datag} {emit_nativeint n}\n` | Csingle f -> - emit_float32_directive ".long" f + emit_float32_directive ".long" (Int32.bits_of_float f) | Cdouble f -> if ppc64 - then emit_float64_directive ".quad" f - else emit_float64_split_directive ".long" f + then emit_float64_directive ".quad" (Int64.bits_of_float f) + else emit_float64_split_directive ".long" (Int64.bits_of_float f) | Csymbol_address s -> ` {emit_string datag} {emit_symbol s}\n` | Clabel_address lbl -> diff --git a/asmcomp/power/proc.ml b/asmcomp/power/proc.ml index 203e8a9ef4..77e37deda0 100644 --- a/asmcomp/power/proc.ml +++ b/asmcomp/power/proc.ml @@ -224,6 +224,17 @@ let max_register_pressure = function Iextcall(_, _) -> [| 15; 18 |] | _ -> [| 23; 30 |] +(* Pure operations (without any side effect besides updating their result + registers). *) + +let op_is_pure = function + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false + | Ispecific(Imultaddf | Imultsubf) -> true + | Ispecific _ -> false + | _ -> true + (* Layout of the stack *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/power/scheduling.ml b/asmcomp/power/scheduling.ml index 6e594f0283..7adaa2eed3 100644 --- a/asmcomp/power/scheduling.ml +++ b/asmcomp/power/scheduling.ml @@ -44,7 +44,7 @@ method reload_retaddr_latency = 12 method oper_issue_cycles = function Iconst_float _ | Iconst_symbol _ -> 2 | Iload(_, Ibased(_, _)) -> 2 - | Istore(_, Ibased(_, _)) -> 2 + | Istore(_, Ibased(_, _), _) -> 2 | Ialloc _ -> 4 | Iintop(Imod) -> 40 (* assuming full stall *) | Iintop(Icomp _) -> 4 diff --git a/asmcomp/printclambda.ml b/asmcomp/printclambda.ml index f4e3b4d93f..b28d749e24 100644 --- a/asmcomp/printclambda.ml +++ b/asmcomp/printclambda.ml @@ -16,17 +16,20 @@ open Asttypes open Clambda let rec structured_constant ppf = function - | Uconst_float x -> fprintf ppf "%s" x - | Uconst_int32 x -> fprintf ppf "%ld" x - | Uconst_int64 x -> fprintf ppf "%Ld" x - | Uconst_nativeint x -> fprintf ppf "%nd" x + | Uconst_float x -> fprintf ppf "%F" x + | Uconst_int32 x -> fprintf ppf "%ldl" x + | Uconst_int64 x -> fprintf ppf "%LdL" x + | Uconst_nativeint x -> fprintf ppf "%ndn" x | Uconst_block (tag, l) -> fprintf ppf "block(%i" tag; List.iter (fun u -> fprintf ppf ",%a" uconstant u) l; fprintf ppf ")" - | Uconst_float_array sl -> - fprintf ppf "floatarray(%s)" - (String.concat "," sl) + | Uconst_float_array [] -> + fprintf ppf "floatarray()" + | Uconst_float_array (f1 :: fl) -> + fprintf ppf "floatarray(%F" f1; + List.iter (fun f -> fprintf ppf ",%F" f) fl; + fprintf ppf ")" | Uconst_string s -> fprintf ppf "%S" s and uconstant ppf = function diff --git a/asmcomp/printcmm.ml b/asmcomp/printcmm.ml index 008081fb47..89c8582aef 100644 --- a/asmcomp/printcmm.ml +++ b/asmcomp/printcmm.ml @@ -89,7 +89,7 @@ let rec expr ppf = function | Cconst_int n -> fprintf ppf "%i" n | Cconst_natint n | Cconst_blockheader n -> fprintf ppf "%s" (Nativeint.to_string n) - | Cconst_float s -> fprintf ppf "%s" s + | Cconst_float n -> fprintf ppf "%F" n | Cconst_symbol s -> fprintf ppf "\"%s\"" s | Cconst_pointer n -> fprintf ppf "%ia" n | Cconst_natpointer n -> fprintf ppf "%sa" (Nativeint.to_string n) @@ -188,8 +188,8 @@ let data_item ppf = function | Cint16 n -> fprintf ppf "int16 %i" n | Cint32 n -> fprintf ppf "int32 %s" (Nativeint.to_string n) | Cint n -> fprintf ppf "int %s" (Nativeint.to_string n) - | Csingle f -> fprintf ppf "single %s" f - | Cdouble f -> fprintf ppf "double %s" f + | Csingle f -> fprintf ppf "single %F" f + | Cdouble f -> fprintf ppf "double %F" f | Csymbol_address s -> fprintf ppf "addr \"%s\"" s | Clabel_address l -> fprintf ppf "addr L%i" l | Cstring s -> fprintf ppf "string \"%s\"" s diff --git a/asmcomp/printmach.ml b/asmcomp/printmach.ml index 824665cd9d..a39160d28c 100644 --- a/asmcomp/printmach.ml +++ b/asmcomp/printmach.ml @@ -105,7 +105,7 @@ let operation op arg ppf res = | Ireload -> fprintf ppf "%a (reload)" regs arg | Iconst_int n | Iconst_blockheader n -> fprintf ppf "%s" (Nativeint.to_string n) - | Iconst_float s -> fprintf ppf "%s" s + | Iconst_float f -> fprintf ppf "%F" f | Iconst_symbol s -> fprintf ppf "\"%s\"" s | Icall_ind -> fprintf ppf "call %a" regs arg | Icall_imm lbl -> fprintf ppf "call \"%s\" %a" lbl regs arg @@ -119,12 +119,13 @@ let operation op arg ppf res = | Iload(chunk, addr) -> fprintf ppf "%s[%a]" (Printcmm.chunk chunk) (Arch.print_addressing reg addr) arg - | Istore(chunk, addr) -> - fprintf ppf "%s[%a] := %a" + | Istore(chunk, addr, is_assign) -> + fprintf ppf "%s[%a] := %a %s" (Printcmm.chunk chunk) (Arch.print_addressing reg addr) (Array.sub arg 1 (Array.length arg - 1)) reg arg.(0) + (if is_assign then "(assign)" else "(init)") | Ialloc n -> fprintf ppf "alloc %i" n | Iintop(op) -> fprintf ppf "%a%s%a" reg arg.(0) (intop op) reg arg.(1) | Iintop_imm(op, n) -> fprintf ppf "%a%s%i" reg arg.(0) (intop op) n diff --git a/asmcomp/proc.mli b/asmcomp/proc.mli index 6cc6aedc90..cd3374ab9a 100644 --- a/asmcomp/proc.mli +++ b/asmcomp/proc.mli @@ -40,6 +40,9 @@ val max_register_pressure: Mach.operation -> int array val destroyed_at_oper: Mach.instruction_desc -> Reg.t array val destroyed_at_raise: Reg.t array +(* Pure operations *) +val op_is_pure: Mach.operation -> bool + (* Info for laying out the stack frame *) val num_stack_slots: int array val contains_calls: bool ref diff --git a/asmcomp/reg.ml b/asmcomp/reg.ml index a0fc7dfffa..ef6db5cb6e 100644 --- a/asmcomp/reg.ml +++ b/asmcomp/reg.ml @@ -178,6 +178,16 @@ let inter_set_array s v = else inter_all(i+1) in inter_all 0 +let disjoint_set_array s v = + match Array.length v with + 0 -> true + | 1 -> not (Set.mem v.(0) s) + | n -> let rec disjoint_all i = + if i >= n then true + else if Set.mem v.(i) s then false + else disjoint_all (i+1) + in disjoint_all 0 + let set_of_array v = match Array.length v with 0 -> Set.empty diff --git a/asmcomp/reg.mli b/asmcomp/reg.mli index 34e7498018..e3cb2d9520 100644 --- a/asmcomp/reg.mli +++ b/asmcomp/reg.mli @@ -58,6 +58,7 @@ module Map: Map.S with type key = t val add_set_array: Set.t -> t array -> Set.t val diff_set_array: Set.t -> t array -> Set.t val inter_set_array: Set.t -> t array -> Set.t +val disjoint_set_array: Set.t -> t array -> bool val set_of_array: t array -> Set.t val reset: unit -> unit diff --git a/asmcomp/schedgen.ml b/asmcomp/schedgen.ml index e04eacd375..eb91854a50 100644 --- a/asmcomp/schedgen.ml +++ b/asmcomp/schedgen.ml @@ -165,7 +165,7 @@ method private instr_in_basic_block instr = load or store instructions (e.g. on the I386). *) method is_store = function - Istore(_, _) -> true + Istore(_, _, _) -> true | _ -> false method is_load = function diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml index a8f073e53a..e30d6fec39 100644 --- a/asmcomp/selectgen.ml +++ b/asmcomp/selectgen.ml @@ -209,8 +209,8 @@ method virtual select_addressing : (* Default instruction selection for stores (of words) *) -method select_store addr arg = - (Istore(Word, addr), arg) +method select_store is_assign addr arg = + (Istore(Word, addr, is_assign), arg) (* call marking methods, documented in selectgen.mli *) @@ -256,10 +256,10 @@ method select_operation op args = | (Cstore chunk, [arg1; arg2]) -> let (addr, eloc) = self#select_addressing chunk arg1 in if chunk = Word then begin - let (op, newarg2) = self#select_store addr arg2 in + let (op, newarg2) = self#select_store true addr arg2 in (op, [newarg2; eloc]) end else begin - (Istore(chunk, addr), [arg2; eloc]) + (Istore(chunk, addr, true), [arg2; eloc]) (* Inversion addr/datum in Istore *) end | (Calloc, _) -> (Ialloc 0, args) @@ -677,16 +677,16 @@ method emit_stores env data regs_addr = ref (Arch.offset_addressing Arch.identity_addressing (-Arch.size_int)) in List.iter (fun e -> - let (op, arg) = self#select_store !a e in + let (op, arg) = self#select_store false !a e in match self#emit_expr env arg with None -> assert false | Some regs -> match op with - Istore(_, _) -> + Istore(_, _, _) -> for i = 0 to Array.length regs - 1 do let r = regs.(i) in let kind = if r.typ = Float then Double_u else Word in - self#insert (Iop(Istore(kind, !a))) + self#insert (Iop(Istore(kind, !a, false))) (Array.append [|r|] regs_addr) [||]; a := Arch.offset_addressing !a (size_component r.typ) done diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli index 7012c900cc..abc6db5ebf 100644 --- a/asmcomp/selectgen.mli +++ b/asmcomp/selectgen.mli @@ -35,7 +35,8 @@ class virtual selector_generic : object method select_condition : Cmm.expression -> Mach.test * Cmm.expression (* Can be overridden to deal with special test instructions *) method select_store : - Arch.addressing_mode -> Cmm.expression -> Mach.operation * Cmm.expression + bool -> Arch.addressing_mode -> Cmm.expression -> + Mach.operation * Cmm.expression (* Can be overridden to deal with special store constant instructions *) method regs_for : Cmm.machtype -> Reg.t array (* Return an array of fresh registers of the given type. diff --git a/asmcomp/sparc/CSE.ml b/asmcomp/sparc/CSE.ml new file mode 100644 index 0000000000..c38bab8fe1 --- /dev/null +++ b/asmcomp/sparc/CSE.ml @@ -0,0 +1,31 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for Sparc *) + +open Mach +open CSEgen + +class cse = object (self) + +inherit cse_generic (* as super *) + +method! is_cheap_operation op = + match op with + | Iconst_int n | Iconst_blockheader n -> n <= 4095n && n >= -4096n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f + diff --git a/asmcomp/sparc/emit.mlp b/asmcomp/sparc/emit.mlp index 12d60ed327..877a3d52a0 100644 --- a/asmcomp/sparc/emit.mlp +++ b/asmcomp/sparc/emit.mlp @@ -190,7 +190,7 @@ let emit_frame fd = (* Record floating-point constants *) -let float_constants = ref ([] : (int * string) list) +let float_constants = ref ([] : (int * int64) list) let emit_float_constant (lbl, cst) = rodata (); @@ -309,11 +309,11 @@ let rec emit_instr i dslot = ` sethi %hi({emit_nativeint n}), %g1\n`; ` or %g1, %lo({emit_nativeint n}), {emit_reg i.res.(0)}\n` end - | Lop(Iconst_float s) -> + | Lop(Iconst_float f) -> (* On UltraSPARC, the fzero instruction could be used to set a floating point register pair to zero. *) let lbl = new_label() in - float_constants := (lbl, s) :: !float_constants; + float_constants := (lbl, Int64.bits_of_float f) :: !float_constants; ` sethi %hi({emit_label lbl}), %g1\n`; ` ldd [%g1 + %lo({emit_label lbl})], {emit_reg i.res.(0)}\n` | Lop(Iconst_symbol s) -> @@ -375,7 +375,7 @@ let rec emit_instr i dslot = | _ -> "ld" in emit_load loadinstr addr i.arg dest end - | Lop(Istore(chunk, addr)) -> + | Lop(Istore(chunk, addr, _)) -> let src = i.arg.(0) in begin match chunk with Double_u -> @@ -612,7 +612,7 @@ let is_one_instr i = | Iconst_int n | Iconst_blockheader n -> is_native_immediate n | Istackoffset _ -> true | Iload(_, Iindexed n) -> i.res.(0).typ <> Float && is_immediate n - | Istore(_, Iindexed n) -> i.arg.(0).typ <> Float && is_immediate n + | Istore(_, Iindexed n, _) -> i.arg.(0).typ <> Float && is_immediate n | Iintop(op) -> is_one_instr_op op | Iintop_imm(op, _) -> is_one_instr_op op | Iaddf | Isubf | Imulf | Idivf -> true @@ -706,9 +706,9 @@ let emit_item = function | Cint n -> ` .word {emit_nativeint n}\n` | Csingle f -> - emit_float32_directive ".word" f + emit_float32_directive ".word" (Int32.bits_of_float f) | Cdouble f -> - emit_float64_split_directive ".word" f + emit_float64_split_directive ".word" (Int64.bits_of_float f) | Csymbol_address s -> ` .word {emit_symbol s}\n` | Clabel_address lbl -> diff --git a/asmcomp/sparc/proc.ml b/asmcomp/sparc/proc.ml index ed107a82a7..a538df4345 100644 --- a/asmcomp/sparc/proc.ml +++ b/asmcomp/sparc/proc.ml @@ -196,6 +196,15 @@ let max_register_pressure = function Iextcall(_, _) -> [| 11; 0 |] | _ -> [| 19; 15 |] +(* Pure operations (without any side effect besides updating their result + registers). *) + +let op_is_pure = function + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false + | _ -> true + (* Layout of the stack *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/spill.ml b/asmcomp/spill.ml index ca17fe5bf6..95c49de393 100644 --- a/asmcomp/spill.ml +++ b/asmcomp/spill.ml @@ -233,7 +233,12 @@ let rec reload i before = (i, Reg.Set.empty) | Itrywith(body, handler) -> let (new_body, after_body) = reload body before in - let (new_handler, after_handler) = reload handler handler.live in + (* All registers live at the beginning of the handler are destroyed, + except the exception bucket *) + let before_handler = + Reg.Set.remove Proc.loc_exn_bucket + (Reg.add_set_array handler.live handler.arg) in + let (new_handler, after_handler) = reload handler before_handler in let (new_next, finally) = reload i.next (Reg.Set.union after_body after_handler) in (instr_cons (Itrywith(new_body, new_handler)) i.arg i.res new_next, |