(**************************************************************************)
(*                                                                        *)
(*                                 OCaml                                  *)
(*                                                                        *)
(*                 Benedikt Meurer, University of Siegen                  *)
(*                                                                        *)
(*   Copyright 1998 Institut National de Recherche en Informatique et     *)
(*     en Automatique.                                                    *)
(*   Copyright 2012 Benedikt Meurer.                                      *)
(*                                                                        *)
(*   All rights reserved.  This file is distributed under the terms of    *)
(*   the GNU Lesser General Public License version 2.1, with the          *)
(*   special exception on linking described in the file LICENSE.          *)
(*                                                                        *)
(**************************************************************************)

(* Instruction selection for the ARM processor *)

open Arch
open Proc
open Cmm
open Mach

let is_offset chunk n =
  match chunk with
  (* VFPv{2,3} load/store have -1020 to 1020 *)
    Single | Double | Double_u
    when !fpu >= VFPv2 ->
      n >= -1020 && n <= 1020
  (* ARM load/store byte/word have -4095 to 4095 *)
  | Byte_unsigned | Byte_signed
  | Thirtytwo_unsigned | Thirtytwo_signed
  | Word_int | Word_val | Single
    when not !thumb ->
      n >= -4095 && n <= 4095
  (* Thumb-2 load/store have -255 to 4095 *)
  | _ when !arch > ARMv6 && !thumb ->
      n >= -255 && n <= 4095
  (* Everything else has -255 to 255 *)
  | _ ->
      n >= -255 && n <= 255

let select_shiftop = function
    Clsl -> Ishiftlogicalleft
  | Clsr -> Ishiftlogicalright
  | Casr -> Ishiftarithmeticright
  | __-> assert false

(* Special constraints on operand and result registers *)

exception Use_default

let r1 = phys_reg 1
let r6 = phys_reg 6
let r7 = phys_reg 7

let pseudoregs_for_operation op arg res =
  match op with
  (* For mul rd,rm,rs and mla rd,rm,rs,ra (pre-ARMv6) the registers rm
     and rd must be different. We deal with this by pretending that rm
     is also a result of the mul / mla operation. *)
    Iintop Imul | Ispecific Imuladd when !arch < ARMv6 ->
      (arg, [| res.(0); arg.(0) |])
  (* For smull rdlo,rdhi,rn,rm (pre-ARMv6) the registers rdlo, rdhi and rn
     must be different.  Also, rdlo (whose contents we discard) is always
     forced to be r12 in proc.ml, which means that neither rdhi and rn can
     be r12.  To keep things simple, we force both of those two to specific
     hard regs: rdhi in r6 and rn in r7. *)
  | Iintop Imulh when !arch < ARMv6 ->
      ([| r7; arg.(1) |], [| r6 |])
  (* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *)
  | Iabsf | Inegf when !fpu = Soft ->
      ([|res.(0); arg.(1)|], res)
  (* VFPv{2,3} Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
  | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) ->
      let arg' = Array.copy arg in
      arg'.(0) <- res.(0);
      (arg', res)
  (* We use __aeabi_idivmod for Cmodi only, and hence we care only
     for the remainder in r1, so fix up the destination register. *)
  | Iextcall("__aeabi_idivmod", false) ->
      (arg, [|r1|])
  (* Other instructions are regular *)
  | _ -> raise Use_default

(* Instruction selection *)
class selector = object(self)

inherit Selectgen.selector_generic as super

method! regs_for tyv =
  Reg.createv (if !fpu = Soft then begin
                 (* Expand floats into pairs of integer registers *)
                 (* CR mshinwell: we need to check this in conjunction with
                    the unboxed external functionality *)
                 let rec expand = function
                   [] -> []
                 | Float :: tyl -> Int :: Int :: expand tyl
                 | ty :: tyl -> ty :: expand tyl in
                 Array.of_list (expand (Array.to_list tyv))
               end else begin
                 tyv
               end)

method is_immediate n =
  is_immediate (Int32.of_int n)

method! is_simple_expr = function
  (* inlined floating-point ops are simple if their arguments are *)
  | Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv2 ->
      List.for_all self#is_simple_expr args
  (* inlined byte-swap ops are simple if their arguments are *)
  | Cop(Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
      List.for_all self#is_simple_expr args
  | Cop(Cextcall("caml_int32_direct_bswap", _,_,_), args) when !arch >= ARMv6 ->
      List.for_all self#is_simple_expr args
  | e -> super#is_simple_expr e

method select_addressing chunk = function
  | Cop((Cadda | Caddv), [arg; Cconst_int n])
    when is_offset chunk n ->
      (Iindexed n, arg)
  | Cop((Cadda | Caddv as op), [arg1; Cop(Caddi, [arg2; Cconst_int n])])
    when is_offset chunk n ->
      (Iindexed n, Cop(op, [arg1; arg2]))
  | arg ->
      (Iindexed 0, arg)

method select_shift_arith op arithop arithrevop args =
  match args with
    [arg1; Cop(Clsl | Clsr | Casr as op, [arg2; Cconst_int n])]
    when n > 0 && n < 32 ->
      (Ispecific(Ishiftarith(arithop, select_shiftop op, n)), [arg1; arg2])
  | [Cop(Clsl | Clsr | Casr as op, [arg1; Cconst_int n]); arg2]
    when n > 0 && n < 32 ->
      (Ispecific(Ishiftarith(arithrevop, select_shiftop op, n)), [arg2; arg1])
  | args ->
      begin match super#select_operation op args with
      (* Recognize multiply high and add *)
        (Iintop Iadd, [Cop(Cmulhi, args); arg3])
      | (Iintop Iadd, [arg3; Cop(Cmulhi, args)]) as op_args
        when !arch >= ARMv6 ->
          begin match self#select_operation Cmulhi args with
            (Iintop Imulh, [arg1; arg2]) ->
              (Ispecific Imulhadd, [arg1; arg2; arg3])
          | _ -> op_args
          end
      (* Recognize multiply and add *)
      | (Iintop Iadd, [Cop(Cmuli, args); arg3])
      | (Iintop Iadd, [arg3; Cop(Cmuli, args)]) as op_args ->
          begin match self#select_operation Cmuli args with
            (Iintop Imul, [arg1; arg2]) ->
              (Ispecific Imuladd, [arg1; arg2; arg3])
          | _ -> op_args
          end
      (* Recognize multiply and subtract *)
      | (Iintop Isub, [arg3; Cop(Cmuli, args)]) as op_args
        when !arch > ARMv6 ->
          begin match self#select_operation Cmuli args with
            (Iintop Imul, [arg1; arg2]) ->
              (Ispecific Imulsub, [arg1; arg2; arg3])
          | _ -> op_args
          end
      | op_args -> op_args
      end

method! select_operation op args =
  match (op, args) with
  (* Recognize special shift arithmetic *)
    ((Caddv | Cadda | Caddi), [arg; Cconst_int n])
    when n < 0 && self#is_immediate (-n) ->
      (Iintop_imm(Isub, -n), [arg])
  | ((Caddv | Cadda | Caddi as op), args) ->
      self#select_shift_arith op Ishiftadd Ishiftadd args
  | (Csubi, [arg; Cconst_int n])
    when n < 0 && self#is_immediate (-n) ->
      (Iintop_imm(Iadd, -n), [arg])
  | (Csubi, [Cconst_int n; arg])
    when self#is_immediate n ->
      (Ispecific(Irevsubimm n), [arg])
  | (Csubi as op, args) ->
      self#select_shift_arith op Ishiftsub Ishiftsubrev args
  | (Cand as op, args) ->
      self#select_shift_arith op Ishiftand Ishiftand args
  | (Cor as op, args) ->
      self#select_shift_arith op Ishiftor Ishiftor args
  | (Cxor as op, args) ->
      self#select_shift_arith op Ishiftxor Ishiftxor args
  | (Ccheckbound _, [Cop(Clsl | Clsr | Casr as op, [arg1; Cconst_int n]); arg2])
    when n > 0 && n < 32 ->
      (Ispecific(Ishiftcheckbound(select_shiftop op, n)), [arg1; arg2])
  (* ARM does not support immediate operands for multiplication *)
  | (Cmuli, args) ->
      (Iintop Imul, args)
  | (Cmulhi, args) ->
      (Iintop Imulh, args)
  (* Turn integer division/modulus into runtime ABI calls *)
  | (Cdivi, args) ->
      (Iextcall("__aeabi_idiv", false), args)
  | (Cmodi, args) ->
      (* See above for fix up of return register *)
      (Iextcall("__aeabi_idivmod", false), args)
  (* Recognize 16-bit bswap instruction (ARMv6T2 because we need movt) *)
  | (Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
      (Ispecific(Ibswap 16), args)
  (* Recognize 32-bit bswap instructions (ARMv6 and above) *)
  | (Cextcall("caml_int32_direct_bswap", _, _, _), args) when !arch >= ARMv6 ->
      (Ispecific(Ibswap 32), args)
  (* Turn floating-point operations into runtime ABI calls for softfp *)
  | (op, args) when !fpu = Soft -> self#select_operation_softfp op args
  (* Select operations for VFPv{2,3} *)
  | (op, args) -> self#select_operation_vfpv3 op args

method private select_operation_softfp op args =
  match (op, args) with
  (* Turn floating-point operations into runtime ABI calls *)
  | (Caddf, args) -> (Iextcall("__aeabi_dadd", false), args)
  | (Csubf, args) -> (Iextcall("__aeabi_dsub", false), args)
  | (Cmulf, args) -> (Iextcall("__aeabi_dmul", false), args)
  | (Cdivf, args) -> (Iextcall("__aeabi_ddiv", false), args)
  | (Cfloatofint, args) -> (Iextcall("__aeabi_i2d", false), args)
  | (Cintoffloat, args) -> (Iextcall("__aeabi_d2iz", false), args)
  | (Ccmpf comp, args) ->
      let func = (match comp with
                    Cne    (* there's no __aeabi_dcmpne *)
                  | Ceq -> "__aeabi_dcmpeq"
                  | Clt -> "__aeabi_dcmplt"
                  | Cle -> "__aeabi_dcmple"
                  | Cgt -> "__aeabi_dcmpgt"
                  | Cge -> "__aeabi_dcmpge") in
      let comp = (match comp with
                    Cne -> Ceq (* eq 0 => false *)
                  | _   -> Cne (* ne 0 => true *)) in
      (Iintop_imm(Icomp(Iunsigned comp), 0),
       [Cop(Cextcall(func, typ_int, false, Debuginfo.none), args)])
  (* Add coercions around loads and stores of 32-bit floats *)
  | (Cload Single, args) ->
      (Iextcall("__aeabi_f2d", false), [Cop(Cload Word_int, args)])
  | (Cstore (Single, init), [arg1; arg2]) ->
      let arg2' =
        Cop(Cextcall("__aeabi_d2f", typ_int, false, Debuginfo.none),
            [arg2]) in
      self#select_operation (Cstore (Word_int, init)) [arg1; arg2']
  (* Other operations are regular *)
  | (op, args) -> super#select_operation op args

method private select_operation_vfpv3 op args =
  match (op, args) with
  (* Recognize floating-point negate and multiply *)
    (Cnegf, [Cop(Cmulf, args)]) ->
      (Ispecific Inegmulf, args)
  (* Recognize floating-point multiply and add *)
  | (Caddf, [arg; Cop(Cmulf, args)])
  | (Caddf, [Cop(Cmulf, args); arg]) ->
      (Ispecific Imuladdf, arg :: args)
  (* Recognize floating-point negate, multiply and subtract *)
  | (Csubf, [Cop(Cnegf, [arg]); Cop(Cmulf, args)])
  | (Csubf, [Cop(Cnegf, [Cop(Cmulf, args)]); arg]) ->
      (Ispecific Inegmulsubf, arg :: args)
  (* Recognize floating-point negate, multiply and add *)
  | (Csubf, [arg; Cop(Cmulf, args)]) ->
      (Ispecific Inegmuladdf, arg :: args)
  (* Recognize multiply and subtract *)
  | (Csubf, [Cop(Cmulf, args); arg]) ->
      (Ispecific Imulsubf, arg :: args)
  (* Recognize floating-point square root *)
  | (Cextcall("sqrt", _, false, _), args) ->
      (Ispecific Isqrtf, args)
  (* Other operations are regular *)
  | (op, args) -> super#select_operation op args

method! select_condition = function
  (* Turn floating-point comparisons into runtime ABI calls *)
    Cop(Ccmpf _ as op, args) when !fpu = Soft ->
      begin match self#select_operation_softfp op args with
        (Iintop_imm(Icomp(Iunsigned Ceq), 0), [arg]) -> (Ifalsetest, arg)
      | (Iintop_imm(Icomp(Iunsigned Cne), 0), [arg]) -> (Itruetest, arg)
      | _ -> assert false
      end
  | expr ->
      super#select_condition expr

(* Deal with some register constraints *)

method! insert_op_debug op dbg rs rd =
  try
    let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
    self#insert_moves rs rsrc;
    self#insert_debug (Iop op) dbg rsrc rdst;
    self#insert_moves rdst rd;
    rd
  with Use_default ->
    super#insert_op_debug op dbg rs rd

end

let fundecl f = (new selector)#emit_fundecl f