summaryrefslogtreecommitdiff
path: root/asmcomp/arm
diff options
context:
space:
mode:
Diffstat (limited to 'asmcomp/arm')
-rw-r--r--asmcomp/arm/arch.ml66
-rw-r--r--asmcomp/arm/emit.mlp24
-rw-r--r--asmcomp/arm/proc.ml36
-rw-r--r--asmcomp/arm/reload.ml2
-rw-r--r--asmcomp/arm/scheduling.ml6
-rw-r--r--asmcomp/arm/selection.ml29
6 files changed, 96 insertions, 67 deletions
diff --git a/asmcomp/arm/arch.ml b/asmcomp/arm/arch.ml
index c4aca8df0f..cac286aa48 100644
--- a/asmcomp/arm/arch.ml
+++ b/asmcomp/arm/arch.ml
@@ -11,21 +11,18 @@
(* *)
(***********************************************************************)
-(* $Id$ *)
-
(* Specific operations for the ARM processor *)
-open Misc
open Format
-type abi = EABI | EABI_VFP
+type abi = EABI | EABI_HF
type arch = ARMv4 | ARMv5 | ARMv5TE | ARMv6 | ARMv6T2 | ARMv7
-type fpu = Soft | VFPv3_D16 | VFPv3
+type fpu = Soft | VFPv2 | VFPv3_D16 | VFPv3
let abi =
match Config.system with
"linux_eabi" -> EABI
- | "linux_eabihf" -> EABI_VFP
+ | "linux_eabihf" -> EABI_HF
| _ -> assert false
let string_of_arch = function
@@ -38,6 +35,7 @@ let string_of_arch = function
let string_of_fpu = function
Soft -> "soft"
+ | VFPv2 -> "vfpv2"
| VFPv3_D16 -> "vfpv3-d16"
| VFPv3 -> "vfpv3"
@@ -47,13 +45,14 @@ let (arch, fpu, thumb) =
let (def_arch, def_fpu, def_thumb) =
begin match abi, Config.model with
(* Defaults for architecture, FPU and Thumb *)
- EABI, "armv5" -> ARMv5, Soft, false
- | EABI, "armv5te" -> ARMv5TE, Soft, false
- | EABI, "armv6" -> ARMv6, Soft, false
- | EABI, "armv6t2" -> ARMv6T2, Soft, false
- | EABI, "armv7" -> ARMv7, Soft, false
- | EABI, _ -> ARMv4, Soft, false
- | EABI_VFP, _ -> ARMv7, VFPv3_D16, true
+ EABI, "armv5" -> ARMv5, Soft, false
+ | EABI, "armv5te" -> ARMv5TE, Soft, false
+ | EABI, "armv6" -> ARMv6, Soft, false
+ | EABI, "armv6t2" -> ARMv6T2, Soft, false
+ | EABI, "armv7" -> ARMv7, Soft, false
+ | EABI, _ -> ARMv4, Soft, false
+ | EABI_HF, "armv6" -> ARMv6, VFPv2, false
+ | EABI_HF, _ -> ARMv7, VFPv3_D16, true
end in
(ref def_arch, ref def_fpu, ref def_thumb)
@@ -61,19 +60,20 @@ let pic_code = ref false
let farch spec =
arch := (match spec with
- "armv4" when abi <> EABI_VFP -> ARMv4
- | "armv5" when abi <> EABI_VFP -> ARMv5
- | "armv5te" when abi <> EABI_VFP -> ARMv5TE
- | "armv6" when abi <> EABI_VFP -> ARMv6
- | "armv6t2" when abi <> EABI_VFP -> ARMv6T2
- | "armv7" -> ARMv7
+ "armv4" when abi <> EABI_HF -> ARMv4
+ | "armv5" when abi <> EABI_HF -> ARMv5
+ | "armv5te" when abi <> EABI_HF -> ARMv5TE
+ | "armv6" -> ARMv6
+ | "armv6t2" -> ARMv6T2
+ | "armv7" -> ARMv7
| spec -> raise (Arg.Bad spec))
let ffpu spec =
fpu := (match spec with
- "soft" when abi <> EABI_VFP -> Soft
- | "vfpv3-d16" when abi = EABI_VFP -> VFPv3_D16
- | "vfpv3" when abi = EABI_VFP -> VFPv3
+ "soft" when abi <> EABI_HF -> Soft
+ | "vfpv2" when abi = EABI_HF -> VFPv2
+ | "vfpv3-d16" when abi = EABI_HF -> VFPv3_D16
+ | "vfpv3" when abi = EABI_HF -> VFPv3
| spec -> raise (Arg.Bad spec))
let command_line_options =
@@ -110,14 +110,15 @@ type specific_operation =
Ishiftarith of arith_operation * int
| Ishiftcheckbound of int
| Irevsubimm of int
- | Imuladd (* multiply and add *)
- | Imulsub (* multiply and subtract *)
- | Inegmulf (* floating-point negate and multiply *)
- | Imuladdf (* floating-point multiply and add *)
- | Inegmuladdf (* floating-point negate, multiply and add *)
- | Imulsubf (* floating-point multiply and subtract *)
- | Inegmulsubf (* floating-point negate, multiply and subtract *)
- | Isqrtf (* floating-point square root *)
+ | Imuladd (* multiply and add *)
+ | Imulsub (* multiply and subtract *)
+ | Inegmulf (* floating-point negate and multiply *)
+ | Imuladdf (* floating-point multiply and add *)
+ | Inegmuladdf (* floating-point negate, multiply and add *)
+ | Imulsubf (* floating-point multiply and subtract *)
+ | Inegmulsubf (* floating-point negate, multiply and subtract *)
+ | Isqrtf (* floating-point square root *)
+ | Ibswap of int (* endianess conversion *)
and arith_operation =
Ishiftadd
@@ -132,6 +133,8 @@ let size_addr = 4
let size_int = 4
let size_float = 8
+let allow_unaligned_access = false
+
(* Behavior of division *)
let division_crashes_on_overflow = false
@@ -206,6 +209,9 @@ let print_specific_operation printreg op ppf arg =
| Isqrtf ->
fprintf ppf "sqrtf %a"
printreg arg.(0)
+ | Ibswap n ->
+ fprintf ppf "bswap%i %a" n
+ printreg arg.(0)
(* Recognize immediate operands *)
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp
index b0baf86523..4a12615136 100644
--- a/asmcomp/arm/emit.mlp
+++ b/asmcomp/arm/emit.mlp
@@ -11,11 +11,8 @@
(* *)
(***********************************************************************)
-(* $Id$ *)
-
(* Emission of ARM assembly code *)
-open Location
open Misc
open Cmm
open Arch
@@ -402,6 +399,10 @@ let emit_instr i =
` ldr {emit_reg i.res.(1)}, {emit_label lbl} + 4\n`;
2
end
+ | Lop(Iconst_float f) when !fpu = VFPv2 ->
+ let lbl = float_literal f in
+ ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`;
+ 1
| Lop(Iconst_float f) ->
let encode imm =
let sg = Int64.to_int (Int64.shift_right_logical imm 63) in
@@ -468,7 +469,7 @@ let emit_instr i =
let ninstr = emit_stack_adjustment (-n) in
stack_offset := !stack_offset + n;
ninstr
- | Lop(Iload(Single, addr)) when !fpu >= VFPv3_D16 ->
+ | Lop(Iload(Single, addr)) when !fpu >= VFPv2 ->
` flds s14, {emit_addressing addr i.arg 0}\n`;
` fcvtds {emit_reg i.res.(0)}, s14\n`; 2
| Lop(Iload((Double | Double_u), addr)) when !fpu = Soft ->
@@ -502,7 +503,7 @@ let emit_instr i =
| Double_u -> "fldd"
| _ (* 32-bit quantities *) -> "ldr" in
` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
- | Lop(Istore(Single, addr)) when !fpu >= VFPv3_D16 ->
+ | Lop(Istore(Single, addr)) when !fpu >= VFPv2 ->
` fcvtsd s14, {emit_reg i.arg.(0)}\n`;
` fsts s14, {emit_addressing addr i.arg 1}\n`; 2
| Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
@@ -681,6 +682,16 @@ let emit_instr i =
| Imulsub -> "mls"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1
+ | Lop(Ispecific(Ibswap size)) ->
+ begin match size with
+ 16 ->
+ ` rev16 {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`;
+ ` movt {emit_reg i.res.(0)}, #0\n`; 2
+ | 32 ->
+ ` rev {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
+ | _ ->
+ assert false
+ end
| Lreloadretaddr ->
let n = frame_size() in
` ldr lr, [sp, #{emit_int(n-4)}]\n`; 1
@@ -808,7 +819,7 @@ let rec emit_all ninstr i =
let n = emit_instr i in
let ninstr' = ninstr + n in
(* fldd can address up to +/-1KB, ldr can address up to +/-4KB *)
- let limit = (if !fpu >= VFPv3_D16 && !float_literals <> []
+ let limit = (if !fpu >= VFPv2 && !float_literals <> []
then 127
else 511) in
let limit = limit - !num_literals in
@@ -910,6 +921,7 @@ let begin_assembly() =
end;
begin match !fpu with
Soft -> ` .fpu softvfp\n`
+ | VFPv2 -> ` .fpu vfpv2\n`
| VFPv3_D16 -> ` .fpu vfpv3-d16\n`
| VFPv3 -> ` .fpu vfpv3\n`
end;
diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml
index aed2b01a76..dbb13173a9 100644
--- a/asmcomp/arm/proc.ml
+++ b/asmcomp/arm/proc.ml
@@ -11,8 +11,6 @@
(* *)
(***********************************************************************)
-(* $Id$ *)
-
(* Description of the ARM processor *)
open Misc
@@ -38,7 +36,7 @@ let word_addressed = false
r13 stack pointer
r14 return address
r15 program counter
- Floatinng-point register map (VFPv3):
+ Floating-point register map (VFPv{2,3}):
d0 - d7 general purpose (not preserved)
d8 - d15 general purpose (preserved)
d16 - d31 generat purpose (not preserved), VFPv3 only
@@ -55,9 +53,9 @@ let float_reg_name =
(* We have three register classes:
0 for integer registers
- 1 for VFPv3-D16
+ 1 for VFPv2 and VFPv3-D16
2 for VFPv3
- This way we can choose between VFPv3-D16 and VFPv3
+ This way we can choose between VFPv2/VFPv3-D16 and VFPv3
at (ocamlopt) runtime using command line switches.
*)
@@ -66,6 +64,7 @@ let num_register_classes = 3
let register_class r =
match (r.typ, !fpu) with
(Int | Addr), _ -> 0
+ | Float, VFPv2 -> 1
| Float, VFPv3_D16 -> 1
| Float, _ -> 2
@@ -124,8 +123,8 @@ let calling_conventions
ofs := !ofs + size_int
end
| Float ->
- assert (abi = EABI_VFP);
- assert (!fpu >= VFPv3_D16);
+ assert (abi = EABI_HF);
+ assert (!fpu >= VFPv2);
if !float <= last_float then begin
loc.(i) <- phys_reg !float;
incr float
@@ -186,24 +185,24 @@ let destroyed_at_c_call =
108;109;110;111;112;113;114;115;
116;116;118;119;120;121;122;123;
124;125;126;127;128;129;130;131]
- | EABI_VFP -> (* r4-r7, d8-d15 preserved *)
+ | EABI_HF -> (* r4-r7, d8-d15 preserved *)
[0;1;2;3;8;
100;101;102;103;104;105;106;107;
116;116;118;119;120;121;122;123;
124;125;126;127;128;129;130;131]))
let destroyed_at_oper = function
- Iop(Icall_ind | Icall_imm _ )
+ Iop(Icall_ind | Icall_imm _)
| Iop(Iextcall(_, true)) ->
all_phys_regs
| Iop(Iextcall(_, false)) ->
destroyed_at_c_call
- | Iop(Ialloc n) ->
+ | Iop(Ialloc _) ->
destroyed_at_alloc
| Iop(Iconst_symbol _) when !pic_code ->
- [|phys_reg 3; phys_reg 8|] (* r3 and r12 destroyed *)
+ [| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *)
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
- [|phys_reg 107|] (* d7 (s14-s15) destroyed *)
+ [| phys_reg 107 |] (* d7 (s14-s15) destroyed *)
| _ -> [||]
let destroyed_at_raise = all_phys_regs
@@ -211,11 +210,17 @@ let destroyed_at_raise = all_phys_regs
(* Maximal register pressure *)
let safe_register_pressure = function
- Iextcall(_, _) -> 5
+ Iextcall(_, _) -> if abi = EABI then 0 else 4
+ | Ialloc _ -> if abi = EABI then 0 else 7
+ | Iconst_symbol _ when !pic_code -> 7
| _ -> 9
let max_register_pressure = function
- Iextcall(_, _) -> [| 5; 9; 9 |]
+ Iextcall(_, _) -> if abi = EABI then [| 4; 0; 0 |] else [| 4; 8; 8 |]
+ | Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |]
+ | Iconst_symbol _ when !pic_code -> [| 7; 16; 32 |]
+ | Iintoffloat | Ifloatofint
+ | Iload(Single, _) | Istore(Single, _) -> [| 9; 15; 31 |]
| _ -> [| 9; 16; 32 |]
(* Layout of the stack *)
@@ -228,3 +233,6 @@ let contains_calls = ref false
let assemble_file infile outfile =
Ccomp.command (Config.asm ^ " -o " ^
Filename.quote outfile ^ " " ^ Filename.quote infile)
+
+
+let init () = ()
diff --git a/asmcomp/arm/reload.ml b/asmcomp/arm/reload.ml
index c5b137abcf..bd783acb82 100644
--- a/asmcomp/arm/reload.ml
+++ b/asmcomp/arm/reload.ml
@@ -10,8 +10,6 @@
(* *)
(***********************************************************************)
-(* $Id$ *)
-
(* Reloading for the ARM *)
let fundecl f =
diff --git a/asmcomp/arm/scheduling.ml b/asmcomp/arm/scheduling.ml
index 4b47733f1f..9e2d65bc69 100644
--- a/asmcomp/arm/scheduling.ml
+++ b/asmcomp/arm/scheduling.ml
@@ -11,8 +11,6 @@
(* *)
(***********************************************************************)
-(* $Id$ *)
-
open Arch
open Mach
@@ -42,7 +40,7 @@ method oper_latency = function
| Imulf | Ispecific Inegmulf
| Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf)
| Ispecific Isqrtf
- | Inegf | Iabsf when !fpu >= VFPv3_D16 -> 2
+ | Inegf | Iabsf when !fpu >= VFPv2 -> 2
(* Everything else *)
| _ -> 1
@@ -72,7 +70,7 @@ method oper_issue_cycles = function
| Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) -> 17
| Idivf
| Ispecific Isqrtf -> 27
- | Inegf | Iabsf | Iconst_float _ when !fpu >= VFPv3_D16 -> 4
+ | Inegf | Iabsf | Iconst_float _ when !fpu >= VFPv2 -> 4
(* Everything else *)
| _ -> 1
diff --git a/asmcomp/arm/selection.ml b/asmcomp/arm/selection.ml
index 94d0367bef..97f615ec78 100644
--- a/asmcomp/arm/selection.ml
+++ b/asmcomp/arm/selection.ml
@@ -11,22 +11,18 @@
(* *)
(***********************************************************************)
-(* $Id$ *)
-
(* Instruction selection for the ARM processor *)
open Arch
+open Proc
open Cmm
open Mach
-open Misc
-open Proc
-open Reg
let is_offset chunk n =
match chunk with
- (* VFPv3 load/store have -1020 to 1020 *)
+ (* VFPv{2,3} load/store have -1020 to 1020 *)
Single | Double | Double_u
- when !fpu >= VFPv3_D16 ->
+ when !fpu >= VFPv2 ->
n >= -1020 && n <= 1020
(* ARM load/store byte/word have -4095 to 4095 *)
| Byte_unsigned | Byte_signed
@@ -61,7 +57,7 @@ let pseudoregs_for_operation op arg res =
(* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *)
| Iabsf | Inegf when !fpu = Soft ->
([|res.(0); arg.(1)|], res)
- (* VFPv3 Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
+ (* VFPv{2,3} Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
| Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) ->
let arg' = Array.copy arg in
arg'.(0) <- res.(0);
@@ -95,7 +91,12 @@ method is_immediate n =
method! is_simple_expr = function
(* inlined floating-point ops are simple if their arguments are *)
- | Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv3_D16 ->
+ | Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv2 ->
+ List.for_all self#is_simple_expr args
+ (* inlined byte-swap ops are simple if their arguments are *)
+ | Cop(Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
+ List.for_all self#is_simple_expr args
+ | Cop(Cextcall("caml_int32_direct_bswap", _,_,_), args) when !arch >= ARMv6 ->
List.for_all self#is_simple_expr args
| e -> super#is_simple_expr e
@@ -173,14 +174,20 @@ method! select_operation op args =
| (Cdivi, args) ->
(Iextcall("__aeabi_idiv", false), args)
| (Cmodi, [arg; Cconst_int n])
- when n = 1 lsl Misc.log2 n ->
+ when n > 1 && n = 1 lsl Misc.log2 n ->
(Iintop_imm(Imod, n), [arg])
| (Cmodi, args) ->
(* See above for fix up of return register *)
(Iextcall("__aeabi_idivmod", false), args)
+ (* Recognize 16-bit bswap instruction (ARMv6T2 because we need movt) *)
+ | (Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
+ (Ispecific(Ibswap 16), args)
+ (* Recognize 32-bit bswap instructions (ARMv6 and above) *)
+ | (Cextcall("caml_int32_direct_bswap", _, _, _), args) when !arch >= ARMv6 ->
+ (Ispecific(Ibswap 32), args)
(* Turn floating-point operations into runtime ABI calls for softfp *)
| (op, args) when !fpu = Soft -> self#select_operation_softfp op args
- (* Select operations for VFPv3 *)
+ (* Select operations for VFPv{2,3} *)
| (op, args) -> self#select_operation_vfpv3 op args
method private select_operation_softfp op args =