6 files changed, 96 insertions, 67 deletions
diff --git a/asmcomp/arm/arch.ml b/asmcomp/arm/arch.ml
index c4aca8df0f..cac286aa48 100644
--- a/asmcomp/arm/arch.ml
+++ b/asmcomp/arm/arch.ml
@@ -11,21 +11,18 @@
 (*                                                                     *)
 (***********************************************************************)
 
-(* $Id$ *)
-
 (* Specific operations for the ARM processor *)
 
-open Misc
 open Format
 
-type abi = EABI | EABI_VFP
+type abi = EABI | EABI_HF
 type arch = ARMv4 | ARMv5 | ARMv5TE | ARMv6 | ARMv6T2 | ARMv7
-type fpu = Soft | VFPv3_D16 | VFPv3
+type fpu = Soft | VFPv2 | VFPv3_D16 | VFPv3
 
 let abi =
   match Config.system with
     "linux_eabi"   -> EABI
-  | "linux_eabihf" -> EABI_VFP
+  | "linux_eabihf" -> EABI_HF
   | _ -> assert false
 
 let string_of_arch = function
@@ -38,6 +35,7 @@ let string_of_arch = function
 
 let string_of_fpu = function
     Soft      -> "soft"
+  | VFPv2     -> "vfpv2"
   | VFPv3_D16 -> "vfpv3-d16"
   | VFPv3     -> "vfpv3"
 
@@ -47,13 +45,14 @@ let (arch, fpu, thumb) =
   let (def_arch, def_fpu, def_thumb) =
     begin match abi, Config.model with
     (* Defaults for architecture, FPU and Thumb *)
-      EABI, "armv5"   -> ARMv5,   Soft,      false
-    | EABI, "armv5te" -> ARMv5TE, Soft,      false
-    | EABI, "armv6"   -> ARMv6,   Soft,      false
-    | EABI, "armv6t2" -> ARMv6T2, Soft,      false
-    | EABI, "armv7"   -> ARMv7,   Soft,      false
-    | EABI, _         -> ARMv4,   Soft,      false
-    | EABI_VFP, _     -> ARMv7,   VFPv3_D16, true
+      EABI, "armv5"    -> ARMv5,   Soft,      false
+    | EABI, "armv5te"  -> ARMv5TE, Soft,      false
+    | EABI, "armv6"    -> ARMv6,   Soft,      false
+    | EABI, "armv6t2"  -> ARMv6T2, Soft,      false
+    | EABI, "armv7"    -> ARMv7,   Soft,      false
+    | EABI, _          -> ARMv4,   Soft,      false
+    | EABI_HF, "armv6" -> ARMv6,   VFPv2,     false
+    | EABI_HF, _       -> ARMv7,   VFPv3_D16, true
     end in
   (ref def_arch, ref def_fpu, ref def_thumb)
 
@@ -61,19 +60,20 @@ let pic_code = ref false
 
 let farch spec =
   arch := (match spec with
-             "armv4" when abi <> EABI_VFP   -> ARMv4
-           | "armv5" when abi <> EABI_VFP   -> ARMv5
-           | "armv5te" when abi <> EABI_VFP -> ARMv5TE
-           | "armv6" when abi <> EABI_VFP   -> ARMv6
-           | "armv6t2" when abi <> EABI_VFP -> ARMv6T2
-           | "armv7"                        -> ARMv7
+             "armv4" when abi <> EABI_HF   -> ARMv4
+           | "armv5" when abi <> EABI_HF   -> ARMv5
+           | "armv5te" when abi <> EABI_HF -> ARMv5TE
+           | "armv6"                       -> ARMv6
+           | "armv6t2"                     -> ARMv6T2
+           | "armv7"                       -> ARMv7
            | spec -> raise (Arg.Bad spec))
 
 let ffpu spec =
   fpu := (match spec with
-            "soft" when abi <> EABI_VFP     -> Soft
-          | "vfpv3-d16" when abi = EABI_VFP -> VFPv3_D16
-          | "vfpv3" when abi = EABI_VFP     -> VFPv3
+            "soft" when abi <> EABI_HF     -> Soft
+          | "vfpv2" when abi = EABI_HF     -> VFPv2
+          | "vfpv3-d16" when abi = EABI_HF -> VFPv3_D16
+          | "vfpv3" when abi = EABI_HF     -> VFPv3
           | spec -> raise (Arg.Bad spec))
 
 let command_line_options =
@@ -110,14 +110,15 @@ type specific_operation =
     Ishiftarith of arith_operation * int
   | Ishiftcheckbound of int
   | Irevsubimm of int
-  | Imuladd     (* multiply and add *)
-  | Imulsub     (* multiply and subtract *)
-  | Inegmulf    (* floating-point negate and multiply *)
-  | Imuladdf    (* floating-point multiply and add *)
-  | Inegmuladdf (* floating-point negate, multiply and add *)
-  | Imulsubf    (* floating-point multiply and subtract *)
-  | Inegmulsubf (* floating-point negate, multiply and subtract *)
-  | Isqrtf      (* floating-point square root *)
+  | Imuladd       (* multiply and add *)
+  | Imulsub       (* multiply and subtract *)
+  | Inegmulf      (* floating-point negate and multiply *)
+  | Imuladdf      (* floating-point multiply and add *)
+  | Inegmuladdf   (* floating-point negate, multiply and add *)
+  | Imulsubf      (* floating-point multiply and subtract *)
+  | Inegmulsubf   (* floating-point negate, multiply and subtract *)
+  | Isqrtf        (* floating-point square root *)
+  | Ibswap of int (* endianess conversion *)
 
 and arith_operation =
     Ishiftadd
@@ -132,6 +133,8 @@ let size_addr = 4
 let size_int = 4
 let size_float = 8
 
+let allow_unaligned_access = false
+
 (* Behavior of division *)
 
 let division_crashes_on_overflow = false
@@ -206,6 +209,9 @@ let print_specific_operation printreg op ppf arg =
   | Isqrtf ->
       fprintf ppf "sqrtf %a"
         printreg arg.(0)
+  | Ibswap n ->
+      fprintf ppf "bswap%i %a" n
+        printreg arg.(0)
 
 (* Recognize immediate operands *)
 
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp
index b0baf86523..4a12615136 100644
--- a/asmcomp/arm/emit.mlp
+++ b/asmcomp/arm/emit.mlp
@@ -11,11 +11,8 @@
 (*                                                                     *)
 (***********************************************************************)
 
-(* $Id$ *)
-
 (* Emission of ARM assembly code *)
 
-open Location
 open Misc
 open Cmm
 open Arch
@@ -402,6 +399,10 @@ let emit_instr i =
           `	ldr	{emit_reg i.res.(1)}, {emit_label lbl} + 4\n`;
           2
         end
+    | Lop(Iconst_float f) when !fpu = VFPv2 ->
+        let lbl = float_literal f in
+        `	fldd	{emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`;
+        1
     | Lop(Iconst_float f) ->
         let encode imm =
           let sg = Int64.to_int (Int64.shift_right_logical imm 63) in
@@ -468,7 +469,7 @@ let emit_instr i =
         let ninstr = emit_stack_adjustment (-n) in
         stack_offset := !stack_offset + n;
         ninstr
-    | Lop(Iload(Single, addr)) when !fpu >= VFPv3_D16 ->
+    | Lop(Iload(Single, addr)) when !fpu >= VFPv2 ->
         `	flds	s14, {emit_addressing addr i.arg 0}\n`;
         `	fcvtds	{emit_reg i.res.(0)}, s14\n`; 2
     | Lop(Iload((Double | Double_u), addr)) when !fpu = Soft ->
@@ -502,7 +503,7 @@ let emit_instr i =
           | Double_u -> "fldd"
           | _ (* 32-bit quantities *) -> "ldr" in
         `	{emit_string instr}	{emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
-    | Lop(Istore(Single, addr)) when !fpu >= VFPv3_D16 ->
+    | Lop(Istore(Single, addr)) when !fpu >= VFPv2 ->
         `	fcvtsd	s14, {emit_reg i.arg.(0)}\n`;
         `	fsts	s14, {emit_addressing addr i.arg 1}\n`; 2
     | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
@@ -681,6 +682,16 @@ let emit_instr i =
                      | Imulsub -> "mls"
                      | _ -> assert false) in
         `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1
+    | Lop(Ispecific(Ibswap size)) ->
+        begin match size with
+          16 ->
+            `	rev16	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`;
+            `	movt	{emit_reg i.res.(0)}, #0\n`; 2
+        | 32 ->
+            `	rev	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
+        | _ ->
+            assert false
+        end
     | Lreloadretaddr ->
         let n = frame_size() in
         `	ldr	lr, [sp, #{emit_int(n-4)}]\n`; 1
@@ -808,7 +819,7 @@ let rec emit_all ninstr i =
     let n = emit_instr i in
     let ninstr' = ninstr + n in
     (* fldd can address up to +/-1KB, ldr can address up to +/-4KB *)
-    let limit = (if !fpu >= VFPv3_D16 && !float_literals <> []
+    let limit = (if !fpu >= VFPv2 && !float_literals <> []
                  then 127
                  else 511) in
     let limit = limit - !num_literals in
@@ -910,6 +921,7 @@ let begin_assembly() =
   end;
   begin match !fpu with
     Soft      -> `	.fpu	softvfp\n`
+  | VFPv2     -> `	.fpu	vfpv2\n`
   | VFPv3_D16 -> `	.fpu	vfpv3-d16\n`
   | VFPv3     -> `	.fpu	vfpv3\n`
   end;
diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml
index aed2b01a76..dbb13173a9 100644
--- a/asmcomp/arm/proc.ml
+++ b/asmcomp/arm/proc.ml
@@ -11,8 +11,6 @@
 (*                                                                     *)
 (***********************************************************************)
 
-(* $Id$ *)
-
 (* Description of the ARM processor *)
 
 open Misc
@@ -38,7 +36,7 @@ let word_addressed = false
     r13                   stack pointer
     r14                   return address
     r15                   program counter
-   Floatinng-point register map (VFPv3):
+   Floating-point register map (VFPv{2,3}):
     d0 - d7               general purpose (not preserved)
     d8 - d15              general purpose (preserved)
     d16 - d31             generat purpose (not preserved), VFPv3 only
@@ -55,9 +53,9 @@ let float_reg_name =
 
 (* We have three register classes:
     0 for integer registers
-    1 for VFPv3-D16
+    1 for VFPv2 and VFPv3-D16
     2 for VFPv3
-   This way we can choose between VFPv3-D16 and VFPv3
+   This way we can choose between VFPv2/VFPv3-D16 and VFPv3
    at (ocamlopt) runtime using command line switches.
 *)
 
@@ -66,6 +64,7 @@ let num_register_classes = 3
 let register_class r =
   match (r.typ, !fpu) with
     (Int | Addr), _  -> 0
+  | Float, VFPv2     -> 1
   | Float, VFPv3_D16 -> 1
   | Float, _         -> 2
 
@@ -124,8 +123,8 @@ let calling_conventions
           ofs := !ofs + size_int
         end
     | Float ->
-        assert (abi = EABI_VFP);
-        assert (!fpu >= VFPv3_D16);
+        assert (abi = EABI_HF);
+        assert (!fpu >= VFPv2);
         if !float <= last_float then begin
           loc.(i) <- phys_reg !float;
           incr float
@@ -186,24 +185,24 @@ let destroyed_at_c_call =
                          108;109;110;111;112;113;114;115;
                          116;116;118;119;120;121;122;123;
                          124;125;126;127;128;129;130;131]
-                    | EABI_VFP ->   (* r4-r7, d8-d15 preserved *)
+                    | EABI_HF ->    (* r4-r7, d8-d15 preserved *)
                         [0;1;2;3;8;
                          100;101;102;103;104;105;106;107;
                          116;116;118;119;120;121;122;123;
                          124;125;126;127;128;129;130;131]))
 
 let destroyed_at_oper = function
-    Iop(Icall_ind | Icall_imm _ )
+    Iop(Icall_ind | Icall_imm _)
   | Iop(Iextcall(_, true)) ->
       all_phys_regs
   | Iop(Iextcall(_, false)) ->
       destroyed_at_c_call
-  | Iop(Ialloc n) ->
+  | Iop(Ialloc _) ->
       destroyed_at_alloc
   | Iop(Iconst_symbol _) when !pic_code ->
-      [|phys_reg 3; phys_reg 8|]  (* r3 and r12 destroyed *)
+      [| phys_reg 3; phys_reg 8 |]  (* r3 and r12 destroyed *)
   | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
-      [|phys_reg 107|]            (* d7 (s14-s15) destroyed *)
+      [| phys_reg 107 |]            (* d7 (s14-s15) destroyed *)
   | _ -> [||]
 
 let destroyed_at_raise = all_phys_regs
@@ -211,11 +210,17 @@ let destroyed_at_raise = all_phys_regs
 (* Maximal register pressure *)
 
 let safe_register_pressure = function
-    Iextcall(_, _) -> 5
+    Iextcall(_, _) -> if abi = EABI then 0 else 4
+  | Ialloc _ -> if abi = EABI then 0 else 7
+  | Iconst_symbol _ when !pic_code -> 7
   | _ -> 9
 
 let max_register_pressure = function
-    Iextcall(_, _) -> [| 5; 9; 9 |]
+    Iextcall(_, _) -> if abi = EABI then [| 4; 0; 0 |] else [| 4; 8; 8 |]
+  | Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |]
+  | Iconst_symbol _ when !pic_code -> [| 7; 16; 32 |]
+  | Iintoffloat | Ifloatofint
+  | Iload(Single, _) | Istore(Single, _) -> [| 9; 15; 31 |]
   | _ -> [| 9; 16; 32 |]
 
 (* Layout of the stack *)
@@ -228,3 +233,6 @@ let contains_calls = ref false
 let assemble_file infile outfile =
   Ccomp.command (Config.asm ^ " -o " ^
                  Filename.quote outfile ^ " " ^ Filename.quote infile)
+
+
+let init () = ()
diff --git a/asmcomp/arm/reload.ml b/asmcomp/arm/reload.ml
index c5b137abcf..bd783acb82 100644
--- a/asmcomp/arm/reload.ml
+++ b/asmcomp/arm/reload.ml
@@ -10,8 +10,6 @@
 (*                                                                     *)
 (***********************************************************************)
 
-(* $Id$ *)
-
 (* Reloading for the ARM *)
 
 let fundecl f =
diff --git a/asmcomp/arm/scheduling.ml b/asmcomp/arm/scheduling.ml
index 4b47733f1f..9e2d65bc69 100644
--- a/asmcomp/arm/scheduling.ml
+++ b/asmcomp/arm/scheduling.ml
@@ -11,8 +11,6 @@
 (*                                                                     *)
 (***********************************************************************)
 
-(* $Id$ *)
-
 open Arch
 open Mach
 
@@ -42,7 +40,7 @@ method oper_latency = function
   | Imulf | Ispecific Inegmulf
   | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf)
   | Ispecific Isqrtf
-  | Inegf | Iabsf when !fpu >= VFPv3_D16 -> 2
+  | Inegf | Iabsf when !fpu >= VFPv2 -> 2
   (* Everything else *)
   | _ -> 1
 
@@ -72,7 +70,7 @@ method oper_issue_cycles = function
   | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) -> 17
   | Idivf
   | Ispecific Isqrtf -> 27
-  | Inegf | Iabsf | Iconst_float _ when !fpu >= VFPv3_D16 -> 4
+  | Inegf | Iabsf | Iconst_float _ when !fpu >= VFPv2 -> 4
   (* Everything else *)
   | _ -> 1
 
diff --git a/asmcomp/arm/selection.ml b/asmcomp/arm/selection.ml
index 94d0367bef..97f615ec78 100644
--- a/asmcomp/arm/selection.ml
+++ b/asmcomp/arm/selection.ml
@@ -11,22 +11,18 @@
 (*                                                                     *)
 (***********************************************************************)
 
-(* $Id$ *)
-
 (* Instruction selection for the ARM processor *)
 
 open Arch
+open Proc
 open Cmm
 open Mach
-open Misc
-open Proc
-open Reg
 
 let is_offset chunk n =
   match chunk with
-  (* VFPv3 load/store have -1020 to 1020 *)
+  (* VFPv{2,3} load/store have -1020 to 1020 *)
     Single | Double | Double_u
-    when !fpu >= VFPv3_D16 ->
+    when !fpu >= VFPv2 ->
       n >= -1020 && n <= 1020
   (* ARM load/store byte/word have -4095 to 4095 *)
   | Byte_unsigned | Byte_signed
@@ -61,7 +57,7 @@ let pseudoregs_for_operation op arg res =
   (* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *)
   | Iabsf | Inegf when !fpu = Soft ->
       ([|res.(0); arg.(1)|], res)
-  (* VFPv3 Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
+  (* VFPv{2,3} Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
   | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) ->
       let arg' = Array.copy arg in
       arg'.(0) <- res.(0);
@@ -95,7 +91,12 @@ method is_immediate n =
 
 method! is_simple_expr = function
   (* inlined floating-point ops are simple if their arguments are *)
-  | Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv3_D16 ->
+  | Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv2 ->
+      List.for_all self#is_simple_expr args
+  (* inlined byte-swap ops are simple if their arguments are *)
+  | Cop(Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
+      List.for_all self#is_simple_expr args
+  | Cop(Cextcall("caml_int32_direct_bswap", _,_,_), args) when !arch >= ARMv6 ->
       List.for_all self#is_simple_expr args
   | e -> super#is_simple_expr e
 
@@ -173,14 +174,20 @@ method! select_operation op args =
   | (Cdivi, args) ->
       (Iextcall("__aeabi_idiv", false), args)
   | (Cmodi, [arg; Cconst_int n])
-    when n = 1 lsl Misc.log2 n ->
+    when n > 1 && n = 1 lsl Misc.log2 n ->
       (Iintop_imm(Imod, n), [arg])
   | (Cmodi, args) ->
       (* See above for fix up of return register *)
       (Iextcall("__aeabi_idivmod", false), args)
+  (* Recognize 16-bit bswap instruction (ARMv6T2 because we need movt) *)
+  | (Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
+      (Ispecific(Ibswap 16), args)
+  (* Recognize 32-bit bswap instructions (ARMv6 and above) *)
+  | (Cextcall("caml_int32_direct_bswap", _, _, _), args) when !arch >= ARMv6 ->
+      (Ispecific(Ibswap 32), args)
   (* Turn floating-point operations into runtime ABI calls for softfp *)
   | (op, args) when !fpu = Soft -> self#select_operation_softfp op args
-  (* Select operations for VFPv3 *)
+  (* Select operations for VFPv{2,3} *)
   | (op, args) -> self#select_operation_vfpv3 op args
 
 method private select_operation_softfp op args =