summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSaleem Abdulrasool <compnerd@compnerd.org>2017-01-11 16:19:25 +0000
committerSaleem Abdulrasool <compnerd@compnerd.org>2017-01-11 16:19:25 +0000
commit2d5a51da2c5ce01ab0f43556388de96efc70d3f1 (patch)
tree16c989d817cbf492a1586bae43b8876c3c64f594
parentd7d74b37574937c97965d3b2156d793b00b0d86f (diff)
downloadcompiler-rt-2d5a51da2c5ce01ab0f43556388de96efc70d3f1.tar.gz
builtins: support building ARM builtins for a HF target
The `-target` impacts the CC for the builtins. HF targets (with either floating point ABI) always use AAPCS VFP for the builtins unless they are AEABI builtins, in which case they use AAPCS. Non-HF targets (with either floating point ABI) always use AAPCS for the builtins and AAPCS for the AEABI builtins. This introduces the thunks necessary to switch CC for the floating point operations. This is not currently enabled, and should be dependent on the target being used to build compiler-rt. However, as a stop-gap, a define can be added for ASFLAGS to get the thunks. git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@291677 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/builtins/arm/adddf3vfp.S4
-rw-r--r--lib/builtins/arm/addsf3vfp.S4
-rw-r--r--lib/builtins/arm/comparesf2.S34
-rw-r--r--lib/builtins/arm/divdf3vfp.S6
-rw-r--r--lib/builtins/arm/divsf3vfp.S4
-rw-r--r--lib/builtins/arm/eqdf2vfp.S4
-rw-r--r--lib/builtins/arm/eqsf2vfp.S4
-rw-r--r--lib/builtins/arm/extendsfdf2vfp.S4
-rw-r--r--lib/builtins/arm/fixdfsivfp.S5
-rw-r--r--lib/builtins/arm/fixsfsivfp.S5
-rw-r--r--lib/builtins/arm/fixunsdfsivfp.S5
-rw-r--r--lib/builtins/arm/fixunssfsivfp.S5
-rw-r--r--lib/builtins/arm/floatsidfvfp.S5
-rw-r--r--lib/builtins/arm/floatsisfvfp.S5
-rw-r--r--lib/builtins/arm/floatunssidfvfp.S5
-rw-r--r--lib/builtins/arm/floatunssisfvfp.S5
-rw-r--r--lib/builtins/arm/gedf2vfp.S4
-rw-r--r--lib/builtins/arm/gesf2vfp.S4
-rw-r--r--lib/builtins/arm/gtdf2vfp.S4
-rw-r--r--lib/builtins/arm/gtsf2vfp.S4
-rw-r--r--lib/builtins/arm/ledf2vfp.S4
-rw-r--r--lib/builtins/arm/lesf2vfp.S4
-rw-r--r--lib/builtins/arm/ltdf2vfp.S4
-rw-r--r--lib/builtins/arm/ltsf2vfp.S4
-rw-r--r--lib/builtins/arm/muldf3vfp.S6
-rw-r--r--lib/builtins/arm/mulsf3vfp.S4
-rw-r--r--lib/builtins/arm/nedf2vfp.S4
-rw-r--r--lib/builtins/arm/negdf2vfp.S4
-rw-r--r--lib/builtins/arm/negsf2vfp.S4
-rw-r--r--lib/builtins/arm/nesf2vfp.S4
-rw-r--r--lib/builtins/arm/subdf3vfp.S4
-rw-r--r--lib/builtins/arm/subsf3vfp.S6
-rw-r--r--lib/builtins/arm/truncdfsf2vfp.S4
-rw-r--r--lib/builtins/arm/unorddf2vfp.S6
-rw-r--r--lib/builtins/arm/unordsf2vfp.S4
35 files changed, 179 insertions, 7 deletions
diff --git a/lib/builtins/arm/adddf3vfp.S b/lib/builtins/arm/adddf3vfp.S
index f4c00a03e..8e476cad1 100644
--- a/lib/builtins/arm/adddf3vfp.S
+++ b/lib/builtins/arm/adddf3vfp.S
@@ -18,10 +18,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vadd.f64 d0, d0, d1
+#else
vmov d6, r0, r1 // move first param from r0/r1 pair into d6
vmov d7, r2, r3 // move second param from r2/r3 pair into d7
vadd.f64 d6, d6, d7
vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__adddf3vfp)
diff --git a/lib/builtins/arm/addsf3vfp.S b/lib/builtins/arm/addsf3vfp.S
index af40c1cc9..8871efdcc 100644
--- a/lib/builtins/arm/addsf3vfp.S
+++ b/lib/builtins/arm/addsf3vfp.S
@@ -18,10 +18,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vadd.f32 s0, s0, s1
+#else
vmov s14, r0 // move first param from r0 into float register
vmov s15, r1 // move second param from r1 into float register
vadd.f32 s14, s14, s15
vmov r0, s14 // move result back to r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__addsf3vfp)
diff --git a/lib/builtins/arm/comparesf2.S b/lib/builtins/arm/comparesf2.S
index f9c989a84..e8095650e 100644
--- a/lib/builtins/arm/comparesf2.S
+++ b/lib/builtins/arm/comparesf2.S
@@ -43,8 +43,14 @@
.thumb
#endif
-.p2align 2
+@ int __eqsf2(float a, float b)
+
+ .p2align 2
DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
// Make copies of a and b with the sign bit shifted off the top. These will
// be used to detect zeros and NaNs.
#if __ARM_ARCH_ISA_THUMB == 1
@@ -166,16 +172,23 @@ LOCAL_LABEL(CHECK_NAN):
JMP(lr)
#endif
END_COMPILERRT_FUNCTION(__eqsf2)
+
DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
-.p2align 2
+@ int __gtsf2(float a, float b)
+
+ .p2align 2
DEFINE_COMPILERRT_FUNCTION(__gtsf2)
// Identical to the preceding except in that we return -1 for NaN values.
// Given that the two paths share so much code, one might be tempted to
// unify them; however, the extra code needed to do so makes the code size
// to performance tradeoff very hard to justify for such small functions.
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
#if __ARM_ARCH_ISA_THUMB == 1
push {r6, lr}
lsls r2, r0, #1
@@ -235,10 +248,17 @@ LOCAL_LABEL(CHECK_NAN_2):
JMP(lr)
#endif
END_COMPILERRT_FUNCTION(__gtsf2)
+
DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
-.p2align 2
+@ int __unordsf2(float a, float b)
+
+ .p2align 2
DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
// Return 1 for NaN values, 0 otherwise.
lsls r2, r0, #1
lsls r3, r1, #1
@@ -262,7 +282,15 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2)
JMP(lr)
END_COMPILERRT_FUNCTION(__unordsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum):
+ vmov s0, r0
+ vmov s1, r1
+ b SYMBOL_NAME(__unordsf2)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+#else
DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+#endif
NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/builtins/arm/divdf3vfp.S b/lib/builtins/arm/divdf3vfp.S
index 928f53809..776ba4f24 100644
--- a/lib/builtins/arm/divdf3vfp.S
+++ b/lib/builtins/arm/divdf3vfp.S
@@ -18,10 +18,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vdiv.f64 d0, d0, d1
+#else
vmov d6, r0, r1 // move first param from r0/r1 pair into d6
vmov d7, r2, r3 // move second param from r2/r3 pair into d7
- vdiv.f64 d5, d6, d7
+ vdiv.f64 d5, d6, d7
vmov r0, r1, d5 // move result back to r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__divdf3vfp)
diff --git a/lib/builtins/arm/divsf3vfp.S b/lib/builtins/arm/divsf3vfp.S
index a2e297f70..130318f0c 100644
--- a/lib/builtins/arm/divsf3vfp.S
+++ b/lib/builtins/arm/divsf3vfp.S
@@ -18,10 +18,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vdiv.f32 s0, s0, s1
+#else
vmov s14, r0 // move first param from r0 into float register
vmov s15, r1 // move second param from r1 into float register
vdiv.f32 s13, s14, s15
vmov r0, s13 // move result back to r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__divsf3vfp)
diff --git a/lib/builtins/arm/eqdf2vfp.S b/lib/builtins/arm/eqdf2vfp.S
index 95e6bb363..8fa0b2deb 100644
--- a/lib/builtins/arm/eqdf2vfp.S
+++ b/lib/builtins/arm/eqdf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
moveq r0, #1 // set result register to 1 if equal
movne r0, #0
diff --git a/lib/builtins/arm/eqsf2vfp.S b/lib/builtins/arm/eqsf2vfp.S
index fbac139c1..3776bf487 100644
--- a/lib/builtins/arm/eqsf2vfp.S
+++ b/lib/builtins/arm/eqsf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
moveq r0, #1 // set result register to 1 if equal
movne r0, #0
diff --git a/lib/builtins/arm/extendsfdf2vfp.S b/lib/builtins/arm/extendsfdf2vfp.S
index 563bf92af..1079f977b 100644
--- a/lib/builtins/arm/extendsfdf2vfp.S
+++ b/lib/builtins/arm/extendsfdf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.f64.f32 d0, s0
+#else
vmov s15, r0 // load float register from R0
vcvt.f64.f32 d7, s15 // convert single to double
vmov r0, r1, d7 // return result in r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
diff --git a/lib/builtins/arm/fixdfsivfp.S b/lib/builtins/arm/fixdfsivfp.S
index 8263ff942..5d7b0f856 100644
--- a/lib/builtins/arm/fixdfsivfp.S
+++ b/lib/builtins/arm/fixdfsivfp.S
@@ -19,9 +19,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.s32.f64 s0, d0
+ vmov r0, s0
+#else
vmov d7, r0, r1 // load double register from R0/R1
vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__fixdfsivfp)
diff --git a/lib/builtins/arm/fixsfsivfp.S b/lib/builtins/arm/fixsfsivfp.S
index c7c3b8117..805a277af 100644
--- a/lib/builtins/arm/fixsfsivfp.S
+++ b/lib/builtins/arm/fixsfsivfp.S
@@ -19,9 +19,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.s32.f32 s0, s0
+ vmov r0, s0
+#else
vmov s15, r0 // load float register from R0
vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__fixsfsivfp)
diff --git a/lib/builtins/arm/fixunsdfsivfp.S b/lib/builtins/arm/fixunsdfsivfp.S
index 9cc1e6286..4f1b2c8ce 100644
--- a/lib/builtins/arm/fixunsdfsivfp.S
+++ b/lib/builtins/arm/fixunsdfsivfp.S
@@ -20,9 +20,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.u32.f64 s0, d0
+ vmov r0, s0
+#else
vmov d7, r0, r1 // load double register from R0/R1
vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
diff --git a/lib/builtins/arm/fixunssfsivfp.S b/lib/builtins/arm/fixunssfsivfp.S
index 79d708229..e5d778236 100644
--- a/lib/builtins/arm/fixunssfsivfp.S
+++ b/lib/builtins/arm/fixunssfsivfp.S
@@ -20,9 +20,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.u32.f32 s0, s0
+ vmov r0, s0
+#else
vmov s15, r0 // load float register from R0
vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__fixunssfsivfp)
diff --git a/lib/builtins/arm/floatsidfvfp.S b/lib/builtins/arm/floatsidfvfp.S
index 7623f26c6..3297ad44d 100644
--- a/lib/builtins/arm/floatsidfvfp.S
+++ b/lib/builtins/arm/floatsidfvfp.S
@@ -19,9 +19,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f64.s32 d0, s0
+#else
vmov s15, r0 // move int to float register s15
vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7
vmov r0, r1, d7 // move d7 to result register pair r0/r1
+#endif
bx lr
END_COMPILERRT_FUNCTION(__floatsidfvfp)
diff --git a/lib/builtins/arm/floatsisfvfp.S b/lib/builtins/arm/floatsisfvfp.S
index c73dfac13..65408b54b 100644
--- a/lib/builtins/arm/floatsisfvfp.S
+++ b/lib/builtins/arm/floatsisfvfp.S
@@ -19,9 +19,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f32.s32 s0, s0
+#else
vmov s15, r0 // move int to float register s15
vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__floatsisfvfp)
diff --git a/lib/builtins/arm/floatunssidfvfp.S b/lib/builtins/arm/floatunssidfvfp.S
index 2a59fdb83..d7a7024a2 100644
--- a/lib/builtins/arm/floatunssidfvfp.S
+++ b/lib/builtins/arm/floatunssidfvfp.S
@@ -19,9 +19,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f64.u32 d0, s0
+#else
vmov s15, r0 // move int to float register s15
vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7
vmov r0, r1, d7 // move d7 to result register pair r0/r1
+#endif
bx lr
END_COMPILERRT_FUNCTION(__floatunssidfvfp)
diff --git a/lib/builtins/arm/floatunssisfvfp.S b/lib/builtins/arm/floatunssisfvfp.S
index c096263c1..1ca856519 100644
--- a/lib/builtins/arm/floatunssisfvfp.S
+++ b/lib/builtins/arm/floatunssisfvfp.S
@@ -19,9 +19,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f32.u32 s0, s0
+#else
vmov s15, r0 // move int to float register s15
vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__floatunssisfvfp)
diff --git a/lib/builtins/arm/gedf2vfp.S b/lib/builtins/arm/gedf2vfp.S
index 72f13ef4e..14899f00a 100644
--- a/lib/builtins/arm/gedf2vfp.S
+++ b/lib/builtins/arm/gedf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
movge r0, #1 // set result register to 1 if greater than or equal
movlt r0, #0
diff --git a/lib/builtins/arm/gesf2vfp.S b/lib/builtins/arm/gesf2vfp.S
index c9ee52c9c..b49d04d1c 100644
--- a/lib/builtins/arm/gesf2vfp.S
+++ b/lib/builtins/arm/gesf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
movge r0, #1 // set result register to 1 if greater than or equal
movlt r0, #0
diff --git a/lib/builtins/arm/gtdf2vfp.S b/lib/builtins/arm/gtdf2vfp.S
index c7f277552..8166305e3 100644
--- a/lib/builtins/arm/gtdf2vfp.S
+++ b/lib/builtins/arm/gtdf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
movgt r0, #1 // set result register to 1 if equal
movle r0, #0
diff --git a/lib/builtins/arm/gtsf2vfp.S b/lib/builtins/arm/gtsf2vfp.S
index 7d49e4564..d2d8a2380 100644
--- a/lib/builtins/arm/gtsf2vfp.S
+++ b/lib/builtins/arm/gtsf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
movgt r0, #1 // set result register to 1 if equal
movle r0, #0
diff --git a/lib/builtins/arm/ledf2vfp.S b/lib/builtins/arm/ledf2vfp.S
index ca5b553f1..a9dab77c1 100644
--- a/lib/builtins/arm/ledf2vfp.S
+++ b/lib/builtins/arm/ledf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
movls r0, #1 // set result register to 1 if equal
movhi r0, #0
diff --git a/lib/builtins/arm/lesf2vfp.S b/lib/builtins/arm/lesf2vfp.S
index f25422ece..7e127f465 100644
--- a/lib/builtins/arm/lesf2vfp.S
+++ b/lib/builtins/arm/lesf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
movls r0, #1 // set result register to 1 if equal
movhi r0, #0
diff --git a/lib/builtins/arm/ltdf2vfp.S b/lib/builtins/arm/ltdf2vfp.S
index 6e2c0997c..8b6f8e4cc 100644
--- a/lib/builtins/arm/ltdf2vfp.S
+++ b/lib/builtins/arm/ltdf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
movmi r0, #1 // set result register to 1 if equal
movpl r0, #0
diff --git a/lib/builtins/arm/ltsf2vfp.S b/lib/builtins/arm/ltsf2vfp.S
index 95febb606..c4ff812b4 100644
--- a/lib/builtins/arm/ltsf2vfp.S
+++ b/lib/builtins/arm/ltsf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
movmi r0, #1 // set result register to 1 if equal
movpl r0, #0
diff --git a/lib/builtins/arm/muldf3vfp.S b/lib/builtins/arm/muldf3vfp.S
index f638de1ad..aa7b23495 100644
--- a/lib/builtins/arm/muldf3vfp.S
+++ b/lib/builtins/arm/muldf3vfp.S
@@ -18,10 +18,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmul.f64 d0, d0, d1
+#else
vmov d6, r0, r1 // move first param from r0/r1 pair into d6
vmov d7, r2, r3 // move second param from r2/r3 pair into d7
- vmul.f64 d6, d6, d7
+ vmul.f64 d6, d6, d7
vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__muldf3vfp)
diff --git a/lib/builtins/arm/mulsf3vfp.S b/lib/builtins/arm/mulsf3vfp.S
index bef58d3a0..a1da789dc 100644
--- a/lib/builtins/arm/mulsf3vfp.S
+++ b/lib/builtins/arm/mulsf3vfp.S
@@ -18,9 +18,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmul.f32 s0, s0, s1
+#else
vmov s14, r0 // move first param from r0 into float register
vmov s15, r1 // move second param from r1 into float register
vmul.f32 s13, s14, s15
+#endif
vmov r0, s13 // move result back to r0
bx lr
END_COMPILERRT_FUNCTION(__mulsf3vfp)
diff --git a/lib/builtins/arm/nedf2vfp.S b/lib/builtins/arm/nedf2vfp.S
index 78cf529d6..7d884e072 100644
--- a/lib/builtins/arm/nedf2vfp.S
+++ b/lib/builtins/arm/nedf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
movne r0, #1 // set result register to 0 if unequal
moveq r0, #0
diff --git a/lib/builtins/arm/negdf2vfp.S b/lib/builtins/arm/negdf2vfp.S
index 01c8ba6a1..81f0ab8ee 100644
--- a/lib/builtins/arm/negdf2vfp.S
+++ b/lib/builtins/arm/negdf2vfp.S
@@ -18,7 +18,11 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vneg.f64 d0, d0
+#else
eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__negdf2vfp)
diff --git a/lib/builtins/arm/negsf2vfp.S b/lib/builtins/arm/negsf2vfp.S
index 797abb32e..46ab4a9cf 100644
--- a/lib/builtins/arm/negsf2vfp.S
+++ b/lib/builtins/arm/negsf2vfp.S
@@ -18,7 +18,11 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vneg.f32 s0, s0
+#else
eor r0, r0, #-2147483648 // flip sign bit on float in r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__negsf2vfp)
diff --git a/lib/builtins/arm/nesf2vfp.S b/lib/builtins/arm/nesf2vfp.S
index 554d3e467..97c764f63 100644
--- a/lib/builtins/arm/nesf2vfp.S
+++ b/lib/builtins/arm/nesf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
movne r0, #1 // set result register to 1 if unequal
moveq r0, #0
diff --git a/lib/builtins/arm/subdf3vfp.S b/lib/builtins/arm/subdf3vfp.S
index 1fc7d18c3..2b6f2bdbf 100644
--- a/lib/builtins/arm/subdf3vfp.S
+++ b/lib/builtins/arm/subdf3vfp.S
@@ -18,10 +18,14 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vsub.f64 d0, d0, d1
+#else
vmov d6, r0, r1 // move first param from r0/r1 pair into d6
vmov d7, r2, r3 // move second param from r2/r3 pair into d7
vsub.f64 d6, d6, d7
vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__subdf3vfp)
diff --git a/lib/builtins/arm/subsf3vfp.S b/lib/builtins/arm/subsf3vfp.S
index 11fe386cd..a9f3ba942 100644
--- a/lib/builtins/arm/subsf3vfp.S
+++ b/lib/builtins/arm/subsf3vfp.S
@@ -12,17 +12,21 @@
//
// extern float __subsf3vfp(float a, float b);
//
-// Returns the difference between two single precision floating point numbers
+// Returns the difference between two single precision floating point numbers
// using the Darwin calling convention where single arguments are passsed
// like 32-bit ints.
//
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vsub.f32 s0, s0, s1
+#elsee
vmov s14, r0 // move first param from r0 into float register
vmov s15, r1 // move second param from r1 into float register
vsub.f32 s14, s14, s15
vmov r0, s14 // move result back to r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__subsf3vfp)
diff --git a/lib/builtins/arm/truncdfsf2vfp.S b/lib/builtins/arm/truncdfsf2vfp.S
index 04287ad27..682e54d3d 100644
--- a/lib/builtins/arm/truncdfsf2vfp.S
+++ b/lib/builtins/arm/truncdfsf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.f32.f64 s0, d0
+#else
vmov d7, r0, r1 // load double from r0/r1 pair
vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
vmov r0, s15 // return result in r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
diff --git a/lib/builtins/arm/unorddf2vfp.S b/lib/builtins/arm/unorddf2vfp.S
index 022dd7a97..855637547 100644
--- a/lib/builtins/arm/unorddf2vfp.S
+++ b/lib/builtins/arm/unorddf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
- vcmp.f64 d6, d7
+ vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
movvs r0, #1 // set result register to 1 if "overflow" (any NaNs)
movvc r0, #0
diff --git a/lib/builtins/arm/unordsf2vfp.S b/lib/builtins/arm/unordsf2vfp.S
index 5ebdd3df5..2b16b4905 100644
--- a/lib/builtins/arm/unordsf2vfp.S
+++ b/lib/builtins/arm/unordsf2vfp.S
@@ -19,9 +19,13 @@
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
movvs r0, #1 // set result register to 1 if "overflow" (any NaNs)
movvc r0, #0