diff options
Diffstat (limited to 'lib/builtins')
50 files changed, 986 insertions, 44 deletions
diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt index b33786a85..ad9059c33 100644 --- a/lib/builtins/CMakeLists.txt +++ b/lib/builtins/CMakeLists.txt @@ -132,6 +132,7 @@ set(GENERIC_SOURCES negvdi2.c negvsi2.c negvti2.c + os_version_check.c paritydi2.c paritysi2.c parityti2.c @@ -302,6 +303,13 @@ set(arm_SOURCES arm/umodsi3.S ${GENERIC_SOURCES}) +set(thumb1_SOURCES + arm/divsi3.S + arm/udivsi3.S + arm/comparesf2.S + arm/addsf3.S + ${GENERIC_SOURCES}) + set(arm_EABI_SOURCES arm/aeabi_cdcmp.S arm/aeabi_cdcmpeq_check_nan.c @@ -320,6 +328,7 @@ set(arm_EABI_SOURCES arm/aeabi_memset.S arm/aeabi_uidivmod.S arm/aeabi_uldivmod.S) + set(arm_Thumb1_JT_SOURCES arm/switch16.S arm/switch32.S @@ -401,6 +410,10 @@ elseif(NOT WIN32) ${arm_SOURCES} ${arm_EABI_SOURCES} ${arm_Thumb1_SOURCES}) + + set(thumb1_SOURCES + ${thumb1_SOURCES} + ${arm_EABI_SOURCES}) endif() set(aarch64_SOURCES @@ -415,8 +428,10 @@ set(aarch64_SOURCES fixunstfti.c floatditf.c floatsitf.c + floattitf.c floatunditf.c floatunsitf.c + floatuntitf.c multc3.c trunctfdf2.c trunctfsf2.c @@ -429,7 +444,7 @@ set(armv7k_SOURCES ${arm_SOURCES}) set(arm64_SOURCES ${aarch64_SOURCES}) # macho_embedded archs -set(armv6m_SOURCES ${GENERIC_SOURCES}) +set(armv6m_SOURCES ${thumb1_SOURCES}) set(armv7m_SOURCES ${arm_SOURCES}) set(armv7em_SOURCES ${arm_SOURCES}) diff --git a/lib/builtins/arm/adddf3vfp.S b/lib/builtins/arm/adddf3vfp.S index f4c00a03e..8e476cad1 100644 --- a/lib/builtins/arm/adddf3vfp.S +++ b/lib/builtins/arm/adddf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vadd.f64 d0, d0, d1 +#else vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 vadd.f64 d6, d6, d7 vmov r0, r1, d6 // move result back to r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__adddf3vfp) diff --git a/lib/builtins/arm/addsf3.S b/lib/builtins/arm/addsf3.S new file mode 100644 index 000000000..362b5c147 --- /dev/null +++ b/lib/builtins/arm/addsf3.S @@ -0,0 +1,277 @@ +/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __addsf3 (single precision floating pointer number + * addition with the IEEE-754 default rounding (to nearest, ties to even) + * function for the ARM Thumb1 ISA. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" +#define significandBits 23 +#define typeWidth 32 + + .syntax unified + .text + .thumb + .p2align 2 + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3) + +DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3) + push {r4, r5, r6, r7, lr} + // Get the absolute value of a and b. + lsls r2, r0, #1 + lsls r3, r1, #1 + lsrs r2, r2, #1 /* aAbs */ + beq LOCAL_LABEL(a_zero_nan_inf) + lsrs r3, r3, #1 /* bAbs */ + beq LOCAL_LABEL(zero_nan_inf) + + // Detect if a or b is infinity or Nan. + lsrs r6, r2, #(significandBits) + lsrs r7, r3, #(significandBits) + cmp r6, #0xFF + beq LOCAL_LABEL(zero_nan_inf) + cmp r7, #0xFF + beq LOCAL_LABEL(zero_nan_inf) + + // Swap Rep and Abs so that a and aAbs has the larger absolute value. + cmp r2, r3 + bhs LOCAL_LABEL(no_swap) + movs r4, r0 + movs r5, r2 + movs r0, r1 + movs r2, r3 + movs r1, r4 + movs r3, r5 +LOCAL_LABEL(no_swap): + + // Get the significands and shift them to give us round, guard and sticky. + lsls r4, r0, #(typeWidth - significandBits) + lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */ + lsls r5, r1, #(typeWidth - significandBits) + lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */ + + // Get the implicitBit. + movs r6, #1 + lsls r6, r6, #(significandBits + 3) + + // Get aExponent and set implicit bit if necessary. + lsrs r2, r2, #(significandBits) + beq LOCAL_LABEL(a_done_implicit_bit) + orrs r4, r6 +LOCAL_LABEL(a_done_implicit_bit): + + // Get bExponent and set implicit bit if necessary. + lsrs r3, r3, #(significandBits) + beq LOCAL_LABEL(b_done_implicit_bit) + orrs r5, r6 +LOCAL_LABEL(b_done_implicit_bit): + + // Get the difference in exponents. + subs r6, r2, r3 + beq LOCAL_LABEL(done_align) + + // If b is denormal, then a must be normal as align > 0, and we only need to + // right shift bSignificand by (align - 1) bits. + cmp r3, #0 + bne 1f + subs r6, r6, #1 +1: + + // No longer needs bExponent. r3 is dead here. + // Set sticky bits of b: sticky = bSignificand << (typeWidth - align). + movs r3, #(typeWidth) + subs r3, r3, r6 + movs r7, r5 + lsls r7, r3 + beq 1f + movs r7, #1 +1: + + // bSignificand = bSignificand >> align | sticky; + lsrs r5, r6 + orrs r5, r7 + bne LOCAL_LABEL(done_align) + movs r5, #1 // sticky; b is known to be non-zero. + +LOCAL_LABEL(done_align): + // isSubtraction = (aRep ^ bRep) >> 31; + movs r7, r0 + eors r7, r1 + lsrs r7, #31 + bne LOCAL_LABEL(do_substraction) + + // Same sign, do Addition. + + // aSignificand += bSignificand; + adds r4, r4, r5 + + // Check carry bit. + movs r6, #1 + lsls r6, r6, #(significandBits + 3 + 1) + movs r7, r4 + ands r7, r6 + beq LOCAL_LABEL(form_result) + // If the addition carried up, we need to right-shift the result and + // adjust the exponent. + movs r7, r4 + movs r6, #1 + ands r7, r6 // sticky = aSignificand & 1; + lsrs r4, #1 + orrs r4, r7 // result Significand + adds r2, #1 // result Exponent + // If we have overflowed the type, return +/- infinity. + cmp r2, 0xFF + beq LOCAL_LABEL(ret_inf) + +LOCAL_LABEL(form_result): + // Shift the sign, exponent and significand into place. + lsrs r0, #(typeWidth - 1) + lsls r0, #(typeWidth - 1) // Get Sign. + lsls r2, #(significandBits) + orrs r0, r2 + movs r1, r4 + lsls r4, #(typeWidth - significandBits - 3) + lsrs r4, #(typeWidth - significandBits) + orrs r0, r4 + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + // roundGuardSticky = aSignificand & 0x7; + movs r2, #0x7 + ands r1, r2 + // if (roundGuardSticky > 0x4) result++; + + cmp r1, #0x4 + blt LOCAL_LABEL(done_round) + beq 1f + adds r0, #1 + pop {r4, r5, r6, r7, pc} +1: + + // if (roundGuardSticky == 0x4) result += result & 1; + movs r1, r0 + lsrs r1, #1 + bcc LOCAL_LABEL(done_round) + adds r0, r0, #1 +LOCAL_LABEL(done_round): + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(do_substraction): + subs r4, r4, r5 // aSignificand -= bSignificand; + beq LOCAL_LABEL(ret_zero) + movs r6, r4 + cmp r2, 0 + beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize. + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + lsrs r6, r6, #(significandBits + 3) + bne LOCAL_LABEL(form_result) + + push {r0, r1, r2, r3} + movs r0, r4 + bl __clzsi2 + movs r5, r0 + pop {r0, r1, r2, r3} + // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + subs r5, r5, #(typeWidth - significandBits - 3 - 1) + // aSignificand <<= shift; aExponent -= shift; + lsls r4, r5 + subs r2, r2, r5 + bgt LOCAL_LABEL(form_result) + + // Do normalization if aExponent <= 0. + movs r6, #1 + subs r6, r6, r2 // 1 - aExponent; + movs r2, #0 // aExponent = 0; + movs r3, #(typeWidth) // bExponent is dead. + subs r3, r3, r6 + movs r7, r4 + lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align)) + beq 1f + movs r7, #1 +1: + lsrs r4, r6 /* aSignificand >> shift */ + orrs r4, r7 + b LOCAL_LABEL(form_result) + +LOCAL_LABEL(ret_zero): + movs r0, #0 + pop {r4, r5, r6, r7, pc} + + +LOCAL_LABEL(a_zero_nan_inf): + lsrs r3, r3, #1 + +LOCAL_LABEL(zero_nan_inf): + // Here r2 has aAbs, r3 has bAbs + movs r4, #0xFF + lsls r4, r4, #(significandBits) // Make +inf. + + cmp r2, r4 + bhi LOCAL_LABEL(a_is_nan) + cmp r3, r4 + bhi LOCAL_LABEL(b_is_nan) + + cmp r2, r4 + bne LOCAL_LABEL(a_is_rational) + // aAbs is INF. + eors r1, r0 // aRep ^ bRep. + movs r6, #1 + lsls r6, r6, #(typeWidth - 1) // get sign mask. + cmp r1, r6 // if they only differ on sign bit, it's -INF + INF + beq LOCAL_LABEL(a_is_nan) + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(a_is_rational): + cmp r3, r4 + bne LOCAL_LABEL(b_is_rational) + movs r0, r1 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(b_is_rational): + // either a or b or both are zero. + adds r4, r2, r3 + beq LOCAL_LABEL(both_zero) + cmp r2, #0 // is absA 0 ? + beq LOCAL_LABEL(ret_b) + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(both_zero): + ands r0, r1 // +0 + -0 = +0 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(ret_b): + movs r0, r1 + +LOCAL_LABEL(ret): + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(b_is_nan): + movs r0, r1 +LOCAL_LABEL(a_is_nan): + movs r1, #1 + lsls r1, r1, #(significandBits -1) // r1 is quiet bit. + orrs r0, r1 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(ret_inf): + movs r4, #0xFF + lsls r4, r4, #(significandBits) + orrs r0, r4 + lsrs r0, r0, #(significandBits) + lsls r0, r0, #(significandBits) + pop {r4, r5, r6, r7, pc} + + +END_COMPILERRT_FUNCTION(__addsf3) + +NO_EXEC_STACK_DIRECTIVE diff --git a/lib/builtins/arm/addsf3vfp.S b/lib/builtins/arm/addsf3vfp.S index af40c1cc9..8871efdcc 100644 --- a/lib/builtins/arm/addsf3vfp.S +++ b/lib/builtins/arm/addsf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vadd.f32 s0, s0, s1 +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vadd.f32 s14, s14, s15 vmov r0, s14 // move result back to r0 +#endif bx lr END_COMPILERRT_FUNCTION(__addsf3vfp) diff --git a/lib/builtins/arm/aeabi_cdcmp.S b/lib/builtins/arm/aeabi_cdcmp.S index 8008f5fca..b67814d9f 100644 --- a/lib/builtins/arm/aeabi_cdcmp.S +++ b/lib/builtins/arm/aeabi_cdcmp.S @@ -30,6 +30,19 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) push {r0-r3, lr} bl __aeabi_cdcmpeq_check_nan cmp r0, #1 +#if __ARM_ARCH_ISA_THUMB == 1 + beq 1f + // NaN has been ruled out, so __aeabi_cdcmple can't trap + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_cdcmple + pop {r0-r3, pc} +1: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else pop {r0-r3, lr} // NaN has been ruled out, so __aeabi_cdcmple can't trap @@ -37,6 +50,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) msr CPSR_f, #APSR_C JMP(lr) +#endif END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) @@ -59,6 +73,28 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) bl __aeabi_dcmplt cmp r0, #1 +#if __ARM_ARCH_ISA_THUMB == 1 + bne 1f + // Z = 0, C = 0 + movs r0, #1 + lsls r0, r0, #1 + pop {r0-r3, pc} +1: + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_dcmpeq + cmp r0, #1 + bne 2f + // Z = 1, C = 1 + movs r0, #2 + lsls r0, r0, #31 + pop {r0-r3, pc} +2: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else moveq ip, #0 beq 1f @@ -72,6 +108,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) msr CPSR_f, ip pop {r0-r3} POP_PC() +#endif END_COMPILERRT_FUNCTION(__aeabi_cdcmple) // int __aeabi_cdrcmple(double a, double b) { diff --git a/lib/builtins/arm/aeabi_cfcmp.S b/lib/builtins/arm/aeabi_cfcmp.S index 274baf7ae..e37aa3d06 100644 --- a/lib/builtins/arm/aeabi_cfcmp.S +++ b/lib/builtins/arm/aeabi_cfcmp.S @@ -30,6 +30,19 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) push {r0-r3, lr} bl __aeabi_cfcmpeq_check_nan cmp r0, #1 +#if __ARM_ARCH_ISA_THUMB == 1 + beq 1f + // NaN has been ruled out, so __aeabi_cfcmple can't trap + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_cfcmple + pop {r0-r3, pc} +1: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else pop {r0-r3, lr} // NaN has been ruled out, so __aeabi_cfcmple can't trap @@ -37,6 +50,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) msr CPSR_f, #APSR_C JMP(lr) +#endif END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) @@ -59,6 +73,28 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) bl __aeabi_fcmplt cmp r0, #1 +#if __ARM_ARCH_ISA_THUMB == 1 + bne 1f + // Z = 0, C = 0 + movs r0, #1 + lsls r0, r0, #1 + pop {r0-r3, pc} +1: + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_fcmpeq + cmp r0, #1 + bne 2f + // Z = 1, C = 1 + movs r0, #2 + lsls r0, r0, #31 + pop {r0-r3, pc} +2: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else moveq ip, #0 beq 1f @@ -72,6 +108,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) msr CPSR_f, ip pop {r0-r3} POP_PC() +#endif END_COMPILERRT_FUNCTION(__aeabi_cfcmple) // int __aeabi_cfrcmple(float a, float b) { diff --git a/lib/builtins/arm/aeabi_dcmp.S b/lib/builtins/arm/aeabi_dcmp.S index 43e439268..51539c0ac 100644 --- a/lib/builtins/arm/aeabi_dcmp.S +++ b/lib/builtins/arm/aeabi_dcmp.S @@ -26,10 +26,10 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ cmp r0, #0 SEPARATOR \ b ## cond 1f SEPARATOR \ - mov r0, #0 SEPARATOR \ + movs r0, #0 SEPARATOR \ pop { r4, pc } SEPARATOR \ 1: SEPARATOR \ - mov r0, #1 SEPARATOR \ + movs r0, #1 SEPARATOR \ pop { r4, pc } SEPARATOR \ END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) diff --git a/lib/builtins/arm/aeabi_fcmp.S b/lib/builtins/arm/aeabi_fcmp.S index 0a1d92a60..8e7774b58 100644 --- a/lib/builtins/arm/aeabi_fcmp.S +++ b/lib/builtins/arm/aeabi_fcmp.S @@ -26,10 +26,10 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ cmp r0, #0 SEPARATOR \ b ## cond 1f SEPARATOR \ - mov r0, #0 SEPARATOR \ + movs r0, #0 SEPARATOR \ pop { r4, pc } SEPARATOR \ 1: SEPARATOR \ - mov r0, #1 SEPARATOR \ + movs r0, #1 SEPARATOR \ pop { r4, pc } SEPARATOR \ END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) diff --git a/lib/builtins/arm/aeabi_idivmod.S b/lib/builtins/arm/aeabi_idivmod.S index b43ea6990..0164b15dc 100644 --- a/lib/builtins/arm/aeabi_idivmod.S +++ b/lib/builtins/arm/aeabi_idivmod.S @@ -26,7 +26,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) push {r0, r1, lr} bl SYMBOL_NAME(__divsi3) pop {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom - muls r2, r2, r0 // r2 = quot * denom + muls r2, r0, r2 // r2 = quot * denom subs r1, r1, r2 JMP (r3) #else diff --git a/lib/builtins/arm/aeabi_ldivmod.S b/lib/builtins/arm/aeabi_ldivmod.S index 3dae14ef0..038ae5d72 100644 --- a/lib/builtins/arm/aeabi_ldivmod.S +++ b/lib/builtins/arm/aeabi_ldivmod.S @@ -23,23 +23,23 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) - push {r11, lr} + push {r6, lr} sub sp, sp, #16 - add r12, sp, #8 - str r12, [sp] + add r6, sp, #8 + str r6, [sp] #if defined(__MINGW32__) - mov r12, r0 - mov r0, r2 - mov r2, r12 - mov r12, r1 - mov r1, r3 - mov r3, r12 + movs r6, r0 + movs r0, r2 + movs r2, r6 + movs r6, r1 + movs r1, r3 + movs r3, r6 #endif bl SYMBOL_NAME(__divmoddi4) ldr r2, [sp, #8] ldr r3, [sp, #12] add sp, sp, #16 - pop {r11, pc} + pop {r6, pc} END_COMPILERRT_FUNCTION(__aeabi_ldivmod) NO_EXEC_STACK_DIRECTIVE diff --git a/lib/builtins/arm/aeabi_memset.S b/lib/builtins/arm/aeabi_memset.S index 48edd8970..633f59227 100644 --- a/lib/builtins/arm/aeabi_memset.S +++ b/lib/builtins/arm/aeabi_memset.S @@ -26,7 +26,7 @@ DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) mov r2, r1 - mov r1, #0 + movs r1, #0 b memset END_COMPILERRT_FUNCTION(__aeabi_memclr) diff --git a/lib/builtins/arm/aeabi_uidivmod.S b/lib/builtins/arm/aeabi_uidivmod.S index 7098bc6ff..a627fc740 100644 --- a/lib/builtins/arm/aeabi_uidivmod.S +++ b/lib/builtins/arm/aeabi_uidivmod.S @@ -29,7 +29,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) push {r0, r1, lr} bl SYMBOL_NAME(__aeabi_uidiv) pop {r1, r2, r3} - muls r2, r2, r0 // r2 = quot * denom + muls r2, r0, r2 // r2 = quot * denom subs r1, r1, r2 JMP (r3) LOCAL_LABEL(case_denom_larger): diff --git a/lib/builtins/arm/aeabi_uldivmod.S b/lib/builtins/arm/aeabi_uldivmod.S index bc26e5674..be343b6bc 100644 --- a/lib/builtins/arm/aeabi_uldivmod.S +++ b/lib/builtins/arm/aeabi_uldivmod.S @@ -23,23 +23,23 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) - push {r11, lr} + push {r6, lr} sub sp, sp, #16 - add r12, sp, #8 - str r12, [sp] + add r6, sp, #8 + str r6, [sp] #if defined(__MINGW32__) - mov r12, r0 - mov r0, r2 - mov r2, r12 - mov r12, r1 - mov r1, r3 - mov r3, r12 + movs r6, r0 + movs r0, r2 + movs r2, r6 + movs r6, r1 + movs r1, r3 + movs r3, r6 #endif bl SYMBOL_NAME(__udivmoddi4) ldr r2, [sp, #8] ldr r3, [sp, #12] add sp, sp, #16 - pop {r11, pc} + pop {r6, pc} END_COMPILERRT_FUNCTION(__aeabi_uldivmod) NO_EXEC_STACK_DIRECTIVE diff --git a/lib/builtins/arm/comparesf2.S b/lib/builtins/arm/comparesf2.S index 6d7019545..ef7091bf3 100644 --- a/lib/builtins/arm/comparesf2.S +++ b/lib/builtins/arm/comparesf2.S @@ -43,31 +43,60 @@ .thumb #endif -.p2align 2 +@ int __eqsf2(float a, float b) + + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqsf2) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif // Make copies of a and b with the sign bit shifted off the top. These will // be used to detect zeros and NaNs. +#if __ARM_ARCH_ISA_THUMB == 1 + push {r6, lr} + lsls r2, r0, #1 + lsls r3, r1, #1 +#else mov r2, r0, lsl #1 mov r3, r1, lsl #1 +#endif // We do the comparison in three stages (ignoring NaN values for the time // being). First, we orr the absolute values of a and b; this sets the Z // flag if both a and b are zero (of either sign). The shift of r3 doesn't // effect this at all, but it *does* make sure that the C flag is clear for // the subsequent operations. +#if __ARM_ARCH_ISA_THUMB == 1 + lsrs r6, r3, #1 + orrs r6, r2 +#else orrs r12, r2, r3, lsr #1 - +#endif // Next, we check if a and b have the same or different signs. If they have // opposite signs, this eor will set the N flag. +#if __ARM_ARCH_ISA_THUMB == 1 + beq 1f + movs r6, r0 + eors r6, r1 +1: +#else it ne eorsne r12, r0, r1 +#endif // If a and b are equal (either both zeros or bit identical; again, we're // ignoring NaNs for now), this subtract will zero out r0. If they have the // same sign, the flags are updated as they would be for a comparison of the // absolute values of a and b. +#if __ARM_ARCH_ISA_THUMB == 1 + bmi 1f + subs r0, r2, r3 +1: +#else it pl subspl r0, r2, r3 +#endif // If a is smaller in magnitude than b and both have the same sign, place // the negation of the sign of b in r0. Thus, if both are negative and @@ -79,41 +108,126 @@ DEFINE_COMPILERRT_FUNCTION(__eqsf2) // still clear from the shift argument in orrs; if a is positive and b // negative, this places 0 in r0; if a is negative and b positive, -1 is // placed in r0. +#if __ARM_ARCH_ISA_THUMB == 1 + bhs 1f + // Here if a and b have the same sign and absA < absB, the result is thus + // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan). + movs r0, #1 + lsrs r1, #31 + bne LOCAL_LABEL(CHECK_NAN) + negs r0, r0 + b LOCAL_LABEL(CHECK_NAN) +1: +#else it lo mvnlo r0, r1, asr #31 +#endif // If a is greater in magnitude than b and both have the same sign, place // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed // in r0, which is the desired result. Conversely, if both are positive // and a > b, zero is placed in r0. +#if __ARM_ARCH_ISA_THUMB == 1 + bls 1f + // Here both have the same sign and absA > absB. + movs r0, #1 + lsrs r1, #31 + beq LOCAL_LABEL(CHECK_NAN) + negs r0, r0 +1: +#else it hi movhi r0, r1, asr #31 +#endif // If you've been keeping track, at this point r0 contains -1 if a < b and // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. // If a == b, then the Z flag is set, so we can get the correct final value // into r0 by simply or'ing with 1 if Z is clear. + // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b. +#if __ARM_ARCH_ISA_THUMB != 1 it ne orrne r0, r0, #1 +#endif // Finally, we need to deal with NaNs. If either argument is NaN, replace // the value in r0 with 1. +#if __ARM_ARCH_ISA_THUMB == 1 +LOCAL_LABEL(CHECK_NAN): + movs r6, #0xff + lsls r6, #24 + cmp r2, r6 + bhi 1f + cmp r3, r6 +1: + bls 2f + movs r0, #1 +2: + pop {r6, pc} +#else cmp r2, #0xff000000 ite ls cmpls r3, #0xff000000 movhi r0, #1 JMP(lr) +#endif END_COMPILERRT_FUNCTION(__eqsf2) + DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) -.p2align 2 +@ int __gtsf2(float a, float b) + + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtsf2) // Identical to the preceding except in that we return -1 for NaN values. - // Given that the two paths share so much code, one might be tempted to + // Given that the two paths share so much code, one might be tempted to // unify them; however, the extra code needed to do so makes the code size // to performance tradeoff very hard to justify for such small functions. +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif +#if __ARM_ARCH_ISA_THUMB == 1 + push {r6, lr} + lsls r2, r0, #1 + lsls r3, r1, #1 + lsrs r6, r3, #1 + orrs r6, r2 + beq 1f + movs r6, r0 + eors r6, r1 +1: + bmi 2f + subs r0, r2, r3 +2: + bhs 3f + movs r0, #1 + lsrs r1, #31 + bne LOCAL_LABEL(CHECK_NAN_2) + negs r0, r0 + b LOCAL_LABEL(CHECK_NAN_2) +3: + bls 4f + movs r0, #1 + lsrs r1, #31 + beq LOCAL_LABEL(CHECK_NAN_2) + negs r0, r0 +4: +LOCAL_LABEL(CHECK_NAN_2): + movs r6, #0xff + lsls r6, #24 + cmp r2, r6 + bhi 5f + cmp r3, r6 +5: + bls 6f + movs r0, #1 + negs r0, r0 +6: + pop {r6, pc} +#else mov r2, r0, lsl #1 mov r3, r1, lsl #1 orrs r12, r2, r3, lsr #1 @@ -132,23 +246,51 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2) cmpls r3, #0xff000000 movhi r0, #-1 JMP(lr) +#endif END_COMPILERRT_FUNCTION(__gtsf2) + DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) -.p2align 2 +@ int __unordsf2(float a, float b) + + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unordsf2) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif // Return 1 for NaN values, 0 otherwise. - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - mov r0, #0 + lsls r2, r0, #1 + lsls r3, r1, #1 + movs r0, #0 +#if __ARM_ARCH_ISA_THUMB == 1 + movs r1, #0xff + lsls r1, #24 + cmp r2, r1 + bhi 1f + cmp r3, r1 +1: + bls 2f + movs r0, #1 +2: +#else cmp r2, #0xff000000 ite ls cmpls r3, #0xff000000 movhi r0, #1 +#endif JMP(lr) END_COMPILERRT_FUNCTION(__unordsf2) +#if defined(COMPILER_RT_ARMHF_TARGET) +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum) + vmov s0, r0 + vmov s1, r1 + b SYMBOL_NAME(__unordsf2) +END_COMPILERRT_FUNCTION(__aeabi_fcmpum) +#else DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) +#endif NO_EXEC_STACK_DIRECTIVE diff --git a/lib/builtins/arm/divdf3vfp.S b/lib/builtins/arm/divdf3vfp.S index 928f53809..776ba4f24 100644 --- a/lib/builtins/arm/divdf3vfp.S +++ b/lib/builtins/arm/divdf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vdiv.f64 d0, d0, d1 +#else vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 - vdiv.f64 d5, d6, d7 + vdiv.f64 d5, d6, d7 vmov r0, r1, d5 // move result back to r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__divdf3vfp) diff --git a/lib/builtins/arm/divsf3vfp.S b/lib/builtins/arm/divsf3vfp.S index a2e297f70..130318f0c 100644 --- a/lib/builtins/arm/divsf3vfp.S +++ b/lib/builtins/arm/divsf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vdiv.f32 s0, s0, s1 +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vdiv.f32 s13, s14, s15 vmov r0, s13 // move result back to r0 +#endif bx lr END_COMPILERRT_FUNCTION(__divsf3vfp) diff --git a/lib/builtins/arm/eqdf2vfp.S b/lib/builtins/arm/eqdf2vfp.S index 95e6bb363..8fa0b2deb 100644 --- a/lib/builtins/arm/eqdf2vfp.S +++ b/lib/builtins/arm/eqdf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr moveq r0, #1 // set result register to 1 if equal movne r0, #0 diff --git a/lib/builtins/arm/eqsf2vfp.S b/lib/builtins/arm/eqsf2vfp.S index fbac139c1..3776bf487 100644 --- a/lib/builtins/arm/eqsf2vfp.S +++ b/lib/builtins/arm/eqsf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr moveq r0, #1 // set result register to 1 if equal movne r0, #0 diff --git a/lib/builtins/arm/extendsfdf2vfp.S b/lib/builtins/arm/extendsfdf2vfp.S index 563bf92af..1079f977b 100644 --- a/lib/builtins/arm/extendsfdf2vfp.S +++ b/lib/builtins/arm/extendsfdf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.f64.f32 d0, s0 +#else vmov s15, r0 // load float register from R0 vcvt.f64.f32 d7, s15 // convert single to double vmov r0, r1, d7 // return result in r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__extendsfdf2vfp) diff --git a/lib/builtins/arm/fixdfsivfp.S b/lib/builtins/arm/fixdfsivfp.S index 8263ff942..5d7b0f856 100644 --- a/lib/builtins/arm/fixdfsivfp.S +++ b/lib/builtins/arm/fixdfsivfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.s32.f64 s0, d0 + vmov r0, s0 +#else vmov d7, r0, r1 // load double register from R0/R1 vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__fixdfsivfp) diff --git a/lib/builtins/arm/fixsfsivfp.S b/lib/builtins/arm/fixsfsivfp.S index c7c3b8117..805a277af 100644 --- a/lib/builtins/arm/fixsfsivfp.S +++ b/lib/builtins/arm/fixsfsivfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.s32.f32 s0, s0 + vmov r0, s0 +#else vmov s15, r0 // load float register from R0 vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__fixsfsivfp) diff --git a/lib/builtins/arm/fixunsdfsivfp.S b/lib/builtins/arm/fixunsdfsivfp.S index 9cc1e6286..4f1b2c8ce 100644 --- a/lib/builtins/arm/fixunsdfsivfp.S +++ b/lib/builtins/arm/fixunsdfsivfp.S @@ -20,9 +20,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.u32.f64 s0, d0 + vmov r0, s0 +#else vmov d7, r0, r1 // load double register from R0/R1 vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__fixunsdfsivfp) diff --git a/lib/builtins/arm/fixunssfsivfp.S b/lib/builtins/arm/fixunssfsivfp.S index 79d708229..e5d778236 100644 --- a/lib/builtins/arm/fixunssfsivfp.S +++ b/lib/builtins/arm/fixunssfsivfp.S @@ -20,9 +20,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.u32.f32 s0, s0 + vmov r0, s0 +#else vmov s15, r0 // load float register from R0 vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__fixunssfsivfp) diff --git a/lib/builtins/arm/floatsidfvfp.S b/lib/builtins/arm/floatsidfvfp.S index 7623f26c6..3297ad44d 100644 --- a/lib/builtins/arm/floatsidfvfp.S +++ b/lib/builtins/arm/floatsidfvfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f64.s32 d0, s0 +#else vmov s15, r0 // move int to float register s15 vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7 vmov r0, r1, d7 // move d7 to result register pair r0/r1 +#endif bx lr END_COMPILERRT_FUNCTION(__floatsidfvfp) diff --git a/lib/builtins/arm/floatsisfvfp.S b/lib/builtins/arm/floatsisfvfp.S index c73dfac13..65408b54b 100644 --- a/lib/builtins/arm/floatsisfvfp.S +++ b/lib/builtins/arm/floatsisfvfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f32.s32 s0, s0 +#else vmov s15, r0 // move int to float register s15 vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__floatsisfvfp) diff --git a/lib/builtins/arm/floatunssidfvfp.S b/lib/builtins/arm/floatunssidfvfp.S index 2a59fdb83..d7a7024a2 100644 --- a/lib/builtins/arm/floatunssidfvfp.S +++ b/lib/builtins/arm/floatunssidfvfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f64.u32 d0, s0 +#else vmov s15, r0 // move int to float register s15 vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7 vmov r0, r1, d7 // move d7 to result register pair r0/r1 +#endif bx lr END_COMPILERRT_FUNCTION(__floatunssidfvfp) diff --git a/lib/builtins/arm/floatunssisfvfp.S b/lib/builtins/arm/floatunssisfvfp.S index c096263c1..1ca856519 100644 --- a/lib/builtins/arm/floatunssisfvfp.S +++ b/lib/builtins/arm/floatunssisfvfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f32.u32 s0, s0 +#else vmov s15, r0 // move int to float register s15 vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__floatunssisfvfp) diff --git a/lib/builtins/arm/gedf2vfp.S b/lib/builtins/arm/gedf2vfp.S index 72f13ef4e..14899f00a 100644 --- a/lib/builtins/arm/gedf2vfp.S +++ b/lib/builtins/arm/gedf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr movge r0, #1 // set result register to 1 if greater than or equal movlt r0, #0 diff --git a/lib/builtins/arm/gesf2vfp.S b/lib/builtins/arm/gesf2vfp.S index c9ee52c9c..b49d04d1c 100644 --- a/lib/builtins/arm/gesf2vfp.S +++ b/lib/builtins/arm/gesf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr movge r0, #1 // set result register to 1 if greater than or equal movlt r0, #0 diff --git a/lib/builtins/arm/gtdf2vfp.S b/lib/builtins/arm/gtdf2vfp.S index c7f277552..8166305e3 100644 --- a/lib/builtins/arm/gtdf2vfp.S +++ b/lib/builtins/arm/gtdf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr movgt r0, #1 // set result register to 1 if equal movle r0, #0 diff --git a/lib/builtins/arm/gtsf2vfp.S b/lib/builtins/arm/gtsf2vfp.S index 7d49e4564..d2d8a2380 100644 --- a/lib/builtins/arm/gtsf2vfp.S +++ b/lib/builtins/arm/gtsf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr movgt r0, #1 // set result register to 1 if equal movle r0, #0 diff --git a/lib/builtins/arm/ledf2vfp.S b/lib/builtins/arm/ledf2vfp.S index ca5b553f1..a9dab77c1 100644 --- a/lib/builtins/arm/ledf2vfp.S +++ b/lib/builtins/arm/ledf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr movls r0, #1 // set result register to 1 if equal movhi r0, #0 diff --git a/lib/builtins/arm/lesf2vfp.S b/lib/builtins/arm/lesf2vfp.S index f25422ece..7e127f465 100644 --- a/lib/builtins/arm/lesf2vfp.S +++ b/lib/builtins/arm/lesf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr movls r0, #1 // set result register to 1 if equal movhi r0, #0 diff --git a/lib/builtins/arm/ltdf2vfp.S b/lib/builtins/arm/ltdf2vfp.S index 6e2c0997c..8b6f8e4cc 100644 --- a/lib/builtins/arm/ltdf2vfp.S +++ b/lib/builtins/arm/ltdf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr movmi r0, #1 // set result register to 1 if equal movpl r0, #0 diff --git a/lib/builtins/arm/ltsf2vfp.S b/lib/builtins/arm/ltsf2vfp.S index 95febb606..c4ff812b4 100644 --- a/lib/builtins/arm/ltsf2vfp.S +++ b/lib/builtins/arm/ltsf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr movmi r0, #1 // set result register to 1 if equal movpl r0, #0 diff --git a/lib/builtins/arm/muldf3vfp.S b/lib/builtins/arm/muldf3vfp.S index f638de1ad..aa7b23495 100644 --- a/lib/builtins/arm/muldf3vfp.S +++ b/lib/builtins/arm/muldf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmul.f64 d0, d0, d1 +#else vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 - vmul.f64 d6, d6, d7 + vmul.f64 d6, d6, d7 vmov r0, r1, d6 // move result back to r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__muldf3vfp) diff --git a/lib/builtins/arm/mulsf3vfp.S b/lib/builtins/arm/mulsf3vfp.S index bef58d3a0..a1da789dc 100644 --- a/lib/builtins/arm/mulsf3vfp.S +++ b/lib/builtins/arm/mulsf3vfp.S @@ -18,9 +18,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmul.f32 s0, s0, s1 +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vmul.f32 s13, s14, s15 +#endif vmov r0, s13 // move result back to r0 bx lr END_COMPILERRT_FUNCTION(__mulsf3vfp) diff --git a/lib/builtins/arm/nedf2vfp.S b/lib/builtins/arm/nedf2vfp.S index 78cf529d6..7d884e072 100644 --- a/lib/builtins/arm/nedf2vfp.S +++ b/lib/builtins/arm/nedf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr movne r0, #1 // set result register to 0 if unequal moveq r0, #0 diff --git a/lib/builtins/arm/negdf2vfp.S b/lib/builtins/arm/negdf2vfp.S index 01c8ba6a1..81f0ab8ee 100644 --- a/lib/builtins/arm/negdf2vfp.S +++ b/lib/builtins/arm/negdf2vfp.S @@ -18,7 +18,11 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vneg.f64 d0, d0 +#else eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__negdf2vfp) diff --git a/lib/builtins/arm/negsf2vfp.S b/lib/builtins/arm/negsf2vfp.S index 797abb32e..46ab4a9cf 100644 --- a/lib/builtins/arm/negsf2vfp.S +++ b/lib/builtins/arm/negsf2vfp.S @@ -18,7 +18,11 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vneg.f32 s0, s0 +#else eor r0, r0, #-2147483648 // flip sign bit on float in r0 +#endif bx lr END_COMPILERRT_FUNCTION(__negsf2vfp) diff --git a/lib/builtins/arm/nesf2vfp.S b/lib/builtins/arm/nesf2vfp.S index 554d3e467..97c764f63 100644 --- a/lib/builtins/arm/nesf2vfp.S +++ b/lib/builtins/arm/nesf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr movne r0, #1 // set result register to 1 if unequal moveq r0, #0 diff --git a/lib/builtins/arm/subdf3vfp.S b/lib/builtins/arm/subdf3vfp.S index 1fc7d18c3..2b6f2bdbf 100644 --- a/lib/builtins/arm/subdf3vfp.S +++ b/lib/builtins/arm/subdf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vsub.f64 d0, d0, d1 +#else vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 vsub.f64 d6, d6, d7 vmov r0, r1, d6 // move result back to r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__subdf3vfp) diff --git a/lib/builtins/arm/subsf3vfp.S b/lib/builtins/arm/subsf3vfp.S index 11fe386cd..3e83ea265 100644 --- a/lib/builtins/arm/subsf3vfp.S +++ b/lib/builtins/arm/subsf3vfp.S @@ -12,17 +12,21 @@ // // extern float __subsf3vfp(float a, float b); // -// Returns the difference between two single precision floating point numbers +// Returns the difference between two single precision floating point numbers // using the Darwin calling convention where single arguments are passsed // like 32-bit ints. // .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vsub.f32 s0, s0, s1 +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vsub.f32 s14, s14, s15 vmov r0, s14 // move result back to r0 +#endif bx lr END_COMPILERRT_FUNCTION(__subsf3vfp) diff --git a/lib/builtins/arm/truncdfsf2vfp.S b/lib/builtins/arm/truncdfsf2vfp.S index 04287ad27..682e54d3d 100644 --- a/lib/builtins/arm/truncdfsf2vfp.S +++ b/lib/builtins/arm/truncdfsf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.f32.f64 s0, d0 +#else vmov d7, r0, r1 // load double from r0/r1 pair vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) vmov r0, s15 // return result in r0 +#endif bx lr END_COMPILERRT_FUNCTION(__truncdfsf2vfp) diff --git a/lib/builtins/arm/unorddf2vfp.S b/lib/builtins/arm/unorddf2vfp.S index 022dd7a97..855637547 100644 --- a/lib/builtins/arm/unorddf2vfp.S +++ b/lib/builtins/arm/unorddf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 + vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) movvc r0, #0 diff --git a/lib/builtins/arm/unordsf2vfp.S b/lib/builtins/arm/unordsf2vfp.S index 5ebdd3df5..2b16b4905 100644 --- a/lib/builtins/arm/unordsf2vfp.S +++ b/lib/builtins/arm/unordsf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) movvc r0, #0 diff --git a/lib/builtins/clear_cache.c b/lib/builtins/clear_cache.c index 4c2ac3b1e..a68f9fcfb 100644 --- a/lib/builtins/clear_cache.c +++ b/lib/builtins/clear_cache.c @@ -82,10 +82,6 @@ uintptr_t GetCurrentProcess(void); #endif #endif -#if defined(__linux__) && defined(__arm__) - #include <asm/unistd.h> -#endif - /* * The compiler generates calls to __clear_cache() when creating * trampoline functions on the stack for use with nested functions. @@ -108,6 +104,15 @@ void __clear_cache(void *start, void *end) { sysarch(ARM_SYNC_ICACHE, &arg); #elif defined(__linux__) + /* + * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but + * it also brought many other unused defines, as well as a dependency on + * kernel headers to be installed. + * + * This value is stable at least since Linux 3.13 and should remain so for + * compatibility reasons, warranting it's re-definition here. + */ + #define __ARM_NR_cacheflush 0x0f0002 register int start_reg __asm("r0") = (int) (intptr_t) start; const register int end_reg __asm("r1") = (int) (intptr_t) end; const register int flags __asm("r2") = 0; diff --git a/lib/builtins/floattitf.c b/lib/builtins/floattitf.c new file mode 100644 index 000000000..994fded39 --- /dev/null +++ b/lib/builtins/floattitf.c @@ -0,0 +1,82 @@ +//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ti_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +/* Returns: convert a ti_int to a fp_t, rounding toward even. */ + +/* Assumption: fp_t is a IEEE 128 bit floating point type + * ti_int is a 128 bit integral type + */ + +/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | + * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t +__floattitf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + + long_double_bits fb; + fb.u.high.all = (s & 0x8000000000000000LL) /* sign */ + | (du_int)(e + 16383) << 48 /* exponent */ + | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ + fb.u.low.all = (du_int)(a); + return fb.f; +} + +#endif diff --git a/lib/builtins/floatuntitf.c b/lib/builtins/floatuntitf.c new file mode 100644 index 000000000..e2518c93f --- /dev/null +++ b/lib/builtins/floatuntitf.c @@ -0,0 +1,79 @@ +//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tu_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +/* Returns: convert a tu_int to a fp_t, rounding toward even. */ + +/* Assumption: fp_t is a IEEE 128 bit floating point type + * tu_int is a 128 bit integral type + */ + +/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | + * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t +__floatuntitf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + + long_double_bits fb; + fb.u.high.all = (du_int)(e + 16383) << 48 /* exponent */ + | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ + fb.u.low.all = (du_int)(a); + return fb.f; +} + +#endif diff --git a/lib/builtins/os_version_check.c b/lib/builtins/os_version_check.c new file mode 100644 index 000000000..b36ae546e --- /dev/null +++ b/lib/builtins/os_version_check.c @@ -0,0 +1,124 @@ +/* ===-- os_version_check.c - OS version checking -------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements the function __isOSVersionAtLeast, used by + * Objective-C's @available + * + * ===----------------------------------------------------------------------=== + */ + +#ifdef __APPLE__ + +#include <CoreFoundation/CoreFoundation.h> +#include <dispatch/dispatch.h> +#include <TargetConditionals.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* These three variables hold the host's OS version. */ +static int32_t GlobalMajor, GlobalMinor, GlobalSubminor; +static dispatch_once_t DispatchOnceCounter; + +/* Find and parse the SystemVersion.plist file. */ +static void parseSystemVersionPList(void *Unused) { + (void)Unused; + + char *PListPath = "/System/Library/CoreServices/SystemVersion.plist"; + +#if TARGET_OS_SIMULATOR + char *PListPathPrefix = getenv("IPHONE_SIMULATOR_ROOT"); + if (!PListPathPrefix) + return; + char FullPath[strlen(PListPathPrefix) + strlen(PListPath) + 1]; + strcpy(FullPath, PListPathPrefix); + strcat(FullPath, PListPath); + PListPath = FullPath; +#endif + FILE *PropertyList = fopen(PListPath, "r"); + if (!PropertyList) + return; + + /* Dynamically allocated stuff. */ + CFDictionaryRef PListRef = NULL; + CFDataRef FileContentsRef = NULL; + UInt8 *PListBuf = NULL; + + fseek(PropertyList, 0, SEEK_END); + long PListFileSize = ftell(PropertyList); + if (PListFileSize < 0) + goto Fail; + rewind(PropertyList); + + PListBuf = malloc((size_t)PListFileSize); + if (!PListBuf) + goto Fail; + + size_t NumRead = fread(PListBuf, 1, (size_t)PListFileSize, PropertyList); + if (NumRead != (size_t)PListFileSize) + goto Fail; + + /* Get the file buffer into CF's format. We pass in a null allocator here * + * because we free PListBuf ourselves */ + FileContentsRef = CFDataCreateWithBytesNoCopy( + NULL, PListBuf, (CFIndex)NumRead, kCFAllocatorNull); + if (!FileContentsRef) + goto Fail; + + if (&CFPropertyListCreateWithData) + PListRef = CFPropertyListCreateWithData( + NULL, FileContentsRef, kCFPropertyListImmutable, NULL, NULL); + else { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + PListRef = CFPropertyListCreateFromXMLData(NULL, FileContentsRef, + kCFPropertyListImmutable, NULL); +#pragma clang diagnostic pop + } + if (!PListRef) + goto Fail; + + CFTypeRef OpaqueValue = + CFDictionaryGetValue(PListRef, CFSTR("ProductVersion")); + if (!OpaqueValue || CFGetTypeID(OpaqueValue) != CFStringGetTypeID()) + goto Fail; + + char VersionStr[32]; + if (!CFStringGetCString((CFStringRef)OpaqueValue, VersionStr, + sizeof(VersionStr), kCFStringEncodingUTF8)) + goto Fail; + sscanf(VersionStr, "%d.%d.%d", &GlobalMajor, &GlobalMinor, &GlobalSubminor); + +Fail: + if (PListRef) + CFRelease(PListRef); + if (FileContentsRef) + CFRelease(FileContentsRef); + free(PListBuf); + fclose(PropertyList); +} + +int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) { + /* Populate the global version variables, if they haven't already. */ + dispatch_once_f(&DispatchOnceCounter, NULL, parseSystemVersionPList); + + if (Major < GlobalMajor) return 1; + if (Major > GlobalMajor) return 0; + if (Minor < GlobalMinor) return 1; + if (Minor > GlobalMinor) return 0; + return Subminor <= GlobalSubminor; +} + +#else + +/* Silence an empty translation unit warning. */ +typedef int unused; + +#endif |