diff options
Diffstat (limited to 'lib/builtins/hexagon')
30 files changed, 252 insertions, 298 deletions
diff --git a/lib/builtins/hexagon/common_entry_exit_abi1.S b/lib/builtins/hexagon/common_entry_exit_abi1.S index d5479d2a5..23fed01c6 100644 --- a/lib/builtins/hexagon/common_entry_exit_abi1.S +++ b/lib/builtins/hexagon/common_entry_exit_abi1.S @@ -1,14 +1,13 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/* Functions that implement common sequences in function prologues and epilogues - used to save code size */ +// Functions that implement common sequences in function prologues and epilogues +// used to save code size .macro FUNCTION_BEGIN name .text @@ -33,16 +32,16 @@ -/* Save r25:24 at fp+#-8 and r27:26 at fp+#-16. */ +// Save r25:24 at fp+#-8 and r27:26 at fp+#-16. -/* The compiler knows that the __save_* functions clobber LR. No other - registers should be used without informing the compiler. */ +// The compiler knows that the __save_* functions clobber LR. No other +// registers should be used without informing the compiler. -/* Since we can only issue one store per packet, we don't hurt performance by - simply jumping to the right point in this sequence of stores. */ +// Since we can only issue one store per packet, we don't hurt performance by +// simply jumping to the right point in this sequence of stores. FUNCTION_BEGIN __save_r24_through_r27 memd(fp+#-16) = r27:26 @@ -56,10 +55,10 @@ FUNCTION_END __save_r24_through_r25 -/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel - with deallocframe. That way, the return gets the old value of lr, which is - where these functions need to return, and at the same time, lr gets the value - it needs going into the tail call. */ +// For each of the *_before_tailcall functions, jumpr lr is executed in parallel +// with deallocframe. That way, the return gets the old value of lr, which is +// where these functions need to return, and at the same time, lr gets the value +// it needs going into the tail call. FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall r27:26 = memd(fp+#-16) @@ -74,8 +73,8 @@ FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall -/* Here we use the extra load bandwidth to restore LR early, allowing the return - to occur in parallel with the deallocframe. */ +// Here we use the extra load bandwidth to restore LR early, allowing the return +// to occur in parallel with the deallocframe. FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe { @@ -92,7 +91,7 @@ FUNCTION_END __restore_r24_through_r27_and_deallocframe -/* Here the load bandwidth is maximized. */ +// Here the load bandwidth is maximized. FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe { diff --git a/lib/builtins/hexagon/common_entry_exit_abi2.S b/lib/builtins/hexagon/common_entry_exit_abi2.S index 6f470343d..3b85aea2f 100644 --- a/lib/builtins/hexagon/common_entry_exit_abi2.S +++ b/lib/builtins/hexagon/common_entry_exit_abi2.S @@ -1,14 +1,13 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/* Functions that implement common sequences in function prologues and epilogues - used to save code size */ +// Functions that implement common sequences in function prologues and epilogues +// used to save code size .macro FUNCTION_BEGIN name .p2align 2 @@ -33,10 +32,10 @@ -/* Save r17:16 at fp+#-8, r19:18 at fp+#-16, r21:20 at fp+#-24, r23:22 at - fp+#-32, r25:24 at fp+#-40, and r27:26 at fp+#-48. - The compiler knows that the __save_* functions clobber LR. No other - registers should be used without informing the compiler. */ +// Save r17:16 at fp+#-8, r19:18 at fp+#-16, r21:20 at fp+#-24, r23:22 at +// fp+#-32, r25:24 at fp+#-40, and r27:26 at fp+#-48. +// The compiler knows that the __save_* functions clobber LR. No other +// registers should be used without informing the compiler. FUNCTION_BEGIN __save_r16_through_r27 { @@ -107,10 +106,10 @@ FUNCTION_BEGIN __save_r16_through_r17 } FUNCTION_END __save_r16_through_r17 -/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel - with deallocframe. That way, the return gets the old value of lr, which is - where these functions need to return, and at the same time, lr gets the value - it needs going into the tail call. */ +// For each of the *_before_tailcall functions, jumpr lr is executed in parallel +// with deallocframe. That way, the return gets the old value of lr, which is +// where these functions need to return, and at the same time, lr gets the value +// it needs going into the tail call. FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe_before_tailcall diff --git a/lib/builtins/hexagon/common_entry_exit_legacy.S b/lib/builtins/hexagon/common_entry_exit_legacy.S index 3258f15a3..8a6044573 100644 --- a/lib/builtins/hexagon/common_entry_exit_legacy.S +++ b/lib/builtins/hexagon/common_entry_exit_legacy.S @@ -1,15 +1,14 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/* Functions that implement common sequences in function prologues and epilogues - used to save code size */ +// Functions that implement common sequences in function prologues and epilogues +// used to save code size .macro FUNCTION_BEGIN name .text @@ -34,17 +33,17 @@ -/* Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at - fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. */ +// Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at +// fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. -/* The compiler knows that the __save_* functions clobber LR. No other - registers should be used without informing the compiler. */ +// The compiler knows that the __save_* functions clobber LR. No other +// registers should be used without informing the compiler. -/* Since we can only issue one store per packet, we don't hurt performance by - simply jumping to the right point in this sequence of stores. */ +// Since we can only issue one store per packet, we don't hurt performance by +// simply jumping to the right point in this sequence of stores. FUNCTION_BEGIN __save_r27_through_r16 memd(fp+#-48) = r17:16 @@ -65,10 +64,10 @@ FUNCTION_END __save_r27_through_r24 -/* For each of the *_before_sibcall functions, jumpr lr is executed in parallel - with deallocframe. That way, the return gets the old value of lr, which is - where these functions need to return, and at the same time, lr gets the value - it needs going into the sibcall. */ +// For each of the *_before_sibcall functions, jumpr lr is executed in parallel +// with deallocframe. That way, the return gets the old value of lr, which is +// where these functions need to return, and at the same time, lr gets the value +// it needs going into the sibcall. FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall { @@ -108,8 +107,8 @@ FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall -/* Here we use the extra load bandwidth to restore LR early, allowing the return - to occur in parallel with the deallocframe. */ +// Here we use the extra load bandwidth to restore LR early, allowing the return +// to occur in parallel with the deallocframe. FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe { @@ -136,7 +135,7 @@ FUNCTION_END __restore_r27_through_r24_and_deallocframe -/* Here the load bandwidth is maximized for all three functions. */ +// Here the load bandwidth is maximized for all three functions. FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe { diff --git a/lib/builtins/hexagon/dfaddsub.S b/lib/builtins/hexagon/dfaddsub.S index 4173f86a4..1b0d34550 100644 --- a/lib/builtins/hexagon/dfaddsub.S +++ b/lib/builtins/hexagon/dfaddsub.S @@ -1,13 +1,12 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/* Double Precision Multiply */ +// Double Precision Multiply #define A r1:0 #define AH r1 @@ -179,18 +178,17 @@ __hexagon_subdf3: .Ladd_ovf_unf: // Overflow or Denormal is possible // Good news: Underflow flag is not possible! - /* - * ATMP has 2's complement value - * - * EXPA has A's exponent, EXPB has EXPA-BIAS-60 - * - * Convert, extract exponent, add adjustment. - * If > 2046, overflow - * If <= 0, denormal - * - * Note that we've not done our zero check yet, so do that too - * - */ + + // ATMP has 2's complement value + // + // EXPA has A's exponent, EXPB has EXPA-BIAS-60 + // + // Convert, extract exponent, add adjustment. + // If > 2046, overflow + // If <= 0, denormal + // + // Note that we've not done our zero check yet, so do that too + { A = convert_d2df(ATMP) p0 = cmp.eq(ATMPH,#0) diff --git a/lib/builtins/hexagon/dfdiv.S b/lib/builtins/hexagon/dfdiv.S index 0c5dbe272..202965ec4 100644 --- a/lib/builtins/hexagon/dfdiv.S +++ b/lib/builtins/hexagon/dfdiv.S @@ -1,13 +1,12 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/* Double Precision Divide */ +// Double Precision Divide #define A r1:0 #define AH r1 @@ -237,10 +236,10 @@ __hexagon_divdf3: P_TMP = cmp.gt(EXPA,#0) if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... } - /* Underflow */ - /* We know what the infinite range exponent should be (EXPA) */ - /* Q is 2's complement, PROD is abs(Q) */ - /* Normalize Q, shift right, add a high bit, convert, change exponent */ + // Underflow + // We know what the infinite range exponent should be (EXPA) + // Q is 2's complement, PROD is abs(Q) + // Normalize Q, shift right, add a high bit, convert, change exponent #define FUDGE1 7 // how much to shift right #define FUDGE2 4 // how many guard/round to keep at lsbs @@ -287,8 +286,8 @@ __hexagon_divdf3: .Lpossible_unf: - /* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */ - /* The answer is correct, but we need to raise Underflow */ + // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal + // The answer is correct, but we need to raise Underflow { B = extractu(A,#63,#0) TMPPAIR = combine(##0x00100000,#0) // min normal @@ -321,9 +320,9 @@ __hexagon_divdf3: } .Ldiv_ovf: - /* - * Raise Overflow, and choose the correct overflow value (saturated normal or infinity) - */ + + // Raise Overflow, and choose the correct overflow value (saturated normal or infinity) + { TMP = USR B = combine(##0x7fefffff,#-1) @@ -389,8 +388,8 @@ __hexagon_divdf3: if (!P_ZERO) jump .Ldiv_zero_result if (!P_INF) jump .Ldiv_inf_result } - /* Now we've narrowed it down to (de)normal / (de)normal */ - /* Set up A/EXPA B/EXPB and go back */ + // Now we've narrowed it down to (de)normal / (de)normal + // Set up A/EXPA B/EXPB and go back #undef P_ZERO #undef P_INF #define P_TMP2 p1 diff --git a/lib/builtins/hexagon/dffma.S b/lib/builtins/hexagon/dffma.S index 97b885a3b..c201d3d8b 100644 --- a/lib/builtins/hexagon/dffma.S +++ b/lib/builtins/hexagon/dffma.S @@ -1,16 +1,15 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG #define END(TAG) .size TAG,.-TAG -/* Double Precision Multiply */ +// Double Precision Multiply #define A r1:0 @@ -76,33 +75,29 @@ #define SR_ROUND_OFF 22 #endif - /* - * First, classify for normal values, and abort if abnormal - * - * Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8 - * - * Since we know that the 2 MSBs of the H registers is zero, we should never carry - * the partial products that involve the H registers - * - * Try to buy X slots, at the expense of latency if needed - * - * We will have PP_HH with the upper bits of the product, PP_LL with the lower - * PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts - * PP_HH can have a minimum of 0x0100_0000_0000_0000 - * - * 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS - * - * We need to align CTMP. - * If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add - * If CTMP << PP align CTMP and add 128 bits. Then compute sticky - * If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation. - * - * Convert partial product and CTMP to 2's complement prior to addition - * - * After we add, we need to normalize into upper 64 bits, then compute sticky. - * - * - */ + // First, classify for normal values, and abort if abnormal + // + // Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8 + // + // Since we know that the 2 MSBs of the H registers is zero, we should never carry + // the partial products that involve the H registers + // + // Try to buy X slots, at the expense of latency if needed + // + // We will have PP_HH with the upper bits of the product, PP_LL with the lower + // PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts + // PP_HH can have a minimum of 0x0100_0000_0000_0000 + // + // 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS + // + // We need to align CTMP. + // If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add + // If CTMP << PP align CTMP and add 128 bits. Then compute sticky + // If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation. + // + // Convert partial product and CTMP to 2's complement prior to addition + // + // After we add, we need to normalize into upper 64 bits, then compute sticky. .text .global __hexagon_fmadf4 @@ -182,14 +177,12 @@ fma: #define EXPCA r19:18 EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS) } - /* PP_HH:PP_LL now has product */ - /* CTMP is negated */ - /* EXPA,B,C are extracted */ - /* - * We need to negate PP - * Since we will be adding with carry later, if we need to negate, - * just invert all bits now, which we can do conditionally and in parallel - */ + // PP_HH:PP_LL now has product + // CTMP is negated + // EXPA,B,C are extracted + // We need to negate PP + // Since we will be adding with carry later, if we need to negate, + // just invert all bits now, which we can do conditionally and in parallel #define PP_HH_TMP r15:14 #define PP_LL_TMP r7:6 { @@ -274,18 +267,16 @@ fma: PP_HH = add(CTMP,PP_HH,P_CARRY):carry TMP = #62 } - /* - * PP_HH:PP_LL now holds the sum - * We may need to normalize left, up to ??? bits. - * - * I think that if we have massive cancellation, the range we normalize by - * is still limited - */ + // PP_HH:PP_LL now holds the sum + // We may need to normalize left, up to ??? bits. + // + // I think that if we have massive cancellation, the range we normalize by + // is still limited { LEFTSHIFT = add(clb(PP_HH),#-2) if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits? } - /* We had all sign bits, shift left by 62. */ + // We had all sign bits, shift left by 62. { CTMP = extractu(PP_LL,#62,#2) PP_LL = asl(PP_LL,#62) @@ -330,7 +321,7 @@ fma: if (!P_TMP) dealloc_return // not zero, return } .Ladd_yields_zero: - /* We had full cancellation. Return +/- zero (-0 when round-down) */ + // We had full cancellation. Return +/- zero (-0 when round-down) { TMP = USR A = #0 @@ -408,9 +399,9 @@ fma: EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize p3 = cmp.gt(CTMPH,#-1) } - /* Underflow */ - /* We know that the infinte range exponent should be EXPA */ - /* CTMP is 2's complement, ATMP is abs(CTMP) */ + // Underflow + // We know that the infinte range exponent should be EXPA + // CTMP is 2's complement, ATMP is abs(CTMP) { EXPA = add(EXPA,EXPB) // how much to shift back right ATMP = asl(ATMP,EXPB) // shift left @@ -593,7 +584,7 @@ fma: p1 = dfclass(C,#0x08) if (p1.new) jump:nt .Lfma_inf_plus_inf } - /* A*B is +/- inf, C is finite. Return A */ + // A*B is +/- inf, C is finite. Return A { jumpr r31 } @@ -649,7 +640,7 @@ fma: if (!p0) A = C // If C is not zero, return C if (!p0) jumpr r31 } - /* B has correctly signed zero, C is also zero */ + // B has correctly signed zero, C is also zero .Lzero_plus_zero: { p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0 @@ -674,8 +665,8 @@ fma: #define CTMP r11:10 .falign .Lfma_abnormal_c: - /* We know that AB is normal * normal */ - /* C is not normal: zero, subnormal, inf, or NaN. */ + // We know that AB is normal * normal + // C is not normal: zero, subnormal, inf, or NaN. { p0 = dfclass(C,#0x10) // is C NaN? if (p0.new) jump:nt .Lnan diff --git a/lib/builtins/hexagon/dfminmax.S b/lib/builtins/hexagon/dfminmax.S index 41122911f..44f031ba1 100644 --- a/lib/builtins/hexagon/dfminmax.S +++ b/lib/builtins/hexagon/dfminmax.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -15,17 +14,14 @@ #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG #define END(TAG) .size TAG,.-TAG -/* - * Min and Max return A if B is NaN, or B if A is NaN - * Otherwise, they return the smaller or bigger value - * - * If values are equal, we want to favor -0.0 for min and +0.0 for max. - */ +// Min and Max return A if B is NaN, or B if A is NaN +// Otherwise, they return the smaller or bigger value +// +// If values are equal, we want to favor -0.0 for min and +0.0 for max. + +// Compares always return false for NaN +// if (isnan(A)) A = B; if (A > B) A = B will only trigger at most one of those options. -/* - * Compares always return false for NaN - * if (isnan(A)) A = B; if (A > B) A = B will only trigger at most one of those options. - */ .text .global __hexagon_mindf3 .global __hexagon_maxdf3 @@ -51,7 +47,7 @@ fmin: p2 = dfcmp.eq(A,B) // if A == B if (!p2.new) jumpr:t r31 } - /* A == B, return A|B to select -0.0 over 0.0 */ + // A == B, return A|B to select -0.0 over 0.0 { A = or(ATMP,B) jumpr r31 @@ -71,7 +67,7 @@ fmax: p2 = dfcmp.eq(A,B) if (!p2.new) jumpr:t r31 } - /* A == B, return A&B to select 0.0 over -0.0 */ + // A == B, return A&B to select 0.0 over -0.0 { A = and(ATMP,B) jumpr r31 diff --git a/lib/builtins/hexagon/dfmul.S b/lib/builtins/hexagon/dfmul.S index fde6d77bd..e6f62c351 100644 --- a/lib/builtins/hexagon/dfmul.S +++ b/lib/builtins/hexagon/dfmul.S @@ -1,13 +1,12 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/* Double Precision Multiply */ +// Double Precision Multiply #define A r1:0 #define AH r1 #define AL r0 @@ -47,8 +46,8 @@ #define BIAS 1024 #define MANTISSA_TO_INT_BIAS 52 -/* Some constant to adjust normalization amount in error code */ -/* Amount to right shift the partial product to get to a denorm */ +// Some constant to adjust normalization amount in error code +// Amount to right shift the partial product to get to a denorm #define FUDGE 5 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG @@ -80,10 +79,10 @@ __hexagon_muldf3: PP_ODD = mpyu(BTMPL,ATMPH) BTMP = insert(ONE,#2,#62) } - /* since we know that the MSB of the H registers is zero, we should never carry */ - /* H <= 2^31-1. L <= 2^32-1. Therefore, HL <= 2^63-2^32-2^31+1 */ - /* Adding 2 HLs, we get 2^64-3*2^32+2 maximum. */ - /* Therefore, we can add 3 2^32-1 values safely without carry. We only need one. */ + // since we know that the MSB of the H registers is zero, we should never carry + // H <= 2^31-1. L <= 2^32-1. Therefore, HL <= 2^63-2^32-2^31+1 + // Adding 2 HLs, we get 2^64-3*2^32+2 maximum. + // Therefore, we can add 3 2^32-1 values safely without carry. We only need one. { PP_LL = mpyu(ATMPL,BTMPL) PP_ODD += mpyu(ATMPL,BTMPH) @@ -99,10 +98,10 @@ __hexagon_muldf3: p1 = cmp.eq(PP_LL_L,#0) // 64 lsb's 0? p1 = cmp.eq(PP_ODD_L,#0) // 64 lsb's 0? } - /* - * PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts - * PP_HH can have a minimum of 0x1000_0000_0000_0000 or so - */ + + // PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts + // PP_HH can have a minimum of 0x1000_0000_0000_0000 or so + #undef PP_ODD #undef PP_ODD_H #undef PP_ODD_L @@ -137,15 +136,15 @@ __hexagon_muldf3: .falign .Lpossible_unf: - /* We end up with a positive exponent */ - /* But we may have rounded up to an exponent of 1. */ - /* If the exponent is 1, if we rounded up to it - * we need to also raise underflow - * Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000 - * And the PP should also have more than one bit set - */ - /* Note: ATMP should have abs(PP_HH) */ - /* Note: BTMPL should have 0x7FEFFFFF */ + // We end up with a positive exponent + // But we may have rounded up to an exponent of 1. + // If the exponent is 1, if we rounded up to it + // we need to also raise underflow + // Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000 + // And the PP should also have more than one bit set + // + // Note: ATMP should have abs(PP_HH) + // Note: BTMPL should have 0x7FEFFFFF { p0 = cmp.eq(AL,#0) p0 = bitsclr(AH,BTMPL) @@ -194,29 +193,25 @@ __hexagon_muldf3: BTMPH = sub(EXP0,BTMPH) TMP = #63 // max amount to shift } - /* Underflow */ - /* - * PP_HH has the partial product with sticky LSB. - * PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts - * PP_HH can have a minimum of 0x1000_0000_0000_0000 or so - * The exponent of PP_HH is in EXP1, which is non-positive (0 or negative) - * That's the exponent that happens after the normalization - * - * EXP0 has the exponent that, when added to the normalized value, is out of range. - * - * Strategy: - * - * * Shift down bits, with sticky bit, such that the bits are aligned according - * to the LZ count and appropriate exponent, but not all the way to mantissa - * field, keep around the last few bits. - * * Put a 1 near the MSB - * * Check the LSBs for inexact; if inexact also set underflow - * * Convert [u]d2df -- will correctly round according to rounding mode - * * Replace exponent field with zero - * - * - */ - + // Underflow + // + // PP_HH has the partial product with sticky LSB. + // PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts + // PP_HH can have a minimum of 0x1000_0000_0000_0000 or so + // The exponent of PP_HH is in EXP1, which is non-positive (0 or negative) + // That's the exponent that happens after the normalization + // + // EXP0 has the exponent that, when added to the normalized value, is out of range. + // + // Strategy: + // + // * Shift down bits, with sticky bit, such that the bits are aligned according + // to the LZ count and appropriate exponent, but not all the way to mantissa + // field, keep around the last few bits. + // * Put a 1 near the MSB + // * Check the LSBs for inexact; if inexact also set underflow + // * Convert [u]d2df -- will correctly round according to rounding mode + // * Replace exponent field with zero { BTMPL = #0 // offset for extract diff --git a/lib/builtins/hexagon/dfsqrt.S b/lib/builtins/hexagon/dfsqrt.S index 027d9e1fd..f1435e868 100644 --- a/lib/builtins/hexagon/dfsqrt.S +++ b/lib/builtins/hexagon/dfsqrt.S @@ -1,13 +1,12 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/* Double Precision square root */ +// Double Precision square root #define EXP r28 @@ -169,9 +168,9 @@ __hexagon_sqrt: #define P_CARRY1 p2 #define P_CARRY2 p3 - /* Iteration 0 */ - /* Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply */ - /* We can shift and subtract instead of shift and add? */ + // Iteration 0 + // Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply + // We can shift and subtract instead of shift and add? { ERROR = asl(FRACRAD,#15) PROD = mpyu(ROOTHI,ROOTHI) @@ -194,7 +193,7 @@ __hexagon_sqrt: SHIFTAMT = add(SHIFTAMT,#16) ERROR = asl(FRACRAD,#31) // for next iter } - /* Iteration 1 */ + // Iteration 1 { PROD = mpyu(ROOTHI,ROOTHI) ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed @@ -214,7 +213,7 @@ __hexagon_sqrt: SHIFTAMT = add(SHIFTAMT,#16) ERROR = asl(FRACRAD,#47) // for next iter } - /* Iteration 2 */ + // Iteration 2 { PROD = mpyu(ROOTHI,ROOTHI) } @@ -245,7 +244,7 @@ __hexagon_sqrt: #undef RECIPEST #undef SHIFTAMT #define TWOROOT_LO r9:8 - /* Adjust Root */ + // Adjust Root { HL = mpyu(ROOTHI,ROOTLO) LL = mpyu(ROOTLO,ROOTLO) diff --git a/lib/builtins/hexagon/divdi3.S b/lib/builtins/hexagon/divdi3.S index 49ee8104f..770601a47 100644 --- a/lib/builtins/hexagon/divdi3.S +++ b/lib/builtins/hexagon/divdi3.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/divsi3.S b/lib/builtins/hexagon/divsi3.S index 8e159baa1..5f406524e 100644 --- a/lib/builtins/hexagon/divsi3.S +++ b/lib/builtins/hexagon/divsi3.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/fabs_opt.S b/lib/builtins/hexagon/fabs_opt.S index b09b00734..6bf9b84b3 100644 --- a/lib/builtins/hexagon/fabs_opt.S +++ b/lib/builtins/hexagon/fabs_opt.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/fastmath2_dlib_asm.S b/lib/builtins/hexagon/fastmath2_dlib_asm.S index 9286df06c..574a04432 100644 --- a/lib/builtins/hexagon/fastmath2_dlib_asm.S +++ b/lib/builtins/hexagon/fastmath2_dlib_asm.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /* ==================================================================== */ diff --git a/lib/builtins/hexagon/fastmath2_ldlib_asm.S b/lib/builtins/hexagon/fastmath2_ldlib_asm.S index 419255535..cf623f94c 100644 --- a/lib/builtins/hexagon/fastmath2_ldlib_asm.S +++ b/lib/builtins/hexagon/fastmath2_ldlib_asm.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /* ==================================================================== * diff --git a/lib/builtins/hexagon/fastmath_dlib_asm.S b/lib/builtins/hexagon/fastmath_dlib_asm.S index 215936b78..3e59526c1 100644 --- a/lib/builtins/hexagon/fastmath_dlib_asm.S +++ b/lib/builtins/hexagon/fastmath_dlib_asm.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /* ==================================================================== */ diff --git a/lib/builtins/hexagon/fma_opt.S b/lib/builtins/hexagon/fma_opt.S index 12378f0da..7f566adff 100644 --- a/lib/builtins/hexagon/fma_opt.S +++ b/lib/builtins/hexagon/fma_opt.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/fmax_opt.S b/lib/builtins/hexagon/fmax_opt.S index f3a218c97..81d711dff 100644 --- a/lib/builtins/hexagon/fmax_opt.S +++ b/lib/builtins/hexagon/fmax_opt.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/fmin_opt.S b/lib/builtins/hexagon/fmin_opt.S index ef9b0ff85..d043f1d7a 100644 --- a/lib/builtins/hexagon/fmin_opt.S +++ b/lib/builtins/hexagon/fmin_opt.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S b/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S index fbe09086c..10b81f653 100644 --- a/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S +++ b/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/builtins/hexagon/memcpy_likely_aligned.S b/lib/builtins/hexagon/memcpy_likely_aligned.S index bbc85c22d..492298f10 100644 --- a/lib/builtins/hexagon/memcpy_likely_aligned.S +++ b/lib/builtins/hexagon/memcpy_likely_aligned.S @@ -1,9 +1,8 @@ //===------------------------- memcopy routines ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/moddi3.S b/lib/builtins/hexagon/moddi3.S index 12a0595fe..d4246b61b 100644 --- a/lib/builtins/hexagon/moddi3.S +++ b/lib/builtins/hexagon/moddi3.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/modsi3.S b/lib/builtins/hexagon/modsi3.S index 5afda9e29..4015d5e06 100644 --- a/lib/builtins/hexagon/modsi3.S +++ b/lib/builtins/hexagon/modsi3.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/sfdiv_opt.S b/lib/builtins/hexagon/sfdiv_opt.S index 6bdd4808c..7c9ae14b7 100644 --- a/lib/builtins/hexagon/sfdiv_opt.S +++ b/lib/builtins/hexagon/sfdiv_opt.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/sfsqrt_opt.S b/lib/builtins/hexagon/sfsqrt_opt.S index 7f6190027..532df9a06 100644 --- a/lib/builtins/hexagon/sfsqrt_opt.S +++ b/lib/builtins/hexagon/sfsqrt_opt.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/udivdi3.S b/lib/builtins/hexagon/udivdi3.S index 1ca326b75..23f931d4f 100644 --- a/lib/builtins/hexagon/udivdi3.S +++ b/lib/builtins/hexagon/udivdi3.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/udivmoddi4.S b/lib/builtins/hexagon/udivmoddi4.S index deb5aae09..6dbfc59bd 100644 --- a/lib/builtins/hexagon/udivmoddi4.S +++ b/lib/builtins/hexagon/udivmoddi4.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/udivmodsi4.S b/lib/builtins/hexagon/udivmodsi4.S index 25bbe7cd5..9e231212d 100644 --- a/lib/builtins/hexagon/udivmodsi4.S +++ b/lib/builtins/hexagon/udivmodsi4.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/udivsi3.S b/lib/builtins/hexagon/udivsi3.S index 54f0aa409..d68599a8e 100644 --- a/lib/builtins/hexagon/udivsi3.S +++ b/lib/builtins/hexagon/udivsi3.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/umoddi3.S b/lib/builtins/hexagon/umoddi3.S index f09152141..646ca128d 100644 --- a/lib/builtins/hexagon/umoddi3.S +++ b/lib/builtins/hexagon/umoddi3.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/builtins/hexagon/umodsi3.S b/lib/builtins/hexagon/umodsi3.S index a8270c203..a92394486 100644 --- a/lib/builtins/hexagon/umodsi3.S +++ b/lib/builtins/hexagon/umodsi3.S @@ -1,9 +1,8 @@ //===----------------------Hexagon builtin routine ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// |