diff options
author | Saleem Abdulrasool <compnerd@compnerd.org> | 2014-05-12 15:23:37 +0000 |
---|---|---|
committer | Saleem Abdulrasool <compnerd@compnerd.org> | 2014-05-12 15:23:37 +0000 |
commit | eee6d0a714aef5d40279f9f4608efc7abffa8c76 (patch) | |
tree | 2eafba93d6b7b4e0c389b8803a3d5c4bc83e9d52 /lib/builtins | |
parent | 1d5cc7bed35dfcb6932e587157b50f0cf6c19175 (diff) | |
download | compiler-rt-eee6d0a714aef5d40279f9f4608efc7abffa8c76.tar.gz |
[CompilerRT] use .p2align, .balign instead of .align
The .align statements in ARM assembly routines is actually meant to be a power
of 2 alignment (e.g. .align 2 == 4 byte alignment, not 2). Switch to using
.p2align. .p2align is guaranteed to be a power-of-two alignment always and much
more explicit.
The .align in the case of x86_64 is byte alignment, use .balign instead of
.align.
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@208578 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/builtins')
77 files changed, 94 insertions, 94 deletions
diff --git a/lib/builtins/arm/adddf3vfp.S b/lib/builtins/arm/adddf3vfp.S index 4302b6ab3..2825ae92c 100644 --- a/lib/builtins/arm/adddf3vfp.S +++ b/lib/builtins/arm/adddf3vfp.S @@ -16,7 +16,7 @@ // calling convention where double arguments are passsed in GPR pairs // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 diff --git a/lib/builtins/arm/addsf3vfp.S b/lib/builtins/arm/addsf3vfp.S index 316328abe..bff5a7e0f 100644 --- a/lib/builtins/arm/addsf3vfp.S +++ b/lib/builtins/arm/addsf3vfp.S @@ -16,7 +16,7 @@ // calling convention where single arguments are passsed in GPRs // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register diff --git a/lib/builtins/arm/aeabi_dcmp.S b/lib/builtins/arm/aeabi_dcmp.S index ee2946d2d..310c35b74 100644 --- a/lib/builtins/arm/aeabi_dcmp.S +++ b/lib/builtins/arm/aeabi_dcmp.S @@ -20,7 +20,7 @@ #define DEFINE_AEABI_DCMP(cond) \ .syntax unified SEPARATOR \ - .align 2 SEPARATOR \ + .p2align 2 SEPARATOR \ DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ push { r4, lr } SEPARATOR \ bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ diff --git a/lib/builtins/arm/aeabi_fcmp.S b/lib/builtins/arm/aeabi_fcmp.S index ac3f54721..55f49a2b5 100644 --- a/lib/builtins/arm/aeabi_fcmp.S +++ b/lib/builtins/arm/aeabi_fcmp.S @@ -20,7 +20,7 @@ #define DEFINE_AEABI_FCMP(cond) \ .syntax unified SEPARATOR \ - .align 2 SEPARATOR \ + .p2align 2 SEPARATOR \ DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ push { r4, lr } SEPARATOR \ bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ diff --git a/lib/builtins/arm/aeabi_idivmod.S b/lib/builtins/arm/aeabi_idivmod.S index ac5dfcc22..384add382 100644 --- a/lib/builtins/arm/aeabi_idivmod.S +++ b/lib/builtins/arm/aeabi_idivmod.S @@ -16,7 +16,7 @@ // } .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) push { lr } sub sp, sp, #4 diff --git a/lib/builtins/arm/aeabi_ldivmod.S b/lib/builtins/arm/aeabi_ldivmod.S index 684252571..ad06f1de2 100644 --- a/lib/builtins/arm/aeabi_ldivmod.S +++ b/lib/builtins/arm/aeabi_ldivmod.S @@ -17,7 +17,7 @@ // } .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) push {r11, lr} sub sp, sp, #16 diff --git a/lib/builtins/arm/aeabi_memcmp.S b/lib/builtins/arm/aeabi_memcmp.S index fc20825f4..051ce435b 100644 --- a/lib/builtins/arm/aeabi_memcmp.S +++ b/lib/builtins/arm/aeabi_memcmp.S @@ -11,7 +11,7 @@ // void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) b memcmp END_COMPILERRT_FUNCTION(__aeabi_memcmp) diff --git a/lib/builtins/arm/aeabi_memcpy.S b/lib/builtins/arm/aeabi_memcpy.S index f6184632e..cf0233249 100644 --- a/lib/builtins/arm/aeabi_memcpy.S +++ b/lib/builtins/arm/aeabi_memcpy.S @@ -11,7 +11,7 @@ // void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) b memcpy END_COMPILERRT_FUNCTION(__aeabi_memcpy) diff --git a/lib/builtins/arm/aeabi_memmove.S b/lib/builtins/arm/aeabi_memmove.S index f3d6b0e1a..4dda06f75 100644 --- a/lib/builtins/arm/aeabi_memmove.S +++ b/lib/builtins/arm/aeabi_memmove.S @@ -11,7 +11,7 @@ // void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) b memmove END_COMPILERRT_FUNCTION(__aeabi_memmove) diff --git a/lib/builtins/arm/aeabi_memset.S b/lib/builtins/arm/aeabi_memset.S index 43c085e26..c8b49c780 100644 --- a/lib/builtins/arm/aeabi_memset.S +++ b/lib/builtins/arm/aeabi_memset.S @@ -12,7 +12,7 @@ // void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } // void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) mov r3, r1 mov r1, r2 diff --git a/lib/builtins/arm/aeabi_uidivmod.S b/lib/builtins/arm/aeabi_uidivmod.S index 4c1e818cb..8ea474d91 100644 --- a/lib/builtins/arm/aeabi_uidivmod.S +++ b/lib/builtins/arm/aeabi_uidivmod.S @@ -17,7 +17,7 @@ // } .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) push { lr } sub sp, sp, #4 diff --git a/lib/builtins/arm/aeabi_uldivmod.S b/lib/builtins/arm/aeabi_uldivmod.S index 62a2113cc..4e1f8e2a6 100644 --- a/lib/builtins/arm/aeabi_uldivmod.S +++ b/lib/builtins/arm/aeabi_uldivmod.S @@ -17,7 +17,7 @@ // } .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) push {r11, lr} sub sp, sp, #16 diff --git a/lib/builtins/arm/bswapdi2.S b/lib/builtins/arm/bswapdi2.S index c6e1b79c6..14070fdfb 100644 --- a/lib/builtins/arm/bswapdi2.S +++ b/lib/builtins/arm/bswapdi2.S @@ -14,7 +14,7 @@ // // Reverse all the bytes in a 64-bit integer. // -.align 2 +.p2align 2 DEFINE_COMPILERRT_FUNCTION(__bswapdi2) #if __ARM_ARCH < 6 // before armv6 does not have "rev" instruction diff --git a/lib/builtins/arm/bswapsi2.S b/lib/builtins/arm/bswapsi2.S index 5842da263..0fa2d986b 100644 --- a/lib/builtins/arm/bswapsi2.S +++ b/lib/builtins/arm/bswapsi2.S @@ -14,7 +14,7 @@ // // Reverse all the bytes in a 32-bit integer. // -.align 2 +.p2align 2 DEFINE_COMPILERRT_FUNCTION(__bswapsi2) #if __ARM_ARCH < 6 // before armv6 does not have "rev" instruction diff --git a/lib/builtins/arm/clzdi2.S b/lib/builtins/arm/clzdi2.S index 33284cd3a..841ba7ba5 100644 --- a/lib/builtins/arm/clzdi2.S +++ b/lib/builtins/arm/clzdi2.S @@ -16,7 +16,7 @@ .syntax unified .text - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__clzdi2) #ifdef __ARM_FEATURE_CLZ #ifdef __ARMEB__ diff --git a/lib/builtins/arm/clzsi2.S b/lib/builtins/arm/clzsi2.S index 8081d3284..de53f4f85 100644 --- a/lib/builtins/arm/clzsi2.S +++ b/lib/builtins/arm/clzsi2.S @@ -16,7 +16,7 @@ .syntax unified .text - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__clzsi2) #ifdef __ARM_FEATURE_CLZ clz r0, r0 diff --git a/lib/builtins/arm/comparesf2.S b/lib/builtins/arm/comparesf2.S index ad1b10a7f..09f72858b 100644 --- a/lib/builtins/arm/comparesf2.S +++ b/lib/builtins/arm/comparesf2.S @@ -40,7 +40,7 @@ #include "../assembly.h" .syntax unified -.align 2 +.p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqsf2) // Make copies of a and b with the sign bit shifted off the top. These will // be used to detect zeros and NaNs. @@ -105,7 +105,7 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) -.align 2 +.p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtsf2) // Identical to the preceeding except in that we return -1 for NaN values. // Given that the two paths share so much code, one might be tempted to @@ -132,7 +132,7 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2) END_COMPILERRT_FUNCTION(__gtsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) -.align 2 +.p2align 2 DEFINE_COMPILERRT_FUNCTION(__unordsf2) // Return 1 for NaN values, 0 otherwise. mov r2, r0, lsl #1 diff --git a/lib/builtins/arm/divdf3vfp.S b/lib/builtins/arm/divdf3vfp.S index ec2f99721..6eebef167 100644 --- a/lib/builtins/arm/divdf3vfp.S +++ b/lib/builtins/arm/divdf3vfp.S @@ -16,7 +16,7 @@ // calling convention where double arguments are passsed in GPR pairs // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 diff --git a/lib/builtins/arm/divmodsi4.S b/lib/builtins/arm/divmodsi4.S index 24e3657dd..ff37d9f9f 100644 --- a/lib/builtins/arm/divmodsi4.S +++ b/lib/builtins/arm/divmodsi4.S @@ -22,7 +22,7 @@ pop {r4-r7, pc} .syntax unified -.align 3 +.p2align 3 DEFINE_COMPILERRT_FUNCTION(__divmodsi4) #if __ARM_ARCH_EXT_IDIV__ tst r1, r1 diff --git a/lib/builtins/arm/divsf3vfp.S b/lib/builtins/arm/divsf3vfp.S index a4e2f8148..fdbaebc88 100644 --- a/lib/builtins/arm/divsf3vfp.S +++ b/lib/builtins/arm/divsf3vfp.S @@ -16,7 +16,7 @@ // calling convention where single arguments are passsed like 32-bit ints. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register diff --git a/lib/builtins/arm/divsi3.S b/lib/builtins/arm/divsi3.S index 76b679352..08f3aba14 100644 --- a/lib/builtins/arm/divsi3.S +++ b/lib/builtins/arm/divsi3.S @@ -21,7 +21,7 @@ pop {r4, r7, pc} .syntax unified -.align 3 +.p2align 3 // Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3) DEFINE_COMPILERRT_FUNCTION(__divsi3) diff --git a/lib/builtins/arm/eqdf2vfp.S b/lib/builtins/arm/eqdf2vfp.S index 198d42278..7f2fbc307 100644 --- a/lib/builtins/arm/eqdf2vfp.S +++ b/lib/builtins/arm/eqdf2vfp.S @@ -17,7 +17,7 @@ // like in GPR pairs. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register diff --git a/lib/builtins/arm/eqsf2vfp.S b/lib/builtins/arm/eqsf2vfp.S index 395187f47..a318b336a 100644 --- a/lib/builtins/arm/eqsf2vfp.S +++ b/lib/builtins/arm/eqsf2vfp.S @@ -17,7 +17,7 @@ // like 32-bit ints // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register diff --git a/lib/builtins/arm/extendsfdf2vfp.S b/lib/builtins/arm/extendsfdf2vfp.S index ba31f3e0c..b998e5894 100644 --- a/lib/builtins/arm/extendsfdf2vfp.S +++ b/lib/builtins/arm/extendsfdf2vfp.S @@ -17,7 +17,7 @@ // passed in a GPR and a double precision result is returned in R0/R1 pair. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) vmov s15, r0 // load float register from R0 vcvt.f64.f32 d7, s15 // convert single to double diff --git a/lib/builtins/arm/fixdfsivfp.S b/lib/builtins/arm/fixdfsivfp.S index 26adbe389..e3bd8e05e 100644 --- a/lib/builtins/arm/fixdfsivfp.S +++ b/lib/builtins/arm/fixdfsivfp.S @@ -17,7 +17,7 @@ // passed in GPR register pair. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) vmov d7, r0, r1 // load double register from R0/R1 vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15 diff --git a/lib/builtins/arm/fixsfsivfp.S b/lib/builtins/arm/fixsfsivfp.S index c1e0dadb7..3d0d0f56d 100644 --- a/lib/builtins/arm/fixsfsivfp.S +++ b/lib/builtins/arm/fixsfsivfp.S @@ -17,7 +17,7 @@ // passed in a GPR.. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) vmov s15, r0 // load float register from R0 vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15 diff --git a/lib/builtins/arm/fixunsdfsivfp.S b/lib/builtins/arm/fixunsdfsivfp.S index fb91da752..35dda5b9b 100644 --- a/lib/builtins/arm/fixunsdfsivfp.S +++ b/lib/builtins/arm/fixunsdfsivfp.S @@ -18,7 +18,7 @@ // passed in GPR register pair. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) vmov d7, r0, r1 // load double register from R0/R1 vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15 diff --git a/lib/builtins/arm/fixunssfsivfp.S b/lib/builtins/arm/fixunssfsivfp.S index f12dff61a..5c3a7d926 100644 --- a/lib/builtins/arm/fixunssfsivfp.S +++ b/lib/builtins/arm/fixunssfsivfp.S @@ -18,7 +18,7 @@ // passed in a GPR.. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) vmov s15, r0 // load float register from R0 vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15 diff --git a/lib/builtins/arm/floatsidfvfp.S b/lib/builtins/arm/floatsidfvfp.S index ab48933b9..d69184914 100644 --- a/lib/builtins/arm/floatsidfvfp.S +++ b/lib/builtins/arm/floatsidfvfp.S @@ -17,7 +17,7 @@ // return in GPR register pair. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) vmov s15, r0 // move int to float register s15 vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7 diff --git a/lib/builtins/arm/floatsisfvfp.S b/lib/builtins/arm/floatsisfvfp.S index eb265e990..4a0cb39d0 100644 --- a/lib/builtins/arm/floatsisfvfp.S +++ b/lib/builtins/arm/floatsisfvfp.S @@ -17,7 +17,7 @@ // return in a GPR.. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) vmov s15, r0 // move int to float register s15 vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15 diff --git a/lib/builtins/arm/floatunssidfvfp.S b/lib/builtins/arm/floatunssidfvfp.S index 96e2eb21e..d92969ea3 100644 --- a/lib/builtins/arm/floatunssidfvfp.S +++ b/lib/builtins/arm/floatunssidfvfp.S @@ -17,7 +17,7 @@ // return in GPR register pair. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) vmov s15, r0 // move int to float register s15 vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7 diff --git a/lib/builtins/arm/floatunssisfvfp.S b/lib/builtins/arm/floatunssisfvfp.S index 05471c4a5..f6aeba56a 100644 --- a/lib/builtins/arm/floatunssisfvfp.S +++ b/lib/builtins/arm/floatunssisfvfp.S @@ -17,7 +17,7 @@ // return in a GPR.. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) vmov s15, r0 // move int to float register s15 vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15 diff --git a/lib/builtins/arm/gedf2vfp.S b/lib/builtins/arm/gedf2vfp.S index aa636aa14..9e2352701 100644 --- a/lib/builtins/arm/gedf2vfp.S +++ b/lib/builtins/arm/gedf2vfp.S @@ -17,7 +17,7 @@ // like in GPR pairs. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register diff --git a/lib/builtins/arm/gesf2vfp.S b/lib/builtins/arm/gesf2vfp.S index 087ce15f1..0ff608477 100644 --- a/lib/builtins/arm/gesf2vfp.S +++ b/lib/builtins/arm/gesf2vfp.S @@ -17,7 +17,7 @@ // like 32-bit ints // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register diff --git a/lib/builtins/arm/gtdf2vfp.S b/lib/builtins/arm/gtdf2vfp.S index 230952a9f..3dc5d5b59 100644 --- a/lib/builtins/arm/gtdf2vfp.S +++ b/lib/builtins/arm/gtdf2vfp.S @@ -17,7 +17,7 @@ // like in GPR pairs. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register diff --git a/lib/builtins/arm/gtsf2vfp.S b/lib/builtins/arm/gtsf2vfp.S index c1f5db053..ddd843acf 100644 --- a/lib/builtins/arm/gtsf2vfp.S +++ b/lib/builtins/arm/gtsf2vfp.S @@ -17,7 +17,7 @@ // like 32-bit ints // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register diff --git a/lib/builtins/arm/ledf2vfp.S b/lib/builtins/arm/ledf2vfp.S index 94980a2ef..b06ff6db5 100644 --- a/lib/builtins/arm/ledf2vfp.S +++ b/lib/builtins/arm/ledf2vfp.S @@ -17,7 +17,7 @@ // like in GPR pairs. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register diff --git a/lib/builtins/arm/lesf2vfp.S b/lib/builtins/arm/lesf2vfp.S index 362352a83..9b33c0c53 100644 --- a/lib/builtins/arm/lesf2vfp.S +++ b/lib/builtins/arm/lesf2vfp.S @@ -17,7 +17,7 @@ // like 32-bit ints // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register diff --git a/lib/builtins/arm/ltdf2vfp.S b/lib/builtins/arm/ltdf2vfp.S index 391ff2924..9f794b026 100644 --- a/lib/builtins/arm/ltdf2vfp.S +++ b/lib/builtins/arm/ltdf2vfp.S @@ -17,7 +17,7 @@ // like in GPR pairs. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register diff --git a/lib/builtins/arm/ltsf2vfp.S b/lib/builtins/arm/ltsf2vfp.S index dcaa03561..ba190d9d8 100644 --- a/lib/builtins/arm/ltsf2vfp.S +++ b/lib/builtins/arm/ltsf2vfp.S @@ -17,7 +17,7 @@ // like 32-bit ints // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register diff --git a/lib/builtins/arm/modsi3.S b/lib/builtins/arm/modsi3.S index 07c5f9381..b7933ea9a 100644 --- a/lib/builtins/arm/modsi3.S +++ b/lib/builtins/arm/modsi3.S @@ -21,7 +21,7 @@ pop {r4, r7, pc} .syntax unified -.align 3 +.p2align 3 DEFINE_COMPILERRT_FUNCTION(__modsi3) #if __ARM_ARCH_EXT_IDIV__ tst r1, r1 diff --git a/lib/builtins/arm/muldf3vfp.S b/lib/builtins/arm/muldf3vfp.S index 2c1d70377..636cc711a 100644 --- a/lib/builtins/arm/muldf3vfp.S +++ b/lib/builtins/arm/muldf3vfp.S @@ -16,7 +16,7 @@ // calling convention where double arguments are passsed in GPR pairs // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 diff --git a/lib/builtins/arm/mulsf3vfp.S b/lib/builtins/arm/mulsf3vfp.S index 98af81d23..7f4008266 100644 --- a/lib/builtins/arm/mulsf3vfp.S +++ b/lib/builtins/arm/mulsf3vfp.S @@ -16,7 +16,7 @@ // calling convention where single arguments are passsed like 32-bit ints. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register diff --git a/lib/builtins/arm/nedf2vfp.S b/lib/builtins/arm/nedf2vfp.S index c1a832e1a..7ab2f5501 100644 --- a/lib/builtins/arm/nedf2vfp.S +++ b/lib/builtins/arm/nedf2vfp.S @@ -17,7 +17,7 @@ // like in GPR pairs. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register diff --git a/lib/builtins/arm/negdf2vfp.S b/lib/builtins/arm/negdf2vfp.S index ad9b87418..56d73c676 100644 --- a/lib/builtins/arm/negdf2vfp.S +++ b/lib/builtins/arm/negdf2vfp.S @@ -16,7 +16,7 @@ // Darwin calling convention where double arguments are passsed in GPR pairs. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair bx lr diff --git a/lib/builtins/arm/negsf2vfp.S b/lib/builtins/arm/negsf2vfp.S index 482eb2640..a6e32e1ff 100644 --- a/lib/builtins/arm/negsf2vfp.S +++ b/lib/builtins/arm/negsf2vfp.S @@ -16,7 +16,7 @@ // Darwin calling convention where single arguments are passsed like 32-bit ints // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) eor r0, r0, #-2147483648 // flip sign bit on float in r0 bx lr diff --git a/lib/builtins/arm/nesf2vfp.S b/lib/builtins/arm/nesf2vfp.S index 127145ec4..9fe8ecdef 100644 --- a/lib/builtins/arm/nesf2vfp.S +++ b/lib/builtins/arm/nesf2vfp.S @@ -17,7 +17,7 @@ // like 32-bit ints // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register diff --git a/lib/builtins/arm/restore_vfp_d8_d15_regs.S b/lib/builtins/arm/restore_vfp_d8_d15_regs.S index 5d5547415..909cfa4fe 100644 --- a/lib/builtins/arm/restore_vfp_d8_d15_regs.S +++ b/lib/builtins/arm/restore_vfp_d8_d15_regs.S @@ -25,7 +25,7 @@ // // Restore registers d8-d15 from stack // - .align 2 + .p2align 2 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs) vldmia sp!, {d8-d15} // pop registers d8-d15 off stack bx lr // return to prolog diff --git a/lib/builtins/arm/save_vfp_d8_d15_regs.S b/lib/builtins/arm/save_vfp_d8_d15_regs.S index 4be3ff3a5..f5f287021 100644 --- a/lib/builtins/arm/save_vfp_d8_d15_regs.S +++ b/lib/builtins/arm/save_vfp_d8_d15_regs.S @@ -25,7 +25,7 @@ // // Save registers d8-d15 onto stack // - .align 2 + .p2align 2 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs) vstmdb sp!, {d8-d15} // push registers d8-d15 onto stack bx lr // return to prolog diff --git a/lib/builtins/arm/subdf3vfp.S b/lib/builtins/arm/subdf3vfp.S index 747d6596c..5f3c0f70d 100644 --- a/lib/builtins/arm/subdf3vfp.S +++ b/lib/builtins/arm/subdf3vfp.S @@ -16,7 +16,7 @@ // the Darwin calling convention where double arguments are passsed in GPR pairs // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 diff --git a/lib/builtins/arm/subsf3vfp.S b/lib/builtins/arm/subsf3vfp.S index 7cc63cccc..d6e06df51 100644 --- a/lib/builtins/arm/subsf3vfp.S +++ b/lib/builtins/arm/subsf3vfp.S @@ -17,7 +17,7 @@ // like 32-bit ints. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register diff --git a/lib/builtins/arm/switch16.S b/lib/builtins/arm/switch16.S index 0dedc598a..b13e347d7 100644 --- a/lib/builtins/arm/switch16.S +++ b/lib/builtins/arm/switch16.S @@ -29,7 +29,7 @@ // The table contains signed 2-byte sized elements which are 1/2 the distance // from lr to the target label. // - .align 2 + .p2align 2 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16) ldrh ip, [lr, #-1] // get first 16-bit word in table cmp r0, ip // compare with index diff --git a/lib/builtins/arm/switch32.S b/lib/builtins/arm/switch32.S index 64d558eb8..f1be578de 100644 --- a/lib/builtins/arm/switch32.S +++ b/lib/builtins/arm/switch32.S @@ -29,7 +29,7 @@ // The table contains signed 4-byte sized elements which are the distance // from lr to the target label. // - .align 2 + .p2align 2 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32) ldr ip, [lr, #-1] // get first 32-bit word in table cmp r0, ip // compare with index diff --git a/lib/builtins/arm/switch8.S b/lib/builtins/arm/switch8.S index b5008849a..746a8561f 100644 --- a/lib/builtins/arm/switch8.S +++ b/lib/builtins/arm/switch8.S @@ -29,7 +29,7 @@ // The table contains signed byte sized elements which are 1/2 the distance // from lr to the target label. // - .align 2 + .p2align 2 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8) ldrb ip, [lr, #-1] // get first byte in table cmp r0, ip // signed compare with index diff --git a/lib/builtins/arm/switchu8.S b/lib/builtins/arm/switchu8.S index 488d4e74e..0b05c5478 100644 --- a/lib/builtins/arm/switchu8.S +++ b/lib/builtins/arm/switchu8.S @@ -29,7 +29,7 @@ // The table contains unsigned byte sized elements which are 1/2 the distance // from lr to the target label. // - .align 2 + .p2align 2 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8) ldrb ip, [lr, #-1] // get first byte in table cmp r0, ip // compare with index diff --git a/lib/builtins/arm/sync-ops.h b/lib/builtins/arm/sync-ops.h index 777b121d3..87cc3c27c 100644 --- a/lib/builtins/arm/sync-ops.h +++ b/lib/builtins/arm/sync-ops.h @@ -16,7 +16,7 @@ #include "../assembly.h" #define SYNC_OP_4(op) \ - .align 2 ; \ + .p2align 2 ; \ .thumb ; \ DEFINE_COMPILERRT_FUNCTION(__sync_fetch_and_ ## op) \ dmb ; \ @@ -30,7 +30,7 @@ bx lr #define SYNC_OP_8(op) \ - .align 2 ; \ + .p2align 2 ; \ .thumb ; \ DEFINE_COMPILERRT_FUNCTION(__sync_fetch_and_ ## op) \ push {r4, r5, r6, lr} ; \ diff --git a/lib/builtins/arm/sync_synchronize.S b/lib/builtins/arm/sync_synchronize.S index aa18f04fb..178f24534 100644 --- a/lib/builtins/arm/sync_synchronize.S +++ b/lib/builtins/arm/sync_synchronize.S @@ -21,7 +21,7 @@ #if __APPLE__ - .align 2 + .p2align 2 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize) stmfd sp!, {r7, lr} add r7, sp, #0 diff --git a/lib/builtins/arm/truncdfsf2vfp.S b/lib/builtins/arm/truncdfsf2vfp.S index 55bf324cc..fa4362c45 100644 --- a/lib/builtins/arm/truncdfsf2vfp.S +++ b/lib/builtins/arm/truncdfsf2vfp.S @@ -17,7 +17,7 @@ // passed in a R0/R1 pair and a signle precision result is returned in R0. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) vmov d7, r0, r1 // load double from r0/r1 pair vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) diff --git a/lib/builtins/arm/unorddf2vfp.S b/lib/builtins/arm/unorddf2vfp.S index ee7de4025..c4bea2d5e 100644 --- a/lib/builtins/arm/unorddf2vfp.S +++ b/lib/builtins/arm/unorddf2vfp.S @@ -17,7 +17,7 @@ // like in GPR pairs. // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register diff --git a/lib/builtins/arm/unordsf2vfp.S b/lib/builtins/arm/unordsf2vfp.S index eb419a541..886e96568 100644 --- a/lib/builtins/arm/unordsf2vfp.S +++ b/lib/builtins/arm/unordsf2vfp.S @@ -17,7 +17,7 @@ // like 32-bit ints // .syntax unified - .align 2 + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register diff --git a/lib/builtins/i386/ashldi3.S b/lib/builtins/i386/ashldi3.S index a5da1ace1..3fbd73903 100644 --- a/lib/builtins/i386/ashldi3.S +++ b/lib/builtins/i386/ashldi3.S @@ -16,7 +16,7 @@ #ifdef __SSE2__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__ashldi3) movd 12(%esp), %xmm2 // Load count #ifndef TRUST_CALLERS_USE_64_BIT_STORES @@ -36,7 +36,7 @@ END_COMPILERRT_FUNCTION(__ashldi3) #else // Use GPRs instead of SSE2 instructions, if they aren't available. .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__ashldi3) movl 12(%esp), %ecx // Load count movl 8(%esp), %edx // Load high diff --git a/lib/builtins/i386/ashrdi3.S b/lib/builtins/i386/ashrdi3.S index ab0fcfcc9..8f4742481 100644 --- a/lib/builtins/i386/ashrdi3.S +++ b/lib/builtins/i386/ashrdi3.S @@ -9,7 +9,7 @@ #ifdef __SSE2__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__ashrdi3) movd 12(%esp), %xmm2 // Load count movl 8(%esp), %eax @@ -46,7 +46,7 @@ END_COMPILERRT_FUNCTION(__ashrdi3) #else // Use GPRs instead of SSE2 instructions, if they aren't available. .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__ashrdi3) movl 12(%esp), %ecx // Load count movl 8(%esp), %edx // Load high diff --git a/lib/builtins/i386/divdi3.S b/lib/builtins/i386/divdi3.S index 5afec5f9c..2cb0ddd4c 100644 --- a/lib/builtins/i386/divdi3.S +++ b/lib/builtins/i386/divdi3.S @@ -19,7 +19,7 @@ #ifdef __i386__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__divdi3) /* This is currently implemented by wrapping the unsigned divide up in an absolute diff --git a/lib/builtins/i386/floatdidf.S b/lib/builtins/i386/floatdidf.S index 493608886..5c45ee9e6 100644 --- a/lib/builtins/i386/floatdidf.S +++ b/lib/builtins/i386/floatdidf.S @@ -10,14 +10,14 @@ #ifndef __ELF__ .const #endif -.align 4 +.balign 4 twop52: .quad 0x4330000000000000 twop32: .quad 0x41f0000000000000 #define REL_ADDR(_a) (_a)-0b(%eax) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatdidf) cvtsi2sd 8(%esp), %xmm1 movss 4(%esp), %xmm0 // low 32 bits of a diff --git a/lib/builtins/i386/floatdisf.S b/lib/builtins/i386/floatdisf.S index 8340331b8..f64276703 100644 --- a/lib/builtins/i386/floatdisf.S +++ b/lib/builtins/i386/floatdisf.S @@ -15,7 +15,7 @@ #ifdef __i386__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatdisf) #ifndef TRUST_CALLERS_USE_64_BIT_STORES movd 4(%esp), %xmm0 diff --git a/lib/builtins/i386/floatdixf.S b/lib/builtins/i386/floatdixf.S index eaa34d744..839b0434c 100644 --- a/lib/builtins/i386/floatdixf.S +++ b/lib/builtins/i386/floatdixf.S @@ -15,7 +15,7 @@ // It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatdixf) #ifndef TRUST_CALLERS_USE_64_BIT_STORES movd 4(%esp), %xmm0 diff --git a/lib/builtins/i386/floatundidf.S b/lib/builtins/i386/floatundidf.S index 5b8787ba6..b00627848 100644 --- a/lib/builtins/i386/floatundidf.S +++ b/lib/builtins/i386/floatundidf.S @@ -20,7 +20,7 @@ #ifndef __ELF__ .const #endif -.align 4 +.balign 4 twop52: .quad 0x4330000000000000 twop84_plus_twop52: .quad 0x4530000000100000 @@ -29,7 +29,7 @@ twop84: .quad 0x4530000000000000 #define REL_ADDR(_a) (_a)-0b(%eax) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatundidf) movss 8(%esp), %xmm1 // high 32 bits of a movss 4(%esp), %xmm0 // low 32 bits of a diff --git a/lib/builtins/i386/floatundisf.S b/lib/builtins/i386/floatundisf.S index 2253021bc..c5743492a 100644 --- a/lib/builtins/i386/floatundisf.S +++ b/lib/builtins/i386/floatundisf.S @@ -19,7 +19,7 @@ #ifdef __i386__ .const -.align 3 +.balign 3 .quad 0x43f0000000000000 twop64: .quad 0x0000000000000000 @@ -27,7 +27,7 @@ twop64: .quad 0x0000000000000000 #define TWOp64 twop64-0b(%ecx,%eax,8) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatundisf) movl 8(%esp), %eax movd 8(%esp), %xmm1 @@ -54,9 +54,9 @@ END_COMPILERRT_FUNCTION(__floatundisf) #ifndef __ELF__ .const -.align 3 +.balign 3 #else -.align 8 +.balign 8 #endif twop52: .quad 0x4330000000000000 .quad 0x0000000000000fff @@ -68,7 +68,7 @@ twelve: .long 0x00000000 #define STICKY sticky-0b(%ecx,%eax,8) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatundisf) movl 8(%esp), %eax movd 8(%esp), %xmm1 diff --git a/lib/builtins/i386/floatundixf.S b/lib/builtins/i386/floatundixf.S index b728d0697..9d2f31faa 100644 --- a/lib/builtins/i386/floatundixf.S +++ b/lib/builtins/i386/floatundixf.S @@ -10,7 +10,7 @@ #ifndef __ELF__ .const #endif -.align 4 +.balign 4 twop52: .quad 0x4330000000000000 twop84_plus_twop52_neg: .quad 0xc530000000100000 @@ -19,7 +19,7 @@ twop84: .quad 0x4530000000000000 #define REL_ADDR(_a) (_a)-0b(%eax) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatundixf) calll 0f 0: popl %eax diff --git a/lib/builtins/i386/lshrdi3.S b/lib/builtins/i386/lshrdi3.S index dcfc33169..b80f11a38 100644 --- a/lib/builtins/i386/lshrdi3.S +++ b/lib/builtins/i386/lshrdi3.S @@ -16,7 +16,7 @@ #ifdef __SSE2__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__lshrdi3) movd 12(%esp), %xmm2 // Load count #ifndef TRUST_CALLERS_USE_64_BIT_STORES @@ -36,7 +36,7 @@ END_COMPILERRT_FUNCTION(__lshrdi3) #else // Use GPRs instead of SSE2 instructions, if they aren't available. .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__lshrdi3) movl 12(%esp), %ecx // Load count movl 8(%esp), %edx // Load high diff --git a/lib/builtins/i386/moddi3.S b/lib/builtins/i386/moddi3.S index 79e4fc213..b9cee9d7a 100644 --- a/lib/builtins/i386/moddi3.S +++ b/lib/builtins/i386/moddi3.S @@ -20,7 +20,7 @@ #ifdef __i386__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__moddi3) /* This is currently implemented by wrapping the unsigned modulus up in an absolute diff --git a/lib/builtins/i386/muldi3.S b/lib/builtins/i386/muldi3.S index 5af1136c1..15b6b4998 100644 --- a/lib/builtins/i386/muldi3.S +++ b/lib/builtins/i386/muldi3.S @@ -8,7 +8,7 @@ #ifdef __i386__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__muldi3) pushl %ebx movl 16(%esp), %eax // b.lo diff --git a/lib/builtins/i386/udivdi3.S b/lib/builtins/i386/udivdi3.S index 5072b221c..41b2edf03 100644 --- a/lib/builtins/i386/udivdi3.S +++ b/lib/builtins/i386/udivdi3.S @@ -19,7 +19,7 @@ #ifdef __i386__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__udivdi3) pushl %ebx diff --git a/lib/builtins/i386/umoddi3.S b/lib/builtins/i386/umoddi3.S index 63cefc207..a190a7d39 100644 --- a/lib/builtins/i386/umoddi3.S +++ b/lib/builtins/i386/umoddi3.S @@ -20,7 +20,7 @@ #ifdef __i386__ .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__umoddi3) pushl %ebx diff --git a/lib/builtins/x86_64/floatundidf.S b/lib/builtins/x86_64/floatundidf.S index 1df3d7479..28babfdcf 100644 --- a/lib/builtins/x86_64/floatundidf.S +++ b/lib/builtins/x86_64/floatundidf.S @@ -20,7 +20,7 @@ #ifndef __ELF__ .const #endif -.align 4 +.balign 4 twop52: .quad 0x4330000000000000 twop84_plus_twop52: .quad 0x4530000000100000 @@ -29,7 +29,7 @@ twop84: .quad 0x4530000000000000 #define REL_ADDR(_a) (_a)(%rip) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatundidf) movd %edi, %xmm0 // low 32 bits of a shrq $32, %rdi // high 32 bits of a diff --git a/lib/builtins/x86_64/floatundisf.S b/lib/builtins/x86_64/floatundisf.S index 7dd5c329b..b5ca4f395 100644 --- a/lib/builtins/x86_64/floatundisf.S +++ b/lib/builtins/x86_64/floatundisf.S @@ -15,7 +15,7 @@ two: .single 2.0 #define REL_ADDR(_a) (_a)(%rip) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatundisf) movq $1, %rsi testq %rdi, %rdi diff --git a/lib/builtins/x86_64/floatundixf.S b/lib/builtins/x86_64/floatundixf.S index b05954a0b..36b837ce5 100644 --- a/lib/builtins/x86_64/floatundixf.S +++ b/lib/builtins/x86_64/floatundixf.S @@ -10,13 +10,13 @@ #ifndef __ELF__ .const #endif -.align 4 +.balign 4 twop64: .quad 0x43f0000000000000 #define REL_ADDR(_a) (_a)(%rip) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatundixf) movq %rdi, -8(%rsp) fildq -8(%rsp) @@ -36,7 +36,7 @@ END_COMPILERRT_FUNCTION(__floatundixf) #ifdef __x86_64__ .const -.align 4 +.balign 4 twop52: .quad 0x4330000000000000 twop84_plus_twop52_neg: .quad 0xc530000000100000 @@ -45,7 +45,7 @@ twop84: .quad 0x4530000000000000 #define REL_ADDR(_a) (_a)(%rip) .text -.align 4 +.balign 4 DEFINE_COMPILERRT_FUNCTION(__floatundixf) movl %edi, %esi // low 32 bits of input shrq $32, %rdi // hi 32 bits of input |