diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-09-13 20:11:09 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-09-13 20:11:09 +0200 |
commit | 80e7cec60267594563ca9a5dcb22661c01fbc802 (patch) | |
tree | c70d05a2f1d414d2ad0b0e9f0dc9ff16361a4a32 | |
parent | 2325920519e459cbd3dbccc0a29778aae3d7b36e (diff) | |
download | nettle-80e7cec60267594563ca9a5dcb22661c01fbc802.tar.gz |
Use default m4 quote character in asm files, part 2
Update arm files.
37 files changed, 602 insertions, 602 deletions
diff --git a/arm/aes-decrypt-internal.asm b/arm/aes-decrypt-internal.asm index 3da333c8..1b04ed9a 100644 --- a/arm/aes-decrypt-internal.asm +++ b/arm/aes-decrypt-internal.asm @@ -1,6 +1,6 @@ C arm/aes-decrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,39 +28,39 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -include_src(<arm/aes.m4>) +include_src(`arm/aes.m4') -define(<PARAM_ROUNDS>, <r0>) -define(<PARAM_KEYS>, <r1>) -define(<TABLE>, <r2>) -define(<PARAM_LENGTH>, <r3>) +define(`PARAM_ROUNDS', `r0') +define(`PARAM_KEYS', `r1') +define(`TABLE', `r2') +define(`PARAM_LENGTH', `r3') C On stack: DST, SRC -define(<W0>, <r4>) -define(<W1>, <r5>) -define(<W2>, <r6>) -define(<W3>, <r7>) -define(<T0>, <r8>) -define(<COUNT>, <r10>) -define(<KEY>, <r11>) - -define(<MASK>, <r0>) C Overlaps inputs, except TABLE -define(<X0>, <r1>) -define(<X1>, <r3>) -define(<X2>, <r12>) -define(<X3>, <r14>) C lr - -define(<FRAME_ROUNDS>, <[sp]>) -define(<FRAME_KEYS>, <[sp, #+4]>) -define(<FRAME_LENGTH>, <[sp, #+8]>) +define(`W0', `r4') +define(`W1', `r5') +define(`W2', `r6') +define(`W3', `r7') +define(`T0', `r8') +define(`COUNT', `r10') +define(`KEY', `r11') + +define(`MASK', `r0') C Overlaps inputs, except TABLE +define(`X0', `r1') +define(`X1', `r3') +define(`X2', `r12') +define(`X3', `r14') C lr + +define(`FRAME_ROUNDS', `[sp]') +define(`FRAME_KEYS', `[sp, #+4]') +define(`FRAME_LENGTH', `[sp, #+8]') C 8 saved registers -define(<FRAME_DST>, <[sp, #+44]>) -define(<FRAME_SRC>, <[sp, #+48]>) +define(`FRAME_DST', `[sp, #+44]') +define(`FRAME_SRC', `[sp, #+48]') -define(<AES_DECRYPT_ROUND>, < +define(`AES_DECRYPT_ROUND', ` and T0, MASK, $1, lsl #2 ldr $5, [TABLE, T0] and T0, MASK, $2, lsl #2 @@ -118,7 +118,7 @@ define(<AES_DECRYPT_ROUND>, < eor $6, $6, $2 eor $7, $7, $3 eor $8, $8, $4 ->) +') .file "aes-decrypt-internal.asm" diff --git a/arm/aes-encrypt-internal.asm b/arm/aes-encrypt-internal.asm index e8b3df6b..a16a9d57 100644 --- a/arm/aes-encrypt-internal.asm +++ b/arm/aes-encrypt-internal.asm @@ -1,6 +1,6 @@ C arm/aes-encrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,9 +28,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -include_src(<arm/aes.m4>) +include_src(`arm/aes.m4') C Benchmarked at at 725, 815, 990 cycles/block on cortex A9, C for 128, 192 and 256 bit key sizes. @@ -38,37 +38,37 @@ C for 128, 192 and 256 bit key sizes. C Possible improvements: More efficient load and store with C aligned accesses. Better scheduling. -define(<PARAM_ROUNDS>, <r0>) -define(<PARAM_KEYS>, <r1>) -define(<TABLE>, <r2>) -define(<PARAM_LENGTH>, <r3>) +define(`PARAM_ROUNDS', `r0') +define(`PARAM_KEYS', `r1') +define(`TABLE', `r2') +define(`PARAM_LENGTH', `r3') C On stack: DST, SRC -define(<W0>, <r4>) -define(<W1>, <r5>) -define(<W2>, <r6>) -define(<W3>, <r7>) -define(<T0>, <r8>) -define(<COUNT>, <r10>) -define(<KEY>, <r11>) - -define(<MASK>, <r0>) C Overlaps inputs, except TABLE -define(<X0>, <r1>) -define(<X1>, <r3>) -define(<X2>, <r12>) -define(<X3>, <r14>) C lr - -define(<FRAME_ROUNDS>, <[sp]>) -define(<FRAME_KEYS>, <[sp, #+4]>) -define(<FRAME_LENGTH>, <[sp, #+8]>) +define(`W0', `r4') +define(`W1', `r5') +define(`W2', `r6') +define(`W3', `r7') +define(`T0', `r8') +define(`COUNT', `r10') +define(`KEY', `r11') + +define(`MASK', `r0') C Overlaps inputs, except TABLE +define(`X0', `r1') +define(`X1', `r3') +define(`X2', `r12') +define(`X3', `r14') C lr + +define(`FRAME_ROUNDS', `[sp]') +define(`FRAME_KEYS', `[sp, #+4]') +define(`FRAME_LENGTH', `[sp, #+8]') C 8 saved registers -define(<FRAME_DST>, <[sp, #+44]>) -define(<FRAME_SRC>, <[sp, #+48]>) +define(`FRAME_DST', `[sp, #+44]') +define(`FRAME_SRC', `[sp, #+48]') C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key) C MASK should hold the constant 0x3fc. -define(<AES_ENCRYPT_ROUND>, < +define(`AES_ENCRYPT_ROUND', ` and T0, MASK, $1, lsl #2 ldr $5, [TABLE, T0] @@ -127,7 +127,7 @@ define(<AES_ENCRYPT_ROUND>, < eor $6, $6, $2 eor $7, $7, $3 eor $8, $8, $4 ->) +') .file "aes-encrypt-internal.asm" @@ -1,6 +1,6 @@ C Loads one word, and adds it to the subkey. Uses T0 C AES_LOAD(SRC, KEY, REG) -define(<AES_LOAD>, < +define(`AES_LOAD', ` ldrb $3, [$1], #+1 ldrb T0, [$1], #+1 orr $3, T0, lsl #8 @@ -10,10 +10,10 @@ define(<AES_LOAD>, < orr $3, T0, lsl #24 ldr T0, [$2], #+4 eor $3, T0 ->) +') C Stores one word. Destroys input. C AES_STORE(DST, X) -define(<AES_STORE>, < +define(`AES_STORE', ` strb $2, [$1], #+1 ror $2, $2, #8 strb $2, [$1], #+1 @@ -21,10 +21,10 @@ define(<AES_STORE>, < strb $2, [$1], #+1 ror $2, $2, #8 strb $2, [$1], #+1 ->) +') C AES_FINAL_ROUND_V6(a,b,c,d,key,res) -define(<AES_FINAL_ROUND_V6>, < +define(`AES_FINAL_ROUND_V6', ` uxtb T0, $1 ldrb $6, [TABLE, T0] uxtb T0, $2, ror #8 @@ -37,12 +37,12 @@ define(<AES_FINAL_ROUND_V6>, < eor $6, $6, T0, lsl #24 ldr T0, [$5], #+4 eor $6, $6, T0 ->) +') C AES_FINAL_ROUND_V5(a,b,c,d,key,res,mask) C Avoids the uxtb instruction, introduced in ARMv6. C The mask argument should hold the constant 0xff -define(<AES_FINAL_ROUND_V5>, < +define(`AES_FINAL_ROUND_V5', ` and T0, $7, $1 ldrb $6, [TABLE, T0] and T0, $7, $2, ror #8 @@ -55,4 +55,4 @@ define(<AES_FINAL_ROUND_V5>, < eor $6, $6, T0, lsl #24 ldr T0, [$5], #+4 eor $6, T0 ->) +') diff --git a/arm/ecc-secp192r1-modp.asm b/arm/ecc-secp192r1-modp.asm index 4c596a16..72a81a54 100644 --- a/arm/ecc-secp192r1-modp.asm +++ b/arm/ecc-secp192r1-modp.asm @@ -1,6 +1,6 @@ C arm/ecc-secp192r1-modp.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,26 +28,26 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp192r1-modp.asm" .arm -define(<HP>, <r0>) C Overlaps unused modulo argument -define(<RP>, <r1>) - -define(<T0>, <r2>) -define(<T1>, <r3>) -define(<T2>, <r4>) -define(<T3>, <r5>) -define(<T4>, <r6>) -define(<T5>, <r7>) -define(<T6>, <r8>) -define(<T7>, <r10>) -define(<H0>, <T0>) C Overlaps T0 and T1 -define(<H1>, <T1>) -define(<C2>, <HP>) -define(<C4>, <r12>) +define(`HP', `r0') C Overlaps unused modulo argument +define(`RP', `r1') + +define(`T0', `r2') +define(`T1', `r3') +define(`T2', `r4') +define(`T3', `r5') +define(`T4', `r6') +define(`T5', `r7') +define(`T6', `r8') +define(`T7', `r10') +define(`H0', `T0') C Overlaps T0 and T1 +define(`H1', `T1') +define(`C2', `HP') +define(`C4', `r12') C ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/ecc-secp224r1-modp.asm b/arm/ecc-secp224r1-modp.asm index 67089a0c..3256601c 100644 --- a/arm/ecc-secp224r1-modp.asm +++ b/arm/ecc-secp224r1-modp.asm @@ -1,6 +1,6 @@ C arm/ecc-secp224r1-modp.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,25 +28,25 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp224r1-modp.asm" .arm -define(<RP>, <r1>) -define(<H>, <r0>) C Overlaps unused modulo argument - -define(<T0>, <r2>) -define(<T1>, <r3>) -define(<T2>, <r4>) -define(<T3>, <r5>) -define(<T4>, <r6>) -define(<T5>, <r7>) -define(<T6>, <r8>) -define(<N3>, <r10>) -define(<L0>, <r11>) -define(<L1>, <r12>) -define(<L2>, <lr>) +define(`RP', `r1') +define(`H', `r0') C Overlaps unused modulo argument + +define(`T0', `r2') +define(`T1', `r3') +define(`T2', `r4') +define(`T3', `r5') +define(`T4', `r6') +define(`T5', `r7') +define(`T6', `r8') +define(`N3', `r10') +define(`L0', `r11') +define(`L1', `r12') +define(`L2', `lr') C ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/ecc-secp256r1-redc.asm b/arm/ecc-secp256r1-redc.asm index f8386c39..e127a2f2 100644 --- a/arm/ecc-secp256r1-redc.asm +++ b/arm/ecc-secp256r1-redc.asm @@ -1,6 +1,6 @@ C arm/ecc-secp256r1-redc.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,25 +28,25 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp256r1-redc.asm" .arm -define(<RP>, <r1>) - -define(<T0>, <r0>) C Overlaps unused modulo argument -define(<T1>, <r2>) -define(<T2>, <r3>) -define(<T3>, <r4>) -define(<T4>, <r5>) -define(<T5>, <r6>) -define(<T6>, <r7>) -define(<T7>, <r8>) -define(<F0>, <r10>) -define(<F1>, <r11>) -define(<F2>, <r12>) -define(<F3>, <lr>) +define(`RP', `r1') + +define(`T0', `r0') C Overlaps unused modulo argument +define(`T1', `r2') +define(`T2', `r3') +define(`T3', `r4') +define(`T4', `r5') +define(`T5', `r6') +define(`T6', `r7') +define(`T7', `r8') +define(`F0', `r10') +define(`F1', `r11') +define(`F2', `r12') +define(`F3', `lr') C ecc_secp256r1_redc (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/ecc-secp384r1-modp.asm b/arm/ecc-secp384r1-modp.asm index 1983ee68..96744ee9 100644 --- a/arm/ecc-secp384r1-modp.asm +++ b/arm/ecc-secp384r1-modp.asm @@ -1,6 +1,6 @@ C arm/ecc-secp384r1-modp.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,23 +28,23 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp384r1-modp.asm" .arm -define(<RP>, <r1>) -define(<T0>, <r0>) -define(<T1>, <r2>) -define(<T2>, <r3>) -define(<T3>, <r4>) -define(<F0>, <r5>) -define(<F1>, <r6>) -define(<F2>, <r7>) -define(<F3>, <r8>) -define(<F4>, <r10>) -define(<N>, <r12>) -define(<H>, <lr>) +define(`RP', `r1') +define(`T0', `r0') +define(`T1', `r2') +define(`T2', `r3') +define(`T3', `r4') +define(`F0', `r5') +define(`F1', `r6') +define(`F2', `r7') +define(`F3', `r8') +define(`F4', `r10') +define(`N', `r12') +define(`H', `lr') C ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/ecc-secp521r1-modp.asm b/arm/ecc-secp521r1-modp.asm index 6d1759ec..22e8dd4e 100644 --- a/arm/ecc-secp521r1-modp.asm +++ b/arm/ecc-secp521r1-modp.asm @@ -1,6 +1,6 @@ C arm/ecc-secp521r1-modp.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,22 +28,22 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "ecc-secp521r1-modp.asm" .arm -define(<HP>, <r0>) -define(<RP>, <r1>) -define(<T0>, <r2>) -define(<T1>, <r3>) -define(<T2>, <r4>) -define(<F0>, <r5>) -define(<F1>, <r6>) -define(<F2>, <r7>) -define(<F3>, <r8>) -define(<H>, <r12>) -define(<N>, <lr>) +define(`HP', `r0') +define(`RP', `r1') +define(`T0', `r2') +define(`T1', `r3') +define(`T2', `r4') +define(`F0', `r5') +define(`F1', `r6') +define(`F2', `r7') +define(`F3', `r8') +define(`H', `r12') +define(`N', `lr') C ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp) .text diff --git a/arm/fat/aes-decrypt-internal-2.asm b/arm/fat/aes-decrypt-internal-2.asm index 2110f310..d51ae411 100644 --- a/arm/fat/aes-decrypt-internal-2.asm +++ b/arm/fat/aes-decrypt-internal-2.asm @@ -1,7 +1,7 @@ C arm/fat/aes-decrypt-internal-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,7 +29,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -define(<fat_transform>, <$1_armv6>) -include_src(<arm/v6/aes-decrypt-internal.asm>) +define(`fat_transform', `$1_armv6') +include_src(`arm/v6/aes-decrypt-internal.asm') diff --git a/arm/fat/aes-decrypt-internal.asm b/arm/fat/aes-decrypt-internal.asm index 8d763889..9994fc07 100644 --- a/arm/fat/aes-decrypt-internal.asm +++ b/arm/fat/aes-decrypt-internal.asm @@ -1,7 +1,7 @@ C arm/fat/aes-decrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,7 +29,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -define(<fat_transform>, <$1_arm>) -include_src(<arm/aes-decrypt-internal.asm>) +define(`fat_transform', `$1_arm') +include_src(`arm/aes-decrypt-internal.asm') diff --git a/arm/fat/aes-encrypt-internal-2.asm b/arm/fat/aes-encrypt-internal-2.asm index 490a52be..aeeab39e 100644 --- a/arm/fat/aes-encrypt-internal-2.asm +++ b/arm/fat/aes-encrypt-internal-2.asm @@ -1,7 +1,7 @@ C arm/fat/aes-encrypt-internal-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,7 +29,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -define(<fat_transform>, <$1_armv6>) -include_src(<arm/v6/aes-encrypt-internal.asm>) +define(`fat_transform', `$1_armv6') +include_src(`arm/v6/aes-encrypt-internal.asm') diff --git a/arm/fat/aes-encrypt-internal.asm b/arm/fat/aes-encrypt-internal.asm index e695a289..efd14400 100644 --- a/arm/fat/aes-encrypt-internal.asm +++ b/arm/fat/aes-encrypt-internal.asm @@ -1,7 +1,7 @@ C arm/fat/aes-encrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,7 +29,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') -define(<fat_transform>, <$1_arm>) -include_src(<arm/aes-encrypt-internal.asm>) +define(`fat_transform', `$1_arm') +include_src(`arm/aes-encrypt-internal.asm') diff --git a/arm/fat/chacha-3core.asm b/arm/fat/chacha-3core.asm index 7938ee89..af6189b7 100644 --- a/arm/fat/chacha-3core.asm +++ b/arm/fat/chacha-3core.asm @@ -1,7 +1,7 @@ C arm/fat/chacha-3core.asm -ifelse(< +ifelse(` Copyright (C) 2020 Niels Möller This file is part of GNU Nettle. @@ -29,8 +29,8 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_fat_chacha_3core) picked up by configure -include_src(<arm/neon/chacha-3core.asm>) +include_src(`arm/neon/chacha-3core.asm') diff --git a/arm/fat/chacha-core-internal-2.asm b/arm/fat/chacha-core-internal-2.asm index 66a5c145..3715471c 100644 --- a/arm/fat/chacha-core-internal-2.asm +++ b/arm/fat/chacha-core-internal-2.asm @@ -1,7 +1,7 @@ C arm/fat/chacha-core-internal-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_chacha_core) picked up by configure -define(<fat_transform>, <$1_neon>) -include_src(<arm/neon/chacha-core-internal.asm>) +define(`fat_transform', `$1_neon') +include_src(`arm/neon/chacha-core-internal.asm') diff --git a/arm/fat/salsa20-2core.asm b/arm/fat/salsa20-2core.asm index 43d9a1d0..2d5c6e24 100644 --- a/arm/fat/salsa20-2core.asm +++ b/arm/fat/salsa20-2core.asm @@ -1,7 +1,7 @@ C arm/fat/salsa20-2core.asm -ifelse(< +ifelse(` Copyright (C) 2020 Niels Möller This file is part of GNU Nettle. @@ -29,8 +29,8 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_fat_salsa20_2core) picked up by configure -include_src(<arm/neon/salsa20-2core.asm>) +include_src(`arm/neon/salsa20-2core.asm') diff --git a/arm/fat/salsa20-core-internal-2.asm b/arm/fat/salsa20-core-internal-2.asm index 64d90302..f88afd86 100644 --- a/arm/fat/salsa20-core-internal-2.asm +++ b/arm/fat/salsa20-core-internal-2.asm @@ -1,7 +1,7 @@ C arm/fat/salsa20-core-internal-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_salsa20_core) picked up by configure -define(<fat_transform>, <$1_neon>) -include_src(<arm/neon/salsa20-core-internal.asm>) +define(`fat_transform', `$1_neon') +include_src(`arm/neon/salsa20-core-internal.asm') diff --git a/arm/fat/sha1-compress-2.asm b/arm/fat/sha1-compress-2.asm index 4c26c3c6..8586499d 100644 --- a/arm/fat/sha1-compress-2.asm +++ b/arm/fat/sha1-compress-2.asm @@ -1,7 +1,7 @@ C arm/fat/sha1-compress-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(nettle_sha1_compress) picked up by configure -define(<fat_transform>, <_$1_armv6>) -include_src(<arm/v6/sha1-compress.asm>) +define(`fat_transform', `_$1_armv6') +include_src(`arm/v6/sha1-compress.asm') diff --git a/arm/fat/sha256-compress-2.asm b/arm/fat/sha256-compress-2.asm index e1babb37..36d55e4b 100644 --- a/arm/fat/sha256-compress-2.asm +++ b/arm/fat/sha256-compress-2.asm @@ -1,7 +1,7 @@ C arm/fat/sha256-compress-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_sha256_compress) picked up by configure -define(<fat_transform>, <$1_armv6>) -include_src(<arm/v6/sha256-compress.asm>) +define(`fat_transform', `$1_armv6') +include_src(`arm/v6/sha256-compress.asm') diff --git a/arm/fat/sha3-permute-2.asm b/arm/fat/sha3-permute-2.asm index b423a762..07e1a6e9 100644 --- a/arm/fat/sha3-permute-2.asm +++ b/arm/fat/sha3-permute-2.asm @@ -1,7 +1,7 @@ C arm/fat/sha3-permute-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_sha3_permute) picked up by configure -define(<fat_transform>, <_$1_neon>) -include_src(<arm/neon/sha3-permute.asm>) +define(`fat_transform', `_$1_neon') +include_src(`arm/neon/sha3-permute.asm') diff --git a/arm/fat/sha512-compress-2.asm b/arm/fat/sha512-compress-2.asm index 428604e0..a753ce8c 100644 --- a/arm/fat/sha512-compress-2.asm +++ b/arm/fat/sha512-compress-2.asm @@ -1,7 +1,7 @@ C arm/fat/sha3-compress-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_sha512_compress) picked up by configure -define(<fat_transform>, <$1_neon>) -include_src(<arm/neon/sha512-compress.asm>) +define(`fat_transform', `$1_neon') +include_src(`arm/neon/sha512-compress.asm') diff --git a/arm/fat/umac-nh-2.asm b/arm/fat/umac-nh-2.asm index fc97cc6b..cb3a191a 100644 --- a/arm/fat/umac-nh-2.asm +++ b/arm/fat/umac-nh-2.asm @@ -1,7 +1,7 @@ C arm/fat/umac-nh-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_umac_nh) picked up by configure -define(<fat_transform>, <$1_neon>) -include_src(<arm/neon/umac-nh.asm>) +define(`fat_transform', `$1_neon') +include_src(`arm/neon/umac-nh.asm') diff --git a/arm/fat/umac-nh-n-2.asm b/arm/fat/umac-nh-n-2.asm index 32b7a830..9b005acf 100644 --- a/arm/fat/umac-nh-n-2.asm +++ b/arm/fat/umac-nh-n-2.asm @@ -1,7 +1,7 @@ C arm/fat/umac-nh-n-2.asm -ifelse(< +ifelse(` Copyright (C) 2015 Niels Möller This file is part of GNU Nettle. @@ -29,9 +29,9 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') dnl PROLOGUE(_nettle_umac_nh_n) picked up by configure -define(<fat_transform>, <$1_neon>) -include_src(<arm/neon/umac-nh-n.asm>) +define(`fat_transform', `$1_neon') +include_src(`arm/neon/umac-nh-n.asm') diff --git a/arm/machine.m4 b/arm/machine.m4 index f982a66a..ccaa79d2 100644 --- a/arm/machine.m4 +++ b/arm/machine.m4 @@ -1,4 +1,4 @@ -define(<QREG>, <ifelse( +define(`QREG', `ifelse( $1, d0, q0, $1, d2, q1, $1, d4, q2, @@ -15,9 +15,9 @@ define(<QREG>, <ifelse( $1, d26, q13, $1, d28, q14, $1, d30, q15, - <NO REGISTER>)>)dnl + `NO REGISTER')')dnl -define(<D0REG>, <ifelse( +define(`D0REG', `ifelse( $1, q0, d0, $1, q1, d2, $1, q2, d4, @@ -34,9 +34,9 @@ define(<D0REG>, <ifelse( $1, q13, d26, $1, q14, d28, $1, q15, d30, - <NO REGISTER>)>)dnl + `NO REGISTER')')dnl -define(<D1REG>, <ifelse( +define(`D1REG', `ifelse( $1, q0, d1, $1, q1, d3, $1, q2, d5, @@ -53,4 +53,4 @@ define(<D1REG>, <ifelse( $1, q13, d27, $1, q14, d29, $1, q15, d31, - <NO REGISTER>)>)dnl + `NO REGISTER')')dnl diff --git a/arm/memxor.asm b/arm/memxor.asm index e4619629..1431a9e7 100644 --- a/arm/memxor.asm +++ b/arm/memxor.asm @@ -1,6 +1,6 @@ C arm/memxor.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,7 +28,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') C Possible speedups: C @@ -38,16 +38,16 @@ C cycles, regardless of alignment. C Register usage: -define(<DST>, <r0>) -define(<SRC>, <r1>) -define(<N>, <r2>) -define(<CNT>, <r6>) -define(<TNC>, <r12>) +define(`DST', `r0') +define(`SRC', `r1') +define(`N', `r2') +define(`CNT', `r6') +define(`TNC', `r12') C little-endian and big-endian need to shift in different directions for C alignment correction -define(<S0ADJ>, IF_LE(<lsr>, <lsl>)) -define(<S1ADJ>, IF_LE(<lsl>, <lsr>)) +define(`S0ADJ', IF_LE(`lsr', `lsl')) +define(`S1ADJ', IF_LE(`lsl', `lsr')) .syntax unified @@ -150,13 +150,13 @@ PROLOGUE(nettle_memxor) C Store bytes, one by one. .Lmemxor_leftover: C bring uppermost byte down for saving while preserving lower ones -IF_BE(< ror r3, #24>) +IF_BE(` ror r3, #24') strb r3, [DST], #+1 subs N, #1 beq .Lmemxor_done subs TNC, #8 C bring down next byte, no need to preserve -IF_LE(< lsr r3, #8>) +IF_LE(` lsr r3, #8') bne .Lmemxor_leftover b .Lmemxor_bytes .Lmemxor_odd_done: diff --git a/arm/memxor3.asm b/arm/memxor3.asm index b6c6da49..c2b43c13 100644 --- a/arm/memxor3.asm +++ b/arm/memxor3.asm @@ -1,6 +1,6 @@ C arm/memxor3.asm -ifelse(< +ifelse(` Copyright (C) 2013, 2015 Niels Möller This file is part of GNU Nettle. @@ -28,7 +28,7 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') C Possible speedups: C @@ -38,21 +38,21 @@ C cycles, regardless of alignment. C Register usage: -define(<DST>, <r0>) -define(<AP>, <r1>) -define(<BP>, <r2>) -define(<N>, <r3>) +define(`DST', `r0') +define(`AP', `r1') +define(`BP', `r2') +define(`N', `r3') C Temporaries r4-r7 -define(<ACNT>, <r8>) -define(<ATNC>, <r10>) -define(<BCNT>, <r11>) -define(<BTNC>, <r12>) +define(`ACNT', `r8') +define(`ATNC', `r10') +define(`BCNT', `r11') +define(`BTNC', `r12') C little-endian and big-endian need to shift in different directions for C alignment correction -define(<S0ADJ>, IF_LE(<lsr>, <lsl>)) -define(<S1ADJ>, IF_LE(<lsl>, <lsr>)) +define(`S0ADJ', IF_LE(`lsr', `lsl')) +define(`S1ADJ', IF_LE(`lsl', `lsr')) .syntax unified @@ -169,13 +169,13 @@ PROLOGUE(nettle_memxor3) .Lmemxor3_au_leftover: C Store a byte at a time C bring uppermost byte down for saving while preserving lower ones -IF_LE(< ror r4, #24>) +IF_LE(` ror r4, #24') strb r4, [DST, #-1]! subs N, #1 beq .Lmemxor3_done subs ACNT, #8 C bring down next byte, no need to preserve -IF_BE(< lsr r4, #8>) +IF_BE(` lsr r4, #8') sub AP, #1 bne .Lmemxor3_au_leftover b .Lmemxor3_bytes @@ -277,19 +277,19 @@ IF_BE(< lsr r4, #8>) C Leftover bytes in r4, low end on LE and high end on BE before C preparatory alignment correction -IF_LE(< ror r4, ACNT>) -IF_BE(< ror r4, ATNC>) +IF_LE(` ror r4, ACNT') +IF_BE(` ror r4, ATNC') C now byte-aligned in high end on LE and low end on BE because we're C working downwards in saving the very first bytes of the buffer .Lmemxor3_uu_leftover: C bring uppermost byte down for saving while preserving lower ones -IF_LE(< ror r4, #24>) +IF_LE(` ror r4, #24') strb r4, [DST, #-1]! subs N, #1 beq .Lmemxor3_done subs ACNT, #8 C bring down next byte, no need to preserve -IF_BE(< lsr r4, #8>) +IF_BE(` lsr r4, #8') bne .Lmemxor3_uu_leftover b .Lmemxor3_bytes diff --git a/arm/neon/chacha-3core.asm b/arm/neon/chacha-3core.asm index 708494b2..bd1cf63c 100644 --- a/arm/neon/chacha-3core.asm +++ b/arm/neon/chacha-3core.asm @@ -1,6 +1,6 @@ C arm/neon/chacha-3core.asm -ifelse(< +ifelse(` Copyright (C) 2020 Niels Möller This file is part of GNU Nettle. @@ -28,33 +28,33 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "chacha-3core.asm" .fpu neon -define(<DST>, <r0>) -define(<SRC>, <r1>) -define(<ROUNDS>, <r2>) +define(`DST', `r0') +define(`SRC', `r1') +define(`ROUNDS', `r2') C State, X, Y and Z representing consecutive blocks -define(<X0>, <q0>) -define(<X1>, <q1>) -define(<X2>, <q2>) -define(<X3>, <q3>) -define(<Y0>, <q8>) -define(<Y1>, <q9>) -define(<Y2>, <q10>) -define(<Y3>, <q11>) -define(<Z0>, <q12>) -define(<Z1>, <q13>) -define(<Z2>, <q14>) -define(<Z3>, <q15>) - -define(<T0>, <q4>) -define(<T1>, <q5>) -define(<T2>, <q6>) -define(<T3>, <q7>) +define(`X0', `q0') +define(`X1', `q1') +define(`X2', `q2') +define(`X3', `q3') +define(`Y0', `q8') +define(`Y1', `q9') +define(`Y2', `q10') +define(`Y3', `q11') +define(`Z0', `q12') +define(`Z1', `q13') +define(`Z2', `q14') +define(`Z3', `q15') + +define(`T0', `q4') +define(`T1', `q5') +define(`T2', `q6') +define(`T3', `q7') .text .align 4 diff --git a/arm/neon/chacha-core-internal.asm b/arm/neon/chacha-core-internal.asm index 22f843e8..b0a775bd 100644 --- a/arm/neon/chacha-core-internal.asm +++ b/arm/neon/chacha-core-internal.asm @@ -1,6 +1,6 @@ C arm/neon/chacha-core-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013, 2015 Niels Möller This file is part of GNU Nettle. @@ -28,26 +28,26 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "chacha-core-internal.asm" .fpu neon -define(<DST>, <r0>) -define(<SRC>, <r1>) -define(<ROUNDS>, <r2>) - -define(<X0>, <q0>) -define(<X1>, <q1>) -define(<X2>, <q2>) -define(<X3>, <q3>) -define(<T0>, <q8>) -define(<S0>, <q12>) -define(<S1>, <q13>) -define(<S2>, <q14>) -define(<S3>, <q15>) - -define(<QROUND>, < +define(`DST', `r0') +define(`SRC', `r1') +define(`ROUNDS', `r2') + +define(`X0', `q0') +define(`X1', `q1') +define(`X2', `q2') +define(`X3', `q3') +define(`T0', `q8') +define(`S0', `q12') +define(`S1', `q13') +define(`S2', `q14') +define(`S3', `q15') + +define(`QROUND', ` C x0 += x1, x3 ^= x0, x3 lrot 16 C x2 += x3, x1 ^= x2, x1 lrot 12 C x0 += x1, x3 ^= x0, x3 lrot 8 @@ -76,7 +76,7 @@ define(<QROUND>, < vshl.i32 T0, $2, #7 vshr.u32 $2, $2, #25 veor $2, $2, T0 ->) +') .text .align 4 @@ -121,21 +121,21 @@ PROLOGUE(_nettle_chacha_core) C 12 15 14 13 >>> 3 C different number of elements needs to be C extracted on BE because of different column order -IF_LE(< vext.32 X1, X1, X1, #1>) -IF_BE(< vext.32 X1, X1, X1, #3>) +IF_LE(` vext.32 X1, X1, X1, #1') +IF_BE(` vext.32 X1, X1, X1, #3') vext.32 X2, X2, X2, #2 -IF_LE(< vext.32 X3, X3, X3, #3>) -IF_BE(< vext.32 X3, X3, X3, #1>) +IF_LE(` vext.32 X3, X3, X3, #3') +IF_BE(` vext.32 X3, X3, X3, #1') QROUND(X0, X1, X2, X3) subs ROUNDS, ROUNDS, #2 C Inverse rotation -IF_LE(< vext.32 X1, X1, X1, #3>) -IF_BE(< vext.32 X1, X1, X1, #1>) +IF_LE(` vext.32 X1, X1, X1, #3') +IF_BE(` vext.32 X1, X1, X1, #1') vext.32 X2, X2, X2, #2 -IF_LE(< vext.32 X3, X3, X3, #1>) -IF_BE(< vext.32 X3, X3, X3, #3>) +IF_LE(` vext.32 X3, X3, X3, #1') +IF_BE(` vext.32 X3, X3, X3, #3') bhi .Loop @@ -145,10 +145,10 @@ IF_BE(< vext.32 X3, X3, X3, #3>) vadd.u32 X3, X3, S3 C caller expects result little-endian -IF_BE(< vrev32.u8 X0, X0 +IF_BE(` vrev32.u8 X0, X0 vrev32.u8 X1, X1 vrev32.u8 X2, X2 - vrev32.u8 X3, X3>) + vrev32.u8 X3, X3') vstm DST, {X0,X1,X2,X3} bx lr diff --git a/arm/neon/salsa20-2core.asm b/arm/neon/salsa20-2core.asm index cdb6133a..d622edd6 100644 --- a/arm/neon/salsa20-2core.asm +++ b/arm/neon/salsa20-2core.asm @@ -1,6 +1,6 @@ C arm/neon/salsa20-2core.asm -ifelse(< +ifelse(` Copyright (C) 2020 Niels Möller This file is part of GNU Nettle. @@ -28,28 +28,28 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "salsa20-2core.asm" .fpu neon -define(<DST>, <r0>) -define(<SRC>, <r1>) -define(<ROUNDS>, <r2>) +define(`DST', `r0') +define(`SRC', `r1') +define(`ROUNDS', `r2') C State, even elements in X, odd elements in Y -define(<X0>, <q0>) -define(<X1>, <q1>) -define(<X2>, <q2>) -define(<X3>, <q3>) -define(<Y0>, <q8>) -define(<Y1>, <q9>) -define(<Y2>, <q10>) -define(<Y3>, <q11>) -define(<T0>, <q12>) -define(<T1>, <q13>) -define(<T2>, <q14>) -define(<T3>, <q15>) +define(`X0', `q0') +define(`X1', `q1') +define(`X2', `q2') +define(`X3', `q3') +define(`Y0', `q8') +define(`Y1', `q9') +define(`Y2', `q10') +define(`Y3', `q11') +define(`T0', `q12') +define(`T1', `q13') +define(`T2', `q14') +define(`T3', `q15') .text .align 4 diff --git a/arm/neon/salsa20-core-internal.asm b/arm/neon/salsa20-core-internal.asm index 20710499..d59d7b80 100644 --- a/arm/neon/salsa20-core-internal.asm +++ b/arm/neon/salsa20-core-internal.asm @@ -1,6 +1,6 @@ C arm/neon/salsa20-core-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,29 +28,29 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "salsa20-core-internal.asm" .fpu neon -define(<DST>, <r0>) -define(<SRC>, <r1>) -define(<ROUNDS>, <r2>) - -define(<X0>, <q0>) -define(<X1>, <q1>) -define(<X2>, <q2>) -define(<X3>, <q3>) -define(<T0>, <q8>) -define(<T1>, <q9>) -define(<M0101>, <q10>) -define(<M0110>, <q11>) -define(<M0011>, <q12>) -define(<S1>, <q13>) -define(<S2>, <q14>) -define(<S3>, <q15>) +define(`DST', `r0') +define(`SRC', `r1') +define(`ROUNDS', `r2') + +define(`X0', `q0') +define(`X1', `q1') +define(`X2', `q2') +define(`X3', `q3') +define(`T0', `q8') +define(`T1', `q9') +define(`M0101', `q10') +define(`M0110', `q11') +define(`M0011', `q12') +define(`S1', `q13') +define(`S2', `q14') +define(`S3', `q15') -define(<QROUND>, < +define(`QROUND', ` vadd.i32 T0, $1, $4 vshl.i32 T1, T0, #7 vshr.u32 T0, T0, #25 @@ -74,7 +74,7 @@ define(<QROUND>, < vshr.u32 T0, T0, #14 veor $1, $1, T0 veor $1, $1, T1 ->) +') .text .align 4 @@ -168,21 +168,21 @@ PROLOGUE(_nettle_salsa20_core) C 6 1 12 11 >>> 1 C different number of elements needs to be C extracted on BE because of different column order -IF_LE(< vext.32 X1, X1, X1, #3>) -IF_BE(< vext.32 X1, X1, X1, #1>) +IF_LE(` vext.32 X1, X1, X1, #3') +IF_BE(` vext.32 X1, X1, X1, #1') vext.32 X2, X2, X2, #2 -IF_LE(< vext.32 X3, X3, X3, #1>) -IF_BE(< vext.32 X3, X3, X3, #3>) +IF_LE(` vext.32 X3, X3, X3, #1') +IF_BE(` vext.32 X3, X3, X3, #3') QROUND(X0, X3, X2, X1) subs ROUNDS, ROUNDS, #2 C Inverse rotation -IF_LE(< vext.32 X1, X1, X1, #1>) -IF_BE(< vext.32 X1, X1, X1, #3>) +IF_LE(` vext.32 X1, X1, X1, #1') +IF_BE(` vext.32 X1, X1, X1, #3') vext.32 X2, X2, X2, #2 -IF_LE(< vext.32 X3, X3, X3, #3>) -IF_BE(< vext.32 X3, X3, X3, #1>) +IF_LE(` vext.32 X3, X3, X3, #3') +IF_BE(` vext.32 X3, X3, X3, #1') bhi .Loop @@ -209,10 +209,10 @@ IF_BE(< vext.32 X3, X3, X3, #1>) vadd.u32 X3, X3, S3 C caller expects result little-endian -IF_BE(< vrev32.u8 X0, X0 +IF_BE(` vrev32.u8 X0, X0 vrev32.u8 X1, X1 vrev32.u8 X2, X2 - vrev32.u8 X3, X3>) + vrev32.u8 X3, X3') vstm DST, {X0,X1,X2,X3} bx lr diff --git a/arm/neon/sha3-permute.asm b/arm/neon/sha3-permute.asm index 43a523f8..46be4bc0 100644 --- a/arm/neon/sha3-permute.asm +++ b/arm/neon/sha3-permute.asm @@ -1,6 +1,6 @@ C arm/neon/sha3-permute.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,62 +28,62 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "sha3-permute.asm" .fpu neon -define(<CTX>, <r0>) -define(<COUNT>, <r1>) -define(<RC>, <r2>) +define(`CTX', `r0') +define(`COUNT', `r1') +define(`RC', `r2') C First column -define(<A0>, <d0>) -define(<A5>, <d2>) -define(<A10>, <d3>) -define(<A15>, <d4>) -define(<A20>, <d5>) - -define(<A1>, <d6>) -define(<A2>, <d7>) -define(<A3>, <d8>) -define(<A4>, <d9>) - -define(<A6>, <d16>) -define(<A7>, <d17>) -define(<A8>, <d18>) -define(<A9>, <d19>) - -define(<A11>, <d20>) -define(<A12>, <d21>) -define(<A13>, <d22>) -define(<A14>, <d23>) - -define(<A16>, <d24>) -define(<A17>, <d25>) -define(<A18>, <d26>) -define(<A19>, <d27>) - -define(<A21>, <d28>) -define(<A22>, <d29>) -define(<A23>, <d30>) -define(<A24>, <d31>) - -define(<T0>, <d10>) -define(<T1>, <d11>) - -define(<C0>, <d1>) -define(<C1>, <d12>) -define(<C2>, <d13>) -define(<C3>, <d14>) -define(<C4>, <d15>) +define(`A0', `d0') +define(`A5', `d2') +define(`A10', `d3') +define(`A15', `d4') +define(`A20', `d5') + +define(`A1', `d6') +define(`A2', `d7') +define(`A3', `d8') +define(`A4', `d9') + +define(`A6', `d16') +define(`A7', `d17') +define(`A8', `d18') +define(`A9', `d19') + +define(`A11', `d20') +define(`A12', `d21') +define(`A13', `d22') +define(`A14', `d23') + +define(`A16', `d24') +define(`A17', `d25') +define(`A18', `d26') +define(`A19', `d27') + +define(`A21', `d28') +define(`A22', `d29') +define(`A23', `d30') +define(`A24', `d31') + +define(`T0', `d10') +define(`T1', `d11') + +define(`C0', `d1') +define(`C1', `d12') +define(`C2', `d13') +define(`C3', `d14') +define(`C4', `d15') C ROL(DST, SRC, COUNT) C Must have SRC != DST -define(<ROL>, < +define(`ROL', ` vshr.u64 $1, $2, #eval(64-$3) vsli.i64 $1, $2, #$3 - >) + ') C sha3_permute(struct sha3_ctx *ctx) .text diff --git a/arm/neon/sha512-compress.asm b/arm/neon/sha512-compress.asm index 828d9ce2..00633c16 100644 --- a/arm/neon/sha512-compress.asm +++ b/arm/neon/sha512-compress.asm @@ -1,6 +1,6 @@ C arm/neon/sha512-compress.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,73 +28,73 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "sha512-compress.asm" .fpu neon -define(<STATE>, <r0>) -define(<INPUT>, <r1>) -define(<K>, <r2>) -define(<COUNT>, <r3>) -define(<SHIFT>, <r12>) - -define(<SA>, <d0>) -define(<SB>, <d1>) -define(<SC>, <d2>) -define(<SD>, <d3>) -define(<SE>, <d4>) -define(<SF>, <d5>) -define(<SG>, <d6>) -define(<SH>, <d7>) -define(<QSAB>, <q0>) -define(<QSCD>, <q1>) -define(<QSEF>, <q2>) -define(<QSGH>, <q3>) +define(`STATE', `r0') +define(`INPUT', `r1') +define(`K', `r2') +define(`COUNT', `r3') +define(`SHIFT', `r12') + +define(`SA', `d0') +define(`SB', `d1') +define(`SC', `d2') +define(`SD', `d3') +define(`SE', `d4') +define(`SF', `d5') +define(`SG', `d6') +define(`SH', `d7') +define(`QSAB', `q0') +define(`QSCD', `q1') +define(`QSEF', `q2') +define(`QSGH', `q3') C d8-d15 are callee-save -define(<DT0>, <d8>) -define(<DT1>, <d9>) -define(<QT01>, <q4>) -define(<DT2>, <d10>) -define(<DT3>, <d11>) -define(<QT23>, <q5>) -define(<DT4>, <d12>) -define(<DT5>, <d13>) -define(<QT45>, <q6>) +define(`DT0', `d8') +define(`DT1', `d9') +define(`QT01', `q4') +define(`DT2', `d10') +define(`DT3', `d11') +define(`QT23', `q5') +define(`DT4', `d12') +define(`DT5', `d13') +define(`QT45', `q6') C Used only when reading the input, can overlap with state -define(<DT6>, <d0>) -define(<DT7>, <d1>) -define(<QT67>, <q0>) - -define(<DW0>, <d16>) -define(<DW1>, <d17>) -define(<DW2>, <d18>) -define(<DW3>, <d19>) -define(<DW4>, <d20>) -define(<DW5>, <d21>) -define(<DW6>, <d22>) -define(<DW7>, <d23>) -define(<DW8>, <d24>) -define(<DW9>, <d25>) -define(<DW10>, <d26>) -define(<DW11>, <d27>) -define(<DW12>, <d28>) -define(<DW13>, <d29>) -define(<DW14>, <d30>) -define(<DW15>, <d31>) -define(<QW0001>, <q8>) -define(<QW0203>, <q9>) -define(<QW0405>, <q10>) -define(<QW0607>, <q11>) -define(<QW0809>, <q12>) -define(<QW1011>, <q13>) -define(<QW1213>, <q14>) -define(<QW1415>, <q15>) - -define(<EXPAND_ME>, <$1>) -define(<W>, <EXPAND_ME(<DW>eval(($1) % 16))>) +define(`DT6', `d0') +define(`DT7', `d1') +define(`QT67', `q0') + +define(`DW0', `d16') +define(`DW1', `d17') +define(`DW2', `d18') +define(`DW3', `d19') +define(`DW4', `d20') +define(`DW5', `d21') +define(`DW6', `d22') +define(`DW7', `d23') +define(`DW8', `d24') +define(`DW9', `d25') +define(`DW10', `d26') +define(`DW11', `d27') +define(`DW12', `d28') +define(`DW13', `d29') +define(`DW14', `d30') +define(`DW15', `d31') +define(`QW0001', `q8') +define(`QW0203', `q9') +define(`QW0405', `q10') +define(`QW0607', `q11') +define(`QW0809', `q12') +define(`QW1011', `q13') +define(`QW1213', `q14') +define(`QW1415', `q15') + +define(`EXPAND_ME', `$1') +define(`W', `EXPAND_ME(`DW'eval(($1) % 16))') C If x = W(i+14), y = w(i+1), we xor in parallel C @@ -105,7 +105,7 @@ C x >> 61 y >> 8 C xor x >> 6 y >> 7 C ----------------------------- C DT0 DT1 -define(<EXPN>, < +define(`EXPN', ` vshl.i64 DT0, W($1+14), #45 vshl.i64 DT1, W($1 + 1), #63 vshr.u64 DT2, W($1+14), #19 @@ -123,7 +123,7 @@ define(<EXPN>, < veor.i64 QT01, QT01, QT45 vadd.i64 W($1), W($1), DT0 vadd.i64 W($1), W($1), DT1 ->) +') C ROUND(A,B,C,D,E,F,G,H,i) C @@ -148,7 +148,7 @@ C e << 23 a << 25 C xor e >> 41 a >> 39 C ---------------------------- C DT0 DT1 -define(<ROUND>, < +define(`ROUND', ` vshl.i64 DT0, $5, #50 vshl.i64 DT1, $1, #36 vshr.u64 DT2, $5, #14 @@ -180,7 +180,7 @@ define(<ROUND>, < vadd.i64 DT1, DT1, DT2 vadd.i64 $4, $4, $8 vadd.i64 $8, $8, DT1 ->) +') C void C _nettle_sha512_compress(uint64_t *state, const uint8_t *input, const uint64_t *k) diff --git a/arm/neon/umac-nh-n.asm b/arm/neon/umac-nh-n.asm index 42686e02..7e36afe2 100644 --- a/arm/neon/umac-nh-n.asm +++ b/arm/neon/umac-nh-n.asm @@ -1,6 +1,6 @@ C arm/neon/umac-nh-n.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,33 +28,33 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "umac-nh.asm" .fpu neon -define(<OUT>, <r0>) -define(<ITERS>, <r1>) -define(<KEY>, <r2>) -define(<LENGTH>, <r3>) -define(<MSG>, <r12>) -define(<SHIFT>, <r14>) - -define(<QA>, <q0>) -define(<QB>, <q1>) -define(<QY0>, <q3>) C Accumulates for the first two operations. -define(<DM>, <d4>) -define(<QY1>, <q4>) C Used for 3 and 4 iterations. -define(<QC>, <q5>) -define(<QD>, <q6>) -define(<QLEFT>, <q8>) -define(<QRIGHT>, <q9>) -define(<QT0>, <q10>) -define(<QT1>, <q11>) -define(<QT2>, <q12>) -define(<QK0>, <q13>) -define(<QK1>, <q14>) -define(<QK2>, <q15>) +define(`OUT', `r0') +define(`ITERS', `r1') +define(`KEY', `r2') +define(`LENGTH', `r3') +define(`MSG', `r12') +define(`SHIFT', `r14') + +define(`QA', `q0') +define(`QB', `q1') +define(`QY0', `q3') C Accumulates for the first two operations. +define(`DM', `d4') +define(`QY1', `q4') C Used for 3 and 4 iterations. +define(`QC', `q5') +define(`QD', `q6') +define(`QLEFT', `q8') +define(`QRIGHT', `q9') +define(`QT0', `q10') +define(`QT1', `q11') +define(`QT2', `q12') +define(`QK0', `q13') +define(`QK1', `q14') +define(`QK2', `q15') C FIXME: Try permuting subkeys using vld4, vzip or similar. diff --git a/arm/neon/umac-nh.asm b/arm/neon/umac-nh.asm index 38be654c..56ea6454 100644 --- a/arm/neon/umac-nh.asm +++ b/arm/neon/umac-nh.asm @@ -1,6 +1,6 @@ C arm/neon/umac-nh.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,26 +28,26 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "umac-nh.asm" .fpu neon -define(<KEY>, <r0>) -define(<LENGTH>, <r1>) -define(<MSG>, <r2>) -define(<SHIFT>, <r3>) - -define(<QA>, <q0>) -define(<QB>, <q1>) -define(<DM>, <d16>) -define(<QLEFT>, <q9>) -define(<QRIGHT>, <q10>) -define(<QY>, <q11>) -define(<QT0>, <q12>) -define(<QT1>, <q13>) -define(<QK0>, <q14>) -define(<QK1>, <q15>) +define(`KEY', `r0') +define(`LENGTH', `r1') +define(`MSG', `r2') +define(`SHIFT', `r3') + +define(`QA', `q0') +define(`QB', `q1') +define(`DM', `d16') +define(`QLEFT', `q9') +define(`QRIGHT', `q10') +define(`QY', `q11') +define(`QT0', `q12') +define(`QT1', `q13') +define(`QK0', `q14') +define(`QK1', `q15') .text .align 3 @@ -98,7 +98,7 @@ PROLOGUE(_nettle_umac_nh) vadd.i64 D0REG(QY), D0REG(QY), D1REG(QY) C return value needs to respect word order mandated by AAPCS -IF_LE(< vmov r0, r1, D0REG(QY)>) -IF_BE(< vmov r1, r0, D0REG(QY)>) +IF_LE(` vmov r0, r1, D0REG(QY)') +IF_BE(` vmov r1, r0, D0REG(QY)') bx lr EPILOGUE(_nettle_umac_nh) diff --git a/arm/v6/aes-decrypt-internal.asm b/arm/v6/aes-decrypt-internal.asm index 45801050..e8c6e91a 100644 --- a/arm/v6/aes-decrypt-internal.asm +++ b/arm/v6/aes-decrypt-internal.asm @@ -1,6 +1,6 @@ C arm/v6/aes-decrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,42 +28,42 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .arch armv6 -include_src(<arm/aes.m4>) +include_src(`arm/aes.m4') -define(<PARAM_ROUNDS>, <r0>) -define(<PARAM_KEYS>, <r1>) -define(<TABLE>, <r2>) -define(<LENGTH>, <r3>) +define(`PARAM_ROUNDS', `r0') +define(`PARAM_KEYS', `r1') +define(`TABLE', `r2') +define(`LENGTH', `r3') C On stack: DST, SRC -define(<W0>, <r4>) -define(<W1>, <r5>) -define(<W2>, <r6>) -define(<W3>, <r7>) -define(<T0>, <r8>) -define(<COUNT>, <r10>) -define(<KEY>, <r11>) - -define(<X0>, <r0>) C Overlaps PARAM_ROUNDS and PARAM_KEYS -define(<X1>, <r1>) -define(<X2>, <r12>) -define(<X3>, <r14>) C lr - -define(<FRAME_ROUNDS>>, <[sp]>) -define(<FRAME_KEYS>, <[sp, #+4]>) +define(`W0', `r4') +define(`W1', `r5') +define(`W2', `r6') +define(`W3', `r7') +define(`T0', `r8') +define(`COUNT', `r10') +define(`KEY', `r11') + +define(`X0', `r0') C Overlaps PARAM_ROUNDS and PARAM_KEYS +define(`X1', `r1') +define(`X2', `r12') +define(`X3', `r14') C lr + +define(`FRAME_ROUNDS', `[sp]') +define(`FRAME_KEYS', `[sp, #+4]') C 8 saved registers -define(<FRAME_DST>, <[sp, #+40]>) -define(<FRAME_SRC>, <[sp, #+44]>) +define(`FRAME_DST', `[sp, #+40]') +define(`FRAME_SRC', `[sp, #+44]') -define(<SRC>, <r12>) C Overlap registers used in inner loop. -define(<DST>, <COUNT>) +define(`SRC', `r12') C Overlap registers used in inner loop. +define(`DST', `COUNT') C AES_DECRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key) -define(<AES_DECRYPT_ROUND>, < +define(`AES_DECRYPT_ROUND', ` uxtb T0, $1 ldr $5, [TABLE, T0, lsl #2] uxtb T0, $2 @@ -121,7 +121,7 @@ define(<AES_DECRYPT_ROUND>, < eor $6, $6, $2 eor $7, $7, $3 eor $8, $8, $4 ->) +') .file "aes-decrypt-internal.asm" diff --git a/arm/v6/aes-encrypt-internal.asm b/arm/v6/aes-encrypt-internal.asm index 576cf8e0..6cbd66d6 100644 --- a/arm/v6/aes-encrypt-internal.asm +++ b/arm/v6/aes-encrypt-internal.asm @@ -1,6 +1,6 @@ C arm/v6/aes-encrypt-internal.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,11 +28,11 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .arch armv6 -include_src(<arm/aes.m4>) +include_src(`arm/aes.m4') C Benchmarked at at 706, 870, 963 cycles/block on cortex A9, C for 128, 192 and 256 bit key sizes. @@ -40,38 +40,38 @@ C for 128, 192 and 256 bit key sizes. C Possible improvements: More efficient load and store with C aligned accesses. Better scheduling. -define(<PARAM_ROUNDS>, <r0>) -define(<PARAM_KEYS>, <r1>) -define(<TABLE>, <r2>) -define(<LENGTH>, <r3>) +define(`PARAM_ROUNDS', `r0') +define(`PARAM_KEYS', `r1') +define(`TABLE', `r2') +define(`LENGTH', `r3') C On stack: DST, SRC -define(<W0>, <r4>) -define(<W1>, <r5>) -define(<W2>, <r6>) -define(<W3>, <r7>) -define(<T0>, <r8>) -define(<COUNT>, <r10>) -define(<KEY>, <r11>) - -define(<X0>, <r0>) C Overlaps PARAM_ROUNDS and PARAM_KEYS -define(<X1>, <r1>) -define(<X2>, <r12>) -define(<X3>, <r14>) C lr - -define(<FRAME_ROUNDS>>, <[sp]>) -define(<FRAME_KEYS>, <[sp, #+4]>) +define(`W0', `r4') +define(`W1', `r5') +define(`W2', `r6') +define(`W3', `r7') +define(`T0', `r8') +define(`COUNT', `r10') +define(`KEY', `r11') + +define(`X0', `r0') C Overlaps PARAM_ROUNDS and PARAM_KEYS +define(`X1', `r1') +define(`X2', `r12') +define(`X3', `r14') C lr + +define(`FRAME_ROUNDS', `[sp]') +define(`FRAME_KEYS', `[sp, #+4]') C 8 saved registers -define(<FRAME_DST>, <[sp, #+40]>) -define(<FRAME_SRC>, <[sp, #+44]>) +define(`FRAME_DST', `[sp, #+40]') +define(`FRAME_SRC', `[sp, #+44]') -define(<SRC>, <r12>) C Overlap registers used in inner loop. -define(<DST>, <COUNT>) +define(`SRC', `r12') C Overlap registers used in inner loop. +define(`DST', `COUNT') C 53 instr. C It's tempting to use eor with rotation, but that's slower. C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key) -define(<AES_ENCRYPT_ROUND>, < +define(`AES_ENCRYPT_ROUND', ` uxtb T0, $1 ldr $5, [TABLE, T0, lsl #2] uxtb T0, $2 @@ -129,7 +129,7 @@ define(<AES_ENCRYPT_ROUND>, < eor $6, $6, $2 eor $7, $7, $3 eor $8, $8, $4 ->) +') .file "aes-encrypt-internal.asm" diff --git a/arm/v6/sha1-compress.asm b/arm/v6/sha1-compress.asm index f60b4230..be6170b3 100644 --- a/arm/v6/sha1-compress.asm +++ b/arm/v6/sha1-compress.asm @@ -1,6 +1,6 @@ C arm/v6/sha1-compress.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,34 +28,34 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "sha1-compress.asm" .arch armv6 -define(<STATE>, <r0>) -define(<INPUT>, <r1>) -define(<SA>, <r2>) -define(<SB>, <r3>) -define(<SC>, <r4>) -define(<SD>, <r5>) -define(<SE>, <r6>) -define(<T0>, <r7>) -define(<SHIFT>, <r8>) -define(<WPREV>, <r10>) -define(<W>, <r12>) -define(<K>, <lr>) +define(`STATE', `r0') +define(`INPUT', `r1') +define(`SA', `r2') +define(`SB', `r3') +define(`SC', `r4') +define(`SD', `r5') +define(`SE', `r6') +define(`T0', `r7') +define(`SHIFT', `r8') +define(`WPREV', `r10') +define(`W', `r12') +define(`K', `lr') C FIXME: Could avoid a mov with even and odd variants. -define(<LOAD>, < +define(`LOAD', ` ldr T0, [INPUT], #+4 sel W, WPREV, T0 ror W, W, SHIFT mov WPREV, T0 -IF_LE(< rev W, W>) +IF_LE(` rev W, W') str W, [SP,#eval(4*$1)] ->) -define(<EXPN>, < +') +define(`EXPN', ` ldr W, [sp, #+eval(4*$1)] ldr T0, [sp, #+eval(4*(($1 + 2) % 16))] eor W, W, T0 @@ -65,11 +65,11 @@ define(<EXPN>, < eor W, W, T0 ror W, W, #31 str W, [sp, #+eval(4*$1)] ->) +') C F1(B,C,D) = D^(B&(C^D)) C ROUND1(A,B,C,D,E) -define(<ROUND1>, < +define(`ROUND1', ` eor T0, $3, $4 add $5, $5, K and T0, T0, $2 @@ -78,9 +78,9 @@ define(<ROUND1>, < add $5, $5, W ror $2, $2, #2 add $5, $5, T0 ->) +') C F2(B,C,D) = B^C^D -define(<ROUND2>, < +define(`ROUND2', ` eor T0, $2, $4 add $5, $5, K eor T0, T0, $3 @@ -88,9 +88,9 @@ define(<ROUND2>, < add $5, $5, W ror $2, $2, #2 add $5, $5, T0 ->) +') C F3(B,C,D) = (B&C) | (D & (B|C)) = (B & (C ^ D)) + (C & D) -define(<ROUND3>, < +define(`ROUND3', ` eor T0, $3, $4 add $5, $5, K and T0, T0, $2 @@ -100,7 +100,7 @@ define(<ROUND3>, < and T0, $3, $4 ror $2, $2, #2 add $5, $5, T0 ->) +') C void nettle_sha1_compress(uint32_t *state, const uint8_t *input) .text @@ -127,12 +127,12 @@ PROLOGUE(nettle_sha1_compress) lsl SHIFT, SHIFT, #3 mov T0, #0 movne T0, #-1 -IF_LE(< lsl W, T0, SHIFT>) -IF_BE(< lsr W, T0, SHIFT>) +IF_LE(` lsl W, T0, SHIFT') +IF_BE(` lsr W, T0, SHIFT') uadd8 T0, T0, W C Sets APSR.GE bits C on BE rotate right by 32-SHIFT bits C because there is no rotate left -IF_BE(< rsb SHIFT, SHIFT, #32>) +IF_BE(` rsb SHIFT, SHIFT, #32') ldr K, .LK1 ldm STATE, {SA,SB,SC,SD,SE} diff --git a/arm/v6/sha256-compress.asm b/arm/v6/sha256-compress.asm index 324730c7..3c021284 100644 --- a/arm/v6/sha256-compress.asm +++ b/arm/v6/sha256-compress.asm @@ -1,6 +1,6 @@ C arm/v6/sha256-compress.asm -ifelse(< +ifelse(` Copyright (C) 2013 Niels Möller This file is part of GNU Nettle. @@ -28,38 +28,38 @@ ifelse(< You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. ->) +') .file "sha256-compress.asm" .arch armv6 -define(<STATE>, <r0>) -define(<INPUT>, <r1>) -define(<K>, <r2>) -define(<SA>, <r3>) -define(<SB>, <r4>) -define(<SC>, <r5>) -define(<SD>, <r6>) -define(<SE>, <r7>) -define(<SF>, <r8>) -define(<SG>, <r10>) -define(<SH>, <r11>) -define(<T0>, <r12>) -define(<T1>, <r1>) C Overlap INPUT -define(<COUNT>, <r0>) C Overlap STATE -define(<W>, <r14>) +define(`STATE', `r0') +define(`INPUT', `r1') +define(`K', `r2') +define(`SA', `r3') +define(`SB', `r4') +define(`SC', `r5') +define(`SD', `r6') +define(`SE', `r7') +define(`SF', `r8') +define(`SG', `r10') +define(`SH', `r11') +define(`T0', `r12') +define(`T1', `r1') C Overlap INPUT +define(`COUNT', `r0') C Overlap STATE +define(`W', `r14') C Used for data load -define(<I0>, <r3>) -define(<I1>, <r4>) -define(<I2>, <r5>) -define(<I3>, <r6>) -define(<I4>, <r7>) -define(<DST>, <r8>) -define(<SHIFT>, <r10>) -define(<ILEFT>, <r11>) - -define(<EXPN>, < +define(`I0', `r3') +define(`I1', `r4') +define(`I2', `r5') +define(`I3', `r6') +define(`I4', `r7') +define(`DST', `r8') +define(`SHIFT', `r10') +define(`ILEFT', `r11') + +define(`EXPN', ` ldr W, [sp, #+eval(4*$1)] ldr T0, [sp, #+eval(4*(($1 + 14) % 16))] ror T1, T0, #17 @@ -74,7 +74,7 @@ define(<EXPN>, < eor T1, T1, T0, lsr #3 add W, W, T1 str W, [sp, #+eval(4*$1)] ->) +') C ROUND(A,B,C,D,E,F,G,H) C @@ -89,7 +89,7 @@ C S0(A) = A<<<30 ^ A<<<19 ^ A<<<10 C Choice (E, F, G) = G^(E&(F^G)) C Majority (A,B,C) = (A&B) + (C&(A^B)) -define(<ROUND>, < +define(`ROUND', ` ror T0, $5, #6 eor T0, T0, $5, ror #11 eor T0, T0, $5, ror #25 @@ -111,12 +111,12 @@ define(<ROUND>, < eor T0, $1, $2 and T0, T0, $3 add $8, $8, T0 ->) +') -define(<NOEXPN>, < +define(`NOEXPN', ` ldr W, [sp, + $1] add $1, $1, #4 ->) +') C void C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) @@ -137,12 +137,12 @@ PROLOGUE(_nettle_sha256_compress) lsl SHIFT, SHIFT, #3 mov T0, #0 movne T0, #-1 -IF_LE(< lsl I1, T0, SHIFT>) -IF_BE(< lsr I1, T0, SHIFT>) +IF_LE(` lsl I1, T0, SHIFT') +IF_BE(` lsr I1, T0, SHIFT') uadd8 T0, T0, I1 C Sets APSR.GE bits C on BE rotate right by 32-SHIFT bits C because there is no rotate left -IF_BE(< rsb SHIFT, SHIFT, #32>) +IF_BE(` rsb SHIFT, SHIFT, #32') mov DST, sp mov ILEFT, #4 @@ -150,16 +150,16 @@ IF_BE(< rsb SHIFT, SHIFT, #32>) ldm INPUT!, {I1,I2,I3,I4} sel I0, I0, I1 ror I0, I0, SHIFT -IF_LE(< rev I0, I0>) +IF_LE(` rev I0, I0') sel I1, I1, I2 ror I1, I1, SHIFT -IF_LE(< rev I1, I1>) +IF_LE(` rev I1, I1') sel I2, I2, I3 ror I2, I2, SHIFT -IF_LE(< rev I2, I2>) +IF_LE(` rev I2, I2') sel I3, I3, I4 ror I3, I3, SHIFT -IF_LE(< rev I3, I3>) +IF_LE(` rev I3, I3') subs ILEFT, ILEFT, #1 stm DST!, {I0,I1,I2,I3} mov I0, I4 |