diff options
author | Niels Möller <nisse@lysator.liu.se> | 2005-10-02 22:43:42 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2005-10-02 22:43:42 +0200 |
commit | 54467c823ecf5bfd5a7047661a5f2d26025d56bf (patch) | |
tree | ab58bbd724d35ff971c17d97bb3112bb9c947cf5 /x86 | |
parent | 40dba09bd40ada9b216a75ed355c9c1ebd5f9432 (diff) | |
download | nettle-54467c823ecf5bfd5a7047661a5f2d26025d56bf.tar.gz |
(_nettle_sha1_compress): Do byte order
conversion, and store the input data on the stack. This leaves one
more register free for other uses.
Rev: src/nettle/x86/sha1-compress.asm:1.8
Diffstat (limited to 'x86')
-rw-r--r-- | x86/sha1-compress.asm | 60 |
1 files changed, 44 insertions, 16 deletions
diff --git a/x86/sha1-compress.asm b/x86/sha1-compress.asm index 00d8d836..cff69643 100644 --- a/x86/sha1-compress.asm +++ b/x86/sha1-compress.asm @@ -23,11 +23,11 @@ define(<SB>,<%ebx>) define(<SC>,<%ecx>) define(<SD>,<%edx>) define(<SE>,<%ebp>) -define(<DATA>,<%esi>) +define(<DATA>,<%esp>) define(<TMP>,<%edi>) -C Extra temporary needed by F3. Can we get rid of it? -define(<TMP2>,<(%esp)>) -define(<K>, <4(%esp)>)dnl +define(<TMP2>,<%esi>) + +define(<K>, <64(%esp)>)dnl C Constants define(<K1VALUE>, <<$>0x5A827999>) C Rounds 0-19 define(<K2VALUE>, <<$>0x6ED9EBA1>) C Rounds 20-39 @@ -81,6 +81,7 @@ define(<F3>, < andl $3, TMP orl TMP2, TMP >)dnl + C The form of one sha1 round is C C a' = e + a <<< 5 + f( b, c, d ) + k + w; @@ -110,9 +111,16 @@ C adding, and then rotating back. roll <$>30, $2 >)dnl +C SWAP(from, to, register) +define(<SWAP>, < + movl $1, $3 + bswap $3 + movl $3, $2 +>)dnl + .file "sha1-compress.asm" - C sha1_compress(uint32_t *state, uint32_t *data) + C sha1_compress(uint32_t *state, uint8_t *data) .text .align 16 @@ -121,24 +129,44 @@ C adding, and then rotating back. C_NAME(_nettle_sha1_compress): C save all registers that need to be saved - pushl %ebx C 20(%esp) - pushl %ebp C 16(%esp) - pushl %esi C 12(%esp) - pushl %edi C 8(%esp) + pushl %ebx C 80(%esp) + pushl %ebp C 76(%esp) + pushl %esi C 72(%esp) + pushl %edi C 68(%esp) - pushl K1VALUE C 4(%esp) - subl $4, %esp C 0(%esp) = TMP + pushl K1VALUE C 64(%esp) + subl $64, %esp C %esp = W + C Load and byteswap data + movl 92(%esp), TMP + + C No scheduling of these instructions, just use a couple of registers, + C and hope the out-of-order unit can keep up. + SWAP( (TMP), (DATA), %eax) + SWAP( 4(TMP), 4(DATA), %ebx) + SWAP( 8(TMP), 8(DATA), %ecx) + SWAP(12(TMP), 12(DATA), %edx) + SWAP(16(TMP), 16(DATA), %eax) + SWAP(20(TMP), 20(DATA), %ebx) + SWAP(24(TMP), 24(DATA), %ecx) + SWAP(28(TMP), 28(DATA), %edx) + SWAP(32(TMP), 32(DATA), %eax) + SWAP(36(TMP), 36(DATA), %ebx) + SWAP(40(TMP), 40(DATA), %ecx) + SWAP(44(TMP), 44(DATA), %edx) + SWAP(48(TMP), 48(DATA), %eax) + SWAP(52(TMP), 52(DATA), %ebx) + SWAP(56(TMP), 56(DATA), %ecx) + SWAP(60(TMP), 60(DATA), %edx) + C load the state vector - movl 28(%esp),TMP + movl 88(%esp),TMP movl (TMP), SA movl 4(TMP), SB movl 8(TMP), SC movl 12(TMP), SD movl 16(TMP), SE - movl 32(%esp), DATA - ROUND(SA, SB, SC, SD, SE, <F1>, NOEXPAND( 0)) ROUND(SE, SA, SB, SC, SD, <F1>, NOEXPAND( 1)) ROUND(SD, SE, SA, SB, SC, <F1>, NOEXPAND( 2)) @@ -239,14 +267,14 @@ C_NAME(_nettle_sha1_compress): EXPAND(79) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) C Update the state vector - movl 28(%esp),TMP + movl 88(%esp),TMP addl SA, (TMP) addl SB, 4(TMP) addl SC, 8(TMP) addl SD, 12(TMP) addl SE, 16(TMP) - addl $8, %esp + addl $68, %esp popl %edi popl %esi popl %ebp |