diff options
author | Niels Möller <nisse@lysator.liu.se> | 2005-10-03 21:25:52 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2005-10-03 21:25:52 +0200 |
commit | 49e411a403ff6bd37044c0e619b704d1d7921a4b (patch) | |
tree | 87668605cad278b1c72e628a43bcd3263f0d3915 /x86 | |
parent | 27c84453295ea116a814b62be95c7e770685c252 (diff) | |
download | nettle-49e411a403ff6bd37044c0e619b704d1d7921a4b.tar.gz |
(OFFSET): New macro.
(F3): Eliminated a movl.
(ROUNd): New argument, for k. When using F3, it's TMP3, on the
stack, otherwise, it is kept in TMP2, a register.
Rev: src/nettle/x86/sha1-compress.asm:1.9
Diffstat (limited to 'x86')
-rw-r--r-- | x86/sha1-compress.asm | 298 |
1 files changed, 143 insertions, 155 deletions
diff --git a/x86/sha1-compress.asm b/x86/sha1-compress.asm index cff69643..a6b02aa6 100644 --- a/x86/sha1-compress.asm +++ b/x86/sha1-compress.asm @@ -25,15 +25,27 @@ define(<SD>,<%edx>) define(<SE>,<%ebp>) define(<DATA>,<%esp>) define(<TMP>,<%edi>) -define(<TMP2>,<%esi>) +define(<TMP2>,<%esi>) C Used by SWAP and F3 -define(<K>, <64(%esp)>)dnl +define(<TMP3>, <64(%esp)>)dnl C Constants define(<K1VALUE>, <<$>0x5A827999>) C Rounds 0-19 define(<K2VALUE>, <<$>0x6ED9EBA1>) C Rounds 20-39 define(<K3VALUE>, <<$>0x8F1BBCDC>) C Rounds 40-59 define(<K4VALUE>, <<$>0xCA62C1D6>) C Rounds 60-79 + +C OFFSET(i) +C Expands to 4*i, or to the empty string if i is zero +define(<OFFSET>, <ifelse($1,0,,eval(4*$1))>) +C Reads the input via TMP2 into register, byteswaps it, and stores it in the DATA array. +C SWAP(index, register) +define(<SWAP>, < + movl OFFSET($1)(TMP2), $2 + bswap $2 + movl $2, OFFSET($1) (DATA) +>)dnl + C expand(i) is the expansion function C C W[i] = (W[i - 16] ^ W[i - 14] ^ W[i - 8] ^ W[i - 3]) <<< 1 @@ -43,14 +55,14 @@ C C Result is stored back in W[i], and also left in TMP, the only C register that is used. define(<EXPAND>, < - movl eval(4 * ($1 & 15)) (DATA), TMP - xorl eval(4 * (($1 + 2) & 15)) (DATA), TMP - xorl eval(4 * (($1 + 8) & 15)) (DATA), TMP - xorl eval(4 * (($1 + 13) & 15)) (DATA), TMP + movl OFFSET(eval($1 & 15)) (DATA), TMP + xorl OFFSET(eval(($1 + 2) & 15)) (DATA), TMP + xorl OFFSET(eval(($1 + 8) & 15)) (DATA), TMP + xorl OFFSET(eval(($1 + 13) & 15)) (DATA), TMP roll <$>1, TMP - movl TMP, eval(4 * ($1 & 15)) (DATA) ->)dnl -define(<NOEXPAND>, <eval(4 * ($1 & 15)) (DATA)>)dnl + movl TMP, OFFSET(eval($1 & 15)) (DATA)>)dnl +define(<NOEXPAND>, <OFFSET($1) (DATA)>)dnl + C The f functions, C C f1(x,y,z) = z ^ (x & (y ^ z)) @@ -59,28 +71,24 @@ C f3(x,y,z) = (x & y) | (z & (x | y)) C f4 = f2 C C The macro Fk(x,y,z) computes = fk(x,y,z). -C Result is left in TMP. May the TMP2 stackslot. +C Result is left in TMP. define(<F1>, < movl $3, TMP xorl $2, TMP andl $1, TMP - xorl $3, TMP ->)dnl + xorl $3, TMP>)dnl define(<F2>, < movl $1, TMP xorl $2, TMP - xorl $3, TMP ->)dnl -C We have one register too little. Can this be rewritten so we don't need the stack? + xorl $3, TMP>)dnl +C Uses TMP2 define(<F3>, < - movl $1, TMP - andl $2, TMP - movl TMP, TMP2 + movl $1, TMP2 + andl $2, TMP2 movl $1, TMP orl $2, TMP andl $3, TMP - orl TMP2, TMP ->)dnl + orl TMP2, TMP>)dnl C The form of one sha1 round is C @@ -96,10 +104,10 @@ C C e += a <<< 5 + f( b, c, d ) + k + w; C b <<<= 30 C -C ROUND(a,b,c,d,e,f,w) +C ROUND(a,b,c,d,e,f,k,w) define(<ROUND>, < - addl K, $5 addl $7, $5 + addl ifelse($8,,TMP,$8), $5 $6($2,$3,$4) addl TMP, $5 @@ -108,15 +116,7 @@ C adding, and then rotating back. movl $1, TMP roll <$>5, TMP addl TMP, $5 - roll <$>30, $2 ->)dnl - -C SWAP(from, to, register) -define(<SWAP>, < - movl $1, $3 - bswap $3 - movl $3, $2 ->)dnl + roll <$>30, $2>)dnl .file "sha1-compress.asm" @@ -134,31 +134,16 @@ C_NAME(_nettle_sha1_compress): pushl %esi C 72(%esp) pushl %edi C 68(%esp) - pushl K1VALUE C 64(%esp) - subl $64, %esp C %esp = W + subl $68, %esp C %esp = W C Load and byteswap data - movl 92(%esp), TMP - - C No scheduling of these instructions, just use a couple of registers, - C and hope the out-of-order unit can keep up. - SWAP( (TMP), (DATA), %eax) - SWAP( 4(TMP), 4(DATA), %ebx) - SWAP( 8(TMP), 8(DATA), %ecx) - SWAP(12(TMP), 12(DATA), %edx) - SWAP(16(TMP), 16(DATA), %eax) - SWAP(20(TMP), 20(DATA), %ebx) - SWAP(24(TMP), 24(DATA), %ecx) - SWAP(28(TMP), 28(DATA), %edx) - SWAP(32(TMP), 32(DATA), %eax) - SWAP(36(TMP), 36(DATA), %ebx) - SWAP(40(TMP), 40(DATA), %ecx) - SWAP(44(TMP), 44(DATA), %edx) - SWAP(48(TMP), 48(DATA), %eax) - SWAP(52(TMP), 52(DATA), %ebx) - SWAP(56(TMP), 56(DATA), %ecx) - SWAP(60(TMP), 60(DATA), %edx) - + movl 92(%esp), TMP2 + + SWAP( 0, %eax) SWAP( 1, %ebx) SWAP( 2, %ecx) SWAP( 3, %edx) + SWAP( 4, %eax) SWAP( 5, %ebx) SWAP( 6, %ecx) SWAP( 7, %edx) + SWAP( 8, %eax) SWAP( 9, %ebx) SWAP(10, %ecx) SWAP(11, %edx) + SWAP(12, %eax) SWAP(13, %ebx) SWAP(14, %ecx) SWAP(15, %edx) + C load the state vector movl 88(%esp),TMP movl (TMP), SA @@ -167,104 +152,107 @@ C_NAME(_nettle_sha1_compress): movl 12(TMP), SD movl 16(TMP), SE - ROUND(SA, SB, SC, SD, SE, <F1>, NOEXPAND( 0)) - ROUND(SE, SA, SB, SC, SD, <F1>, NOEXPAND( 1)) - ROUND(SD, SE, SA, SB, SC, <F1>, NOEXPAND( 2)) - ROUND(SC, SD, SE, SA, SB, <F1>, NOEXPAND( 3)) - ROUND(SB, SC, SD, SE, SA, <F1>, NOEXPAND( 4)) - - ROUND(SA, SB, SC, SD, SE, <F1>, NOEXPAND( 5)) - ROUND(SE, SA, SB, SC, SD, <F1>, NOEXPAND( 6)) - ROUND(SD, SE, SA, SB, SC, <F1>, NOEXPAND( 7)) - ROUND(SC, SD, SE, SA, SB, <F1>, NOEXPAND( 8)) - ROUND(SB, SC, SD, SE, SA, <F1>, NOEXPAND( 9)) - - ROUND(SA, SB, SC, SD, SE, <F1>, NOEXPAND(10)) - ROUND(SE, SA, SB, SC, SD, <F1>, NOEXPAND(11)) - ROUND(SD, SE, SA, SB, SC, <F1>, NOEXPAND(12)) - ROUND(SC, SD, SE, SA, SB, <F1>, NOEXPAND(13)) - ROUND(SB, SC, SD, SE, SA, <F1>, NOEXPAND(14)) - - ROUND(SA, SB, SC, SD, SE, <F1>, NOEXPAND(15)) - EXPAND(16) ROUND(SE, SA, SB, SC, SD, <F1>, TMP) - EXPAND(17) ROUND(SD, SE, SA, SB, SC, <F1>, TMP) - EXPAND(18) ROUND(SC, SD, SE, SA, SB, <F1>, TMP) - EXPAND(19) ROUND(SB, SC, SD, SE, SA, <F1>, TMP) - - movl K2VALUE, K - EXPAND(20) ROUND(SA, SB, SC, SD, SE, <F2>, TMP) - EXPAND(21) ROUND(SE, SA, SB, SC, SD, <F2>, TMP) - EXPAND(22) ROUND(SD, SE, SA, SB, SC, <F2>, TMP) - EXPAND(23) ROUND(SC, SD, SE, SA, SB, <F2>, TMP) - EXPAND(24) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) - - EXPAND(25) ROUND(SA, SB, SC, SD, SE, <F2>, TMP) - EXPAND(26) ROUND(SE, SA, SB, SC, SD, <F2>, TMP) - EXPAND(27) ROUND(SD, SE, SA, SB, SC, <F2>, TMP) - EXPAND(28) ROUND(SC, SD, SE, SA, SB, <F2>, TMP) - EXPAND(29) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) - - EXPAND(30) ROUND(SA, SB, SC, SD, SE, <F2>, TMP) - EXPAND(31) ROUND(SE, SA, SB, SC, SD, <F2>, TMP) - EXPAND(32) ROUND(SD, SE, SA, SB, SC, <F2>, TMP) - EXPAND(33) ROUND(SC, SD, SE, SA, SB, <F2>, TMP) - EXPAND(34) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) - - EXPAND(35) ROUND(SA, SB, SC, SD, SE, <F2>, TMP) - EXPAND(36) ROUND(SE, SA, SB, SC, SD, <F2>, TMP) - EXPAND(37) ROUND(SD, SE, SA, SB, SC, <F2>, TMP) - EXPAND(38) ROUND(SC, SD, SE, SA, SB, <F2>, TMP) - EXPAND(39) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) - - movl K3VALUE, K - EXPAND(40) ROUND(SA, SB, SC, SD, SE, <F3>, TMP) - EXPAND(41) ROUND(SE, SA, SB, SC, SD, <F3>, TMP) - EXPAND(42) ROUND(SD, SE, SA, SB, SC, <F3>, TMP) - EXPAND(43) ROUND(SC, SD, SE, SA, SB, <F3>, TMP) - EXPAND(44) ROUND(SB, SC, SD, SE, SA, <F3>, TMP) - - EXPAND(45) ROUND(SA, SB, SC, SD, SE, <F3>, TMP) - EXPAND(46) ROUND(SE, SA, SB, SC, SD, <F3>, TMP) - EXPAND(47) ROUND(SD, SE, SA, SB, SC, <F3>, TMP) - EXPAND(48) ROUND(SC, SD, SE, SA, SB, <F3>, TMP) - EXPAND(49) ROUND(SB, SC, SD, SE, SA, <F3>, TMP) - - EXPAND(50) ROUND(SA, SB, SC, SD, SE, <F3>, TMP) - EXPAND(51) ROUND(SE, SA, SB, SC, SD, <F3>, TMP) - EXPAND(52) ROUND(SD, SE, SA, SB, SC, <F3>, TMP) - EXPAND(53) ROUND(SC, SD, SE, SA, SB, <F3>, TMP) - EXPAND(54) ROUND(SB, SC, SD, SE, SA, <F3>, TMP) - - EXPAND(55) ROUND(SA, SB, SC, SD, SE, <F3>, TMP) - EXPAND(56) ROUND(SE, SA, SB, SC, SD, <F3>, TMP) - EXPAND(57) ROUND(SD, SE, SA, SB, SC, <F3>, TMP) - EXPAND(58) ROUND(SC, SD, SE, SA, SB, <F3>, TMP) - EXPAND(59) ROUND(SB, SC, SD, SE, SA, <F3>, TMP) - - movl K4VALUE, K - EXPAND(60) ROUND(SA, SB, SC, SD, SE, <F2>, TMP) - EXPAND(61) ROUND(SE, SA, SB, SC, SD, <F2>, TMP) - EXPAND(62) ROUND(SD, SE, SA, SB, SC, <F2>, TMP) - EXPAND(63) ROUND(SC, SD, SE, SA, SB, <F2>, TMP) - EXPAND(64) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) - - EXPAND(65) ROUND(SA, SB, SC, SD, SE, <F2>, TMP) - EXPAND(66) ROUND(SE, SA, SB, SC, SD, <F2>, TMP) - EXPAND(67) ROUND(SD, SE, SA, SB, SC, <F2>, TMP) - EXPAND(68) ROUND(SC, SD, SE, SA, SB, <F2>, TMP) - EXPAND(69) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) - - EXPAND(70) ROUND(SA, SB, SC, SD, SE, <F2>, TMP) - EXPAND(71) ROUND(SE, SA, SB, SC, SD, <F2>, TMP) - EXPAND(72) ROUND(SD, SE, SA, SB, SC, <F2>, TMP) - EXPAND(73) ROUND(SC, SD, SE, SA, SB, <F2>, TMP) - EXPAND(74) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) - - EXPAND(75) ROUND(SA, SB, SC, SD, SE, <F2>, TMP) - EXPAND(76) ROUND(SE, SA, SB, SC, SD, <F2>, TMP) - EXPAND(77) ROUND(SD, SE, SA, SB, SC, <F2>, TMP) - EXPAND(78) ROUND(SC, SD, SE, SA, SB, <F2>, TMP) - EXPAND(79) ROUND(SB, SC, SD, SE, SA, <F2>, TMP) + movl K1VALUE, TMP2 + ROUND(SA, SB, SC, SD, SE, <F1>, TMP2, NOEXPAND( 0)) + ROUND(SE, SA, SB, SC, SD, <F1>, TMP2, NOEXPAND( 1)) + ROUND(SD, SE, SA, SB, SC, <F1>, TMP2, NOEXPAND( 2)) + ROUND(SC, SD, SE, SA, SB, <F1>, TMP2, NOEXPAND( 3)) + ROUND(SB, SC, SD, SE, SA, <F1>, TMP2, NOEXPAND( 4)) + + ROUND(SA, SB, SC, SD, SE, <F1>, TMP2, NOEXPAND( 5)) + ROUND(SE, SA, SB, SC, SD, <F1>, TMP2, NOEXPAND( 6)) + ROUND(SD, SE, SA, SB, SC, <F1>, TMP2, NOEXPAND( 7)) + ROUND(SC, SD, SE, SA, SB, <F1>, TMP2, NOEXPAND( 8)) + ROUND(SB, SC, SD, SE, SA, <F1>, TMP2, NOEXPAND( 9)) + + ROUND(SA, SB, SC, SD, SE, <F1>, TMP2, NOEXPAND(10)) + ROUND(SE, SA, SB, SC, SD, <F1>, TMP2, NOEXPAND(11)) + ROUND(SD, SE, SA, SB, SC, <F1>, TMP2, NOEXPAND(12)) + ROUND(SC, SD, SE, SA, SB, <F1>, TMP2, NOEXPAND(13)) + ROUND(SB, SC, SD, SE, SA, <F1>, TMP2, NOEXPAND(14)) + + ROUND(SA, SB, SC, SD, SE, <F1>, TMP2, NOEXPAND(15)) + EXPAND(16) ROUND(SE, SA, SB, SC, SD, <F1>, TMP2) + EXPAND(17) ROUND(SD, SE, SA, SB, SC, <F1>, TMP2) + EXPAND(18) ROUND(SC, SD, SE, SA, SB, <F1>, TMP2) + EXPAND(19) ROUND(SB, SC, SD, SE, SA, <F1>, TMP2) + + C TMP2 is free to use in these rounds + movl K2VALUE, TMP2 + EXPAND(20) ROUND(SA, SB, SC, SD, SE, <F2>, TMP2) + EXPAND(21) ROUND(SE, SA, SB, SC, SD, <F2>, TMP2) + EXPAND(22) ROUND(SD, SE, SA, SB, SC, <F2>, TMP2) + EXPAND(23) ROUND(SC, SD, SE, SA, SB, <F2>, TMP2) + EXPAND(24) ROUND(SB, SC, SD, SE, SA, <F2>, TMP2) + + EXPAND(25) ROUND(SA, SB, SC, SD, SE, <F2>, TMP2) + EXPAND(26) ROUND(SE, SA, SB, SC, SD, <F2>, TMP2) + EXPAND(27) ROUND(SD, SE, SA, SB, SC, <F2>, TMP2) + EXPAND(28) ROUND(SC, SD, SE, SA, SB, <F2>, TMP2) + EXPAND(29) ROUND(SB, SC, SD, SE, SA, <F2>, TMP2) + + EXPAND(30) ROUND(SA, SB, SC, SD, SE, <F2>, TMP2) + EXPAND(31) ROUND(SE, SA, SB, SC, SD, <F2>, TMP2) + EXPAND(32) ROUND(SD, SE, SA, SB, SC, <F2>, TMP2) + EXPAND(33) ROUND(SC, SD, SE, SA, SB, <F2>, TMP2) + EXPAND(34) ROUND(SB, SC, SD, SE, SA, <F2>, TMP2) + + EXPAND(35) ROUND(SA, SB, SC, SD, SE, <F2>, TMP2) + EXPAND(36) ROUND(SE, SA, SB, SC, SD, <F2>, TMP2) + EXPAND(37) ROUND(SD, SE, SA, SB, SC, <F2>, TMP2) + EXPAND(38) ROUND(SC, SD, SE, SA, SB, <F2>, TMP2) + EXPAND(39) ROUND(SB, SC, SD, SE, SA, <F2>, TMP2) + + C We have to put this constant on the stack + movl K3VALUE, TMP3 + EXPAND(40) ROUND(SA, SB, SC, SD, SE, <F3>, TMP3) + EXPAND(41) ROUND(SE, SA, SB, SC, SD, <F3>, TMP3) + EXPAND(42) ROUND(SD, SE, SA, SB, SC, <F3>, TMP3) + EXPAND(43) ROUND(SC, SD, SE, SA, SB, <F3>, TMP3) + EXPAND(44) ROUND(SB, SC, SD, SE, SA, <F3>, TMP3) + + EXPAND(45) ROUND(SA, SB, SC, SD, SE, <F3>, TMP3) + EXPAND(46) ROUND(SE, SA, SB, SC, SD, <F3>, TMP3) + EXPAND(47) ROUND(SD, SE, SA, SB, SC, <F3>, TMP3) + EXPAND(48) ROUND(SC, SD, SE, SA, SB, <F3>, TMP3) + EXPAND(49) ROUND(SB, SC, SD, SE, SA, <F3>, TMP3) + + EXPAND(50) ROUND(SA, SB, SC, SD, SE, <F3>, TMP3) + EXPAND(51) ROUND(SE, SA, SB, SC, SD, <F3>, TMP3) + EXPAND(52) ROUND(SD, SE, SA, SB, SC, <F3>, TMP3) + EXPAND(53) ROUND(SC, SD, SE, SA, SB, <F3>, TMP3) + EXPAND(54) ROUND(SB, SC, SD, SE, SA, <F3>, TMP3) + + EXPAND(55) ROUND(SA, SB, SC, SD, SE, <F3>, TMP3) + EXPAND(56) ROUND(SE, SA, SB, SC, SD, <F3>, TMP3) + EXPAND(57) ROUND(SD, SE, SA, SB, SC, <F3>, TMP3) + EXPAND(58) ROUND(SC, SD, SE, SA, SB, <F3>, TMP3) + EXPAND(59) ROUND(SB, SC, SD, SE, SA, <F3>, TMP3) + + movl K4VALUE, TMP2 + EXPAND(60) ROUND(SA, SB, SC, SD, SE, <F2>, TMP2) + EXPAND(61) ROUND(SE, SA, SB, SC, SD, <F2>, TMP2) + EXPAND(62) ROUND(SD, SE, SA, SB, SC, <F2>, TMP2) + EXPAND(63) ROUND(SC, SD, SE, SA, SB, <F2>, TMP2) + EXPAND(64) ROUND(SB, SC, SD, SE, SA, <F2>, TMP2) + + EXPAND(65) ROUND(SA, SB, SC, SD, SE, <F2>, TMP2) + EXPAND(66) ROUND(SE, SA, SB, SC, SD, <F2>, TMP2) + EXPAND(67) ROUND(SD, SE, SA, SB, SC, <F2>, TMP2) + EXPAND(68) ROUND(SC, SD, SE, SA, SB, <F2>, TMP2) + EXPAND(69) ROUND(SB, SC, SD, SE, SA, <F2>, TMP2) + + EXPAND(70) ROUND(SA, SB, SC, SD, SE, <F2>, TMP2) + EXPAND(71) ROUND(SE, SA, SB, SC, SD, <F2>, TMP2) + EXPAND(72) ROUND(SD, SE, SA, SB, SC, <F2>, TMP2) + EXPAND(73) ROUND(SC, SD, SE, SA, SB, <F2>, TMP2) + EXPAND(74) ROUND(SB, SC, SD, SE, SA, <F2>, TMP2) + + EXPAND(75) ROUND(SA, SB, SC, SD, SE, <F2>, TMP2) + EXPAND(76) ROUND(SE, SA, SB, SC, SD, <F2>, TMP2) + EXPAND(77) ROUND(SD, SE, SA, SB, SC, <F2>, TMP2) + EXPAND(78) ROUND(SC, SD, SE, SA, SB, <F2>, TMP2) + EXPAND(79) ROUND(SB, SC, SD, SE, SA, <F2>, TMP2) C Update the state vector movl 88(%esp),TMP @@ -289,8 +277,8 @@ C although it's probably not worth the effort. This is the trick: C C round(a,b,c,d,e,f,k) modifies only b,e. C -C round(a,b,c,d,e,f3,k) load + store -C round(e,a,b,c,d,f3,k) load + store +C round(a,b,c,d,e,f3,k) +C round(e,a,b,c,d,f3,k) C C ; f3(b,c,d) = (b & c) | (d & (b | c)) C @@ -314,7 +302,7 @@ C andl b, c C andl d, tmp C orl c, tmp C -C ; fr(a,b,c) = (a & b) | (c & (a | b)) +C ; f3(a,b,c) = (a & b) | (c & (a | b)) C movl b, tmp C andl a, tmp C movl a, c |