diff options
author | weidai <weidai11@users.noreply.github.com> | 2007-05-04 15:24:09 +0000 |
---|---|---|
committer | weidai <weidai11@users.noreply.github.com> | 2007-05-04 15:24:09 +0000 |
commit | d2510f30c75b341dcbc45432a4bd38c0513f2616 (patch) | |
tree | 3ddcd92ac078642dfed5375980dc2db4006d1498 /panama.cpp | |
parent | 460c2d6c6adc5490c77777f8f8b2e96cc4bf4eb3 (diff) | |
download | cryptopp-git-d2510f30c75b341dcbc45432a4bd38c0513f2616.tar.gz |
fix compile for x64, DLL and VC 6
Diffstat (limited to 'panama.cpp')
-rw-r--r-- | panama.cpp | 137 |
1 files changed, 68 insertions, 69 deletions
@@ -26,31 +26,31 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) __asm__ __volatile__ ( ".intel_syntax noprefix;" - AS1( push ebx) + AS_PUSH( bx) #else - AS2( mov ecx, count) - AS2( mov esi, state) - AS2( mov edi, z) - AS2( mov edx, y) + AS2( mov WORD_REG(cx), count) + AS2( mov WORD_REG(si), state) + AS2( mov WORD_REG(di), z) + AS2( mov WORD_REG(dx), y) #endif - AS2( shl ecx, 5) + AS2( shl WORD_REG(cx), 5) ASJ( jz, 5, f) - AS2( mov ebx, [esi+4*17]) - AS2( add ecx, ebx) + AS2( mov ebx, [WORD_REG(si)+4*17]) + AS2( add WORD_REG(cx), WORD_REG(bx)) - AS1( push ebp) - AS1( push ecx) + AS_PUSH( bp) + AS_PUSH( cx) - AS2( movdqa xmm0, [esi+0*16]) - AS2( movdqa xmm1, [esi+1*16]) - AS2( movdqa xmm2, [esi+2*16]) - AS2( movdqa xmm3, [esi+3*16]) - AS2( mov eax, [esi+4*16]) + AS2( movdqa xmm0, [WORD_REG(si)+0*16]) + AS2( movdqa xmm1, [WORD_REG(si)+1*16]) + AS2( movdqa xmm2, [WORD_REG(si)+2*16]) + AS2( movdqa xmm3, [WORD_REG(si)+3*16]) + AS2( mov eax, [WORD_REG(si)+4*16]) ASL(4) // gamma and pi #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE - AS2( test ebx, 1) + AS2( test WORD_REG(bx), 1) ASJ( jnz, 6, f) #endif AS2( movdqa xmm6, xmm2) @@ -81,7 +81,7 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) #define pi(i) \ AS2( movd ecx, xmm7)\ AS2( rol ecx, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\ - AS2( mov [esi+SSE2_Index(ASM_MOD(5*(i), 17))*4], ecx) + AS2( mov [WORD_REG(si)+SSE2_Index(ASM_MOD(5*(i), 17))*4], ecx) #define pi4(x, y, z, a, b, c, d) \ AS2( pcmpeqb xmm7, xmm7)\ @@ -110,65 +110,65 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) AS2( punpckhdq xmm2, xmm0) // 11 12 15 16 // keystream - AS2( test edi, edi) + AS2( test WORD_REG(di), WORD_REG(di)) ASJ( jz, 0, f) AS2( movdqa xmm6, xmm4) AS2( punpcklqdq xmm4, xmm2) AS2( punpckhqdq xmm6, xmm2) - AS2( test edx, 0xf) + AS2( test WORD_REG(dx), 0xf) ASJ( jnz, 2, f) - AS2( test edx, edx) + AS2( test WORD_REG(dx), WORD_REG(dx)) ASJ( jz, 1, f) - AS2( pxor xmm4, [edx]) - AS2( pxor xmm6, [edx+16]) - AS2( add edx, 32) + AS2( pxor xmm4, [WORD_REG(dx)]) + AS2( pxor xmm6, [WORD_REG(dx)+16]) + AS2( add WORD_REG(dx), 32) ASJ( jmp, 1, f) ASL(2) - AS2( movdqu xmm0, [edx]) - AS2( movdqu xmm2, [edx+16]) + AS2( movdqu xmm0, [WORD_REG(dx)]) + AS2( movdqu xmm2, [WORD_REG(dx)+16]) AS2( pxor xmm4, xmm0) AS2( pxor xmm6, xmm2) - AS2( add edx, 32) + AS2( add WORD_REG(dx), 32) ASL(1) - AS2( test edi, 0xf) + AS2( test WORD_REG(di), 0xf) ASJ( jnz, 3, f) - AS2( movdqa [edi], xmm4) - AS2( movdqa [edi+16], xmm6) - AS2( add edi, 32) + AS2( movdqa [WORD_REG(di)], xmm4) + AS2( movdqa [WORD_REG(di)+16], xmm6) + AS2( add WORD_REG(di), 32) ASJ( jmp, 0, f) ASL(3) - AS2( movdqu [edi], xmm4) - AS2( movdqu [edi+16], xmm6) - AS2( add edi, 32) + AS2( movdqu [WORD_REG(di)], xmm4) + AS2( movdqu [WORD_REG(di)+16], xmm6) + AS2( add WORD_REG(di), 32) ASL(0) // buffer update - AS2( lea ecx, [ebx + 32]) - AS2( and ecx, 31*32) - AS2( lea ebp, [ebx + (32-24)*32]) - AS2( and ebp, 31*32) + AS2( lea WORD_REG(cx), [WORD_REG(bx) + 32]) + AS2( and WORD_REG(cx), 31*32) + AS2( lea WORD_REG(bp), [WORD_REG(bx) + (32-24)*32]) + AS2( and WORD_REG(bp), 31*32) - AS2( movdqa xmm0, [esi+20*4+ecx+0*8]) + AS2( movdqa xmm0, [WORD_REG(si)+20*4+WORD_REG(cx)+0*8]) AS2( pxor xmm3, xmm0) ASS( pshufd xmm0, xmm0, 2, 3, 0, 1) - AS2( movdqa [esi+20*4+ecx+0*8], xmm3) - AS2( pxor xmm0, [esi+20*4+ebp+2*8]) - AS2( movdqa [esi+20*4+ebp+2*8], xmm0) + AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(cx)+0*8], xmm3) + AS2( pxor xmm0, [WORD_REG(si)+20*4+WORD_REG(bp)+2*8]) + AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(bp)+2*8], xmm0) - AS2( movdqa xmm4, [esi+20*4+ecx+2*8]) + AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+2*8]) AS2( pxor xmm1, xmm4) - AS2( movdqa [esi+20*4+ecx+2*8], xmm1) - AS2( pxor xmm4, [esi+20*4+ebp+0*8]) - AS2( movdqa [esi+20*4+ebp+0*8], xmm4) + AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(cx)+2*8], xmm1) + AS2( pxor xmm4, [WORD_REG(si)+20*4+WORD_REG(bp)+0*8]) + AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(bp)+0*8], xmm4) // theta - AS2( movdqa xmm3, [esi+3*16]) - AS2( movdqa xmm2, [esi+2*16]) - AS2( movdqa xmm1, [esi+1*16]) - AS2( movdqa xmm0, [esi+0*16]) + AS2( movdqa xmm3, [WORD_REG(si)+3*16]) + AS2( movdqa xmm2, [WORD_REG(si)+2*16]) + AS2( movdqa xmm1, [WORD_REG(si)+1*16]) + AS2( movdqa xmm0, [WORD_REG(si)+0*16]) #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE - AS2( test ebx, 1) + AS2( test WORD_REG(bx), 1) ASJ( jnz, 8, f) #endif AS2( movd xmm6, eax) @@ -214,21 +214,21 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) AS2( pxor xmm0, xmm4) // sigma - AS2( lea ecx, [ebx + (32-4)*32]) - AS2( and ecx, 31*32) - AS2( lea ebp, [ebx + 16*32]) - AS2( and ebp, 31*32) + AS2( lea WORD_REG(cx), [WORD_REG(bx) + (32-4)*32]) + AS2( and WORD_REG(cx), 31*32) + AS2( lea WORD_REG(bp), [WORD_REG(bx) + 16*32]) + AS2( and WORD_REG(bp), 31*32) - AS2( movdqa xmm4, [esi+20*4+ecx+0*16]) - AS2( movdqa xmm5, [esi+20*4+ebp+0*16]) + AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+0*16]) + AS2( movdqa xmm5, [WORD_REG(si)+20*4+WORD_REG(bp)+0*16]) AS2( movdqa xmm6, xmm4) AS2( punpcklqdq xmm4, xmm5) AS2( punpckhqdq xmm6, xmm5) AS2( pxor xmm3, xmm4) AS2( pxor xmm2, xmm6) - AS2( movdqa xmm4, [esi+20*4+ecx+1*16]) - AS2( movdqa xmm5, [esi+20*4+ebp+1*16]) + AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+1*16]) + AS2( movdqa xmm5, [WORD_REG(si)+20*4+WORD_REG(bp)+1*16]) AS2( movdqa xmm6, xmm4) AS2( punpcklqdq xmm4, xmm5) AS2( punpckhqdq xmm6, xmm5) @@ -236,23 +236,22 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) AS2( pxor xmm0, xmm6) // loop - AS2( add ebx, 32) - AS2( cmp ebx, [esp]) + AS2( add WORD_REG(bx), 32) + AS2( cmp WORD_REG(bx), [WORD_REG(sp)]) ASJ( jne, 4, b) // save state - AS2( mov ebp, [esp+4]) - AS2( add esp, 8) - AS2( mov [esi+4*17], ebx) - AS2( mov [esi+4*16], eax) - AS2( movdqa [esi+3*16], xmm3) - AS2( movdqa [esi+2*16], xmm2) - AS2( movdqa [esi+1*16], xmm1) - AS2( movdqa [esi+0*16], xmm0) + AS2( add WORD_REG(sp), WORD_SZ) + AS_POP( bp) + AS2( mov [WORD_REG(si)+4*16], eax) + AS2( movdqa [WORD_REG(si)+3*16], xmm3) + AS2( movdqa [WORD_REG(si)+2*16], xmm2) + AS2( movdqa [WORD_REG(si)+1*16], xmm1) + AS2( movdqa [WORD_REG(si)+0*16], xmm0) ASL(5) #ifdef __GNUC__ - AS1( pop ebx) + AS_POP( bx) ".att_syntax prefix;" : : "c" (count), "S" (state), "D" (z), "d" (y) |