summaryrefslogtreecommitdiff
path: root/panama.cpp
diff options
context:
space:
mode:
authorweidai <weidai11@users.noreply.github.com>2007-05-04 15:24:09 +0000
committerweidai <weidai11@users.noreply.github.com>2007-05-04 15:24:09 +0000
commitd2510f30c75b341dcbc45432a4bd38c0513f2616 (patch)
tree3ddcd92ac078642dfed5375980dc2db4006d1498 /panama.cpp
parent460c2d6c6adc5490c77777f8f8b2e96cc4bf4eb3 (diff)
downloadcryptopp-git-d2510f30c75b341dcbc45432a4bd38c0513f2616.tar.gz
fix compile for x64, DLL and VC 6
Diffstat (limited to 'panama.cpp')
-rw-r--r--panama.cpp137
1 files changed, 68 insertions, 69 deletions
diff --git a/panama.cpp b/panama.cpp
index 89a5aeaa..a60e1670 100644
--- a/panama.cpp
+++ b/panama.cpp
@@ -26,31 +26,31 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
__asm__ __volatile__
(
".intel_syntax noprefix;"
- AS1( push ebx)
+ AS_PUSH( bx)
#else
- AS2( mov ecx, count)
- AS2( mov esi, state)
- AS2( mov edi, z)
- AS2( mov edx, y)
+ AS2( mov WORD_REG(cx), count)
+ AS2( mov WORD_REG(si), state)
+ AS2( mov WORD_REG(di), z)
+ AS2( mov WORD_REG(dx), y)
#endif
- AS2( shl ecx, 5)
+ AS2( shl WORD_REG(cx), 5)
ASJ( jz, 5, f)
- AS2( mov ebx, [esi+4*17])
- AS2( add ecx, ebx)
+ AS2( mov ebx, [WORD_REG(si)+4*17])
+ AS2( add WORD_REG(cx), WORD_REG(bx))
- AS1( push ebp)
- AS1( push ecx)
+ AS_PUSH( bp)
+ AS_PUSH( cx)
- AS2( movdqa xmm0, [esi+0*16])
- AS2( movdqa xmm1, [esi+1*16])
- AS2( movdqa xmm2, [esi+2*16])
- AS2( movdqa xmm3, [esi+3*16])
- AS2( mov eax, [esi+4*16])
+ AS2( movdqa xmm0, [WORD_REG(si)+0*16])
+ AS2( movdqa xmm1, [WORD_REG(si)+1*16])
+ AS2( movdqa xmm2, [WORD_REG(si)+2*16])
+ AS2( movdqa xmm3, [WORD_REG(si)+3*16])
+ AS2( mov eax, [WORD_REG(si)+4*16])
ASL(4)
// gamma and pi
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
- AS2( test ebx, 1)
+ AS2( test WORD_REG(bx), 1)
ASJ( jnz, 6, f)
#endif
AS2( movdqa xmm6, xmm2)
@@ -81,7 +81,7 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
#define pi(i) \
AS2( movd ecx, xmm7)\
AS2( rol ecx, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
- AS2( mov [esi+SSE2_Index(ASM_MOD(5*(i), 17))*4], ecx)
+ AS2( mov [WORD_REG(si)+SSE2_Index(ASM_MOD(5*(i), 17))*4], ecx)
#define pi4(x, y, z, a, b, c, d) \
AS2( pcmpeqb xmm7, xmm7)\
@@ -110,65 +110,65 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
AS2( punpckhdq xmm2, xmm0) // 11 12 15 16
// keystream
- AS2( test edi, edi)
+ AS2( test WORD_REG(di), WORD_REG(di))
ASJ( jz, 0, f)
AS2( movdqa xmm6, xmm4)
AS2( punpcklqdq xmm4, xmm2)
AS2( punpckhqdq xmm6, xmm2)
- AS2( test edx, 0xf)
+ AS2( test WORD_REG(dx), 0xf)
ASJ( jnz, 2, f)
- AS2( test edx, edx)
+ AS2( test WORD_REG(dx), WORD_REG(dx))
ASJ( jz, 1, f)
- AS2( pxor xmm4, [edx])
- AS2( pxor xmm6, [edx+16])
- AS2( add edx, 32)
+ AS2( pxor xmm4, [WORD_REG(dx)])
+ AS2( pxor xmm6, [WORD_REG(dx)+16])
+ AS2( add WORD_REG(dx), 32)
ASJ( jmp, 1, f)
ASL(2)
- AS2( movdqu xmm0, [edx])
- AS2( movdqu xmm2, [edx+16])
+ AS2( movdqu xmm0, [WORD_REG(dx)])
+ AS2( movdqu xmm2, [WORD_REG(dx)+16])
AS2( pxor xmm4, xmm0)
AS2( pxor xmm6, xmm2)
- AS2( add edx, 32)
+ AS2( add WORD_REG(dx), 32)
ASL(1)
- AS2( test edi, 0xf)
+ AS2( test WORD_REG(di), 0xf)
ASJ( jnz, 3, f)
- AS2( movdqa [edi], xmm4)
- AS2( movdqa [edi+16], xmm6)
- AS2( add edi, 32)
+ AS2( movdqa [WORD_REG(di)], xmm4)
+ AS2( movdqa [WORD_REG(di)+16], xmm6)
+ AS2( add WORD_REG(di), 32)
ASJ( jmp, 0, f)
ASL(3)
- AS2( movdqu [edi], xmm4)
- AS2( movdqu [edi+16], xmm6)
- AS2( add edi, 32)
+ AS2( movdqu [WORD_REG(di)], xmm4)
+ AS2( movdqu [WORD_REG(di)+16], xmm6)
+ AS2( add WORD_REG(di), 32)
ASL(0)
// buffer update
- AS2( lea ecx, [ebx + 32])
- AS2( and ecx, 31*32)
- AS2( lea ebp, [ebx + (32-24)*32])
- AS2( and ebp, 31*32)
+ AS2( lea WORD_REG(cx), [WORD_REG(bx) + 32])
+ AS2( and WORD_REG(cx), 31*32)
+ AS2( lea WORD_REG(bp), [WORD_REG(bx) + (32-24)*32])
+ AS2( and WORD_REG(bp), 31*32)
- AS2( movdqa xmm0, [esi+20*4+ecx+0*8])
+ AS2( movdqa xmm0, [WORD_REG(si)+20*4+WORD_REG(cx)+0*8])
AS2( pxor xmm3, xmm0)
ASS( pshufd xmm0, xmm0, 2, 3, 0, 1)
- AS2( movdqa [esi+20*4+ecx+0*8], xmm3)
- AS2( pxor xmm0, [esi+20*4+ebp+2*8])
- AS2( movdqa [esi+20*4+ebp+2*8], xmm0)
+ AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(cx)+0*8], xmm3)
+ AS2( pxor xmm0, [WORD_REG(si)+20*4+WORD_REG(bp)+2*8])
+ AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(bp)+2*8], xmm0)
- AS2( movdqa xmm4, [esi+20*4+ecx+2*8])
+ AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+2*8])
AS2( pxor xmm1, xmm4)
- AS2( movdqa [esi+20*4+ecx+2*8], xmm1)
- AS2( pxor xmm4, [esi+20*4+ebp+0*8])
- AS2( movdqa [esi+20*4+ebp+0*8], xmm4)
+ AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(cx)+2*8], xmm1)
+ AS2( pxor xmm4, [WORD_REG(si)+20*4+WORD_REG(bp)+0*8])
+ AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(bp)+0*8], xmm4)
// theta
- AS2( movdqa xmm3, [esi+3*16])
- AS2( movdqa xmm2, [esi+2*16])
- AS2( movdqa xmm1, [esi+1*16])
- AS2( movdqa xmm0, [esi+0*16])
+ AS2( movdqa xmm3, [WORD_REG(si)+3*16])
+ AS2( movdqa xmm2, [WORD_REG(si)+2*16])
+ AS2( movdqa xmm1, [WORD_REG(si)+1*16])
+ AS2( movdqa xmm0, [WORD_REG(si)+0*16])
#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
- AS2( test ebx, 1)
+ AS2( test WORD_REG(bx), 1)
ASJ( jnz, 8, f)
#endif
AS2( movd xmm6, eax)
@@ -214,21 +214,21 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
AS2( pxor xmm0, xmm4)
// sigma
- AS2( lea ecx, [ebx + (32-4)*32])
- AS2( and ecx, 31*32)
- AS2( lea ebp, [ebx + 16*32])
- AS2( and ebp, 31*32)
+ AS2( lea WORD_REG(cx), [WORD_REG(bx) + (32-4)*32])
+ AS2( and WORD_REG(cx), 31*32)
+ AS2( lea WORD_REG(bp), [WORD_REG(bx) + 16*32])
+ AS2( and WORD_REG(bp), 31*32)
- AS2( movdqa xmm4, [esi+20*4+ecx+0*16])
- AS2( movdqa xmm5, [esi+20*4+ebp+0*16])
+ AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+0*16])
+ AS2( movdqa xmm5, [WORD_REG(si)+20*4+WORD_REG(bp)+0*16])
AS2( movdqa xmm6, xmm4)
AS2( punpcklqdq xmm4, xmm5)
AS2( punpckhqdq xmm6, xmm5)
AS2( pxor xmm3, xmm4)
AS2( pxor xmm2, xmm6)
- AS2( movdqa xmm4, [esi+20*4+ecx+1*16])
- AS2( movdqa xmm5, [esi+20*4+ebp+1*16])
+ AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+1*16])
+ AS2( movdqa xmm5, [WORD_REG(si)+20*4+WORD_REG(bp)+1*16])
AS2( movdqa xmm6, xmm4)
AS2( punpcklqdq xmm4, xmm5)
AS2( punpckhqdq xmm6, xmm5)
@@ -236,23 +236,22 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
AS2( pxor xmm0, xmm6)
// loop
- AS2( add ebx, 32)
- AS2( cmp ebx, [esp])
+ AS2( add WORD_REG(bx), 32)
+ AS2( cmp WORD_REG(bx), [WORD_REG(sp)])
ASJ( jne, 4, b)
// save state
- AS2( mov ebp, [esp+4])
- AS2( add esp, 8)
- AS2( mov [esi+4*17], ebx)
- AS2( mov [esi+4*16], eax)
- AS2( movdqa [esi+3*16], xmm3)
- AS2( movdqa [esi+2*16], xmm2)
- AS2( movdqa [esi+1*16], xmm1)
- AS2( movdqa [esi+0*16], xmm0)
+ AS2( add WORD_REG(sp), WORD_SZ)
+ AS_POP( bp)
+ AS2( mov [WORD_REG(si)+4*16], eax)
+ AS2( movdqa [WORD_REG(si)+3*16], xmm3)
+ AS2( movdqa [WORD_REG(si)+2*16], xmm2)
+ AS2( movdqa [WORD_REG(si)+1*16], xmm1)
+ AS2( movdqa [WORD_REG(si)+0*16], xmm0)
ASL(5)
#ifdef __GNUC__
- AS1( pop ebx)
+ AS_POP( bx)
".att_syntax prefix;"
:
: "c" (count), "S" (state), "D" (z), "d" (y)