diff options
author | weidai <weidai11@users.noreply.github.com> | 2009-03-12 11:24:12 +0000 |
---|---|---|
committer | weidai <weidai11@users.noreply.github.com> | 2009-03-12 11:24:12 +0000 |
commit | 2779fc60506e2042ab1569ffad4061f1187d186c (patch) | |
tree | 68edc0bccf003f5615716b3ae2d6b97067af39c4 /x64dll.asm | |
parent | 64af4560dc8ba66ef0e2ac3b05dec6f445ec96fe (diff) | |
download | cryptopp-git-2779fc60506e2042ab1569ffad4061f1187d186c.tar.gz |
- add EAX mode, XSalsa20
- speed up GCM key setup
- wipe stack in AES assembly code
- speed up CFB mode
Diffstat (limited to 'x64dll.asm')
-rw-r--r-- | x64dll.asm | 467 |
1 files changed, 240 insertions, 227 deletions
@@ -62,357 +62,356 @@ Rijndael_Enc_AdvancedProcessBlocks PROC FRAME rex_push_reg rsi push_reg rdi push_reg rbx -push_reg rbp push_reg r12 .endprolog mov r8, rcx -mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA +mov r11, ?Te@rdtable@CryptoPP@@3PA_KA mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA] -mov rbp, [(r8+16*19)] +mov rsi, [(r8+16*19)] mov rax, 16 -and rax, rbp +and rax, rsi movdqa xmm3, XMMWORD PTR [rdx+16+rax] movdqa [(r8+16*12)], xmm3 lea rax, [rdx+rax+2*16] -sub rax, rbp +sub rax, rsi label0: -movdqa xmm0, [rax+rbp] -movdqa XMMWORD PTR [(r8+0)+rbp], xmm0 -add rbp, 16 -cmp rbp, 16*12 +movdqa xmm0, [rax+rsi] +movdqa XMMWORD PTR [(r8+0)+rsi], xmm0 +add rsi, 16 +cmp rsi, 16*12 jl label0 -movdqa xmm4, [rax+rbp] +movdqa xmm4, [rax+rsi] movdqa xmm1, [rdx] -mov r11d, [rdx+4*4] +mov r12d, [rdx+4*4] mov ebx, [rdx+5*4] mov ecx, [rdx+6*4] mov edx, [rdx+7*4] xor rax, rax label9: -mov ebp, [rsi+rax] +mov esi, [r11+rax] add rax, rdi -mov ebp, [rsi+rax] +mov esi, [r11+rax] add rax, rdi -mov ebp, [rsi+rax] +mov esi, [r11+rax] add rax, rdi -mov ebp, [rsi+rax] +mov esi, [r11+rax] add rax, rdi cmp rax, 2048 jl label9 lfence test DWORD PTR [(r8+16*18+8)], 1 jz label8 -mov rbp, [(r8+16*14)] -movdqa xmm2, [rbp] +mov rsi, [(r8+16*14)] +movdqu xmm2, [rsi] pxor xmm2, xmm1 psrldq xmm1, 14 movd eax, xmm1 -mov al, BYTE PTR [rbp+15] -mov r12d, eax +mov al, BYTE PTR [rsi+15] +mov r10d, eax movd eax, xmm2 psrldq xmm2, 4 movd edi, xmm2 psrldq xmm2, 4 -movzx ebp, al -xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, al +xor r12d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ah +xor edx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] shr eax, 16 -movzx ebp, al -xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, al +xor ecx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, ah +xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] mov eax, edi movd edi, xmm2 psrldq xmm2, 4 -movzx ebp, al -xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, al +xor ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ah +xor r12d, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] shr eax, 16 -movzx ebp, al -xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, al +xor edx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, ah +xor ecx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] mov eax, edi movd edi, xmm2 -movzx ebp, al -xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, al +xor ecx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ah +xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] shr eax, 16 -movzx ebp, al -xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, al +xor r12d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, ah +xor edx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] mov eax, edi -movzx ebp, al -xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, al +xor edx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ah +xor ecx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] shr eax, 16 -movzx ebp, al -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, al +xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] psrldq xmm2, 3 mov eax, [(r8+16*12)+0*4] mov edi, [(r8+16*12)+2*4] -mov r10d, [(r8+16*12)+3*4] -movzx ebp, cl -xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, bl -xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, bh -xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +mov r9d, [(r8+16*12)+3*4] +movzx esi, cl +xor r9d, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +movzx esi, bl +xor edi, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +movzx esi, bh +xor r9d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr ebx, 16 -movzx ebp, bl -xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, bh -mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx esi, bl +xor eax, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, bh +mov ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] xor ebx, [(r8+16*12)+1*4] -movzx ebp, ch -xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, ch +xor eax, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr ecx, 16 -movzx ebp, dl -xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, dh -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, dl +xor eax, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +movzx esi, dh +xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr edx, 16 -movzx ebp, ch -xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dl -xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dh -xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx esi, ch +xor edi, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, cl +xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, dl +xor edi, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, dh +xor r9d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] movd ecx, xmm2 -mov edx, r11d -mov [(r8+0)+3*4], r10d +mov edx, r12d +mov [(r8+0)+3*4], r9d mov [(r8+0)+0*4], eax mov [(r8+0)+1*4], ebx mov [(r8+0)+2*4], edi jmp label5 label3: -mov r11d, [(r8+16*12)+0*4] +mov r12d, [(r8+16*12)+0*4] mov ebx, [(r8+16*12)+1*4] mov ecx, [(r8+16*12)+2*4] mov edx, [(r8+16*12)+3*4] label8: mov rax, [(r8+16*14)] movdqu xmm2, [rax] -mov rbp, [(r8+16*14)+8] -movdqu xmm5, [rbp] +mov rsi, [(r8+16*14)+8] +movdqu xmm5, [rsi] pxor xmm2, xmm1 pxor xmm2, xmm5 movd eax, xmm2 psrldq xmm2, 4 movd edi, xmm2 psrldq xmm2, 4 -movzx ebp, al -xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, al +xor r12d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ah +xor edx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] shr eax, 16 -movzx ebp, al -xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, al +xor ecx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, ah +xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] mov eax, edi movd edi, xmm2 psrldq xmm2, 4 -movzx ebp, al -xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, al +xor ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ah +xor r12d, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] shr eax, 16 -movzx ebp, al -xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, al +xor edx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, ah +xor ecx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] mov eax, edi movd edi, xmm2 -movzx ebp, al -xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, al +xor ecx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ah +xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] shr eax, 16 -movzx ebp, al -xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, al +xor r12d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, ah +xor edx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] mov eax, edi -movzx ebp, al -xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, al +xor edx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ah +xor ecx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] shr eax, 16 -movzx ebp, al -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor r11d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov eax, r11d +movzx esi, al +xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, ah +xor r12d, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +mov eax, r12d add r8, [(r8+16*19)] add r8, 4*16 jmp label2 label1: -mov ecx, r12d -mov edx, r11d +mov ecx, r10d +mov edx, r12d mov eax, [(r8+0)+0*4] mov ebx, [(r8+0)+1*4] xor cl, ch and rcx, 255 label5: -add r12d, 1 -xor edx, DWORD PTR [rsi+rcx*8+3] -movzx ebp, dl -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, dh -mov ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +add r10d, 1 +xor edx, DWORD PTR [r11+rcx*8+3] +movzx esi, dl +xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +movzx esi, dh +mov ecx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr edx, 16 xor ecx, [(r8+0)+2*4] -movzx ebp, dh -xor eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, dl -mov edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx esi, dh +xor eax, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, dl +mov edx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] xor edx, [(r8+0)+3*4] add r8, [(r8+16*19)] add r8, 3*16 jmp label4 label2: -mov r10d, [(r8+0)-4*16+3*4] +mov r9d, [(r8+0)-4*16+3*4] mov edi, [(r8+0)-4*16+2*4] -movzx ebp, cl -xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, cl +xor r9d, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] mov cl, al -movzx ebp, ah -xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, ah +xor edi, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr eax, 16 -movzx ebp, bl -xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, bh -xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, bl +xor edi, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +movzx esi, bh +xor r9d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr ebx, 16 -movzx ebp, al -xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, ah -mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, bl -xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, bh -mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ch -xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, al +xor r9d, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, ah +mov eax, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, bl +xor eax, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, bh +mov ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ch +xor eax, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, cl +xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] shr ecx, 16 -movzx ebp, dl -xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, dh -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, dl +xor eax, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +movzx esi, dh +xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr edx, 16 -movzx ebp, ch -xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dl -xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dh -xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx esi, ch +xor edi, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, cl +xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, dl +xor edi, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, dh +xor r9d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] mov ecx, edi xor eax, [(r8+0)-4*16+0*4] xor ebx, [(r8+0)-4*16+1*4] -mov edx, r10d +mov edx, r9d label4: -mov r10d, [(r8+0)-4*16+7*4] +mov r9d, [(r8+0)-4*16+7*4] mov edi, [(r8+0)-4*16+6*4] -movzx ebp, cl -xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, cl +xor r9d, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] mov cl, al -movzx ebp, ah -xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, ah +xor edi, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr eax, 16 -movzx ebp, bl -xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, bh -xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, bl +xor edi, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +movzx esi, bh +xor r9d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr ebx, 16 -movzx ebp, al -xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, ah -mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, bl -xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, bh -mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ch -xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx esi, al +xor r9d, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, ah +mov eax, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, bl +xor eax, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, bh +mov ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, ch +xor eax, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] +movzx esi, cl +xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] shr ecx, 16 -movzx ebp, dl -xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, dh -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx esi, dl +xor eax, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] +movzx esi, dh +xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] shr edx, 16 -movzx ebp, ch -xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dl -xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dh -xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx esi, ch +xor edi, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] +movzx esi, cl +xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, dl +xor edi, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] +movzx esi, dh +xor r9d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] mov ecx, edi xor eax, [(r8+0)-4*16+4*4] xor ebx, [(r8+0)-4*16+5*4] -mov edx, r10d +mov edx, r9d add r8, 32 test r8, 255 jnz label2 sub r8, 16*16 -movzx ebp, ch -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, dl -xor edi, DWORD PTR [rsi+rbp*8+0] +movzx esi, ch +movzx edi, BYTE PTR [r11+rsi*8+1] +movzx esi, dl +xor edi, DWORD PTR [r11+rsi*8+0] mov WORD PTR [(r8+16*13)+2], di -movzx ebp, dh -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, al -xor edi, DWORD PTR [rsi+rbp*8+0] +movzx esi, dh +movzx edi, BYTE PTR [r11+rsi*8+1] +movzx esi, al +xor edi, DWORD PTR [r11+rsi*8+0] mov WORD PTR [(r8+16*13)+6], di shr edx, 16 -movzx ebp, ah -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, bl -xor edi, DWORD PTR [rsi+rbp*8+0] +movzx esi, ah +movzx edi, BYTE PTR [r11+rsi*8+1] +movzx esi, bl +xor edi, DWORD PTR [r11+rsi*8+0] mov WORD PTR [(r8+16*13)+10], di shr eax, 16 -movzx ebp, bh -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, cl -xor edi, DWORD PTR [rsi+rbp*8+0] +movzx esi, bh +movzx edi, BYTE PTR [r11+rsi*8+1] +movzx esi, cl +xor edi, DWORD PTR [r11+rsi*8+0] mov WORD PTR [(r8+16*13)+14], di shr ebx, 16 -movzx ebp, dh -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, al -xor edi, DWORD PTR [rsi+rbp*8+0] +movzx esi, dh +movzx edi, BYTE PTR [r11+rsi*8+1] +movzx esi, al +xor edi, DWORD PTR [r11+rsi*8+0] mov WORD PTR [(r8+16*13)+12], di shr ecx, 16 -movzx ebp, ah -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, bl -xor edi, DWORD PTR [rsi+rbp*8+0] +movzx esi, ah +movzx edi, BYTE PTR [r11+rsi*8+1] +movzx esi, bl +xor edi, DWORD PTR [r11+rsi*8+0] mov WORD PTR [(r8+16*13)+0], di -movzx ebp, bh -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, cl -xor edi, DWORD PTR [rsi+rbp*8+0] +movzx esi, bh +movzx edi, BYTE PTR [r11+rsi*8+1] +movzx esi, cl +xor edi, DWORD PTR [r11+rsi*8+0] mov WORD PTR [(r8+16*13)+4], di -movzx ebp, ch -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, dl -xor edi, DWORD PTR [rsi+rbp*8+0] +movzx esi, ch +movzx edi, BYTE PTR [r11+rsi*8+1] +movzx esi, dl +xor edi, DWORD PTR [r11+rsi*8+0] mov WORD PTR [(r8+16*13)+8], di mov rax, [(r8+16*14)+16] mov rbx, [(r8+16*14)+24] @@ -430,13 +429,27 @@ mov [(r8+16*18+8)], rcx test rcx, 1 jnz label1 movdqa xmm0, [(r8+16*16)] -paddd xmm0, [(r8+16*14)] +paddq xmm0, [(r8+16*14)] movdqa [(r8+16*14)], xmm0 jmp label3 label7: -mov rbp, [(r8+16*18)] +xorps xmm0, xmm0 +lea rax, [(r8+0)+7*16] +movaps [rax-7*16], xmm0 +movaps [rax-6*16], xmm0 +movaps [rax-5*16], xmm0 +movaps [rax-4*16], xmm0 +movaps [rax-3*16], xmm0 +movaps [rax-2*16], xmm0 +movaps [rax-1*16], xmm0 +movaps [rax+0*16], xmm0 +movaps [rax+1*16], xmm0 +movaps [rax+2*16], xmm0 +movaps [rax+3*16], xmm0 +movaps [rax+4*16], xmm0 +movaps [rax+5*16], xmm0 +movaps [rax+6*16], xmm0 pop r12 -pop rbp pop rbx pop rdi pop rsi |