diff options
Diffstat (limited to 'crypto/des/asm')
-rw-r--r-- | crypto/des/asm/d-win32.asm | 475 | ||||
-rw-r--r-- | crypto/des/asm/des686.pl | 458 | ||||
-rw-r--r-- | crypto/des/asm/desboth.pl | 144 | ||||
-rw-r--r-- | crypto/des/asm/dx86unix.cpp | 3199 | ||||
-rw-r--r-- | crypto/des/asm/readme | 262 |
5 files changed, 4039 insertions, 499 deletions
diff --git a/crypto/des/asm/d-win32.asm b/crypto/des/asm/d-win32.asm index fc62e3b78..9e3dc9cd8 100644 --- a/crypto/des/asm/d-win32.asm +++ b/crypto/des/asm/d-win32.asm @@ -1,27 +1,24 @@ ; Don't even think of reading this code - ; It was automatically generated by des-som3.pl + ; It was automatically generated by des-586.pl ; Which is a perl program used to generate the x86 assember for - ; any of elf, a.out, Win32, or Solaris - ; It can be found in SSLeay 0.6.5+ or in libdes 3.26+ + ; any of elf, a.out, BSDI,Win32, or Solaris ; eric <eay@cryptsoft.com> - ; The inner loop instruction sequence and the IP/FP modifications - ; are from Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> ; - TITLE dx86xxxx.asm + TITLE des-586.asm .386 .model FLAT _TEXT SEGMENT PUBLIC _des_encrypt -EXTRN _des_SPtrans:DWORD +EXTRN _des_SPtrans:DWORD _des_encrypt PROC NEAR - push ebp - push ebx push esi push edi ; ; Load the 2 words - mov esi, DWORD PTR 20[esp] + mov esi, DWORD PTR 12[esp] xor ecx, ecx + push ebx + push ebp mov eax, DWORD PTR [esi] mov ebx, DWORD PTR 28[esp] mov edi, DWORD PTR 4[esi] @@ -63,8 +60,8 @@ _des_encrypt PROC NEAR xor edi, eax ; rol edi, 1 - cmp ebx, 0 mov ebp, DWORD PTR 24[esp] + cmp ebx, 0 je $L000start_decrypt ; ; Round 0 @@ -1294,32 +1291,32 @@ $L001end: ror eax, 4 mov DWORD PTR [edx],eax mov DWORD PTR 4[edx],esi + pop ebp + pop ebx pop edi pop esi - pop ebx - pop ebp ret _des_encrypt ENDP _TEXT ENDS _TEXT SEGMENT PUBLIC _des_encrypt2 -EXTRN _des_SPtrans:DWORD +EXTRN _des_SPtrans:DWORD _des_encrypt2 PROC NEAR - push ebp - push ebx push esi push edi ; ; Load the 2 words - mov eax, DWORD PTR 20[esp] + mov eax, DWORD PTR 12[esp] xor ecx, ecx + push ebx + push ebp mov esi, DWORD PTR [eax] mov ebx, DWORD PTR 28[esp] rol esi, 3 mov edi, DWORD PTR 4[eax] rol edi, 3 - cmp ebx, 0 mov ebp, DWORD PTR 24[esp] + cmp ebx, 0 je $L002start_decrypt ; ; Round 0 @@ -2515,26 +2512,27 @@ $L003end: ror esi, 3 mov DWORD PTR [eax],edi mov DWORD PTR 4[eax],esi + pop ebp + pop ebx pop edi pop esi - pop ebx - pop ebp ret _des_encrypt2 ENDP _TEXT ENDS _TEXT SEGMENT PUBLIC _des_encrypt3 -EXTRN _des_SPtrans:DWORD + _des_encrypt3 PROC NEAR - push ebp push ebx + mov ebx, DWORD PTR 8[esp] + push ebp push esi push edi ; ; Load the data words - mov ebx, DWORD PTR 20[esp] mov edi, DWORD PTR [ebx] mov esi, DWORD PTR 4[ebx] + sub esp, 12 ; ; IP rol edi, 4 @@ -2575,24 +2573,24 @@ _des_encrypt3 PROC NEAR ror edx, 3 ror esi, 2 mov DWORD PTR 4[ebx],esi - mov eax, DWORD PTR 24[esp] + mov eax, DWORD PTR 36[esp] mov DWORD PTR [ebx],edx - mov edi, DWORD PTR 28[esp] - mov esi, DWORD PTR 32[esp] - push 1 - push eax - push ebx + mov edi, DWORD PTR 40[esp] + mov esi, DWORD PTR 44[esp] + mov DWORD PTR 8[esp],1 + mov DWORD PTR 4[esp],eax + mov DWORD PTR [esp],ebx call _des_encrypt2 - push 0 - push edi - push ebx + mov DWORD PTR 8[esp],0 + mov DWORD PTR 4[esp],edi + mov DWORD PTR [esp],ebx call _des_encrypt2 - push 1 - push esi - push ebx + mov DWORD PTR 8[esp],1 + mov DWORD PTR 4[esp],esi + mov DWORD PTR [esp],ebx call _des_encrypt2 + add esp, 12 mov edi, DWORD PTR [ebx] - add esp, 36 mov esi, DWORD PTR 4[ebx] ; ; FP @@ -2637,24 +2635,25 @@ _des_encrypt3 PROC NEAR mov DWORD PTR 4[ebx],esi pop edi pop esi - pop ebx pop ebp + pop ebx ret _des_encrypt3 ENDP _TEXT ENDS _TEXT SEGMENT PUBLIC _des_decrypt3 -EXTRN _des_SPtrans:DWORD + _des_decrypt3 PROC NEAR - push ebp push ebx + mov ebx, DWORD PTR 8[esp] + push ebp push esi push edi ; ; Load the data words - mov ebx, DWORD PTR 20[esp] mov edi, DWORD PTR [ebx] mov esi, DWORD PTR 4[ebx] + sub esp, 12 ; ; IP rol edi, 4 @@ -2695,24 +2694,24 @@ _des_decrypt3 PROC NEAR ror edx, 3 ror esi, 2 mov DWORD PTR 4[ebx],esi - mov esi, DWORD PTR 24[esp] + mov esi, DWORD PTR 36[esp] mov DWORD PTR [ebx],edx - mov edi, DWORD PTR 28[esp] - mov eax, DWORD PTR 32[esp] - push 0 - push eax - push ebx + mov edi, DWORD PTR 40[esp] + mov eax, DWORD PTR 44[esp] + mov DWORD PTR 8[esp],0 + mov DWORD PTR 4[esp],eax + mov DWORD PTR [esp],ebx call _des_encrypt2 - push 1 - push edi - push ebx + mov DWORD PTR 8[esp],1 + mov DWORD PTR 4[esp],edi + mov DWORD PTR [esp],ebx call _des_encrypt2 - push 0 - push esi - push ebx + mov DWORD PTR 8[esp],0 + mov DWORD PTR 4[esp],esi + mov DWORD PTR [esp],ebx call _des_encrypt2 + add esp, 12 mov edi, DWORD PTR [ebx] - add esp, 36 mov esi, DWORD PTR 4[ebx] ; ; FP @@ -2757,9 +2756,377 @@ _des_decrypt3 PROC NEAR mov DWORD PTR 4[ebx],esi pop edi pop esi - pop ebx pop ebp + pop ebx ret _des_decrypt3 ENDP _TEXT ENDS +_TEXT SEGMENT +PUBLIC _des_ncbc_encrypt + +_des_ncbc_encrypt PROC NEAR + ; + push ebp + push ebx + push esi + push edi + mov ebp, DWORD PTR 28[esp] + ; getting iv ptr from parameter 4 + mov ebx, DWORD PTR 36[esp] + mov esi, DWORD PTR [ebx] + mov edi, DWORD PTR 4[ebx] + push edi + push esi + push edi + push esi + mov ebx, esp + mov esi, DWORD PTR 36[esp] + mov edi, DWORD PTR 40[esp] + ; getting encrypt flag from parameter 5 + mov ecx, DWORD PTR 56[esp] + ; get and push parameter 5 + push ecx + ; get and push parameter 3 + mov eax, DWORD PTR 52[esp] + push eax + push ebx + cmp ecx, 0 + jz $L004decrypt + and ebp, 4294967288 + mov eax, DWORD PTR 12[esp] + mov ebx, DWORD PTR 16[esp] + jz $L005encrypt_finish +L006encrypt_loop: + mov ecx, DWORD PTR [esi] + mov edx, DWORD PTR 4[esi] + xor eax, ecx + xor ebx, edx + mov DWORD PTR 12[esp],eax + mov DWORD PTR 16[esp],ebx + call _des_encrypt + mov eax, DWORD PTR 12[esp] + mov ebx, DWORD PTR 16[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + add esi, 8 + add edi, 8 + sub ebp, 8 + jnz L006encrypt_loop +$L005encrypt_finish: + mov ebp, DWORD PTR 56[esp] + and ebp, 7 + jz $L007finish + xor ecx, ecx + xor edx, edx + mov ebp, DWORD PTR $L008cbc_enc_jmp_table[ebp*4] + jmp ebp +L009ej7: + mov dh, BYTE PTR 6[esi] + shl edx, 8 +L010ej6: + mov dh, BYTE PTR 5[esi] +L011ej5: + mov dl, BYTE PTR 4[esi] +L012ej4: + mov ecx, DWORD PTR [esi] + jmp $L013ejend +L014ej3: + mov ch, BYTE PTR 2[esi] + shl ecx, 8 +L015ej2: + mov ch, BYTE PTR 1[esi] +L016ej1: + mov cl, BYTE PTR [esi] +$L013ejend: + xor eax, ecx + xor ebx, edx + mov DWORD PTR 12[esp],eax + mov DWORD PTR 16[esp],ebx + call _des_encrypt + mov eax, DWORD PTR 12[esp] + mov ebx, DWORD PTR 16[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + jmp $L007finish +$L004decrypt: + and ebp, 4294967288 + mov eax, DWORD PTR 20[esp] + mov ebx, DWORD PTR 24[esp] + jz $L017decrypt_finish +L018decrypt_loop: + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + mov DWORD PTR 12[esp],eax + mov DWORD PTR 16[esp],ebx + call _des_encrypt + mov eax, DWORD PTR 12[esp] + mov ebx, DWORD PTR 16[esp] + mov ecx, DWORD PTR 20[esp] + mov edx, DWORD PTR 24[esp] + xor ecx, eax + xor edx, ebx + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + mov DWORD PTR [edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 20[esp],eax + mov DWORD PTR 24[esp],ebx + add esi, 8 + add edi, 8 + sub ebp, 8 + jnz L018decrypt_loop +$L017decrypt_finish: + mov ebp, DWORD PTR 56[esp] + and ebp, 7 + jz $L007finish + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + mov DWORD PTR 12[esp],eax + mov DWORD PTR 16[esp],ebx + call _des_encrypt + mov eax, DWORD PTR 12[esp] + mov ebx, DWORD PTR 16[esp] + mov ecx, DWORD PTR 20[esp] + mov edx, DWORD PTR 24[esp] + xor ecx, eax + xor edx, ebx + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] +L019dj7: + ror edx, 16 + mov BYTE PTR 6[edi],dl + shr edx, 16 +L020dj6: + mov BYTE PTR 5[edi],dh +L021dj5: + mov BYTE PTR 4[edi],dl +L022dj4: + mov DWORD PTR [edi],ecx + jmp $L023djend +L024dj3: + ror ecx, 16 + mov BYTE PTR 2[edi],cl + shl ecx, 16 +L025dj2: + mov BYTE PTR 1[esi],ch +L026dj1: + mov BYTE PTR [esi], cl +$L023djend: + jmp $L007finish +$L007finish: + mov ecx, DWORD PTR 64[esp] + add esp, 28 + mov DWORD PTR [ecx],eax + mov DWORD PTR 4[ecx],ebx + pop edi + pop esi + pop ebx + pop ebp + ret +$L008cbc_enc_jmp_table: + DD 0 + DD L016ej1 + DD L015ej2 + DD L014ej3 + DD L012ej4 + DD L011ej5 + DD L010ej6 + DD L009ej7 +L027cbc_dec_jmp_table: + DD 0 + DD L026dj1 + DD L025dj2 + DD L024dj3 + DD L022dj4 + DD L021dj5 + DD L020dj6 + DD L019dj7 +_des_ncbc_encrypt ENDP +_TEXT ENDS +_TEXT SEGMENT +PUBLIC _des_ede3_cbc_encrypt + +_des_ede3_cbc_encrypt PROC NEAR + ; + push ebp + push ebx + push esi + push edi + mov ebp, DWORD PTR 28[esp] + ; getting iv ptr from parameter 6 + mov ebx, DWORD PTR 44[esp] + mov esi, DWORD PTR [ebx] + mov edi, DWORD PTR 4[ebx] + push edi + push esi + push edi + push esi + mov ebx, esp + mov esi, DWORD PTR 36[esp] + mov edi, DWORD PTR 40[esp] + ; getting encrypt flag from parameter 7 + mov ecx, DWORD PTR 64[esp] + ; get and push parameter 5 + mov eax, DWORD PTR 56[esp] + push eax + ; get and push parameter 4 + mov eax, DWORD PTR 56[esp] + push eax + ; get and push parameter 3 + mov eax, DWORD PTR 56[esp] + push eax + push ebx + cmp ecx, 0 + jz $L028decrypt + and ebp, 4294967288 + mov eax, DWORD PTR 16[esp] + mov ebx, DWORD PTR 20[esp] + jz $L029encrypt_finish +L030encrypt_loop: + mov ecx, DWORD PTR [esi] + mov edx, DWORD PTR 4[esi] + xor eax, ecx + xor ebx, edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + call _des_encrypt3 + mov eax, DWORD PTR 16[esp] + mov ebx, DWORD PTR 20[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + add esi, 8 + add edi, 8 + sub ebp, 8 + jnz L030encrypt_loop +$L029encrypt_finish: + mov ebp, DWORD PTR 60[esp] + and ebp, 7 + jz $L031finish + xor ecx, ecx + xor edx, edx + mov ebp, DWORD PTR $L032cbc_enc_jmp_table[ebp*4] + jmp ebp +L033ej7: + mov dh, BYTE PTR 6[esi] + shl edx, 8 +L034ej6: + mov dh, BYTE PTR 5[esi] +L035ej5: + mov dl, BYTE PTR 4[esi] +L036ej4: + mov ecx, DWORD PTR [esi] + jmp $L037ejend +L038ej3: + mov ch, BYTE PTR 2[esi] + shl ecx, 8 +L039ej2: + mov ch, BYTE PTR 1[esi] +L040ej1: + mov cl, BYTE PTR [esi] +$L037ejend: + xor eax, ecx + xor ebx, edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + call _des_encrypt3 + mov eax, DWORD PTR 16[esp] + mov ebx, DWORD PTR 20[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + jmp $L031finish +$L028decrypt: + and ebp, 4294967288 + mov eax, DWORD PTR 24[esp] + mov ebx, DWORD PTR 28[esp] + jz $L041decrypt_finish +L042decrypt_loop: + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + call _des_decrypt3 + mov eax, DWORD PTR 16[esp] + mov ebx, DWORD PTR 20[esp] + mov ecx, DWORD PTR 24[esp] + mov edx, DWORD PTR 28[esp] + xor ecx, eax + xor edx, ebx + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + mov DWORD PTR [edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 24[esp],eax + mov DWORD PTR 28[esp],ebx + add esi, 8 + add edi, 8 + sub ebp, 8 + jnz L042decrypt_loop +$L041decrypt_finish: + mov ebp, DWORD PTR 60[esp] + and ebp, 7 + jz $L031finish + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + call _des_decrypt3 + mov eax, DWORD PTR 16[esp] + mov ebx, DWORD PTR 20[esp] + mov ecx, DWORD PTR 24[esp] + mov edx, DWORD PTR 28[esp] + xor ecx, eax + xor edx, ebx + mov eax, DWORD PTR [esi] + mov ebx, DWORD PTR 4[esi] +L043dj7: + ror edx, 16 + mov BYTE PTR 6[edi],dl + shr edx, 16 +L044dj6: + mov BYTE PTR 5[edi],dh +L045dj5: + mov BYTE PTR 4[edi],dl +L046dj4: + mov DWORD PTR [edi],ecx + jmp $L047djend +L048dj3: + ror ecx, 16 + mov BYTE PTR 2[edi],cl + shl ecx, 16 +L049dj2: + mov BYTE PTR 1[esi],ch +L050dj1: + mov BYTE PTR [esi], cl +$L047djend: + jmp $L031finish +$L031finish: + mov ecx, DWORD PTR 76[esp] + add esp, 32 + mov DWORD PTR [ecx],eax + mov DWORD PTR 4[ecx],ebx + pop edi + pop esi + pop ebx + pop ebp + ret +$L032cbc_enc_jmp_table: + DD 0 + DD L040ej1 + DD L039ej2 + DD L038ej3 + DD L036ej4 + DD L035ej5 + DD L034ej6 + DD L033ej7 +L051cbc_dec_jmp_table: + DD 0 + DD L050dj1 + DD L049dj2 + DD L048dj3 + DD L046dj4 + DD L045dj5 + DD L044dj6 + DD L043dj7 +_des_ede3_cbc_encrypt ENDP +_TEXT ENDS END diff --git a/crypto/des/asm/des686.pl b/crypto/des/asm/des686.pl index efd9f592e..77dc5b51c 100644 --- a/crypto/des/asm/des686.pl +++ b/crypto/des/asm/des686.pl @@ -1,230 +1,230 @@ #!/usr/local/bin/perl -
-$prog="des686.pl";
-
-# base code is in microsft
-# op dest, source
-# format.
-#
-
-# WILL NOT WORK ANYMORE WITH desboth.pl
-require "desboth.pl";
-
-if ( ($ARGV[0] eq "elf"))
- { require "x86unix.pl"; }
-elsif ( ($ARGV[0] eq "a.out"))
- { $aout=1; require "x86unix.pl"; }
-elsif ( ($ARGV[0] eq "sol"))
- { $sol=1; require "x86unix.pl"; }
-elsif ( ($ARGV[0] eq "cpp"))
- { $cpp=1; require "x86unix.pl"; }
-elsif ( ($ARGV[0] eq "win32"))
- { require "x86ms.pl"; }
-else
- {
- print STDERR <<"EOF";
-Pick one target type from
- elf - linux, FreeBSD etc
- a.out - old linux
- sol - x86 solaris
- cpp - format so x86unix.cpp can be used
- win32 - Windows 95/Windows NT
-EOF
- exit(1);
- }
-
-&comment("Don't even think of reading this code");
-&comment("It was automatically generated by $prog");
-&comment("Which is a perl program used to generate the x86 assember for");
-&comment("any of elf, a.out, Win32, or Solaris");
-&comment("It can be found in SSLeay 0.6.5+ or in libdes 3.26+");
-&comment("eric <eay\@cryptsoft.com>");
-&comment("");
-
-&file("dx86xxxx");
-
-$L="edi";
-$R="esi";
-
-&des_encrypt("des_encrypt",1);
-&des_encrypt("des_encrypt2",0);
-
-&des_encrypt3("des_encrypt3",1);
-&des_encrypt3("des_decrypt3",0);
-
-&file_end();
-
-sub des_encrypt
- {
- local($name,$do_ip)=@_;
-
- &function_begin($name,3,"EXTRN _des_SPtrans:DWORD");
-
- &comment("");
- &comment("Load the 2 words");
- &mov("eax",&wparam(0));
- &mov($L,&DWP(0,"eax","",0));
- &mov($R,&DWP(4,"eax","",0));
-
- $ksp=&wparam(1);
-
- if ($do_ip)
- {
- &comment("");
- &comment("IP");
- &IP($L,$R,"eax");
- }
-
- &comment("");
- &comment("fixup rotate");
- &rotl($R,3);
- &rotl($L,3);
- &exch($L,$R);
-
- &comment("");
- &comment("load counter, key_schedule and enc flag");
- &mov("eax",&wparam(2)); # get encrypt flag
- &mov("ebp",&wparam(1)); # get ks
- &cmp("eax","0");
- &je(&label("start_decrypt"));
-
- # encrypting part
-
- for ($i=0; $i<16; $i+=2)
- {
- &comment("");
- &comment("Round $i");
- &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
-
- &comment("");
- &comment("Round ".sprintf("%d",$i+1));
- &D_ENCRYPT($R,$L,($i+1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
- }
- &jmp(&label("end"));
-
- &set_label("start_decrypt");
-
- for ($i=15; $i>0; $i-=2)
- {
- &comment("");
- &comment("Round $i");
- &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
- &comment("");
- &comment("Round ".sprintf("%d",$i-1));
- &D_ENCRYPT($R,$L,($i-1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
- }
-
- &set_label("end");
-
- &comment("");
- &comment("Fixup");
- &rotr($L,3); # r
- &rotr($R,3); # l
-
- if ($do_ip)
- {
- &comment("");
- &comment("FP");
- &FP($R,$L,"eax");
- }
-
- &mov("eax",&wparam(0));
- &mov(&DWP(0,"eax","",0),$L);
- &mov(&DWP(4,"eax","",0),$R);
-
- &function_end($name);
- }
-
-
-# The logic is to load R into 2 registers and operate on both at the same time.
-# We also load the 2 R's into 2 more registers so we can do the 'move word down a byte'
-# while also masking the other copy and doing a lookup. We then also accumulate the
-# L value in 2 registers then combine them at the end.
-sub D_ENCRYPT
- {
- local($L,$R,$S,$ks,$desSP,$u,$t,$tmp1,$tmp2,$tmp3)=@_;
-
- &mov( $u, &DWP(&n2a($S*4),$ks,"",0));
- &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0));
- &xor( $u, $R );
- &xor( $t, $R );
- &rotr( $t, 4 );
-
- # the numbers at the end of the line are origional instruction order
- &mov( $tmp2, $u ); # 1 2
- &mov( $tmp1, $t ); # 1 1
- &and( $tmp2, "0xfc" ); # 1 4
- &and( $tmp1, "0xfc" ); # 1 3
- &shr( $t, 8 ); # 1 5
- &xor( $L, &DWP("0x100+$desSP",$tmp1,"",0)); # 1 7
- &shr( $u, 8 ); # 1 6
- &mov( $tmp1, &DWP(" $desSP",$tmp2,"",0)); # 1 8
-
- &mov( $tmp2, $u ); # 2 2
- &xor( $L, $tmp1 ); # 1 9
- &and( $tmp2, "0xfc" ); # 2 4
- &mov( $tmp1, $t ); # 2 1
- &and( $tmp1, "0xfc" ); # 2 3
- &shr( $t, 8 ); # 2 5
- &xor( $L, &DWP("0x300+$desSP",$tmp1,"",0)); # 2 7
- &shr( $u, 8 ); # 2 6
- &mov( $tmp1, &DWP("0x200+$desSP",$tmp2,"",0)); # 2 8
- &mov( $tmp2, $u ); # 3 2
-
- &xor( $L, $tmp1 ); # 2 9
- &and( $tmp2, "0xfc" ); # 3 4
-
- &mov( $tmp1, $t ); # 3 1
- &shr( $u, 8 ); # 3 6
- &and( $tmp1, "0xfc" ); # 3 3
- &shr( $t, 8 ); # 3 5
- &xor( $L, &DWP("0x500+$desSP",$tmp1,"",0)); # 3 7
- &mov( $tmp1, &DWP("0x400+$desSP",$tmp2,"",0)); # 3 8
-
- &and( $t, "0xfc" ); # 4 1
- &xor( $L, $tmp1 ); # 3 9
-
- &and( $u, "0xfc" ); # 4 2
- &xor( $L, &DWP("0x700+$desSP",$t,"",0)); # 4 3
- &xor( $L, &DWP("0x600+$desSP",$u,"",0)); # 4 4
- }
-
-sub PERM_OP
- {
- local($a,$b,$tt,$shift,$mask)=@_;
-
- &mov( $tt, $a );
- &shr( $tt, $shift );
- &xor( $tt, $b );
- &and( $tt, $mask );
- &xor( $b, $tt );
- &shl( $tt, $shift );
- &xor( $a, $tt );
- }
-
-sub IP
- {
- local($l,$r,$tt)=@_;
-
- &PERM_OP($r,$l,$tt, 4,"0x0f0f0f0f");
- &PERM_OP($l,$r,$tt,16,"0x0000ffff");
- &PERM_OP($r,$l,$tt, 2,"0x33333333");
- &PERM_OP($l,$r,$tt, 8,"0x00ff00ff");
- &PERM_OP($r,$l,$tt, 1,"0x55555555");
- }
-
-sub FP
- {
- local($l,$r,$tt)=@_;
-
- &PERM_OP($l,$r,$tt, 1,"0x55555555");
- &PERM_OP($r,$l,$tt, 8,"0x00ff00ff");
- &PERM_OP($l,$r,$tt, 2,"0x33333333");
- &PERM_OP($r,$l,$tt,16,"0x0000ffff");
- &PERM_OP($l,$r,$tt, 4,"0x0f0f0f0f");
- }
-
-sub n2a
- {
- sprintf("%d",$_[0]);
- }
+ +$prog="des686.pl"; + +# base code is in microsft +# op dest, source +# format. +# + +# WILL NOT WORK ANYMORE WITH desboth.pl +require "desboth.pl"; + +if ( ($ARGV[0] eq "elf")) + { require "x86unix.pl"; } +elsif ( ($ARGV[0] eq "a.out")) + { $aout=1; require "x86unix.pl"; } +elsif ( ($ARGV[0] eq "sol")) + { $sol=1; require "x86unix.pl"; } +elsif ( ($ARGV[0] eq "cpp")) + { $cpp=1; require "x86unix.pl"; } +elsif ( ($ARGV[0] eq "win32")) + { require "x86ms.pl"; } +else + { + print STDERR <<"EOF"; +Pick one target type from + elf - linux, FreeBSD etc + a.out - old linux + sol - x86 solaris + cpp - format so x86unix.cpp can be used + win32 - Windows 95/Windows NT +EOF + exit(1); + } + +&comment("Don't even think of reading this code"); +&comment("It was automatically generated by $prog"); +&comment("Which is a perl program used to generate the x86 assember for"); +&comment("any of elf, a.out, Win32, or Solaris"); +&comment("It can be found in SSLeay 0.6.5+ or in libdes 3.26+"); +&comment("eric <eay\@cryptsoft.com>"); +&comment(""); + +&file("dx86xxxx"); + +$L="edi"; +$R="esi"; + +&des_encrypt("des_encrypt",1); +&des_encrypt("des_encrypt2",0); + +&des_encrypt3("des_encrypt3",1); +&des_encrypt3("des_decrypt3",0); + +&file_end(); + +sub des_encrypt + { + local($name,$do_ip)=@_; + + &function_begin($name,"EXTRN _des_SPtrans:DWORD"); + + &comment(""); + &comment("Load the 2 words"); + &mov("eax",&wparam(0)); + &mov($L,&DWP(0,"eax","",0)); + &mov($R,&DWP(4,"eax","",0)); + + $ksp=&wparam(1); + + if ($do_ip) + { + &comment(""); + &comment("IP"); + &IP_new($L,$R,"eax"); + } + + &comment(""); + &comment("fixup rotate"); + &rotl($R,3); + &rotl($L,3); + &exch($L,$R); + + &comment(""); + &comment("load counter, key_schedule and enc flag"); + &mov("eax",&wparam(2)); # get encrypt flag + &mov("ebp",&wparam(1)); # get ks + &cmp("eax","0"); + &je(&label("start_decrypt")); + + # encrypting part + + for ($i=0; $i<16; $i+=2) + { + &comment(""); + &comment("Round $i"); + &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); + + &comment(""); + &comment("Round ".sprintf("%d",$i+1)); + &D_ENCRYPT($R,$L,($i+1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); + } + &jmp(&label("end")); + + &set_label("start_decrypt"); + + for ($i=15; $i>0; $i-=2) + { + &comment(""); + &comment("Round $i"); + &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); + &comment(""); + &comment("Round ".sprintf("%d",$i-1)); + &D_ENCRYPT($R,$L,($i-1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); + } + + &set_label("end"); + + &comment(""); + &comment("Fixup"); + &rotr($L,3); # r + &rotr($R,3); # l + + if ($do_ip) + { + &comment(""); + &comment("FP"); + &FP_new($R,$L,"eax"); + } + + &mov("eax",&wparam(0)); + &mov(&DWP(0,"eax","",0),$L); + &mov(&DWP(4,"eax","",0),$R); + + &function_end($name); + } + + +# The logic is to load R into 2 registers and operate on both at the same time. +# We also load the 2 R's into 2 more registers so we can do the 'move word down a byte' +# while also masking the other copy and doing a lookup. We then also accumulate the +# L value in 2 registers then combine them at the end. +sub D_ENCRYPT + { + local($L,$R,$S,$ks,$desSP,$u,$t,$tmp1,$tmp2,$tmp3)=@_; + + &mov( $u, &DWP(&n2a($S*4),$ks,"",0)); + &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0)); + &xor( $u, $R ); + &xor( $t, $R ); + &rotr( $t, 4 ); + + # the numbers at the end of the line are origional instruction order + &mov( $tmp2, $u ); # 1 2 + &mov( $tmp1, $t ); # 1 1 + &and( $tmp2, "0xfc" ); # 1 4 + &and( $tmp1, "0xfc" ); # 1 3 + &shr( $t, 8 ); # 1 5 + &xor( $L, &DWP("0x100+$desSP",$tmp1,"",0)); # 1 7 + &shr( $u, 8 ); # 1 6 + &mov( $tmp1, &DWP(" $desSP",$tmp2,"",0)); # 1 8 + + &mov( $tmp2, $u ); # 2 2 + &xor( $L, $tmp1 ); # 1 9 + &and( $tmp2, "0xfc" ); # 2 4 + &mov( $tmp1, $t ); # 2 1 + &and( $tmp1, "0xfc" ); # 2 3 + &shr( $t, 8 ); # 2 5 + &xor( $L, &DWP("0x300+$desSP",$tmp1,"",0)); # 2 7 + &shr( $u, 8 ); # 2 6 + &mov( $tmp1, &DWP("0x200+$desSP",$tmp2,"",0)); # 2 8 + &mov( $tmp2, $u ); # 3 2 + + &xor( $L, $tmp1 ); # 2 9 + &and( $tmp2, "0xfc" ); # 3 4 + + &mov( $tmp1, $t ); # 3 1 + &shr( $u, 8 ); # 3 6 + &and( $tmp1, "0xfc" ); # 3 3 + &shr( $t, 8 ); # 3 5 + &xor( $L, &DWP("0x500+$desSP",$tmp1,"",0)); # 3 7 + &mov( $tmp1, &DWP("0x400+$desSP",$tmp2,"",0)); # 3 8 + + &and( $t, "0xfc" ); # 4 1 + &xor( $L, $tmp1 ); # 3 9 + + &and( $u, "0xfc" ); # 4 2 + &xor( $L, &DWP("0x700+$desSP",$t,"",0)); # 4 3 + &xor( $L, &DWP("0x600+$desSP",$u,"",0)); # 4 4 + } + +sub PERM_OP + { + local($a,$b,$tt,$shift,$mask)=@_; + + &mov( $tt, $a ); + &shr( $tt, $shift ); + &xor( $tt, $b ); + &and( $tt, $mask ); + &xor( $b, $tt ); + &shl( $tt, $shift ); + &xor( $a, $tt ); + } + +sub IP_new + { + local($l,$r,$tt)=@_; + + &PERM_OP($r,$l,$tt, 4,"0x0f0f0f0f"); + &PERM_OP($l,$r,$tt,16,"0x0000ffff"); + &PERM_OP($r,$l,$tt, 2,"0x33333333"); + &PERM_OP($l,$r,$tt, 8,"0x00ff00ff"); + &PERM_OP($r,$l,$tt, 1,"0x55555555"); + } + +sub FP_new + { + local($l,$r,$tt)=@_; + + &PERM_OP($l,$r,$tt, 1,"0x55555555"); + &PERM_OP($r,$l,$tt, 8,"0x00ff00ff"); + &PERM_OP($l,$r,$tt, 2,"0x33333333"); + &PERM_OP($r,$l,$tt,16,"0x0000ffff"); + &PERM_OP($l,$r,$tt, 4,"0x0f0f0f0f"); + } + +sub n2a + { + sprintf("%d",$_[0]); + } diff --git a/crypto/des/asm/desboth.pl b/crypto/des/asm/desboth.pl index b94138ce6..288984d13 100644 --- a/crypto/des/asm/desboth.pl +++ b/crypto/des/asm/desboth.pl @@ -1,67 +1,79 @@ #!/usr/local/bin/perl -
-$L="edi";
-$R="esi";
-
-sub des_encrypt3
- {
- local($name,$enc)=@_;
-
- &function_begin($name,4,"");
-
- &comment("");
- &comment("Load the data words");
- &mov("ebx",&wparam(0));
- &mov($L,&DWP(0,"ebx","",0));
- &mov($R,&DWP(4,"ebx","",0));
-
- &comment("");
- &comment("IP");
- &IP_new($L,$R,"edx",0);
-
- # put them back
-
- if ($enc)
- {
- &mov(&DWP(4,"ebx","",0),$R);
- &mov("eax",&wparam(1));
- &mov(&DWP(0,"ebx","",0),"edx");
- &mov("edi",&wparam(2));
- &mov("esi",&wparam(3));
- }
- else
- {
- &mov(&DWP(4,"ebx","",0),$R);
- &mov("esi",&wparam(1));
- &mov(&DWP(0,"ebx","",0),"edx");
- &mov("edi",&wparam(2));
- &mov("eax",&wparam(3));
- }
- &push(($enc)?"1":"0");
- &push("eax");
- &push("ebx");
- &call("des_encrypt2");
- &push(($enc)?"0":"1");
- &push("edi");
- &push("ebx");
- &call("des_encrypt2");
- &push(($enc)?"1":"0");
- &push("esi");
- &push("ebx");
- &call("des_encrypt2");
-
- &mov($L,&DWP(0,"ebx","",0));
- &add("esp",36);
- &mov($R,&DWP(4,"ebx","",0));
-
- &comment("");
- &comment("FP");
- &FP_new($L,$R,"eax",0);
-
- &mov(&DWP(0,"ebx","",0),"eax");
- &mov(&DWP(4,"ebx","",0),$R);
-
- &function_end($name);
- }
-
-
+ +$L="edi"; +$R="esi"; + +sub des_encrypt3 + { + local($name,$enc)=@_; + + &function_begin_B($name,""); + &push("ebx"); + &mov("ebx",&wparam(0)); + + &push("ebp"); + &push("esi"); + + &push("edi"); + + &comment(""); + &comment("Load the data words"); + &mov($L,&DWP(0,"ebx","",0)); + &mov($R,&DWP(4,"ebx","",0)); + &stack_push(3); + + &comment(""); + &comment("IP"); + &IP_new($L,$R,"edx",0); + + # put them back + + if ($enc) + { + &mov(&DWP(4,"ebx","",0),$R); + &mov("eax",&wparam(1)); + &mov(&DWP(0,"ebx","",0),"edx"); + &mov("edi",&wparam(2)); + &mov("esi",&wparam(3)); + } + else + { + &mov(&DWP(4,"ebx","",0),$R); + &mov("esi",&wparam(1)); + &mov(&DWP(0,"ebx","",0),"edx"); + &mov("edi",&wparam(2)); + &mov("eax",&wparam(3)); + } + &mov(&swtmp(2), (($enc)?"1":"0")); + &mov(&swtmp(1), "eax"); + &mov(&swtmp(0), "ebx"); + &call("des_encrypt2"); + &mov(&swtmp(2), (($enc)?"0":"1")); + &mov(&swtmp(1), "edi"); + &mov(&swtmp(0), "ebx"); + &call("des_encrypt2"); + &mov(&swtmp(2), (($enc)?"1":"0")); + &mov(&swtmp(1), "esi"); + &mov(&swtmp(0), "ebx"); + &call("des_encrypt2"); + + &stack_pop(3); + &mov($L,&DWP(0,"ebx","",0)); + &mov($R,&DWP(4,"ebx","",0)); + + &comment(""); + &comment("FP"); + &FP_new($L,$R,"eax",0); + + &mov(&DWP(0,"ebx","",0),"eax"); + &mov(&DWP(4,"ebx","",0),$R); + + &pop("edi"); + &pop("esi"); + &pop("ebp"); + &pop("ebx"); + &ret(); + &function_end_B($name); + } + + diff --git a/crypto/des/asm/dx86unix.cpp b/crypto/des/asm/dx86unix.cpp index 941cf1f60..6fca9afa1 100644 --- a/crypto/des/asm/dx86unix.cpp +++ b/crypto/des/asm/dx86unix.cpp @@ -1,32 +1,41 @@ +/* Run the C pre-processor over this file with one of the following defined + * ELF - elf object files, + * OUT - a.out object files, + * BSDI - BSDI style a.out object files + * SOL - Solaris style elf + */ -#define TYPE(a,b) .type a,b -#define SIZE(a,b) .size a,b +#define TYPE(a,b) .type a,b +#define SIZE(a,b) .size a,b + +#if defined(OUT) || defined(BSDI) +#define des_SPtrans _des_SPtrans +#define des_encrypt _des_encrypt +#define des_encrypt2 _des_encrypt2 +#define des_encrypt3 _des_encrypt3 +#define des_decrypt3 _des_decrypt3 +#define des_ncbc_encrypt _des_ncbc_encrypt +#define des_ede3_cbc_encrypt _des_ede3_cbc_encrypt + +#endif #ifdef OUT -#define OK 1 -#define des_SPtrans _des_SPtrans -#define des_encrypt _des_encrypt -#define des_encrypt2 _des_encrypt2 -#define des_encrypt3 _des_encrypt3 -#define des_decrypt3 _des_decrypt3 -#define ALIGN 4 +#define OK 1 +#define ALIGN 4 #endif #ifdef BSDI -#define OK 1 -#define des_SPtrans _des_SPtrans -#define des_encrypt _des_encrypt -#define des_encrypt2 _des_encrypt2 -#define des_encrypt3 _des_encrypt3 -#define des_decrypt3 _des_decrypt3 -#define ALIGN 4 +#define OK 1 +#define ALIGN 4 #undef SIZE #undef TYPE +#define SIZE(a,b) +#define TYPE(a,b) #endif #if defined(ELF) || defined(SOL) -#define OK 1 -#define ALIGN 16 +#define OK 1 +#define ALIGN 16 #endif #ifndef OK @@ -37,5 +46,3157 @@ SOL - solaris systems, which are elf with strange comment lines BSDI - a.out with a very primative version of as. #endif -#include "dx86-cpp.s" +/* Let the Assembler begin :-) */ + /* Don't even think of reading this code */ + /* It was automatically generated by des-586.pl */ + /* Which is a perl program used to generate the x86 assember for */ + /* any of elf, a.out, BSDI,Win32, or Solaris */ + /* eric <eay@cryptsoft.com> */ + + .file "des-586.s" + .version "01.01" +gcc2_compiled.: +.text + .align ALIGN +.globl des_encrypt + TYPE(des_encrypt,@function) +des_encrypt: + pushl %esi + pushl %edi + + /* Load the 2 words */ + movl 12(%esp), %esi + xorl %ecx, %ecx + pushl %ebx + pushl %ebp + movl (%esi), %eax + movl 28(%esp), %ebx + movl 4(%esi), %edi + + /* IP */ + roll $4, %eax + movl %eax, %esi + xorl %edi, %eax + andl $0xf0f0f0f0, %eax + xorl %eax, %esi + xorl %eax, %edi + + roll $20, %edi + movl %edi, %eax + xorl %esi, %edi + andl $0xfff0000f, %edi + xorl %edi, %eax + xorl %edi, %esi + + roll $14, %eax + movl %eax, %edi + xorl %esi, %eax + andl $0x33333333, %eax + xorl %eax, %edi + xorl %eax, %esi + + roll $22, %esi + movl %esi, %eax + xorl %edi, %esi + andl $0x03fc03fc, %esi + xorl %esi, %eax + xorl %esi, %edi + + roll $9, %eax + movl %eax, %esi + xorl %edi, %eax + andl $0xaaaaaaaa, %eax + xorl %eax, %esi + xorl %eax, %edi + +.byte 209 +.byte 199 /* roll $1 %edi */ + movl 24(%esp), %ebp + cmpl $0, %ebx + je .L000start_decrypt + + /* Round 0 */ + movl (%ebp), %eax + xorl %ebx, %ebx + movl 4(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 1 */ + movl 8(%ebp), %eax + xorl %ebx, %ebx + movl 12(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 2 */ + movl 16(%ebp), %eax + xorl %ebx, %ebx + movl 20(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 3 */ + movl 24(%ebp), %eax + xorl %ebx, %ebx + movl 28(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 4 */ + movl 32(%ebp), %eax + xorl %ebx, %ebx + movl 36(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 5 */ + movl 40(%ebp), %eax + xorl %ebx, %ebx + movl 44(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 6 */ + movl 48(%ebp), %eax + xorl %ebx, %ebx + movl 52(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 7 */ + movl 56(%ebp), %eax + xorl %ebx, %ebx + movl 60(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 8 */ + movl 64(%ebp), %eax + xorl %ebx, %ebx + movl 68(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 9 */ + movl 72(%ebp), %eax + xorl %ebx, %ebx + movl 76(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 10 */ + movl 80(%ebp), %eax + xorl %ebx, %ebx + movl 84(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 11 */ + movl 88(%ebp), %eax + xorl %ebx, %ebx + movl 92(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 12 */ + movl 96(%ebp), %eax + xorl %ebx, %ebx + movl 100(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 13 */ + movl 104(%ebp), %eax + xorl %ebx, %ebx + movl 108(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 14 */ + movl 112(%ebp), %eax + xorl %ebx, %ebx + movl 116(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 15 */ + movl 120(%ebp), %eax + xorl %ebx, %ebx + movl 124(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + jmp .L001end +.L000start_decrypt: + + /* Round 15 */ + movl 120(%ebp), %eax + xorl %ebx, %ebx + movl 124(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 14 */ + movl 112(%ebp), %eax + xorl %ebx, %ebx + movl 116(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 13 */ + movl 104(%ebp), %eax + xorl %ebx, %ebx + movl 108(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 12 */ + movl 96(%ebp), %eax + xorl %ebx, %ebx + movl 100(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 11 */ + movl 88(%ebp), %eax + xorl %ebx, %ebx + movl 92(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 10 */ + movl 80(%ebp), %eax + xorl %ebx, %ebx + movl 84(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 9 */ + movl 72(%ebp), %eax + xorl %ebx, %ebx + movl 76(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 8 */ + movl 64(%ebp), %eax + xorl %ebx, %ebx + movl 68(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 7 */ + movl 56(%ebp), %eax + xorl %ebx, %ebx + movl 60(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 6 */ + movl 48(%ebp), %eax + xorl %ebx, %ebx + movl 52(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 5 */ + movl 40(%ebp), %eax + xorl %ebx, %ebx + movl 44(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 4 */ + movl 32(%ebp), %eax + xorl %ebx, %ebx + movl 36(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 3 */ + movl 24(%ebp), %eax + xorl %ebx, %ebx + movl 28(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 2 */ + movl 16(%ebp), %eax + xorl %ebx, %ebx + movl 20(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 1 */ + movl 8(%ebp), %eax + xorl %ebx, %ebx + movl 12(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 0 */ + movl (%ebp), %eax + xorl %ebx, %ebx + movl 4(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi +.L001end: + + /* FP */ + movl 20(%esp), %edx +.byte 209 +.byte 206 /* rorl $1 %esi */ + movl %edi, %eax + xorl %esi, %edi + andl $0xaaaaaaaa, %edi + xorl %edi, %eax + xorl %edi, %esi + + roll $23, %eax + movl %eax, %edi + xorl %esi, %eax + andl $0x03fc03fc, %eax + xorl %eax, %edi + xorl %eax, %esi + + roll $10, %edi + movl %edi, %eax + xorl %esi, %edi + andl $0x33333333, %edi + xorl %edi, %eax + xorl %edi, %esi + + roll $18, %esi + movl %esi, %edi + xorl %eax, %esi + andl $0xfff0000f, %esi + xorl %esi, %edi + xorl %esi, %eax + + roll $12, %edi + movl %edi, %esi + xorl %eax, %edi + andl $0xf0f0f0f0, %edi + xorl %edi, %esi + xorl %edi, %eax + + rorl $4, %eax + movl %eax, (%edx) + movl %esi, 4(%edx) + popl %ebp + popl %ebx + popl %edi + popl %esi + ret +.des_encrypt_end: + SIZE(des_encrypt,.des_encrypt_end-des_encrypt) +.ident "desasm.pl" +.text + .align ALIGN +.globl des_encrypt2 + TYPE(des_encrypt2,@function) +des_encrypt2: + pushl %esi + pushl %edi + + /* Load the 2 words */ + movl 12(%esp), %eax + xorl %ecx, %ecx + pushl %ebx + pushl %ebp + movl (%eax), %esi + movl 28(%esp), %ebx + roll $3, %esi + movl 4(%eax), %edi + roll $3, %edi + movl 24(%esp), %ebp + cmpl $0, %ebx + je .L002start_decrypt + + /* Round 0 */ + movl (%ebp), %eax + xorl %ebx, %ebx + movl 4(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 1 */ + movl 8(%ebp), %eax + xorl %ebx, %ebx + movl 12(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 2 */ + movl 16(%ebp), %eax + xorl %ebx, %ebx + movl 20(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 3 */ + movl 24(%ebp), %eax + xorl %ebx, %ebx + movl 28(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 4 */ + movl 32(%ebp), %eax + xorl %ebx, %ebx + movl 36(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 5 */ + movl 40(%ebp), %eax + xorl %ebx, %ebx + movl 44(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 6 */ + movl 48(%ebp), %eax + xorl %ebx, %ebx + movl 52(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 7 */ + movl 56(%ebp), %eax + xorl %ebx, %ebx + movl 60(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 8 */ + movl 64(%ebp), %eax + xorl %ebx, %ebx + movl 68(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 9 */ + movl 72(%ebp), %eax + xorl %ebx, %ebx + movl 76(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 10 */ + movl 80(%ebp), %eax + xorl %ebx, %ebx + movl 84(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 11 */ + movl 88(%ebp), %eax + xorl %ebx, %ebx + movl 92(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 12 */ + movl 96(%ebp), %eax + xorl %ebx, %ebx + movl 100(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 13 */ + movl 104(%ebp), %eax + xorl %ebx, %ebx + movl 108(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 14 */ + movl 112(%ebp), %eax + xorl %ebx, %ebx + movl 116(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 15 */ + movl 120(%ebp), %eax + xorl %ebx, %ebx + movl 124(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + jmp .L003end +.L002start_decrypt: + + /* Round 15 */ + movl 120(%ebp), %eax + xorl %ebx, %ebx + movl 124(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 14 */ + movl 112(%ebp), %eax + xorl %ebx, %ebx + movl 116(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 13 */ + movl 104(%ebp), %eax + xorl %ebx, %ebx + movl 108(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 12 */ + movl 96(%ebp), %eax + xorl %ebx, %ebx + movl 100(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 11 */ + movl 88(%ebp), %eax + xorl %ebx, %ebx + movl 92(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 10 */ + movl 80(%ebp), %eax + xorl %ebx, %ebx + movl 84(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 9 */ + movl 72(%ebp), %eax + xorl %ebx, %ebx + movl 76(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 8 */ + movl 64(%ebp), %eax + xorl %ebx, %ebx + movl 68(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 7 */ + movl 56(%ebp), %eax + xorl %ebx, %ebx + movl 60(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 6 */ + movl 48(%ebp), %eax + xorl %ebx, %ebx + movl 52(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 5 */ + movl 40(%ebp), %eax + xorl %ebx, %ebx + movl 44(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 4 */ + movl 32(%ebp), %eax + xorl %ebx, %ebx + movl 36(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 3 */ + movl 24(%ebp), %eax + xorl %ebx, %ebx + movl 28(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 2 */ + movl 16(%ebp), %eax + xorl %ebx, %ebx + movl 20(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi + + /* Round 1 */ + movl 8(%ebp), %eax + xorl %ebx, %ebx + movl 12(%ebp), %edx + xorl %esi, %eax + xorl %esi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %edi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %edi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %edi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %edi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %edi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %edi + + /* Round 0 */ + movl (%ebp), %eax + xorl %ebx, %ebx + movl 4(%ebp), %edx + xorl %edi, %eax + xorl %edi, %edx + andl $0xfcfcfcfc, %eax + andl $0xcfcfcfcf, %edx + movb %al, %bl + movb %ah, %cl + rorl $4, %edx + movl des_SPtrans(%ebx),%ebp + movb %dl, %bl + xorl %ebp, %esi + movl 0x200+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movb %dh, %cl + shrl $16, %eax + movl 0x100+des_SPtrans(%ebx),%ebp + xorl %ebp, %esi + movb %ah, %bl + shrl $16, %edx + movl 0x300+des_SPtrans(%ecx),%ebp + xorl %ebp, %esi + movl 24(%esp), %ebp + movb %dh, %cl + andl $0xff, %eax + andl $0xff, %edx + movl 0x600+des_SPtrans(%ebx),%ebx + xorl %ebx, %esi + movl 0x700+des_SPtrans(%ecx),%ebx + xorl %ebx, %esi + movl 0x400+des_SPtrans(%eax),%ebx + xorl %ebx, %esi + movl 0x500+des_SPtrans(%edx),%ebx + xorl %ebx, %esi +.L003end: + + /* Fixup */ + rorl $3, %edi + movl 20(%esp), %eax + rorl $3, %esi + movl %edi, (%eax) + movl %esi, 4(%eax) + popl %ebp + popl %ebx + popl %edi + popl %esi + ret +.des_encrypt2_end: + SIZE(des_encrypt2,.des_encrypt2_end-des_encrypt2) +.ident "desasm.pl" +.text + .align ALIGN +.globl des_encrypt3 + TYPE(des_encrypt3,@function) +des_encrypt3: + pushl %ebx + movl 8(%esp), %ebx + pushl %ebp + pushl %esi + pushl %edi + + /* Load the data words */ + movl (%ebx), %edi + movl 4(%ebx), %esi + subl $12, %esp + + /* IP */ + roll $4, %edi + movl %edi, %edx + xorl %esi, %edi + andl $0xf0f0f0f0, %edi + xorl %edi, %edx + xorl %edi, %esi + + roll $20, %esi + movl %esi, %edi + xorl %edx, %esi + andl $0xfff0000f, %esi + xorl %esi, %edi + xorl %esi, %edx + + roll $14, %edi + movl %edi, %esi + xorl %edx, %edi + andl $0x33333333, %edi + xorl %edi, %esi + xorl %edi, %edx + + roll $22, %edx + movl %edx, %edi + xorl %esi, %edx + andl $0x03fc03fc, %edx + xorl %edx, %edi + xorl %edx, %esi + + roll $9, %edi + movl %edi, %edx + xorl %esi, %edi + andl $0xaaaaaaaa, %edi + xorl %edi, %edx + xorl %edi, %esi + + rorl $3, %edx + rorl $2, %esi + movl %esi, 4(%ebx) + movl 36(%esp), %eax + movl %edx, (%ebx) + movl 40(%esp), %edi + movl 44(%esp), %esi + movl $1, 8(%esp) + movl %eax, 4(%esp) + movl %ebx, (%esp) + call des_encrypt2 + movl $0, 8(%esp) + movl %edi, 4(%esp) + movl %ebx, (%esp) + call des_encrypt2 + movl $1, 8(%esp) + movl %esi, 4(%esp) + movl %ebx, (%esp) + call des_encrypt2 + addl $12, %esp + movl (%ebx), %edi + movl 4(%ebx), %esi + + /* FP */ + roll $2, %esi + roll $3, %edi + movl %edi, %eax + xorl %esi, %edi + andl $0xaaaaaaaa, %edi + xorl %edi, %eax + xorl %edi, %esi + + roll $23, %eax + movl %eax, %edi + xorl %esi, %eax + andl $0x03fc03fc, %eax + xorl %eax, %edi + xorl %eax, %esi + + roll $10, %edi + movl %edi, %eax + xorl %esi, %edi + andl $0x33333333, %edi + xorl %edi, %eax + xorl %edi, %esi + + roll $18, %esi + movl %esi, %edi + xorl %eax, %esi + andl $0xfff0000f, %esi + xorl %esi, %edi + xorl %esi, %eax + + roll $12, %edi + movl %edi, %esi + xorl %eax, %edi + andl $0xf0f0f0f0, %edi + xorl %edi, %esi + xorl %edi, %eax + + rorl $4, %eax + movl %eax, (%ebx) + movl %esi, 4(%ebx) + popl %edi + popl %esi + popl %ebp + popl %ebx + ret +.des_encrypt3_end: + SIZE(des_encrypt3,.des_encrypt3_end-des_encrypt3) +.ident "desasm.pl" +.text + .align ALIGN +.globl des_decrypt3 + TYPE(des_decrypt3,@function) +des_decrypt3: + pushl %ebx + movl 8(%esp), %ebx + pushl %ebp + pushl %esi + pushl %edi + + /* Load the data words */ + movl (%ebx), %edi + movl 4(%ebx), %esi + subl $12, %esp + + /* IP */ + roll $4, %edi + movl %edi, %edx + xorl %esi, %edi + andl $0xf0f0f0f0, %edi + xorl %edi, %edx + xorl %edi, %esi + + roll $20, %esi + movl %esi, %edi + xorl %edx, %esi + andl $0xfff0000f, %esi + xorl %esi, %edi + xorl %esi, %edx + + roll $14, %edi + movl %edi, %esi + xorl %edx, %edi + andl $0x33333333, %edi + xorl %edi, %esi + xorl %edi, %edx + + roll $22, %edx + movl %edx, %edi + xorl %esi, %edx + andl $0x03fc03fc, %edx + xorl %edx, %edi + xorl %edx, %esi + + roll $9, %edi + movl %edi, %edx + xorl %esi, %edi + andl $0xaaaaaaaa, %edi + xorl %edi, %edx + xorl %edi, %esi + + rorl $3, %edx + rorl $2, %esi + movl %esi, 4(%ebx) + movl 36(%esp), %esi + movl %edx, (%ebx) + movl 40(%esp), %edi + movl 44(%esp), %eax + movl $0, 8(%esp) + movl %eax, 4(%esp) + movl %ebx, (%esp) + call des_encrypt2 + movl $1, 8(%esp) + movl %edi, 4(%esp) + movl %ebx, (%esp) + call des_encrypt2 + movl $0, 8(%esp) + movl %esi, 4(%esp) + movl %ebx, (%esp) + call des_encrypt2 + addl $12, %esp + movl (%ebx), %edi + movl 4(%ebx), %esi + + /* FP */ + roll $2, %esi + roll $3, %edi + movl %edi, %eax + xorl %esi, %edi + andl $0xaaaaaaaa, %edi + xorl %edi, %eax + xorl %edi, %esi + + roll $23, %eax + movl %eax, %edi + xorl %esi, %eax + andl $0x03fc03fc, %eax + xorl %eax, %edi + xorl %eax, %esi + + roll $10, %edi + movl %edi, %eax + xorl %esi, %edi + andl $0x33333333, %edi + xorl %edi, %eax + xorl %edi, %esi + + roll $18, %esi + movl %esi, %edi + xorl %eax, %esi + andl $0xfff0000f, %esi + xorl %esi, %edi + xorl %esi, %eax + + roll $12, %edi + movl %edi, %esi + xorl %eax, %edi + andl $0xf0f0f0f0, %edi + xorl %edi, %esi + xorl %edi, %eax + + rorl $4, %eax + movl %eax, (%ebx) + movl %esi, 4(%ebx) + popl %edi + popl %esi + popl %ebp + popl %ebx + ret +.des_decrypt3_end: + SIZE(des_decrypt3,.des_decrypt3_end-des_decrypt3) +.ident "desasm.pl" +.text + .align ALIGN +.globl des_ncbc_encrypt + TYPE(des_ncbc_encrypt,@function) +des_ncbc_encrypt: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp), %ebp + /* getting iv ptr from parameter 4 */ + movl 36(%esp), %ebx + movl (%ebx), %esi + movl 4(%ebx), %edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp, %ebx + movl 36(%esp), %esi + movl 40(%esp), %edi + /* getting encrypt flag from parameter 5 */ + movl 56(%esp), %ecx + /* get and push parameter 5 */ + pushl %ecx + /* get and push parameter 3 */ + movl 52(%esp), %eax + pushl %eax + pushl %ebx + cmpl $0, %ecx + jz .L004decrypt + andl $4294967288, %ebp + movl 12(%esp), %eax + movl 16(%esp), %ebx + jz .L005encrypt_finish +.L006encrypt_loop: + movl (%esi), %ecx + movl 4(%esi), %edx + xorl %ecx, %eax + xorl %edx, %ebx + movl %eax, 12(%esp) + movl %ebx, 16(%esp) + call des_encrypt + movl 12(%esp), %eax + movl 16(%esp), %ebx + movl %eax, (%edi) + movl %ebx, 4(%edi) + addl $8, %esi + addl $8, %edi + subl $8, %ebp + jnz .L006encrypt_loop +.L005encrypt_finish: + movl 56(%esp), %ebp + andl $7, %ebp + jz .L007finish + xorl %ecx, %ecx + xorl %edx, %edx + movl .L008cbc_enc_jmp_table(,%ebp,4),%ebp + jmp *%ebp +.L009ej7: + movb 6(%esi), %dh + sall $8, %edx +.L010ej6: + movb 5(%esi), %dh +.L011ej5: + movb 4(%esi), %dl +.L012ej4: + movl (%esi), %ecx + jmp .L013ejend +.L014ej3: + movb 2(%esi), %ch + sall $8, %ecx +.L015ej2: + movb 1(%esi), %ch +.L016ej1: + movb (%esi), %cl +.L013ejend: + xorl %ecx, %eax + xorl %edx, %ebx + movl %eax, 12(%esp) + movl %ebx, 16(%esp) + call des_encrypt + movl 12(%esp), %eax + movl 16(%esp), %ebx + movl %eax, (%edi) + movl %ebx, 4(%edi) + jmp .L007finish +.align ALIGN +.L004decrypt: + andl $4294967288, %ebp + movl 20(%esp), %eax + movl 24(%esp), %ebx + jz .L017decrypt_finish +.L018decrypt_loop: + movl (%esi), %eax + movl 4(%esi), %ebx + movl %eax, 12(%esp) + movl %ebx, 16(%esp) + call des_encrypt + movl 12(%esp), %eax + movl 16(%esp), %ebx + movl 20(%esp), %ecx + movl 24(%esp), %edx + xorl %eax, %ecx + xorl %ebx, %edx + movl (%esi), %eax + movl 4(%esi), %ebx + movl %ecx, (%edi) + movl %edx, 4(%edi) + movl %eax, 20(%esp) + movl %ebx, 24(%esp) + addl $8, %esi + addl $8, %edi + subl $8, %ebp + jnz .L018decrypt_loop +.L017decrypt_finish: + movl 56(%esp), %ebp + andl $7, %ebp + jz .L007finish + movl (%esi), %eax + movl 4(%esi), %ebx + movl %eax, 12(%esp) + movl %ebx, 16(%esp) + call des_encrypt + movl 12(%esp), %eax + movl 16(%esp), %ebx + movl 20(%esp), %ecx + movl 24(%esp), %edx + xorl %eax, %ecx + xorl %ebx, %edx + movl (%esi), %eax + movl 4(%esi), %ebx +.L019dj7: + rorl $16, %edx + movb %dl, 6(%edi) + shrl $16, %edx +.L020dj6: + movb %dh, 5(%edi) +.L021dj5: + movb %dl, 4(%edi) +.L022dj4: + movl %ecx, (%edi) + jmp .L023djend +.L024dj3: + rorl $16, %ecx + movb %cl, 2(%edi) + sall $16, %ecx +.L025dj2: + movb %ch, 1(%esi) +.L026dj1: + movb %cl, (%esi) +.L023djend: + jmp .L007finish +.align ALIGN +.L007finish: + movl 64(%esp), %ecx + addl $28, %esp + movl %eax, (%ecx) + movl %ebx, 4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align ALIGN +.L008cbc_enc_jmp_table: + .long 0 + .long .L016ej1 + .long .L015ej2 + .long .L014ej3 + .long .L012ej4 + .long .L011ej5 + .long .L010ej6 + .long .L009ej7 +.align ALIGN +.L027cbc_dec_jmp_table: + .long 0 + .long .L026dj1 + .long .L025dj2 + .long .L024dj3 + .long .L022dj4 + .long .L021dj5 + .long .L020dj6 + .long .L019dj7 +.des_ncbc_encrypt_end: + SIZE(des_ncbc_encrypt,.des_ncbc_encrypt_end-des_ncbc_encrypt) +.ident "desasm.pl" +.text + .align ALIGN +.globl des_ede3_cbc_encrypt + TYPE(des_ede3_cbc_encrypt,@function) +des_ede3_cbc_encrypt: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp), %ebp + /* getting iv ptr from parameter 6 */ + movl 44(%esp), %ebx + movl (%ebx), %esi + movl 4(%ebx), %edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp, %ebx + movl 36(%esp), %esi + movl 40(%esp), %edi + /* getting encrypt flag from parameter 7 */ + movl 64(%esp), %ecx + /* get and push parameter 5 */ + movl 56(%esp), %eax + pushl %eax + /* get and push parameter 4 */ + movl 56(%esp), %eax + pushl %eax + /* get and push parameter 3 */ + movl 56(%esp), %eax + pushl %eax + pushl %ebx + cmpl $0, %ecx + jz .L028decrypt + andl $4294967288, %ebp + movl 16(%esp), %eax + movl 20(%esp), %ebx + jz .L029encrypt_finish +.L030encrypt_loop: + movl (%esi), %ecx + movl 4(%esi), %edx + xorl %ecx, %eax + xorl %edx, %ebx + movl %eax, 16(%esp) + movl %ebx, 20(%esp) + call des_encrypt3 + movl 16(%esp), %eax + movl 20(%esp), %ebx + movl %eax, (%edi) + movl %ebx, 4(%edi) + addl $8, %esi + addl $8, %edi + subl $8, %ebp + jnz .L030encrypt_loop +.L029encrypt_finish: + movl 60(%esp), %ebp + andl $7, %ebp + jz .L031finish + xorl %ecx, %ecx + xorl %edx, %edx + movl .L032cbc_enc_jmp_table(,%ebp,4),%ebp + jmp *%ebp +.L033ej7: + movb 6(%esi), %dh + sall $8, %edx +.L034ej6: + movb 5(%esi), %dh +.L035ej5: + movb 4(%esi), %dl +.L036ej4: + movl (%esi), %ecx + jmp .L037ejend +.L038ej3: + movb 2(%esi), %ch + sall $8, %ecx +.L039ej2: + movb 1(%esi), %ch +.L040ej1: + movb (%esi), %cl +.L037ejend: + xorl %ecx, %eax + xorl %edx, %ebx + movl %eax, 16(%esp) + movl %ebx, 20(%esp) + call des_encrypt3 + movl 16(%esp), %eax + movl 20(%esp), %ebx + movl %eax, (%edi) + movl %ebx, 4(%edi) + jmp .L031finish +.align ALIGN +.L028decrypt: + andl $4294967288, %ebp + movl 24(%esp), %eax + movl 28(%esp), %ebx + jz .L041decrypt_finish +.L042decrypt_loop: + movl (%esi), %eax + movl 4(%esi), %ebx + movl %eax, 16(%esp) + movl %ebx, 20(%esp) + call des_decrypt3 + movl 16(%esp), %eax + movl 20(%esp), %ebx + movl 24(%esp), %ecx + movl 28(%esp), %edx + xorl %eax, %ecx + xorl %ebx, %edx + movl (%esi), %eax + movl 4(%esi), %ebx + movl %ecx, (%edi) + movl %edx, 4(%edi) + movl %eax, 24(%esp) + movl %ebx, 28(%esp) + addl $8, %esi + addl $8, %edi + subl $8, %ebp + jnz .L042decrypt_loop +.L041decrypt_finish: + movl 60(%esp), %ebp + andl $7, %ebp + jz .L031finish + movl (%esi), %eax + movl 4(%esi), %ebx + movl %eax, 16(%esp) + movl %ebx, 20(%esp) + call des_decrypt3 + movl 16(%esp), %eax + movl 20(%esp), %ebx + movl 24(%esp), %ecx + movl 28(%esp), %edx + xorl %eax, %ecx + xorl %ebx, %edx + movl (%esi), %eax + movl 4(%esi), %ebx +.L043dj7: + rorl $16, %edx + movb %dl, 6(%edi) + shrl $16, %edx +.L044dj6: + movb %dh, 5(%edi) +.L045dj5: + movb %dl, 4(%edi) +.L046dj4: + movl %ecx, (%edi) + jmp .L047djend +.L048dj3: + rorl $16, %ecx + movb %cl, 2(%edi) + sall $16, %ecx +.L049dj2: + movb %ch, 1(%esi) +.L050dj1: + movb %cl, (%esi) +.L047djend: + jmp .L031finish +.align ALIGN +.L031finish: + movl 76(%esp), %ecx + addl $32, %esp + movl %eax, (%ecx) + movl %ebx, 4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align ALIGN +.L032cbc_enc_jmp_table: + .long 0 + .long .L040ej1 + .long .L039ej2 + .long .L038ej3 + .long .L036ej4 + .long .L035ej5 + .long .L034ej6 + .long .L033ej7 +.align ALIGN +.L051cbc_dec_jmp_table: + .long 0 + .long .L050dj1 + .long .L049dj2 + .long .L048dj3 + .long .L046dj4 + .long .L045dj5 + .long .L044dj6 + .long .L043dj7 +.des_ede3_cbc_encrypt_end: + SIZE(des_ede3_cbc_encrypt,.des_ede3_cbc_encrypt_end-des_ede3_cbc_encrypt) +.ident "desasm.pl" diff --git a/crypto/des/asm/readme b/crypto/des/asm/readme index da2561a6b..f8529d930 100644 --- a/crypto/des/asm/readme +++ b/crypto/des/asm/readme @@ -1,131 +1,131 @@ -First up, let me say I don't like writing in assembler. It is not portable,
-dependant on the particular CPU architecture release and is generally a pig
-to debug and get right. Having said that, the x86 architecture is probably
-the most important for speed due to number of boxes and since
-it appears to be the worst architecture to to get
-good C compilers for. So due to this, I have lowered myself to do
-assembler for the inner DES routines in libdes :-).
-
-The file to implement in assembler is des_enc.c. Replace the following
-4 functions
-des_encrypt(DES_LONG data[2],des_key_schedule ks, int encrypt);
-des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt);
-des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3);
-des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3);
-
-They encrypt/decrypt the 64 bits held in 'data' using
-the 'ks' key schedules. The only difference between the 4 functions is that
-des_encrypt2() does not perform IP() or FP() on the data (this is an
-optimization for when doing triple DES and des_encrypt3() and des_decrypt3()
-perform triple des. The triple DES routines are in here because it does
-make a big difference to have them located near the des_encrypt2 function
-at link time..
-
-Now as we all know, there are lots of different operating systems running on
-x86 boxes, and unfortunately they normally try to make sure their assembler
-formating is not the same as the other peoples.
-The 4 main formats I know of are
-Microsoft Windows 95/Windows NT
-Elf Includes Linux and FreeBSD(?).
-a.out The older Linux.
-Solaris Same as Elf but different comments :-(.
-
-Now I was not overly keen to write 4 different copies of the same code,
-so I wrote a few perl routines to output the correct assembler, given
-a target assembler type. This code is ugly and is just a hack.
-The libraries are x86unix.pl and x86ms.pl.
-des586.pl, des686.pl and des-som[23].pl are the programs to actually
-generate the assembler.
-
-So to generate elf assembler
-perl des-som3.pl elf >dx86-elf.s
-For Windows 95/NT
-perl des-som2.pl win32 >win32.asm
-
-[ update 4 Jan 1996 ]
-I have added another way to do things.
-perl des-som3.pl cpp >dx86-cpp.s
-generates a file that will be included by dx86unix.cpp when it is compiled.
-To build for elf, a.out, solaris, bsdi etc,
-cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o
-cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o
-cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o
-cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o
-This was done to cut down the number of files in the distribution.
-
-Now the ugly part. I acquired my copy of Intels
-"Optimization's For Intel's 32-Bit Processors" and found a few interesting
-things. First, the aim of the exersize is to 'extract' one byte at a time
-from a word and do an array lookup. This involves getting the byte from
-the 4 locations in the word and moving it to a new word and doing the lookup.
-The most obvious way to do this is
-xor eax, eax # clear word
-movb al, cl # get low byte
-xor edi DWORD PTR 0x100+des_SP[eax] # xor in word
-movb al, ch # get next byte
-xor edi DWORD PTR 0x300+des_SP[eax] # xor in word
-shr ecx 16
-which seems ok. For the pentium, this system appears to be the best.
-One has to do instruction interleaving to keep both functional units
-operating, but it is basically very efficient.
-
-Now the crunch. When a full register is used after a partial write, eg.
-mov al, cl
-xor edi, DWORD PTR 0x100+des_SP[eax]
-386 - 1 cycle stall
-486 - 1 cycle stall
-586 - 0 cycle stall
-686 - at least 7 cycle stall (page 22 of the above mentioned document).
-
-So the technique that produces the best results on a pentium, according to
-the documentation, will produce hideous results on a pentium pro.
-
-To get around this, des686.pl will generate code that is not as fast on
-a pentium, should be very good on a pentium pro.
-mov eax, ecx # copy word
-shr ecx, 8 # line up next byte
-and eax, 0fch # mask byte
-xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup
-mov eax, ecx # get word
-shr ecx 8 # line up next byte
-and eax, 0fch # mask byte
-xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup
-
-Due to the execution units in the pentium, this actually works quite well.
-For a pentium pro it should be very good. This is the type of output
-Visual C++ generates.
-
-There is a third option. instead of using
-mov al, ch
-which is bad on the pentium pro, one may be able to use
-movzx eax, ch
-which may not incur the partial write penalty. On the pentium,
-this instruction takes 4 cycles so is not worth using but on the
-pentium pro it appears it may be worth while. I need access to one to
-experiment :-).
-
-eric (20 Oct 1996)
-
-22 Nov 1996 - I have asked people to run the 2 different version on pentium
-pros and it appears that the intel documentation is wrong. The
-mov al,bh is still faster on a pentium pro, so just use the des586.pl
-install des686.pl
-
-3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these
-functions into des_enc.c because it does make a massive performance
-difference on some boxes to have the functions code located close to
-the des_encrypt2() function.
-
-9 Jan 1997 - des-som2.pl is now the correct perl script to use for
-pentiums. It contains an inner loop from
-Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at
-273,000 per second. He had a previous version at 250,000 and the best
-I was able to get was 203,000. The content has not changed, this is all
-due to instruction sequencing (and actual instructions choice) which is able
-to keep both functional units of the pentium going.
-We may have lost the ugly register usage restrictions when x86 went 32 bit
-but for the pentium it has been replaced by evil instruction ordering tricks.
-
-13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf.
-raw DES at 281,000 per second on a pentium 100.
-
+First up, let me say I don't like writing in assembler. It is not portable, +dependant on the particular CPU architecture release and is generally a pig +to debug and get right. Having said that, the x86 architecture is probably +the most important for speed due to number of boxes and since +it appears to be the worst architecture to to get +good C compilers for. So due to this, I have lowered myself to do +assembler for the inner DES routines in libdes :-). + +The file to implement in assembler is des_enc.c. Replace the following +4 functions +des_encrypt(DES_LONG data[2],des_key_schedule ks, int encrypt); +des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt); +des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); +des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); + +They encrypt/decrypt the 64 bits held in 'data' using +the 'ks' key schedules. The only difference between the 4 functions is that +des_encrypt2() does not perform IP() or FP() on the data (this is an +optimization for when doing triple DES and des_encrypt3() and des_decrypt3() +perform triple des. The triple DES routines are in here because it does +make a big difference to have them located near the des_encrypt2 function +at link time.. + +Now as we all know, there are lots of different operating systems running on +x86 boxes, and unfortunately they normally try to make sure their assembler +formating is not the same as the other peoples. +The 4 main formats I know of are +Microsoft Windows 95/Windows NT +Elf Includes Linux and FreeBSD(?). +a.out The older Linux. +Solaris Same as Elf but different comments :-(. + +Now I was not overly keen to write 4 different copies of the same code, +so I wrote a few perl routines to output the correct assembler, given +a target assembler type. This code is ugly and is just a hack. +The libraries are x86unix.pl and x86ms.pl. +des586.pl, des686.pl and des-som[23].pl are the programs to actually +generate the assembler. + +So to generate elf assembler +perl des-som3.pl elf >dx86-elf.s +For Windows 95/NT +perl des-som2.pl win32 >win32.asm + +[ update 4 Jan 1996 ] +I have added another way to do things. +perl des-som3.pl cpp >dx86-cpp.s +generates a file that will be included by dx86unix.cpp when it is compiled. +To build for elf, a.out, solaris, bsdi etc, +cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o +cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o +cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o +cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o +This was done to cut down the number of files in the distribution. + +Now the ugly part. I acquired my copy of Intels +"Optimization's For Intel's 32-Bit Processors" and found a few interesting +things. First, the aim of the exersize is to 'extract' one byte at a time +from a word and do an array lookup. This involves getting the byte from +the 4 locations in the word and moving it to a new word and doing the lookup. +The most obvious way to do this is +xor eax, eax # clear word +movb al, cl # get low byte +xor edi DWORD PTR 0x100+des_SP[eax] # xor in word +movb al, ch # get next byte +xor edi DWORD PTR 0x300+des_SP[eax] # xor in word +shr ecx 16 +which seems ok. For the pentium, this system appears to be the best. +One has to do instruction interleaving to keep both functional units +operating, but it is basically very efficient. + +Now the crunch. When a full register is used after a partial write, eg. +mov al, cl +xor edi, DWORD PTR 0x100+des_SP[eax] +386 - 1 cycle stall +486 - 1 cycle stall +586 - 0 cycle stall +686 - at least 7 cycle stall (page 22 of the above mentioned document). + +So the technique that produces the best results on a pentium, according to +the documentation, will produce hideous results on a pentium pro. + +To get around this, des686.pl will generate code that is not as fast on +a pentium, should be very good on a pentium pro. +mov eax, ecx # copy word +shr ecx, 8 # line up next byte +and eax, 0fch # mask byte +xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup +mov eax, ecx # get word +shr ecx 8 # line up next byte +and eax, 0fch # mask byte +xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup + +Due to the execution units in the pentium, this actually works quite well. +For a pentium pro it should be very good. This is the type of output +Visual C++ generates. + +There is a third option. instead of using +mov al, ch +which is bad on the pentium pro, one may be able to use +movzx eax, ch +which may not incur the partial write penalty. On the pentium, +this instruction takes 4 cycles so is not worth using but on the +pentium pro it appears it may be worth while. I need access to one to +experiment :-). + +eric (20 Oct 1996) + +22 Nov 1996 - I have asked people to run the 2 different version on pentium +pros and it appears that the intel documentation is wrong. The +mov al,bh is still faster on a pentium pro, so just use the des586.pl +install des686.pl + +3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these +functions into des_enc.c because it does make a massive performance +difference on some boxes to have the functions code located close to +the des_encrypt2() function. + +9 Jan 1997 - des-som2.pl is now the correct perl script to use for +pentiums. It contains an inner loop from +Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at +273,000 per second. He had a previous version at 250,000 and the best +I was able to get was 203,000. The content has not changed, this is all +due to instruction sequencing (and actual instructions choice) which is able +to keep both functional units of the pentium going. +We may have lost the ugly register usage restrictions when x86 went 32 bit +but for the pentium it has been replaced by evil instruction ordering tricks. + +13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf. +raw DES at 281,000 per second on a pentium 100. + |