diff options
Diffstat (limited to 'lib/accelerated/x86/coff/padlock-x86-coff.s')
-rw-r--r-- | lib/accelerated/x86/coff/padlock-x86-coff.s | 615 |
1 files changed, 615 insertions, 0 deletions
diff --git a/lib/accelerated/x86/coff/padlock-x86-coff.s b/lib/accelerated/x86/coff/padlock-x86-coff.s new file mode 100644 index 0000000000..c9231f19a7 --- /dev/null +++ b/lib/accelerated/x86/coff/padlock-x86-coff.s @@ -0,0 +1,615 @@ +# Copyright (c) 2011, Andy Polyakov by <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +.file "devel/perlasm/e_padlock-x86.s" +.text +.globl _padlock_capability +.def _padlock_capability; .scl 2; .type 32; .endef +.align 16 +_padlock_capability: +.L_padlock_capability_begin: + pushl %ebx + pushfl + popl %eax + movl %eax,%ecx + xorl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + xorl %eax,%ecx + xorl %eax,%eax + btl $21,%ecx + jnc .L000noluck + .byte 0x0f,0xa2 + xorl %eax,%eax + cmpl $0x746e6543,%ebx + jne .L000noluck + cmpl $0x48727561,%edx + jne .L000noluck + cmpl $0x736c7561,%ecx + jne .L000noluck + movl $3221225472,%eax + .byte 0x0f,0xa2 + movl %eax,%edx + xorl %eax,%eax + cmpl $3221225473,%edx + jb .L000noluck + movl $1,%eax + .byte 0x0f,0xa2 + orl $15,%eax + xorl %ebx,%ebx + andl $4095,%eax + cmpl $1791,%eax + sete %bl + movl $3221225473,%eax + pushl %ebx + .byte 0x0f,0xa2 + popl %ebx + movl %edx,%eax + shll $4,%ebx + andl $4294967279,%eax + orl %ebx,%eax +.L000noluck: + popl %ebx + ret +.globl _padlock_key_bswap +.def _padlock_key_bswap; .scl 2; .type 32; .endef +.align 16 +_padlock_key_bswap: +.L_padlock_key_bswap_begin: + movl 4(%esp),%edx + movl 240(%edx),%ecx +.L001bswap_loop: + movl (%edx),%eax + bswap %eax + movl %eax,(%edx) + leal 4(%edx),%edx + subl $1,%ecx + jnz .L001bswap_loop + ret +.globl _padlock_verify_context +.def _padlock_verify_context; .scl 2; .type 32; .endef +.align 16 +_padlock_verify_context: +.L_padlock_verify_context_begin: + movl 4(%esp),%edx + leal .Lpadlock_saved_context,%eax + pushfl + call __padlock_verify_ctx +.L002verify_pic_point: + leal 4(%esp),%esp + ret +.def __padlock_verify_ctx; .scl 3; .type 32; .endef +.align 16 +__padlock_verify_ctx: + btl $30,4(%esp) + jnc .L003verified + cmpl (%eax),%edx + je .L003verified + pushfl + popfl +.L003verified: + movl %edx,(%eax) + ret +.globl _padlock_reload_key +.def _padlock_reload_key; .scl 2; .type 32; .endef +.align 16 +_padlock_reload_key: +.L_padlock_reload_key_begin: + pushfl + popfl + ret +.globl _padlock_aes_block +.def _padlock_aes_block; .scl 2; .type 32; .endef +.align 16 +_padlock_aes_block: +.L_padlock_aes_block_begin: + pushl %edi + pushl %esi + pushl %ebx + movl 16(%esp),%edi + movl 20(%esp),%esi + movl 24(%esp),%edx + movl $1,%ecx + leal 32(%edx),%ebx + leal 16(%edx),%edx +.byte 243,15,167,200 + popl %ebx + popl %esi + popl %edi + ret +.globl _padlock_ecb_encrypt +.def _padlock_ecb_encrypt; .scl 2; .type 32; .endef +.align 16 +_padlock_ecb_encrypt: +.L_padlock_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + movl 28(%esp),%edx + movl 32(%esp),%ecx + testl $15,%edx + jnz .L004ecb_abort + testl $15,%ecx + jnz .L004ecb_abort + leal .Lpadlock_saved_context,%eax + pushfl + cld + call __padlock_verify_ctx +.L005ecb_pic_point: + leal 16(%edx),%edx + xorl %eax,%eax + xorl %ebx,%ebx + cmpl $128,%ecx + jbe .L006ecb_short + testl $32,(%edx) + jnz .L007ecb_aligned + testl $15,%edi + setz %al + testl $15,%esi + setz %bl + testl %ebx,%eax + jnz .L007ecb_aligned + negl %eax + movl $512,%ebx + notl %eax + leal -24(%esp),%ebp + cmpl %ebx,%ecx + cmovcl %ecx,%ebx + andl %ebx,%eax + movl %ecx,%ebx + negl %eax + andl $511,%ebx + leal (%eax,%ebp,1),%esp + andl $-16,%esp + jmp .L008ecb_loop +.align 16 +.L008ecb_loop: + movl %edi,(%ebp) + movl %esi,4(%ebp) + movl %ecx,8(%ebp) + movl %ebx,%ecx + movl %ebx,12(%ebp) + testl $15,%edi + cmovnzl %esp,%edi + testl $15,%esi + jz .L009ecb_inp_aligned + shrl $2,%ecx +.byte 243,165 + subl %ebx,%edi + movl %ebx,%ecx + movl %edi,%esi +.L009ecb_inp_aligned: + leal -16(%edx),%eax + leal 16(%edx),%ebx + shrl $4,%ecx +.byte 243,15,167,200 + movl (%ebp),%edi + movl 12(%ebp),%ebx + testl $15,%edi + jz .L010ecb_out_aligned + movl %ebx,%ecx + shrl $2,%ecx + leal (%esp),%esi +.byte 243,165 + subl %ebx,%edi +.L010ecb_out_aligned: + movl 4(%ebp),%esi + movl 8(%ebp),%ecx + addl %ebx,%edi + addl %ebx,%esi + subl %ebx,%ecx + movl $512,%ebx + jnz .L008ecb_loop + cmpl %ebp,%esp + je .L011ecb_done + pxor %xmm0,%xmm0 + leal (%esp),%eax +.L012ecb_bzero: + movaps %xmm0,(%eax) + leal 16(%eax),%eax + cmpl %eax,%ebp + ja .L012ecb_bzero +.L011ecb_done: + leal 24(%ebp),%esp + jmp .L013ecb_exit +.align 16 +.L006ecb_short: + xorl %eax,%eax + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L014ecb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L014ecb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L008ecb_loop +.align 16 +.L007ecb_aligned: + leal -16(%edx),%eax + leal 16(%edx),%ebx + shrl $4,%ecx +.byte 243,15,167,200 +.L013ecb_exit: + movl $1,%eax + leal 4(%esp),%esp +.L004ecb_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _padlock_cbc_encrypt +.def _padlock_cbc_encrypt; .scl 2; .type 32; .endef +.align 16 +_padlock_cbc_encrypt: +.L_padlock_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + movl 28(%esp),%edx + movl 32(%esp),%ecx + testl $15,%edx + jnz .L015cbc_abort + testl $15,%ecx + jnz .L015cbc_abort + leal .Lpadlock_saved_context,%eax + pushfl + cld + call __padlock_verify_ctx +.L016cbc_pic_point: + leal 16(%edx),%edx + xorl %eax,%eax + xorl %ebx,%ebx + cmpl $64,%ecx + jbe .L017cbc_short + testl $32,(%edx) + jnz .L018cbc_aligned + testl $15,%edi + setz %al + testl $15,%esi + setz %bl + testl %ebx,%eax + jnz .L018cbc_aligned + negl %eax + movl $512,%ebx + notl %eax + leal -24(%esp),%ebp + cmpl %ebx,%ecx + cmovcl %ecx,%ebx + andl %ebx,%eax + movl %ecx,%ebx + negl %eax + andl $511,%ebx + leal (%eax,%ebp,1),%esp + andl $-16,%esp + jmp .L019cbc_loop +.align 16 +.L019cbc_loop: + movl %edi,(%ebp) + movl %esi,4(%ebp) + movl %ecx,8(%ebp) + movl %ebx,%ecx + movl %ebx,12(%ebp) + testl $15,%edi + cmovnzl %esp,%edi + testl $15,%esi + jz .L020cbc_inp_aligned + shrl $2,%ecx +.byte 243,165 + subl %ebx,%edi + movl %ebx,%ecx + movl %edi,%esi +.L020cbc_inp_aligned: + leal -16(%edx),%eax + leal 16(%edx),%ebx + shrl $4,%ecx +.byte 243,15,167,208 + movaps (%eax),%xmm0 + movaps %xmm0,-16(%edx) + movl (%ebp),%edi + movl 12(%ebp),%ebx + testl $15,%edi + jz .L021cbc_out_aligned + movl %ebx,%ecx + shrl $2,%ecx + leal (%esp),%esi +.byte 243,165 + subl %ebx,%edi +.L021cbc_out_aligned: + movl 4(%ebp),%esi + movl 8(%ebp),%ecx + addl %ebx,%edi + addl %ebx,%esi + subl %ebx,%ecx + movl $512,%ebx + jnz .L019cbc_loop + cmpl %ebp,%esp + je .L022cbc_done + pxor %xmm0,%xmm0 + leal (%esp),%eax +.L023cbc_bzero: + movaps %xmm0,(%eax) + leal 16(%eax),%eax + cmpl %eax,%ebp + ja .L023cbc_bzero +.L022cbc_done: + leal 24(%ebp),%esp + jmp .L024cbc_exit +.align 16 +.L017cbc_short: + xorl %eax,%eax + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L025cbc_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L025cbc_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L019cbc_loop +.align 16 +.L018cbc_aligned: + leal -16(%edx),%eax + leal 16(%edx),%ebx + shrl $4,%ecx +.byte 243,15,167,208 + movaps (%eax),%xmm0 + movaps %xmm0,-16(%edx) +.L024cbc_exit: + movl $1,%eax + leal 4(%esp),%esp +.L015cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _padlock_xstore +.def _padlock_xstore; .scl 2; .type 32; .endef +.align 16 +_padlock_xstore: +.L_padlock_xstore_begin: + pushl %edi + movl 8(%esp),%edi + movl 12(%esp),%edx +.byte 15,167,192 + popl %edi + ret +.def __win32_segv_handler; .scl 3; .type 32; .endef +.align 16 +__win32_segv_handler: + movl $1,%eax + movl 4(%esp),%edx + movl 12(%esp),%ecx + cmpl $3221225477,(%edx) + jne .L026ret + addl $4,184(%ecx) + movl $0,%eax +.L026ret: + ret +.globl _padlock_sha1_oneshot +.def _padlock_sha1_oneshot; .scl 2; .type 32; .endef +.align 16 +_padlock_sha1_oneshot: +.L_padlock_sha1_oneshot_begin: + pushl %edi + pushl %esi + xorl %eax,%eax + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl __win32_segv_handler +.byte 100,255,48 +.byte 100,137,32 + movl %esp,%edx + addl $-128,%esp + movups (%edi),%xmm0 + andl $-16,%esp + movl 16(%edi),%eax + movaps %xmm0,(%esp) + movl %esp,%edi + movl %eax,16(%esp) + xorl %eax,%eax +.byte 243,15,166,200 + movaps (%esp),%xmm0 + movl 16(%esp),%eax + movl %edx,%esp +.byte 100,143,5,0,0,0,0 + leal 4(%esp),%esp + movl 16(%esp),%edi + movups %xmm0,(%edi) + movl %eax,16(%edi) + popl %esi + popl %edi + ret +.globl _padlock_sha1_blocks +.def _padlock_sha1_blocks; .scl 2; .type 32; .endef +.align 16 +_padlock_sha1_blocks: +.L_padlock_sha1_blocks_begin: + pushl %edi + pushl %esi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl %esp,%edx + movl 20(%esp),%ecx + addl $-128,%esp + movups (%edi),%xmm0 + andl $-16,%esp + movl 16(%edi),%eax + movaps %xmm0,(%esp) + movl %esp,%edi + movl %eax,16(%esp) + movl $-1,%eax +.byte 243,15,166,200 + movaps (%esp),%xmm0 + movl 16(%esp),%eax + movl %edx,%esp + movl 12(%esp),%edi + movups %xmm0,(%edi) + movl %eax,16(%edi) + popl %esi + popl %edi + ret +.globl _padlock_sha256_oneshot +.def _padlock_sha256_oneshot; .scl 2; .type 32; .endef +.align 16 +_padlock_sha256_oneshot: +.L_padlock_sha256_oneshot_begin: + pushl %edi + pushl %esi + xorl %eax,%eax + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl __win32_segv_handler +.byte 100,255,48 +.byte 100,137,32 + movl %esp,%edx + addl $-128,%esp + movups (%edi),%xmm0 + andl $-16,%esp + movups 16(%edi),%xmm1 + movaps %xmm0,(%esp) + movl %esp,%edi + movaps %xmm1,16(%esp) + xorl %eax,%eax +.byte 243,15,166,208 + movaps (%esp),%xmm0 + movaps 16(%esp),%xmm1 + movl %edx,%esp +.byte 100,143,5,0,0,0,0 + leal 4(%esp),%esp + movl 16(%esp),%edi + movups %xmm0,(%edi) + movups %xmm1,16(%edi) + popl %esi + popl %edi + ret +.globl _padlock_sha256_blocks +.def _padlock_sha256_blocks; .scl 2; .type 32; .endef +.align 16 +_padlock_sha256_blocks: +.L_padlock_sha256_blocks_begin: + pushl %edi + pushl %esi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + movl %esp,%edx + addl $-128,%esp + movups (%edi),%xmm0 + andl $-16,%esp + movups 16(%edi),%xmm1 + movaps %xmm0,(%esp) + movl %esp,%edi + movaps %xmm1,16(%esp) + movl $-1,%eax +.byte 243,15,166,208 + movaps (%esp),%xmm0 + movaps 16(%esp),%xmm1 + movl %edx,%esp + movl 12(%esp),%edi + movups %xmm0,(%edi) + movups %xmm1,16(%edi) + popl %esi + popl %edi + ret +.globl _padlock_sha512_blocks +.def _padlock_sha512_blocks; .scl 2; .type 32; .endef +.align 16 +_padlock_sha512_blocks: +.L_padlock_sha512_blocks_begin: + pushl %edi + pushl %esi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + movl %esp,%edx + addl $-128,%esp + movups (%edi),%xmm0 + andl $-16,%esp + movups 16(%edi),%xmm1 + movups 32(%edi),%xmm2 + movups 48(%edi),%xmm3 + movaps %xmm0,(%esp) + movl %esp,%edi + movaps %xmm1,16(%esp) + movaps %xmm2,32(%esp) + movaps %xmm3,48(%esp) +.byte 243,15,166,224 + movaps (%esp),%xmm0 + movaps 16(%esp),%xmm1 + movaps 32(%esp),%xmm2 + movaps 48(%esp),%xmm3 + movl %edx,%esp + movl 12(%esp),%edi + movups %xmm0,(%edi) + movups %xmm1,16(%edi) + movups %xmm2,32(%edi) + movups %xmm3,48(%edi) + popl %esi + popl %edi + ret +.byte 86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,32 +.byte 109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65 +.byte 77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 +.byte 110,115,115,108,46,111,114,103,62,0 +.align 16 +.data +.align 4 +.Lpadlock_saved_context: +.long 0 |