diff options
Diffstat (limited to 'mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s')
-rw-r--r-- | mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s b/mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s new file mode 100644 index 0000000..e131fd1 --- /dev/null +++ b/mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s @@ -0,0 +1,120 @@ +# ***** BEGIN LICENSE BLOCK ***** +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is "Marc Bevand's fast AMD64 ARCFOUR source" +# +# The Initial Developer of the Original Code is +# Marc Bevand <bevand_m@epita.fr> . +# Portions created by the Initial Developer are +# Copyright (C) 2004 the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ***** END LICENSE BLOCK ***** + +# ** ARCFOUR implementation optimized for AMD64. +# ** +# ** The throughput achieved by this code is about 320 MBytes/sec, on +# ** a 1.8 GHz AMD Opteron (rev C0) processor. + +.text +.align 16 +.globl ARCFOUR +.type ARCFOUR,@function +ARCFOUR: + pushq %rbp + pushq %rbx + movq %rdi, %rbp # key = ARG(key) + movq %rsi, %rbx # rbx = ARG(len) + movq %rdx, %rsi # in = ARG(in) + movq %rcx, %rdi # out = ARG(out) + movq (%rbp), %rcx # x = key->x + movq 8(%rbp), %rdx # y = key->y + addq $16, %rbp # d = key->data + incq %rcx # x++ + andq $255, %rcx # x &= 0xff + leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8 + movq %rbx, %r9 # tmp = in+len-8 + movq 0(%rbp,%rcx,8), %rax # tx = d[x] + cmpq %rsi, %rbx # cmp in with in+len-8 + jl .Lend # jump if (in+len-8 < in) + +.Lstart: + addq $8, %rsi # increment in + addq $8, %rdi # increment out + + # generate the next 8 bytes of the rc4 stream into %r8 + movq $8, %r11 # byte counter +1: addb %al, %dl # y += tx + movl 0(%rbp,%rdx,8), %ebx # ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty + addb %al, %bl # val = ty + tx + movl %eax, 0(%rbp,%rdx,8) # d[y] = tx + incb %cl # x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b # val = d[val] + decb %r11b + rorq $8, %r8 # (ror does not change ZF) + jnz 1b + + # xor 8 bytes + xorq -8(%rsi), %r8 + cmpq %r9, %rsi # cmp in+len-8 with in + movq %r8, -8(%rdi) + jle .Lstart # jump if (in <= in+len-8) + +.Lend: + addq $8, %r9 # tmp = in+len + + # handle the last bytes, one by one +1: cmpq %rsi, %r9 # cmp in with in+len + jle .Lfinished # jump if (in+len <= in) + addb %al, %dl # y += tx + movl 0(%rbp,%rdx,8), %ebx # ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty + addb %al, %bl # val = ty + tx + movl %eax, 0(%rbp,%rdx,8) # d[y] = tx + incb %cl # x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b # val = d[val] + xorb (%rsi), %r8b # xor 1 byte + movb %r8b, (%rdi) + incq %rsi # in++ + incq %rdi # out++ + jmp 1b + +.Lfinished: + decq %rcx # x-- + movb %dl, -8(%rbp) # key->y = y + movb %cl, -16(%rbp) # key->x = x + popq %rbx + popq %rbp + ret +.L_ARCFOUR_end: +.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR + +# Magic indicating no need for an executable stack +.section .note.GNU-stack,"",@progbits +.previous |