summaryrefslogtreecommitdiff
path: root/mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s
diff options
context:
space:
mode:
Diffstat (limited to 'mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s')
-rw-r--r--mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s120
1 files changed, 120 insertions, 0 deletions
diff --git a/mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s b/mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s
new file mode 100644
index 0000000..e131fd1
--- /dev/null
+++ b/mozilla/security/nss/lib/freebl/arcfour-amd64-gas.s
@@ -0,0 +1,120 @@
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is "Marc Bevand's fast AMD64 ARCFOUR source"
+#
+# The Initial Developer of the Original Code is
+# Marc Bevand <bevand_m@epita.fr> .
+# Portions created by the Initial Developer are
+# Copyright (C) 2004 the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+# ** ARCFOUR implementation optimized for AMD64.
+# **
+# ** The throughput achieved by this code is about 320 MBytes/sec, on
+# ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.text
+.align 16
+.globl ARCFOUR
+.type ARCFOUR,@function
+ARCFOUR:
+ pushq %rbp
+ pushq %rbx
+ movq %rdi, %rbp # key = ARG(key)
+ movq %rsi, %rbx # rbx = ARG(len)
+ movq %rdx, %rsi # in = ARG(in)
+ movq %rcx, %rdi # out = ARG(out)
+ movq (%rbp), %rcx # x = key->x
+ movq 8(%rbp), %rdx # y = key->y
+ addq $16, %rbp # d = key->data
+ incq %rcx # x++
+ andq $255, %rcx # x &= 0xff
+ leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8
+ movq %rbx, %r9 # tmp = in+len-8
+ movq 0(%rbp,%rcx,8), %rax # tx = d[x]
+ cmpq %rsi, %rbx # cmp in with in+len-8
+ jl .Lend # jump if (in+len-8 < in)
+
+.Lstart:
+ addq $8, %rsi # increment in
+ addq $8, %rdi # increment out
+
+ # generate the next 8 bytes of the rc4 stream into %r8
+ movq $8, %r11 # byte counter
+1: addb %al, %dl # y += tx
+ movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
+ addb %al, %bl # val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
+ incb %cl # x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b # val = d[val]
+ decb %r11b
+ rorq $8, %r8 # (ror does not change ZF)
+ jnz 1b
+
+ # xor 8 bytes
+ xorq -8(%rsi), %r8
+ cmpq %r9, %rsi # cmp in+len-8 with in
+ movq %r8, -8(%rdi)
+ jle .Lstart # jump if (in <= in+len-8)
+
+.Lend:
+ addq $8, %r9 # tmp = in+len
+
+ # handle the last bytes, one by one
+1: cmpq %rsi, %r9 # cmp in with in+len
+ jle .Lfinished # jump if (in+len <= in)
+ addb %al, %dl # y += tx
+ movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
+ addb %al, %bl # val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
+ incb %cl # x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b # val = d[val]
+ xorb (%rsi), %r8b # xor 1 byte
+ movb %r8b, (%rdi)
+ incq %rsi # in++
+ incq %rdi # out++
+ jmp 1b
+
+.Lfinished:
+ decq %rcx # x--
+ movb %dl, -8(%rbp) # key->y = y
+ movb %cl, -16(%rbp) # key->x = x
+ popq %rbx
+ popq %rbp
+ ret
+.L_ARCFOUR_end:
+.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
+
+# Magic indicating no need for an executable stack
+.section .note.GNU-stack,"",@progbits
+.previous