summaryrefslogtreecommitdiff
path: root/x86
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2010-07-25 20:23:56 +0200
committerNiels Möller <nisse@lysator.liu.se>2010-07-25 20:23:56 +0200
commitb878b6d47b3f8cc7feacb6beb5351e6598d8a398 (patch)
treeb6fe4cd1275b54bfbfdc21c87cfb3c8204306647 /x86
parent15f754b5034fd01b22c5492c4d2bc80727a446a1 (diff)
downloadnettle-b878b6d47b3f8cc7feacb6beb5351e6598d8a398.tar.gz
New file.
Rev: nettle/x86/camellia-crypt-internal.asm:1.1
Diffstat (limited to 'x86')
-rw-r--r--x86/camellia-crypt-internal.asm213
1 files changed, 213 insertions, 0 deletions
diff --git a/x86/camellia-crypt-internal.asm b/x86/camellia-crypt-internal.asm
new file mode 100644
index 00000000..b5c491c8
--- /dev/null
+++ b/x86/camellia-crypt-internal.asm
@@ -0,0 +1,213 @@
+C -*- mode: asm; asm-comment-char: ?C; -*-
+C nettle, low-level cryptographics library
+C
+C Copyright (C) 2010, Niels Möller
+C
+C The nettle library is free software; you can redistribute it and/or modify
+C it under the terms of the GNU Lesser General Public License as published by
+C the Free Software Foundation; either version 2.1 of the License, or (at your
+C option) any later version.
+C
+C The nettle library is distributed in the hope that it will be useful, but
+C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+C License for more details.
+C
+C You should have received a copy of the GNU Lesser General Public License
+C along with the nettle library; see the file COPYING.LIB. If not, write to
+C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+C MA 02111-1307, USA.
+
+C Register usage:
+
+C Camellia state, 128-bit value in little endian order.
+C L0, H0 corresponds to D1 in the spec and i0 in the C implementation.
+C while L1, H1 corresponds to D2/i1.
+define(<L0>,<%eax>)
+define(<H0>,<%ebx>)
+define(<L1>,<%ecx>)
+define(<H1>,<%edx>)
+
+define(<TMP>,<%ebp>)
+define(<KEY>,<%esi>)
+define(<T>,<%edi>)
+
+C Locals on the stack
+
+define(<FRAME_L0>, <(%esp)>)
+define(<FRAME_H0>, <4(%esp)>)
+define(<FRAME_L1>, <8(%esp)>)
+define(<FRAME_H1>, <12(%esp)>)
+define(<FRAME_CNT>, <16(%esp)>)
+
+C Arguments on stack.
+define(<FRAME_CTX>, <40(%esp)>)
+define(<FRAME_TABLE>, <44(%esp)>)
+define(<FRAME_LENGTH>, <48(%esp)>)
+define(<FRAME_DST>, <52(%esp)>)
+define(<FRAME_SRC>, <56(%esp)>)
+
+define(<SP1110>, <(T,$1,4)>)
+define(<SP0222>, <1024(T,$1,4)>)
+define(<SP3033>, <2048(T,$1,4)>)
+define(<SP4404>, <3072(T,$1,4)>)
+
+C ROUND(xl, xh, yl, yh, key-offset)
+C xl and xh are rotated 16 bits at the end
+C yl and yh are read from stack, and left in registers
+define(<ROUND>, <
+ movzbl LREG($1), TMP
+ movl SP1110(TMP), $4
+ movzbl HREG($1), TMP
+ xorl SP4404(TMP), $4
+ roll <$>16, $1
+
+ movzbl LREG($2), TMP
+ movl SP4404(TMP), $3
+ movzbl HREG($2), TMP
+ xorl SP3033(TMP), $3
+ roll <$>16, $2
+
+ movzbl LREG($1), TMP
+ xorl SP3033(TMP), $4
+ movzbl HREG($1), TMP
+ xorl SP0222(TMP), $4
+
+ movzbl LREG($2), TMP
+ xorl SP0222(TMP), $3
+ movzbl HREG($2), TMP
+ xorl SP1110(TMP), $3
+
+ xorl $5(KEY), $4
+ xorl $5 + 4(KEY), $3
+
+ xorl $3, $4
+ rorl <$>8, $3
+ xorl $4, $3
+
+ xorl FRAME_$3, $3
+ xorl FRAME_$4, $4
+>)
+
+C Six rounds, with inputs and outputs in registers.
+define(<ROUND6>, <
+ movl L0, FRAME_L0
+ movl H0, FRAME_H0
+ movl L1, FRAME_L1
+ movl H1, FRAME_H1
+
+ ROUND(L0,H0,<L1>,<H1>,0)
+ movl L1, FRAME_L1
+ movl H1, FRAME_H1
+ ROUND(L1,H1,<L0>,<H0>,8)
+ movl L0, FRAME_L0
+ movl H0, FRAME_H0
+ ROUND(L0,H0,<L1>,<H1>,16)
+ movl L1, FRAME_L1
+ movl H1, FRAME_H1
+ ROUND(L1,H1,<L0>,<H0>,24)
+ movl L0, FRAME_L0
+ movl H0, FRAME_H0
+ ROUND(L0,H0,<L1>,<H1>,32)
+ ROUND(L1,H1,<L0>,<H0>,40)
+ roll <$>16, L1
+ roll <$>16, H1
+>)
+
+C FL(x0, x1, key-offset)
+define(<FL>, <
+ movl $3 + 4(KEY), TMP
+ andl $2, TMP
+ roll <$>1, TMP
+ xorl TMP, $1
+ movl $3(KEY), TMP
+ orl $1, TMP
+ xorl TMP, $2
+>)
+C FLINV(x0, x1, key-offset)
+define(<FLINV>, <
+ movl $3(KEY), TMP
+ orl $1, TMP
+ xorl TMP, $2
+ movl $3 + 4(KEY), TMP
+ andl $2, TMP
+ roll <$>1, TMP
+ xorl TMP, $1
+>)
+
+.file "camellia-encrypt-internal.asm"
+
+ C _camellia_crypt(struct camellia_context *ctx,
+ C const struct camellia_table *T,
+ C unsigned length, uint8_t *dst,
+ C uint8_t *src)
+ .text
+ ALIGN(4)
+PROLOGUE(_nettle_camellia_crypt)
+ C save all registers that need to be saved
+ pushl %ebx C 32(%esp)
+ pushl %ebp C 28(%esp)
+ pushl %esi C 24(%esp)
+ pushl %edi C 20(%esp)
+
+ subl $20, %esp
+
+ movl FRAME_LENGTH, %ebp
+ testl %ebp,%ebp
+ jz .Lend
+
+.Lblock_loop:
+ C Load data, note that we'll happily do unaligned loads
+ movl FRAME_SRC, TMP
+ movl (TMP), H0
+ bswap H0
+ movl 4(TMP), L0
+ bswap L0
+ movl 8(TMP), H1
+ bswap H1
+ movl 12(TMP), L1
+ bswap L1
+ addl $16, FRAME_SRC
+ movl FRAME_CTX, KEY
+ movl (KEY), TMP
+ subl $8, TMP
+ mov TMP, FRAME_CNT
+ C Whitening using first subkey
+ xor 4(KEY), L0
+ xor 8(KEY), H0
+ add $12, KEY
+
+ movl FRAME_TABLE, T
+
+ ROUND6
+.Lround_loop:
+ add $64, KEY
+ FL(L0, H0, -16)
+ FLINV(L1, H1, -8)
+ ROUND6
+ sub $8, FRAME_CNT
+ ja .Lround_loop
+
+ movl FRAME_DST, TMP
+ bswap H0
+ movl H0,8(TMP)
+ bswap L0
+ movl L0,12(TMP)
+ xorl 52(KEY), H1
+ bswap H1
+ movl H1, 0(TMP)
+ xorl 48(KEY), L1
+ bswap L1
+ movl L1, 4(TMP)
+ addl $16, FRAME_DST
+ subl $16, FRAME_LENGTH
+ ja .Lblock_loop
+
+.Lend:
+ addl $20, %esp
+ popl %edi
+ popl %esi
+ popl %ebp
+ popl %ebx
+ ret
+EPILOGUE(_nettle_camellia_crypt)