summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2021-08-10 22:23:14 +0200
committerNiels Möller <nisse@lysator.liu.se>2021-08-10 22:23:14 +0200
commit463553ae61f1844e62d26107accacba21b1ddef1 (patch)
tree863db63e9dce9cfcbfb50aec660d992fdab72460
parentc7391e5cdb8a0afc05186d484bc9f752b8f0c074 (diff)
downloadnettle-x86_64-aes-refactor.tar.gz
x86_64: New 2-way aesni loop also for aes256x86_64-aes-refactor
-rw-r--r--ChangeLog2
-rw-r--r--x86_64/aesni/aes256-decrypt.asm76
-rw-r--r--x86_64/aesni/aes256-encrypt.asm75
3 files changed, 122 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index 14609c8a..5b082988 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,7 @@
* x86_64/aesni/aes128-decrypt.asm: Likewise.
* x86_64/aesni/aes192-encrypt.asm: Likewise.
* x86_64/aesni/aes192-decrypt.asm: Likewise.
- * x86_64/aesni/aes256-encrypt.asm: New file, but 1-way loop.
+ * x86_64/aesni/aes256-encrypt.asm: Likewise.
* x86_64/aesni/aes256-decrypt.asm: Likewise.
* x86_64/aesni/aes-encrypt-internal.asm: Deleted.
* x86_64/aesni/aes-decrypt-internal.asm: Deleted.
diff --git a/x86_64/aesni/aes256-decrypt.asm b/x86_64/aesni/aes256-decrypt.asm
index 122f1db6..0fc5ad2a 100644
--- a/x86_64/aesni/aes256-decrypt.asm
+++ b/x86_64/aesni/aes256-decrypt.asm
@@ -36,22 +36,22 @@ define(`LENGTH',`%rsi')
define(`DST', `%rdx')
define(`SRC', `%rcx')
-define(`KEY0', `%xmm0')
+define(`KEY0_7', `%xmm0')
define(`KEY1', `%xmm1')
define(`KEY2', `%xmm2')
define(`KEY3', `%xmm3')
define(`KEY4', `%xmm4')
define(`KEY5', `%xmm5')
define(`KEY6', `%xmm6')
-define(`KEY7', `%xmm7')
-define(`KEY8', `%xmm8')
-define(`KEY9', `%xmm9')
-define(`KEY10', `%xmm10')
-define(`KEY11', `%xmm11')
-define(`KEY12', `%xmm12')
-define(`KEY13', `%xmm13')
-define(`KEY14', `%xmm14')
-define(`X', `%xmm15')
+define(`KEY8', `%xmm7')
+define(`KEY9', `%xmm8')
+define(`KEY10', `%xmm9')
+define(`KEY11', `%xmm10')
+define(`KEY12', `%xmm11')
+define(`KEY13', `%xmm12')
+define(`KEY14', `%xmm13')
+define(`X', `%xmm14')
+define(`Y', `%xmm15')
.file "aes256-decrypt.asm"
@@ -67,14 +67,13 @@ PROLOGUE(nettle_aes256_decrypt)
test LENGTH, LENGTH
jz .Lend
- movups (CTX), KEY0
+ movups (CTX), KEY0_7
movups 16(CTX), KEY1
movups 32(CTX), KEY2
movups 48(CTX), KEY3
movups 64(CTX), KEY4
movups 80(CTX), KEY5
movups 96(CTX), KEY6
- movups 112(CTX), KEY7
movups 128(CTX), KEY8
movups 144(CTX), KEY9
movups 160(CTX), KEY10
@@ -83,16 +82,20 @@ PROLOGUE(nettle_aes256_decrypt)
movups 208(CTX), KEY13
movups 224(CTX), KEY14
-.Lblock_loop:
+ shr LENGTH
+ jnc .Lblock_loop
+
movups (SRC), X
- pxor KEY0, X
+ pxor KEY0_7, X
+ movups 112(CTX), KEY0_7
aesdec KEY1, X
aesdec KEY2, X
aesdec KEY3, X
aesdec KEY4, X
aesdec KEY5, X
aesdec KEY6, X
- aesdec KEY7, X
+ aesdec KEY0_7, X
+ movups (CTX), KEY0_7
aesdec KEY8, X
aesdec KEY9, X
aesdec KEY10, X
@@ -104,6 +107,49 @@ PROLOGUE(nettle_aes256_decrypt)
movups X, (DST)
add $16, SRC
add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0_7, X
+ pxor KEY0_7, Y
+ movups 112(CTX), KEY0_7
+ aesdec KEY1, X
+ aesdec KEY1, Y
+ aesdec KEY2, X
+ aesdec KEY2, Y
+ aesdec KEY3, X
+ aesdec KEY3, Y
+ aesdec KEY4, X
+ aesdec KEY4, Y
+ aesdec KEY5, X
+ aesdec KEY5, Y
+ aesdec KEY6, X
+ aesdec KEY6, Y
+ aesdec KEY0_7, X
+ aesdec KEY0_7, Y
+ movups (CTX), KEY0_7
+ aesdec KEY8, X
+ aesdec KEY8, Y
+ aesdec KEY9, X
+ aesdec KEY9, Y
+ aesdec KEY10, X
+ aesdec KEY10, Y
+ aesdec KEY11, X
+ aesdec KEY11, Y
+ aesdec KEY12, X
+ aesdec KEY12, Y
+ aesdec KEY13, X
+ aesdec KEY13, Y
+ aesdeclast KEY14, X
+ aesdeclast KEY14, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
dec LENGTH
jnz .Lblock_loop
diff --git a/x86_64/aesni/aes256-encrypt.asm b/x86_64/aesni/aes256-encrypt.asm
index b261a237..57cfc4d2 100644
--- a/x86_64/aesni/aes256-encrypt.asm
+++ b/x86_64/aesni/aes256-encrypt.asm
@@ -36,22 +36,22 @@ define(`LENGTH',`%rsi')
define(`DST', `%rdx')
define(`SRC', `%rcx')
-define(`KEY0', `%xmm0')
+define(`KEY0_7', `%xmm0')
define(`KEY1', `%xmm1')
define(`KEY2', `%xmm2')
define(`KEY3', `%xmm3')
define(`KEY4', `%xmm4')
define(`KEY5', `%xmm5')
define(`KEY6', `%xmm6')
-define(`KEY7', `%xmm7')
-define(`KEY8', `%xmm8')
-define(`KEY9', `%xmm9')
-define(`KEY10', `%xmm10')
-define(`KEY11', `%xmm11')
-define(`KEY12', `%xmm12')
-define(`KEY13', `%xmm13')
-define(`KEY14', `%xmm14')
-define(`X', `%xmm15')
+define(`KEY8', `%xmm7')
+define(`KEY9', `%xmm8')
+define(`KEY10', `%xmm9')
+define(`KEY11', `%xmm10')
+define(`KEY12', `%xmm11')
+define(`KEY13', `%xmm12')
+define(`KEY14', `%xmm13')
+define(`X', `%xmm14')
+define(`Y', `%xmm15')
.file "aes256-encrypt.asm"
@@ -67,14 +67,13 @@ PROLOGUE(nettle_aes256_encrypt)
test LENGTH, LENGTH
jz .Lend
- movups (CTX), KEY0
+ movups (CTX), KEY0_7
movups 16(CTX), KEY1
movups 32(CTX), KEY2
movups 48(CTX), KEY3
movups 64(CTX), KEY4
movups 80(CTX), KEY5
movups 96(CTX), KEY6
- movups 112(CTX), KEY7
movups 128(CTX), KEY8
movups 144(CTX), KEY9
movups 160(CTX), KEY10
@@ -82,17 +81,20 @@ PROLOGUE(nettle_aes256_encrypt)
movups 192(CTX), KEY12
movups 208(CTX), KEY13
movups 224(CTX), KEY14
+ shr LENGTH
+ jnc .Lblock_loop
-.Lblock_loop:
movups (SRC), X
- pxor KEY0, X
+ pxor KEY0_7, X
+ movups 112(CTX), KEY0_7
aesenc KEY1, X
aesenc KEY2, X
aesenc KEY3, X
aesenc KEY4, X
aesenc KEY5, X
aesenc KEY6, X
- aesenc KEY7, X
+ aesenc KEY0_7, X
+ movups (CTX), KEY0_7
aesenc KEY8, X
aesenc KEY9, X
aesenc KEY10, X
@@ -104,6 +106,49 @@ PROLOGUE(nettle_aes256_encrypt)
movups X, (DST)
add $16, SRC
add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0_7, X
+ pxor KEY0_7, Y
+ movups 112(CTX), KEY0_7
+ aesenc KEY1, X
+ aesenc KEY1, Y
+ aesenc KEY2, X
+ aesenc KEY2, Y
+ aesenc KEY3, X
+ aesenc KEY3, Y
+ aesenc KEY4, X
+ aesenc KEY4, Y
+ aesenc KEY5, X
+ aesenc KEY5, Y
+ aesenc KEY6, X
+ aesenc KEY6, Y
+ aesenc KEY0_7, X
+ aesenc KEY0_7, Y
+ movups (CTX), KEY0_7
+ aesenc KEY8, X
+ aesenc KEY8, Y
+ aesenc KEY9, X
+ aesenc KEY9, Y
+ aesenc KEY10, X
+ aesenc KEY10, Y
+ aesenc KEY11, X
+ aesenc KEY11, Y
+ aesenc KEY12, X
+ aesenc KEY12, Y
+ aesenc KEY13, X
+ aesenc KEY13, Y
+ aesenclast KEY14, X
+ aesenclast KEY14, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
dec LENGTH
jnz .Lblock_loop