summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2020-12-01 18:47:38 +0100
committerNiels Möller <nisse@lysator.liu.se>2020-12-01 18:47:38 +0100
commitc4ac0953c151dfca6716332247f6aa716ed17b62 (patch)
treef2405edeb658ecf9e8adde4e5857de2cad64d033
parent1d2cb8ba6fb85fc0c2653f8a9972c17c6990e23b (diff)
downloadnettle-ppc-chacha-4core.tar.gz
ppc: Save registers below stack pointer, without modifying it.ppc-chacha-4core
-rw-r--r--ChangeLog6
-rw-r--r--powerpc64/p7/chacha-4core.asm21
2 files changed, 15 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index b0e9e199..1f2e2d40 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,9 @@
2020-12-01 Niels Möller <nisse@lysator.liu.se>
- * powerpc64/p7/chacha-4core.asm (QR): Instruction level
- interleaving in the main loop, written by Torbjörn Granlund.
+ * powerpc64/p7/chacha-4core.asm: Use protected zone below stack
+ pointer to save registers, without modifying the stack pointer.
+ (QR): Instruction level interleaving in the main loop, written by
+ Torbjörn Granlund.
2020-11-30 Niels Möller <nisse@lysator.liu.se>
diff --git a/powerpc64/p7/chacha-4core.asm b/powerpc64/p7/chacha-4core.asm
index 0cd5c877..b2330247 100644
--- a/powerpc64/p7/chacha-4core.asm
+++ b/powerpc64/p7/chacha-4core.asm
@@ -132,11 +132,13 @@ PROLOGUE(_nettle_chacha_4core)
li r7, 0x20 C ...useful...
li r8, 0x30 C ...offsets
- addi SP, SP, -0x40 C Save callee-save registers
- stvx v20, 0, SP
- stvx v21, r6, SP
- stvx v22, r7, SP
- stvx v23, r8, SP
+ C Save callee-save registers. Use the "protected zone", max
+ C 228 bytes, below the stack pointer, accessed via r10.
+ addi r10, SP, -0x40
+ stvx v20, 0, r10
+ stvx v21, r6, r10
+ stvx v22, r7, r10
+ stvx v23, r8, r10
vspltisw ROT16, -16 C -16 instead of 16 actually works!
vspltisw ROT12, 12
@@ -257,11 +259,10 @@ IF_BE(`
stxvw4x VSR(v15), r8, DST
C Restore callee-save registers
- lvx v20, 0, SP
- lvx v21, r6, SP
- lvx v22, r7, SP
- lvx v23, r8, SP
- addi SP, SP, 0x40
+ lvx v20, 0, r10
+ lvx v21, r6, r10
+ lvx v22, r7, r10
+ lvx v23, r8, r10
blr
EPILOGUE(_nettle_chacha_4core)