From abfaf8be2fa06786da94fa6e3b90773d6f178739 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Tue, 7 Jan 2020 01:16:43 +0300 Subject: ecc: rename source files with curves data In preparation to adding GOST curves support, rename source files and use curve name as eccdata parameter. Signed-off-by: Dmitry Eremin-Solenikov --- .gitignore | 14 +- Makefile.in | 54 ++++--- arm/ecc-192-modp.asm | 106 ------------ arm/ecc-224-modp.asm | 124 -------------- arm/ecc-256-redc.asm | 173 -------------------- arm/ecc-384-modp.asm | 270 ------------------------------- arm/ecc-521-modp.asm | 127 --------------- arm/ecc-secp192r1-modp.asm | 106 ++++++++++++ arm/ecc-secp224r1-modp.asm | 124 ++++++++++++++ arm/ecc-secp256r1-redc.asm | 173 ++++++++++++++++++++ arm/ecc-secp384r1-modp.asm | 270 +++++++++++++++++++++++++++++++ arm/ecc-secp521r1-modp.asm | 127 +++++++++++++++ configure.ac | 6 +- ecc-192.c | 181 --------------------- ecc-224.c | 133 --------------- ecc-25519.c | 356 ----------------------------------------- ecc-256.c | 310 ----------------------------------- ecc-384.c | 218 ------------------------- ecc-448.c | 334 -------------------------------------- ecc-521.c | 146 ----------------- ecc-curve25519.c | 356 +++++++++++++++++++++++++++++++++++++++++ ecc-curve448.c | 334 ++++++++++++++++++++++++++++++++++++++ ecc-secp192r1.c | 181 +++++++++++++++++++++ ecc-secp224r1.c | 133 +++++++++++++++ ecc-secp256r1.c | 310 +++++++++++++++++++++++++++++++++++ ecc-secp384r1.c | 218 +++++++++++++++++++++++++ ecc-secp521r1.c | 146 +++++++++++++++++ eccdata.c | 58 ++++--- x86_64/ecc-192-modp.asm | 88 ---------- x86_64/ecc-224-modp.asm | 131 --------------- x86_64/ecc-25519-modp.asm | 94 ----------- x86_64/ecc-256-redc.asm | 129 --------------- x86_64/ecc-384-modp.asm | 234 --------------------------- x86_64/ecc-521-modp.asm | 158 ------------------ x86_64/ecc-curve25519-modp.asm | 94 +++++++++++ x86_64/ecc-secp192r1-modp.asm | 88 ++++++++++ x86_64/ecc-secp224r1-modp.asm | 131 +++++++++++++++ x86_64/ecc-secp256r1-redc.asm | 129 +++++++++++++++ x86_64/ecc-secp384r1-modp.asm | 234 +++++++++++++++++++++++++++ x86_64/ecc-secp521r1-modp.asm | 158 ++++++++++++++++++ 40 files changed, 3383 insertions(+), 3373 deletions(-) delete mode 100644 arm/ecc-192-modp.asm delete mode 100644 arm/ecc-224-modp.asm delete mode 100644 arm/ecc-256-redc.asm delete mode 100644 arm/ecc-384-modp.asm delete mode 100644 arm/ecc-521-modp.asm create mode 100644 arm/ecc-secp192r1-modp.asm create mode 100644 arm/ecc-secp224r1-modp.asm create mode 100644 arm/ecc-secp256r1-redc.asm create mode 100644 arm/ecc-secp384r1-modp.asm create mode 100644 arm/ecc-secp521r1-modp.asm delete mode 100644 ecc-192.c delete mode 100644 ecc-224.c delete mode 100644 ecc-25519.c delete mode 100644 ecc-256.c delete mode 100644 ecc-384.c delete mode 100644 ecc-448.c delete mode 100644 ecc-521.c create mode 100644 ecc-curve25519.c create mode 100644 ecc-curve448.c create mode 100644 ecc-secp192r1.c create mode 100644 ecc-secp224r1.c create mode 100644 ecc-secp256r1.c create mode 100644 ecc-secp384r1.c create mode 100644 ecc-secp521r1.c delete mode 100644 x86_64/ecc-192-modp.asm delete mode 100644 x86_64/ecc-224-modp.asm delete mode 100644 x86_64/ecc-25519-modp.asm delete mode 100644 x86_64/ecc-256-redc.asm delete mode 100644 x86_64/ecc-384-modp.asm delete mode 100644 x86_64/ecc-521-modp.asm create mode 100644 x86_64/ecc-curve25519-modp.asm create mode 100644 x86_64/ecc-secp192r1-modp.asm create mode 100644 x86_64/ecc-secp224r1-modp.asm create mode 100644 x86_64/ecc-secp256r1-redc.asm create mode 100644 x86_64/ecc-secp384r1-modp.asm create mode 100644 x86_64/ecc-secp521r1-modp.asm diff --git a/.gitignore b/.gitignore index 0afe61de..ea264107 100644 --- a/.gitignore +++ b/.gitignore @@ -43,13 +43,13 @@ core /keymap.h /parity.h /rotors.h -/ecc-192.h -/ecc-224.h -/ecc-256.h -/ecc-384.h -/ecc-521.h -/ecc-25519.h -/ecc-448.h +/ecc-curve25519.h +/ecc-curve448.h +/ecc-secp192r1.h +/ecc-secp224r1.h +/ecc-secp256r1.h +/ecc-secp384r1.h +/ecc-secp521r1.h /version.h /nettle.aux /nettle.cp diff --git a/Makefile.in b/Makefile.in index e0c9f5f7..38160bb4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -175,8 +175,9 @@ hogweed_SOURCES = sexp.c sexp-format.c \ gmp-glue.c cnd-copy.c \ ecc-mod.c ecc-mod-inv.c \ ecc-mod-arith.c ecc-pp1-redc.c ecc-pm1-redc.c \ - ecc-192.c ecc-224.c ecc-256.c ecc-384.c ecc-521.c \ - ecc-25519.c ecc-448.c \ + ecc-curve25519.c ecc-curve448.c \ + ecc-secp192r1.c ecc-secp224r1.c ecc-secp256r1.c \ + ecc-secp384r1.c ecc-secp521r1.c \ ecc-size.c ecc-j-to-a.c ecc-a-to-j.c \ ecc-dup-jj.c ecc-add-jja.c ecc-add-jjj.c \ ecc-eh-to-a.c \ @@ -350,24 +351,24 @@ des.$(OBJEXT): des.c des.h $(des_headers) # k = 14, c = 7, S = 256, T = 42 ( 28 A + 14 D) 12 KB # k = 11, c = 6, S = 192, T = 44 ( 33 A + 11 D) 9 KB # k = 16, c = 6, S = 128, T = 48 ( 32 A + 16 D) 6 KB -ecc-192.h: eccdata.stamp - ./eccdata$(EXEEXT_FOR_BUILD) 192 8 6 $(NUMB_BITS) > $@T && mv $@T $@ +ecc-secp192r1.h: eccdata.stamp + ./eccdata$(EXEEXT_FOR_BUILD) secp192r1 8 6 $(NUMB_BITS) > $@T && mv $@T $@ # Some reasonable choices for 224: # k = 16, c = 7, S = 256, T = 48 ( 32 A + 16 D) ~16 KB # k = 10, c = 6, S = 256, T = 50 ( 40 A + 10 D) ~16 KB # k = 13, c = 6, S = 192, T = 52 ( 39 A + 13 D) ~12 KB # k = 9, c = 5, S = 160, T = 54 ( 45 A + 9 D) ~10 KB -ecc-224.h: eccdata.stamp - ./eccdata$(EXEEXT_FOR_BUILD) 224 16 7 $(NUMB_BITS) > $@T && mv $@T $@ +ecc-secp224r1.h: eccdata.stamp + ./eccdata$(EXEEXT_FOR_BUILD) secp224r1 16 7 $(NUMB_BITS) > $@T && mv $@T $@ # Some reasonable choices for 256: # k = 9, c = 6, S = 320, T = 54 ( 45 A + 9 D) 20 KB # k = 11, c = 6, S = 256, T = 55 ( 44 A + 11 D) 16 KB # k = 19, c = 7, S = 256, T = 57 ( 38 A + 19 D) 16 KB # k = 15, c = 6, S = 192, T = 60 ( 45 A + 15 D) 12 KB -ecc-256.h: eccdata.stamp - ./eccdata$(EXEEXT_FOR_BUILD) 256 11 6 $(NUMB_BITS) > $@T && mv $@T $@ +ecc-secp256r1.h: eccdata.stamp + ./eccdata$(EXEEXT_FOR_BUILD) secp256r1 11 6 $(NUMB_BITS) > $@T && mv $@T $@ # Some reasonable choices for 384: # k = 16, c = 6, S = 256, T = 80 ( 64 A + 16 D) 24 KB @@ -377,35 +378,35 @@ ecc-256.h: eccdata.stamp # k = 13, c = 5, S = 192, T = 91 ( 78 A + 13 D) 18 KB # k = 16, c = 5, S = 160, T = 96 ( 80 A + 16 D) 15 KB # k = 32, c = 6, S = 128, T = 96 ( 64 A + 32 D) 12 KB -ecc-384.h: eccdata.stamp - ./eccdata$(EXEEXT_FOR_BUILD) 384 32 6 $(NUMB_BITS) > $@T && mv $@T $@ +ecc-secp384r1.h: eccdata.stamp + ./eccdata$(EXEEXT_FOR_BUILD) secp384r1 32 6 $(NUMB_BITS) > $@T && mv $@T $@ # Some reasonable choices for 521: # k = 29, c = 6, S = 192, T = 116 ( 87 A + 29 D) ~27 KB # k = 21, c = 5, S = 160, T = 126 (105 A + 21 D) ~23 KB # k = 44, c = 6, S = 128, T = 132 ( 88 A + 44 D) ~18 KB # k = 35, c = 5, S = 96, T = 140 (105 A + 35 D) ~14 KB -ecc-521.h: eccdata.stamp - ./eccdata$(EXEEXT_FOR_BUILD) 521 44 6 $(NUMB_BITS) > $@T && mv $@T $@ +ecc-secp521r1.h: eccdata.stamp + ./eccdata$(EXEEXT_FOR_BUILD) secp521r1 44 6 $(NUMB_BITS) > $@T && mv $@T $@ -# Parameter choices mostly the same as for ecc-256.h. -ecc-25519.h: eccdata.stamp - ./eccdata$(EXEEXT_FOR_BUILD) 255 11 6 $(NUMB_BITS) > $@T && mv $@T $@ +# Parameter choices mostly the same as for ecc-secp256r1.h. +ecc-curve25519.h: eccdata.stamp + ./eccdata$(EXEEXT_FOR_BUILD) curve25519 11 6 $(NUMB_BITS) > $@T && mv $@T $@ -ecc-448.h: eccdata.stamp - ./eccdata$(EXEEXT_FOR_BUILD) 448 38 6 $(NUMB_BITS) > $@T && mv $@T $@ +ecc-curve448.h: eccdata.stamp + ./eccdata$(EXEEXT_FOR_BUILD) curve448 38 6 $(NUMB_BITS) > $@T && mv $@T $@ eccdata.stamp: eccdata.c $(MAKE) eccdata$(EXEEXT_FOR_BUILD) echo stamp > eccdata.stamp -ecc-192.$(OBJEXT): ecc-192.h -ecc-224.$(OBJEXT): ecc-224.h -ecc-256.$(OBJEXT): ecc-256.h -ecc-384.$(OBJEXT): ecc-384.h -ecc-521.$(OBJEXT): ecc-521.h -ecc-25519.$(OBJEXT): ecc-25519.h -ecc-448.$(OBJEXT): ecc-448.h +ecc-curve25519.$(OBJEXT): ecc-curve25519.h +ecc-curve448.$(OBJEXT): ecc-curve448.h +ecc-secp192r1.$(OBJEXT): ecc-secp192r1.h +ecc-secp224r1.$(OBJEXT): ecc-secp224r1.h +ecc-secp256r1.$(OBJEXT): ecc-secp256r1.h +ecc-secp384r1.$(OBJEXT): ecc-secp384r1.h +ecc-secp521r1.$(OBJEXT): ecc-secp521r1.h .asm.$(OBJEXT): $(srcdir)/asm.m4 machine.m4 config.m4 $(M4) $(srcdir)/asm.m4 machine.m4 config.m4 $< >$*.s @@ -658,8 +659,9 @@ distcheck: dist clean-here: -rm -f $(TARGETS) *.$(OBJEXT) *.s *.so *.dll *.a \ - ecc-192.h ecc-224.h ecc-256.h ecc-384.h ecc-521.h ecc-25519.h \ - ecc-448.h \ + ecc-curve25519.h ecc-curve448.h \ + ecc-secp192r1.h ecc-secp224r1.h ecc-secp256r1.h \ + ecc-secp384r1.h ecc-secp521r1.h \ aesdata$(EXEEXT_FOR_BUILD) \ desdata$(EXEEXT_FOR_BUILD) \ twofishdata$(EXEEXT_FOR_BUILD) \ diff --git a/arm/ecc-192-modp.asm b/arm/ecc-192-modp.asm deleted file mode 100644 index b6074a2e..00000000 --- a/arm/ecc-192-modp.asm +++ /dev/null @@ -1,106 +0,0 @@ -C arm/ecc-192-modp.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-192-modp.asm" - .arm - -define(, ) C Overlaps unused modulo argument -define(, ) - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) C Overlaps T0 and T1 -define(

, ) -define(, ) -define(, ) - - C ecc_192_modp (const struct ecc_modulo *m, mp_limb_t *rp) - .text - .align 2 - -PROLOGUE(nettle_ecc_192_modp) - push {r4,r5,r6,r7,r8,r10} - C Reduce two words at a time - add HP, RP, #48 - add RP, RP, #8 - ldmdb HP!, {H0,H1} - ldm RP, {T2,T3,T4,T5,T6,T7} - mov C4, #0 - adds T4, T4, H0 - adcs T5, T5, H1 - adcs T6, T6, H0 - adcs T7, T7, H1 - C Need to add carry to T2 and T4, do T4 later. - adc C4, C4, #0 - - ldmdb HP!, {H0,H1} - mov C2, #0 - adcs T2, T2, H0 - adcs T3, T3, H1 - adcs T4, T4, H0 - adcs T5, T5, H1 - C Need to add carry to T0 and T2, do T2 later - adc C2, C2, #0 - - ldmdb RP!, {T0, T1} - adcs T0, T0, T6 - adcs T1, T1, T7 - adcs T2, T2, T6 - adcs T3, T3, T7 - adc C4, C4, #0 - - adds T2, T2, C2 - adcs T3, T3, #0 - adcs T4, T4, C4 - adcs T5, T5, #0 - mov C2, #0 - adc C2, C2, #0 - - C Add in final carry - adcs T0, T0, #0 - adcs T1, T1, #0 - adcs T2, T2, C2 - adcs T3, T3, #0 - adcs T4, T4, #0 - adc T5, T5, #0 - - stm RP, {T0,T1,T2,T3,T4,T5} - - pop {r4,r5,r6,r7,r8,r10} - bx lr -EPILOGUE(nettle_ecc_192_modp) diff --git a/arm/ecc-224-modp.asm b/arm/ecc-224-modp.asm deleted file mode 100644 index 15cc0c1b..00000000 --- a/arm/ecc-224-modp.asm +++ /dev/null @@ -1,124 +0,0 @@ -C arm/ecc-224-modp.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-224-modp.asm" - .arm - -define(, ) -define(, ) C Overlaps unused modulo argument - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - - C ecc_224_modp (const struct ecc_modulo *m, mp_limb_t *rp) - .text - .align 2 - -PROLOGUE(nettle_ecc_224_modp) - push {r4,r5,r6,r7,r8,r10,r11,lr} - - add L2, RP, #28 - ldm L2, {T0,T1,T2,T3,T4,T5,T6} - mov H, #0 - - adds T0, T0, T4 - adcs T1, T1, T5 - adcs T2, T2, T6 - adc H, H, #0 - - C This switch from adcs to sbcs takes carry into account with - C correct sign, but it always subtracts 1 too much. We arrange - C to also add B^7 + 1 below, so the effect is adding p. This - C addition of p also ensures that the result never is - C negative. - - sbcs N3, T3, T0 - sbcs T4, T4, T1 - sbcs T5, T5, T2 - sbcs T6, T6, H - mov H, #1 C This is the B^7 - sbc H, #0 - subs T6, T6, T3 - sbc H, #0 - - C Now subtract from low half - ldm RP!, {L0,L1,L2} - - C Clear carry, with the sbcs, this is the 1. - adds RP, #0 - - sbcs T0, L0, T0 - sbcs T1, L1, T1 - sbcs T2, L2, T2 - ldm RP!, {T3,L0,L1,L2} - sbcs T3, T3, N3 - sbcs T4, L0, T4 - sbcs T5, L1, T5 - sbcs T6, L2, T6 - rsc H, H, #0 - - C Now -2 <= H <= 0 is the borrow, so subtract (B^3 - 1) |H| - C Use (B^3 - 1) H = if -1 <=H <= 0, and - C (B^3 - 1) H = <1,B-1, B-1, B-2> if H = -2 - subs T0, T0, H - asr L1, H, #1 - sbcs T1, T1, L1 - eor H, H, L1 - sbcs T2, T2, L1 - sbcs T3, T3, H - sbcs T4, T4, #0 - sbcs T5, T5, #0 - sbcs T6, T6, #0 - sbcs H, H, H - - C Final borrow, subtract (B^3 - 1) |H| - subs T0, T0, H - sbcs T1, T1, H - sbcs T2, T2, H - sbcs T3, T3, #0 - sbcs T4, T4, #0 - sbcs T5, T5, #0 - sbcs T6, T6, #0 - - stmdb RP, {T0,T1,T2,T3,T4,T5,T6} - - pop {r4,r5,r6,r7,r8,r10,r11,pc} -EPILOGUE(nettle_ecc_224_modp) diff --git a/arm/ecc-256-redc.asm b/arm/ecc-256-redc.asm deleted file mode 100644 index 0c5e846d..00000000 --- a/arm/ecc-256-redc.asm +++ /dev/null @@ -1,173 +0,0 @@ -C arm/ecc-256-redc.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-256-redc.asm" - .arm - -define(, ) - -define(, ) C Overlaps unused modulo argument -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - - C ecc_256_redc (const struct ecc_modulo *m, mp_limb_t *rp) - .text - .align 2 - -PROLOGUE(nettle_ecc_256_redc) - push {r4,r5,r6,r7,r8,r10,r11,lr} - - ldm RP!, {T0,T1,T2,T3,T4,T5,T6,T7} - - C Set to the high 4 limbs of (B^2-B+1) - C T2 T1 - C T2 T1 T0 - C - T2 T1 T0 - C ------------- - C F3 F2 F1 F0 - - - adds F1, T0, T2 - adcs F2, T1, #0 - adc F3, T2, #0 - - subs F0, T1, T0 - sbcs F1, F1, T1 C Could also be rsc ? - sbcs F2, F2, T2 - sbc F3, F3, #0 - - C Add: - C T10 T9 T8 T7 T6 T5 T4 T3 - C + F3 F2 F1 F0 T0 T2 T1 T0 - C -------------------------- - C T7 T6 T5 T4 T3 T2 T1 T0 - - adds T3, T3, T0 - adcs T1, T4, T1 - adcs T2, T5, T2 - adcs T6, T6, T0 - mov T0, T3 C FIXME: Be more clever? - mov T3, T6 - adcs T4, T7, F0 - - ldm RP!, {T5,T6,T7} - adcs T5, T5, F1 - adcs T6, T6, F2 - adcs T7, T7, F3 - - C New F3, F2, F1, F0, also adding in carry - adcs F1, T0, T2 - adcs F2, T1, #0 - adc F3, T2, #0 - - subs F0, T1, T0 - sbcs F1, F1, T1 C Could also be rsc ? - sbcs F2, F2, T2 - sbc F3, F3, #0 - - C Start adding - adds T3, T3, T0 - adcs T1, T4, T1 - adcs T2, T5, T2 - adcs T6, T6, T0 - mov T0, T3 C FIXME: Be more clever? - mov T3, T6 - adcs T4, T7, F0 - - ldm RP!, {T5,T6,T7} - adcs T5, T5, F1 - adcs T6, T6, F2 - adcs T7, T7, F3 - - C Final iteration, eliminate only T0, T1 - C Set to the high 3 limbs of (B^2-B+1) - - C T1 T0 T1 - C - T1 T0 - C ------------- - C F2 F1 F0 - - C First add in carry - adcs F1, T0, #0 - adcs F2, T1, #0 - subs F0, T1, T0 - sbcs F1, F1, T1 - sbc F2, F2, #0 - - C Add: - C T9 T8 T7 T6 T5 T4 T3 T2 - C + F2 F1 F0 T0 0 T1 T0 0 - C -------------------------- - C F2 F1 T7 T6 T5 T4 T3 T2 - - adds T3, T3, T0 - adcs T4, T4, T1 - adcs T5, T5, #0 - adcs T6, T6, T0 - adcs T7, T7, F0 - ldm RP!, {T0, T1} - mov F3, #0 - adcs F1, F1, T0 - adcs F2, F2, T1 - - C Sum is < B^8 + p, so it's enough to fold carry once, - C If carry, add in - C B^7 - B^6 - B^3 + 1 = <0, B-2, B-1, B-1, B-1, 0, 0, 1> - - C Mask from carry flag, leaving carry intact - adc F3, F3, #0 - rsb F3, F3, #0 - - adcs T0, T2, #0 - adcs T1, T3, #0 - adcs T2, T4, #0 - adcs T3, T5, F3 - adcs T4, T6, F3 - adcs T5, T7, F3 - and F3, F3, #-2 - adcs T6, F1, F3 - adcs T7, F2, #0 - - sub RP, RP, #64 - stm RP, {T0,T1,T2,T3,T4,T5,T6,T7} - - pop {r4,r5,r6,r7,r8,r10,r11,pc} -EPILOGUE(nettle_ecc_256_redc) diff --git a/arm/ecc-384-modp.asm b/arm/ecc-384-modp.asm deleted file mode 100644 index 1d36319d..00000000 --- a/arm/ecc-384-modp.asm +++ /dev/null @@ -1,270 +0,0 @@ -C arm/ecc-384-modp.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-384-modp.asm" - .arm - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - - C ecc_384_modp (const struct ecc_modulo *m, mp_limb_t *rp) - .text - .align 2 - -PROLOGUE(nettle_ecc_384_modp) - push {r4,r5,r6,r7,r8,r10,lr} - - add RP, RP, #80 - ldm RP, {T0, T1, T2, T3} C 20-23 - - C First get top 4 limbs, which need folding twice, as - C - C T3 T2 T1 T0 - C T3 T2 T1 - C -T3 - C ---------------- - C F4 F3 F2 F1 F0 - C - C Start with - C - C T3 T1 T0 - C T1 - C -T3 - C ----------- - C F2 F1 F0 Always fits - - adds F0, T0, T1 - adcs F1, T1, #0 - adcs F2, T3, #0 - subs F0, F0, T3 - sbcs F1, F1, #0 - sbcs F2, F2, #0 - - C T3 T2 T2 0 - C F2 F1 F0 - C ---------------- - C F4 F3 F2 F1 F0 - - mov F4, #0 - adds F1, F1, T2 - adcs F2, F2, T2 - adcs F3, T3, #0 - adcs F4, F4, #0 - - C Add in to high part - sub RP, RP, #32 - ldm RP, {T0, T1, T2, T3} C 12-15 - mov H, #0 - adds F0, T0, F0 - adcs F1, T1, F1 - adcs F2, T2, F2 - adcs F3, T3, F3 - adcs F4, F4, #0 C Do F4 later - - C Add to low part, keeping carry (positive or negative) in H - sub RP, RP, #48 - ldm RP, {T0, T1, T2, T3} C 0-3 - mov H, #0 - adds T0, T0, F0 - adcs T1, T1, F1 - adcs T2, T2, F2 - adcs T3, T3, F3 - adc H, H, #0 - subs T1, T1, F0 - sbcs T2, T2, F1 - sbcs T3, T3, F2 - sbc H, H, #0 - adds T3, T3, F0 - adc H, H, #0 - - stm RP!, {T0,T1,T2,T3} C 0-3 - mov N, #2 -.Loop: - ldm RP, {T0,T1,T2,T3} C 4-7 - - C First, propagate carry - adds T0, T0, H - asr H, #31 C Sign extend - adcs T1, T1, H - adcs T2, T2, H - adcs T3, T3, H - adc H, H, #0 - - C +B^4 term - adds T0, T0, F0 - adcs T1, T1, F1 - adcs T2, T2, F2 - adcs T3, T3, F3 - adc H, H, #0 - - C +B^3 terms - ldr F0, [RP, #+48] C 16 - adds T0, T0, F1 - adcs T1, T1, F2 - adcs T2, T2, F3 - adcs T3, T3, F0 - adc H, H, #0 - - C -B - ldr F1, [RP, #+52] C 17-18 - ldr F2, [RP, #+56] - subs T0, T0, F3 - sbcs T1, T1, F0 - sbcs T2, T2, F1 - sbcs T3, T3, F2 - sbcs H, H, #0 - - C +1 - ldr F3, [RP, #+60] C 19 - adds T0, T0, F0 - adcs T1, T1, F1 - adcs T2, T2, F2 - adcs T3, T3, F3 - adc H, H, #0 - subs N, N, #1 - stm RP!, {T0,T1,T2,T3} - bne .Loop - - C Fold high limbs, we need to add in - C - C F4 F4 0 -F4 F4 H H 0 -H H - C - C We always have F4 >= 0, but we can have H < 0. - C Sign extension gets tricky when F4 = 0 and H < 0. - sub RP, RP, #48 - - ldm RP, {T0,T1,T2,T3} C 0-3 - - C H H 0 -H H - C ---------------- - C S H F3 F2 F1 F0 - C - C Define S = H >> 31 (asr), we then have - C - C F0 = H - C F1 = S - H - C F2 = - [H > 0] - C F3 = H - [H > 0] - C H = H + S - C - C And we get underflow in S - H iff H > 0 - - C H = 0 H > 0 H = -1 - mov F0, H C 0 H -1 - asr H, #31 - subs F1, H, F0 C 0,C=1 -H,C=0 0,C=1 - sbc F2, F2, F2 C 0 -1 0 - sbc F3, F0, #0 C 0 H-1 -1 - - adds T0, T0, F0 - adcs T1, T1, F1 - adcs T2, T2, F2 - adcs T3, T3, F3 - adc H, H, F0 C 0+cy H+cy -2+cy - - stm RP!, {T0,T1,T2,T3} C 0-3 - ldm RP, {T0,T1,T2,T3} C 4-7 - - C F4 0 -F4 - C --------- - C F3 F2 F1 - - rsbs F1, F4, #0 - sbc F2, F2, F2 - sbc F3, F4, #0 - - C Sign extend H - adds F0, F4, H - asr H, H, #31 - adcs F1, F1, H - adcs F2, F2, H - adcs F3, F3, H - adcs F4, F4, H - adc H, H, #0 - - adds T0, T0, F0 - adcs T1, T1, F1 - adcs T2, T2, F2 - adcs T3, T3, F3 - - stm RP!, {T0,T1,T2,T3} C 4-7 - ldm RP, {T0,T1,T2,T3} C 8-11 - - adcs T0, T0, F4 - adcs T1, T1, H - adcs T2, T2, H - adcs T3, T3, H - adc H, H, #0 - - stm RP, {T0,T1,T2,T3} C 8-11 - - C Final (unlikely) carry - sub RP, RP, #32 - ldm RP, {T0,T1,T2,T3} C 0-3 - C Fold H into F0-F4 - mov F0, H - asr H, #31 - subs F1, H, F0 - sbc F2, F2, F2 - sbc F3, F0, #0 - add F4, F0, H - - adds T0, T0, F0 - adcs T1, T1, F1 - adcs T2, T2, F2 - adcs T3, T3, F3 - - stm RP!, {T0,T1,T2,T3} C 0-3 - ldm RP, {T0,T1,T2,T3} C 4-7 - adcs T0, T0, F4 - adcs T1, T1, H - adcs T2, T2, H - adcs T3, T3, H - stm RP!, {T0,T1,T2,T3} C 4-7 - ldm RP, {T0,T1,T2,T3} C 8-11 - adcs T0, T0, H - adcs T1, T1, H - adcs T2, T2, H - adcs T3, T3, H - stm RP!, {T0,T1,T2,T3} C 8-11 - pop {r4,r5,r6,r7,r8,r10,pc} -EPILOGUE(nettle_ecc_384_modp) diff --git a/arm/ecc-521-modp.asm b/arm/ecc-521-modp.asm deleted file mode 100644 index 3fba2396..00000000 --- a/arm/ecc-521-modp.asm +++ /dev/null @@ -1,127 +0,0 @@ -C arm/ecc-521-modp.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-521-modp.asm" - .arm - -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) -define(, ) - - C ecc_521_modp (const struct ecc_modulo *m, mp_limb_t *rp) - .text -.Lc511: - .int 511 - - .align 2 - -PROLOGUE(nettle_ecc_521_modp) - push {r4,r5,r6,r7,r8,lr} - - C Use that B^17 = 2^23 (mod p) - ldr F3, [RP, #+68] C 17 - add HP, RP, #72 C 18 - ldr T0, [RP] C 0 - adds T0, T0, F3, lsl #23 - str T0, [RP], #+4 - mov N, #5 - - C 5 iterations, reading limbs 18-20, 21-23, 24-26, 27-29, 30-32 - C and adding to limbs 1-3, 4-6, 7-9, 19-12, 13-15 -.Loop: - ldm RP, {T0,T1,T2} C 1+3*k -- 3+3*k - lsr F0, F3, #9 - ldm HP!, {F1,F2,F3} C 18+3*k -- 20+3*k - orr F0, F0, F1, lsl #23 - lsr F1, F1, #9 - orr F1, F1, F2, lsl #23 - lsr F2, F2, #9 - orr F2, F2, F3, lsl #23 - adcs T0, T0, F0 - adcs T1, T1, F1 - adcs T2, T2, F2 - sub N, N, #1 - stm RP!,{T0,T1,T2} - teq N, #0 - bne .Loop - - ldr F0, [RP], #-64 C 16 - ldr F1, [HP] C 33 - ldr T0, .Lc511 - - C Handling of high limbs - C F0 = rp[16] + carry in + F3 >> 9 - adcs F0, F0, F3, lsr #9 - C Copy low 9 bits to H, then shift right including carry - and H, F0, T0 - mov F0, F0, rrx - lsr F0, F0, #8 - C Add in F1 = rp[33], with weight 2^1056 = 2^14 - adds F0, F0, F1, lsl #14 - lsr F1, F1, #18 - adc F1, F1, #0 - - ldm RP, {T0, T1} C 0-1 - adds T0, T0, F0 - adcs T1, T1, F1 - stm RP!, {T0, T1} - - ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 2-8 - adcs T0, T0, #0 - adcs T1, T1, #0 - adcs T2, T2, #0 - adcs F0, F0, #0 - adcs F1, F1, #0 - adcs F2, F2, #0 - adcs F3, F3, #0 - stm RP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8 - ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 9-15 - adcs T0, T0, #0 - adcs T1, T1, #0 - adcs T2, T2, #0 - adcs F0, F0, #0 - adcs F1, F1, #0 - adcs F2, F2, #0 - adcs F3, F3, #0 - adcs H, H, #0 - stm RP, {T0,T1,T2,F0,F1,F2,F3,H} C 9-16 - - pop {r4,r5,r6,r7,r8,pc} -EPILOGUE(nettle_ecc_521_modp) diff --git a/arm/ecc-secp192r1-modp.asm b/arm/ecc-secp192r1-modp.asm new file mode 100644 index 00000000..dbaae2e3 --- /dev/null +++ b/arm/ecc-secp192r1-modp.asm @@ -0,0 +1,106 @@ +C arm/ecc-secp192r1-modp.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp192r1-modp.asm" + .arm + +define(, ) C Overlaps unused modulo argument +define(, ) + +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) C Overlaps T0 and T1 +define(

, ) +define(, ) +define(, ) + + C ecc_192_modp (const struct ecc_modulo *m, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_192_modp) + push {r4,r5,r6,r7,r8,r10} + C Reduce two words at a time + add HP, RP, #48 + add RP, RP, #8 + ldmdb HP!, {H0,H1} + ldm RP, {T2,T3,T4,T5,T6,T7} + mov C4, #0 + adds T4, T4, H0 + adcs T5, T5, H1 + adcs T6, T6, H0 + adcs T7, T7, H1 + C Need to add carry to T2 and T4, do T4 later. + adc C4, C4, #0 + + ldmdb HP!, {H0,H1} + mov C2, #0 + adcs T2, T2, H0 + adcs T3, T3, H1 + adcs T4, T4, H0 + adcs T5, T5, H1 + C Need to add carry to T0 and T2, do T2 later + adc C2, C2, #0 + + ldmdb RP!, {T0, T1} + adcs T0, T0, T6 + adcs T1, T1, T7 + adcs T2, T2, T6 + adcs T3, T3, T7 + adc C4, C4, #0 + + adds T2, T2, C2 + adcs T3, T3, #0 + adcs T4, T4, C4 + adcs T5, T5, #0 + mov C2, #0 + adc C2, C2, #0 + + C Add in final carry + adcs T0, T0, #0 + adcs T1, T1, #0 + adcs T2, T2, C2 + adcs T3, T3, #0 + adcs T4, T4, #0 + adc T5, T5, #0 + + stm RP, {T0,T1,T2,T3,T4,T5} + + pop {r4,r5,r6,r7,r8,r10} + bx lr +EPILOGUE(nettle_ecc_192_modp) diff --git a/arm/ecc-secp224r1-modp.asm b/arm/ecc-secp224r1-modp.asm new file mode 100644 index 00000000..2c86755a --- /dev/null +++ b/arm/ecc-secp224r1-modp.asm @@ -0,0 +1,124 @@ +C arm/ecc-secp224r1-modp.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp224r1-modp.asm" + .arm + +define(, ) +define(, ) C Overlaps unused modulo argument + +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + + C ecc_224_modp (const struct ecc_modulo *m, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_224_modp) + push {r4,r5,r6,r7,r8,r10,r11,lr} + + add L2, RP, #28 + ldm L2, {T0,T1,T2,T3,T4,T5,T6} + mov H, #0 + + adds T0, T0, T4 + adcs T1, T1, T5 + adcs T2, T2, T6 + adc H, H, #0 + + C This switch from adcs to sbcs takes carry into account with + C correct sign, but it always subtracts 1 too much. We arrange + C to also add B^7 + 1 below, so the effect is adding p. This + C addition of p also ensures that the result never is + C negative. + + sbcs N3, T3, T0 + sbcs T4, T4, T1 + sbcs T5, T5, T2 + sbcs T6, T6, H + mov H, #1 C This is the B^7 + sbc H, #0 + subs T6, T6, T3 + sbc H, #0 + + C Now subtract from low half + ldm RP!, {L0,L1,L2} + + C Clear carry, with the sbcs, this is the 1. + adds RP, #0 + + sbcs T0, L0, T0 + sbcs T1, L1, T1 + sbcs T2, L2, T2 + ldm RP!, {T3,L0,L1,L2} + sbcs T3, T3, N3 + sbcs T4, L0, T4 + sbcs T5, L1, T5 + sbcs T6, L2, T6 + rsc H, H, #0 + + C Now -2 <= H <= 0 is the borrow, so subtract (B^3 - 1) |H| + C Use (B^3 - 1) H = if -1 <=H <= 0, and + C (B^3 - 1) H = <1,B-1, B-1, B-2> if H = -2 + subs T0, T0, H + asr L1, H, #1 + sbcs T1, T1, L1 + eor H, H, L1 + sbcs T2, T2, L1 + sbcs T3, T3, H + sbcs T4, T4, #0 + sbcs T5, T5, #0 + sbcs T6, T6, #0 + sbcs H, H, H + + C Final borrow, subtract (B^3 - 1) |H| + subs T0, T0, H + sbcs T1, T1, H + sbcs T2, T2, H + sbcs T3, T3, #0 + sbcs T4, T4, #0 + sbcs T5, T5, #0 + sbcs T6, T6, #0 + + stmdb RP, {T0,T1,T2,T3,T4,T5,T6} + + pop {r4,r5,r6,r7,r8,r10,r11,pc} +EPILOGUE(nettle_ecc_224_modp) diff --git a/arm/ecc-secp256r1-redc.asm b/arm/ecc-secp256r1-redc.asm new file mode 100644 index 00000000..9c20062a --- /dev/null +++ b/arm/ecc-secp256r1-redc.asm @@ -0,0 +1,173 @@ +C arm/ecc-secp256r1-redc.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp256r1-redc.asm" + .arm + +define(, ) + +define(, ) C Overlaps unused modulo argument +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + + C ecc_256_redc (const struct ecc_modulo *m, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_256_redc) + push {r4,r5,r6,r7,r8,r10,r11,lr} + + ldm RP!, {T0,T1,T2,T3,T4,T5,T6,T7} + + C Set to the high 4 limbs of (B^2-B+1) + C T2 T1 + C T2 T1 T0 + C - T2 T1 T0 + C ------------- + C F3 F2 F1 F0 + + + adds F1, T0, T2 + adcs F2, T1, #0 + adc F3, T2, #0 + + subs F0, T1, T0 + sbcs F1, F1, T1 C Could also be rsc ? + sbcs F2, F2, T2 + sbc F3, F3, #0 + + C Add: + C T10 T9 T8 T7 T6 T5 T4 T3 + C + F3 F2 F1 F0 T0 T2 T1 T0 + C -------------------------- + C T7 T6 T5 T4 T3 T2 T1 T0 + + adds T3, T3, T0 + adcs T1, T4, T1 + adcs T2, T5, T2 + adcs T6, T6, T0 + mov T0, T3 C FIXME: Be more clever? + mov T3, T6 + adcs T4, T7, F0 + + ldm RP!, {T5,T6,T7} + adcs T5, T5, F1 + adcs T6, T6, F2 + adcs T7, T7, F3 + + C New F3, F2, F1, F0, also adding in carry + adcs F1, T0, T2 + adcs F2, T1, #0 + adc F3, T2, #0 + + subs F0, T1, T0 + sbcs F1, F1, T1 C Could also be rsc ? + sbcs F2, F2, T2 + sbc F3, F3, #0 + + C Start adding + adds T3, T3, T0 + adcs T1, T4, T1 + adcs T2, T5, T2 + adcs T6, T6, T0 + mov T0, T3 C FIXME: Be more clever? + mov T3, T6 + adcs T4, T7, F0 + + ldm RP!, {T5,T6,T7} + adcs T5, T5, F1 + adcs T6, T6, F2 + adcs T7, T7, F3 + + C Final iteration, eliminate only T0, T1 + C Set to the high 3 limbs of (B^2-B+1) + + C T1 T0 T1 + C - T1 T0 + C ------------- + C F2 F1 F0 + + C First add in carry + adcs F1, T0, #0 + adcs F2, T1, #0 + subs F0, T1, T0 + sbcs F1, F1, T1 + sbc F2, F2, #0 + + C Add: + C T9 T8 T7 T6 T5 T4 T3 T2 + C + F2 F1 F0 T0 0 T1 T0 0 + C -------------------------- + C F2 F1 T7 T6 T5 T4 T3 T2 + + adds T3, T3, T0 + adcs T4, T4, T1 + adcs T5, T5, #0 + adcs T6, T6, T0 + adcs T7, T7, F0 + ldm RP!, {T0, T1} + mov F3, #0 + adcs F1, F1, T0 + adcs F2, F2, T1 + + C Sum is < B^8 + p, so it's enough to fold carry once, + C If carry, add in + C B^7 - B^6 - B^3 + 1 = <0, B-2, B-1, B-1, B-1, 0, 0, 1> + + C Mask from carry flag, leaving carry intact + adc F3, F3, #0 + rsb F3, F3, #0 + + adcs T0, T2, #0 + adcs T1, T3, #0 + adcs T2, T4, #0 + adcs T3, T5, F3 + adcs T4, T6, F3 + adcs T5, T7, F3 + and F3, F3, #-2 + adcs T6, F1, F3 + adcs T7, F2, #0 + + sub RP, RP, #64 + stm RP, {T0,T1,T2,T3,T4,T5,T6,T7} + + pop {r4,r5,r6,r7,r8,r10,r11,pc} +EPILOGUE(nettle_ecc_256_redc) diff --git a/arm/ecc-secp384r1-modp.asm b/arm/ecc-secp384r1-modp.asm new file mode 100644 index 00000000..dbedbdf8 --- /dev/null +++ b/arm/ecc-secp384r1-modp.asm @@ -0,0 +1,270 @@ +C arm/ecc-secp384r1-modp.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp384r1-modp.asm" + .arm + +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + + C ecc_384_modp (const struct ecc_modulo *m, mp_limb_t *rp) + .text + .align 2 + +PROLOGUE(nettle_ecc_384_modp) + push {r4,r5,r6,r7,r8,r10,lr} + + add RP, RP, #80 + ldm RP, {T0, T1, T2, T3} C 20-23 + + C First get top 4 limbs, which need folding twice, as + C + C T3 T2 T1 T0 + C T3 T2 T1 + C -T3 + C ---------------- + C F4 F3 F2 F1 F0 + C + C Start with + C + C T3 T1 T0 + C T1 + C -T3 + C ----------- + C F2 F1 F0 Always fits + + adds F0, T0, T1 + adcs F1, T1, #0 + adcs F2, T3, #0 + subs F0, F0, T3 + sbcs F1, F1, #0 + sbcs F2, F2, #0 + + C T3 T2 T2 0 + C F2 F1 F0 + C ---------------- + C F4 F3 F2 F1 F0 + + mov F4, #0 + adds F1, F1, T2 + adcs F2, F2, T2 + adcs F3, T3, #0 + adcs F4, F4, #0 + + C Add in to high part + sub RP, RP, #32 + ldm RP, {T0, T1, T2, T3} C 12-15 + mov H, #0 + adds F0, T0, F0 + adcs F1, T1, F1 + adcs F2, T2, F2 + adcs F3, T3, F3 + adcs F4, F4, #0 C Do F4 later + + C Add to low part, keeping carry (positive or negative) in H + sub RP, RP, #48 + ldm RP, {T0, T1, T2, T3} C 0-3 + mov H, #0 + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + adc H, H, #0 + subs T1, T1, F0 + sbcs T2, T2, F1 + sbcs T3, T3, F2 + sbc H, H, #0 + adds T3, T3, F0 + adc H, H, #0 + + stm RP!, {T0,T1,T2,T3} C 0-3 + mov N, #2 +.Loop: + ldm RP, {T0,T1,T2,T3} C 4-7 + + C First, propagate carry + adds T0, T0, H + asr H, #31 C Sign extend + adcs T1, T1, H + adcs T2, T2, H + adcs T3, T3, H + adc H, H, #0 + + C +B^4 term + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + adc H, H, #0 + + C +B^3 terms + ldr F0, [RP, #+48] C 16 + adds T0, T0, F1 + adcs T1, T1, F2 + adcs T2, T2, F3 + adcs T3, T3, F0 + adc H, H, #0 + + C -B + ldr F1, [RP, #+52] C 17-18 + ldr F2, [RP, #+56] + subs T0, T0, F3 + sbcs T1, T1, F0 + sbcs T2, T2, F1 + sbcs T3, T3, F2 + sbcs H, H, #0 + + C +1 + ldr F3, [RP, #+60] C 19 + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + adc H, H, #0 + subs N, N, #1 + stm RP!, {T0,T1,T2,T3} + bne .Loop + + C Fold high limbs, we need to add in + C + C F4 F4 0 -F4 F4 H H 0 -H H + C + C We always have F4 >= 0, but we can have H < 0. + C Sign extension gets tricky when F4 = 0 and H < 0. + sub RP, RP, #48 + + ldm RP, {T0,T1,T2,T3} C 0-3 + + C H H 0 -H H + C ---------------- + C S H F3 F2 F1 F0 + C + C Define S = H >> 31 (asr), we then have + C + C F0 = H + C F1 = S - H + C F2 = - [H > 0] + C F3 = H - [H > 0] + C H = H + S + C + C And we get underflow in S - H iff H > 0 + + C H = 0 H > 0 H = -1 + mov F0, H C 0 H -1 + asr H, #31 + subs F1, H, F0 C 0,C=1 -H,C=0 0,C=1 + sbc F2, F2, F2 C 0 -1 0 + sbc F3, F0, #0 C 0 H-1 -1 + + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + adc H, H, F0 C 0+cy H+cy -2+cy + + stm RP!, {T0,T1,T2,T3} C 0-3 + ldm RP, {T0,T1,T2,T3} C 4-7 + + C F4 0 -F4 + C --------- + C F3 F2 F1 + + rsbs F1, F4, #0 + sbc F2, F2, F2 + sbc F3, F4, #0 + + C Sign extend H + adds F0, F4, H + asr H, H, #31 + adcs F1, F1, H + adcs F2, F2, H + adcs F3, F3, H + adcs F4, F4, H + adc H, H, #0 + + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + + stm RP!, {T0,T1,T2,T3} C 4-7 + ldm RP, {T0,T1,T2,T3} C 8-11 + + adcs T0, T0, F4 + adcs T1, T1, H + adcs T2, T2, H + adcs T3, T3, H + adc H, H, #0 + + stm RP, {T0,T1,T2,T3} C 8-11 + + C Final (unlikely) carry + sub RP, RP, #32 + ldm RP, {T0,T1,T2,T3} C 0-3 + C Fold H into F0-F4 + mov F0, H + asr H, #31 + subs F1, H, F0 + sbc F2, F2, F2 + sbc F3, F0, #0 + add F4, F0, H + + adds T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + adcs T3, T3, F3 + + stm RP!, {T0,T1,T2,T3} C 0-3 + ldm RP, {T0,T1,T2,T3} C 4-7 + adcs T0, T0, F4 + adcs T1, T1, H + adcs T2, T2, H + adcs T3, T3, H + stm RP!, {T0,T1,T2,T3} C 4-7 + ldm RP, {T0,T1,T2,T3} C 8-11 + adcs T0, T0, H + adcs T1, T1, H + adcs T2, T2, H + adcs T3, T3, H + stm RP!, {T0,T1,T2,T3} C 8-11 + pop {r4,r5,r6,r7,r8,r10,pc} +EPILOGUE(nettle_ecc_384_modp) diff --git a/arm/ecc-secp521r1-modp.asm b/arm/ecc-secp521r1-modp.asm new file mode 100644 index 00000000..2b4f7919 --- /dev/null +++ b/arm/ecc-secp521r1-modp.asm @@ -0,0 +1,127 @@ +C arm/ecc-secp521r1-modp.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp521r1-modp.asm" + .arm + +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + + C ecc_521_modp (const struct ecc_modulo *m, mp_limb_t *rp) + .text +.Lc511: + .int 511 + + .align 2 + +PROLOGUE(nettle_ecc_521_modp) + push {r4,r5,r6,r7,r8,lr} + + C Use that B^17 = 2^23 (mod p) + ldr F3, [RP, #+68] C 17 + add HP, RP, #72 C 18 + ldr T0, [RP] C 0 + adds T0, T0, F3, lsl #23 + str T0, [RP], #+4 + mov N, #5 + + C 5 iterations, reading limbs 18-20, 21-23, 24-26, 27-29, 30-32 + C and adding to limbs 1-3, 4-6, 7-9, 19-12, 13-15 +.Loop: + ldm RP, {T0,T1,T2} C 1+3*k -- 3+3*k + lsr F0, F3, #9 + ldm HP!, {F1,F2,F3} C 18+3*k -- 20+3*k + orr F0, F0, F1, lsl #23 + lsr F1, F1, #9 + orr F1, F1, F2, lsl #23 + lsr F2, F2, #9 + orr F2, F2, F3, lsl #23 + adcs T0, T0, F0 + adcs T1, T1, F1 + adcs T2, T2, F2 + sub N, N, #1 + stm RP!,{T0,T1,T2} + teq N, #0 + bne .Loop + + ldr F0, [RP], #-64 C 16 + ldr F1, [HP] C 33 + ldr T0, .Lc511 + + C Handling of high limbs + C F0 = rp[16] + carry in + F3 >> 9 + adcs F0, F0, F3, lsr #9 + C Copy low 9 bits to H, then shift right including carry + and H, F0, T0 + mov F0, F0, rrx + lsr F0, F0, #8 + C Add in F1 = rp[33], with weight 2^1056 = 2^14 + adds F0, F0, F1, lsl #14 + lsr F1, F1, #18 + adc F1, F1, #0 + + ldm RP, {T0, T1} C 0-1 + adds T0, T0, F0 + adcs T1, T1, F1 + stm RP!, {T0, T1} + + ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 2-8 + adcs T0, T0, #0 + adcs T1, T1, #0 + adcs T2, T2, #0 + adcs F0, F0, #0 + adcs F1, F1, #0 + adcs F2, F2, #0 + adcs F3, F3, #0 + stm RP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8 + ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 9-15 + adcs T0, T0, #0 + adcs T1, T1, #0 + adcs T2, T2, #0 + adcs F0, F0, #0 + adcs F1, F1, #0 + adcs F2, F2, #0 + adcs F3, F3, #0 + adcs H, H, #0 + stm RP, {T0,T1,T2,F0,F1,F2,F3,H} C 9-16 + + pop {r4,r5,r6,r7,r8,pc} +EPILOGUE(nettle_ecc_521_modp) diff --git a/configure.ac b/configure.ac index ef0c819f..745cc2c5 100644 --- a/configure.ac +++ b/configure.ac @@ -475,9 +475,9 @@ asm_nettle_optional_list="gcm-hash8.asm cpuid.asm \ asm_hogweed_optional_list="" if test "x$enable_public_key" = "xyes" ; then - asm_hogweed_optional_list="ecc-192-modp.asm ecc-224-modp.asm \ - ecc-256-redc.asm ecc-384-modp.asm ecc-521-modp.asm \ - ecc-25519-modp.asm ecc-curve448-modp.asm" + asm_hogweed_optional_list="ecc-secp192r1-modp.asm ecc-secp224r1-modp.asm \ + ecc-secp256r1-redc.asm ecc-secp384r1-modp.asm ecc-secp521r1-modp.asm \ + ecc-curve25519-modp.asm ecc-curve448-modp.asm" fi OPT_NETTLE_OBJS="" diff --git a/ecc-192.c b/ecc-192.c deleted file mode 100644 index 4b756ffd..00000000 --- a/ecc-192.c +++ /dev/null @@ -1,181 +0,0 @@ -/* ecc-192.c - - Compile time constant (but machine dependent) tables. - - Copyright (C) 2013, 2014 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include - -/* FIXME: Remove ecc.h include, once prototypes of more internal - functions are moved to ecc-internal.h */ -#include "ecc.h" -#include "ecc-internal.h" - -#define USE_REDC 0 - -#include "ecc-192.h" - -#if HAVE_NATIVE_ecc_192_modp - -#define ecc_192_modp nettle_ecc_192_modp -void -ecc_192_modp (const struct ecc_modulo *m, mp_limb_t *rp); - -/* Use that p = 2^{192} - 2^64 - 1, to eliminate 128 bits at a time. */ - -#elif GMP_NUMB_BITS == 32 -/* p is 6 limbs, p = B^6 - B^2 - 1 */ -static void -ecc_192_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) -{ - mp_limb_t cy; - - /* Reduce from 12 to 9 limbs (top limb small)*/ - cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4); - cy = sec_add_1 (rp + 6, rp + 6, 2, cy); - cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4); - assert (cy <= 2); - - rp[8] = cy; - - /* Reduce from 9 to 6 limbs */ - cy = mpn_add_n (rp, rp, rp + 6, 3); - cy = sec_add_1 (rp + 3, rp + 3, 2, cy); - cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3); - cy = sec_add_1 (rp + 5, rp + 5, 1, cy); - - assert (cy <= 1); - cy = cnd_add_n (cy, rp, ecc_Bmodp, 6); - assert (cy == 0); -} -#elif GMP_NUMB_BITS == 64 -/* p is 3 limbs, p = B^3 - B - 1 */ -static void -ecc_192_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) -{ - mp_limb_t cy; - - /* Reduce from 6 to 5 limbs (top limb small)*/ - cy = mpn_add_n (rp + 1, rp + 1, rp + 4, 2); - cy = sec_add_1 (rp + 3, rp + 3, 1, cy); - cy += mpn_add_n (rp + 2, rp + 2, rp + 4, 2); - assert (cy <= 2); - - rp[4] = cy; - - /* Reduce from 5 to 4 limbs (high limb small) */ - cy = mpn_add_n (rp, rp, rp + 3, 2); - cy = sec_add_1 (rp + 2, rp + 2, 1, cy); - cy += mpn_add_n (rp + 1, rp + 1, rp + 3, 2); - - assert (cy <= 1); - cy = cnd_add_n (cy, rp, ecc_Bmodp, 3); - assert (cy == 0); -} - -#else -#define ecc_192_modp ecc_mod -#endif - -const struct ecc_curve _nettle_secp_192r1 = -{ - { - 192, - ECC_LIMB_SIZE, - ECC_BMODP_SIZE, - ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_p, - ecc_Bmodp, - ecc_Bmodp_shifted, - ecc_redc_ppm1, - ecc_pp1h, - - ecc_192_modp, - ecc_192_modp, - ecc_mod_inv, - NULL, - }, - { - 192, - ECC_LIMB_SIZE, - ECC_BMODQ_SIZE, - 0, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_q, - ecc_Bmodq, - ecc_Bmodq_shifted, - NULL, - ecc_qp1h, - - ecc_mod, - ecc_mod, - ecc_mod_inv, - NULL, - }, - - USE_REDC, - ECC_PIPPENGER_K, - ECC_PIPPENGER_C, - - ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), - ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), - ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), - ECC_MUL_A_ITCH (ECC_LIMB_SIZE), - ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), - - ecc_add_jja, - ecc_add_jjj, - ecc_dup_jj, - ecc_mul_a, - ecc_mul_g, - ecc_j_to_a, - - ecc_b, - ecc_g, - ecc_unit, - ecc_table -}; - -const struct ecc_curve *nettle_get_secp_192r1(void) -{ - return &_nettle_secp_192r1; -} diff --git a/ecc-224.c b/ecc-224.c deleted file mode 100644 index bf90f848..00000000 --- a/ecc-224.c +++ /dev/null @@ -1,133 +0,0 @@ -/* ecc-224.c - - Compile time constant (but machine dependent) tables. - - Copyright (C) 2013, 2014 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include "ecc.h" -#include "ecc-internal.h" - -#if HAVE_NATIVE_ecc_224_modp - -#define USE_REDC 0 -#define ecc_224_modp nettle_ecc_224_modp -void -ecc_224_modp (const struct ecc_modulo *m, mp_limb_t *rp); - -#else -#define USE_REDC (ECC_REDC_SIZE != 0) -#define ecc_224_modp ecc_mod -#endif - -#include "ecc-224.h" - -#if ECC_REDC_SIZE < 0 -# define ecc_224_redc ecc_pm1_redc -#elif ECC_REDC_SIZE == 0 -# define ecc_224_redc NULL -#else -# error Configuration error -#endif - -const struct ecc_curve _nettle_secp_224r1 = -{ - { - 224, - ECC_LIMB_SIZE, - ECC_BMODP_SIZE, - -ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_p, - ecc_Bmodp, - ecc_Bmodp_shifted, - ecc_redc_ppm1, - ecc_pp1h, - - ecc_224_modp, - USE_REDC ? ecc_224_redc : ecc_224_modp, - ecc_mod_inv, - NULL, - }, - { - 224, - ECC_LIMB_SIZE, - ECC_BMODQ_SIZE, - 0, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_q, - ecc_Bmodq, - ecc_Bmodq_shifted, - NULL, - ecc_qp1h, - - ecc_mod, - ecc_mod, - ecc_mod_inv, - NULL, - }, - - USE_REDC, - ECC_PIPPENGER_K, - ECC_PIPPENGER_C, - - ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), - ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), - ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), - ECC_MUL_A_ITCH (ECC_LIMB_SIZE), - ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), - - ecc_add_jja, - ecc_add_jjj, - ecc_dup_jj, - ecc_mul_a, - ecc_mul_g, - ecc_j_to_a, - - ecc_b, - ecc_g, - ecc_unit, - ecc_table -}; - -const struct ecc_curve *nettle_get_secp_224r1(void) -{ - return &_nettle_secp_224r1; -} diff --git a/ecc-25519.c b/ecc-25519.c deleted file mode 100644 index 7eacc780..00000000 --- a/ecc-25519.c +++ /dev/null @@ -1,356 +0,0 @@ -/* ecc-25519.c - - Arithmetic and tables for curve25519, - - Copyright (C) 2014 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include - -#include "ecc.h" -#include "ecc-internal.h" - -#define USE_REDC 0 - -#include "ecc-25519.h" - -#define PHIGH_BITS (GMP_NUMB_BITS * ECC_LIMB_SIZE - 255) - -#if HAVE_NATIVE_ecc_25519_modp - -#define ecc_25519_modp nettle_ecc_25519_modp -void -ecc_25519_modp (const struct ecc_modulo *m, mp_limb_t *rp); -#else - -#if PHIGH_BITS == 0 -#error Unsupported limb size */ -#endif - -static void -ecc_25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp) -{ - mp_limb_t hi, cy; - - cy = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, - (mp_limb_t) 19 << PHIGH_BITS); - hi = rp[ECC_LIMB_SIZE-1]; - cy = (cy << PHIGH_BITS) + (hi >> (GMP_NUMB_BITS - PHIGH_BITS)); - rp[ECC_LIMB_SIZE-1] = (hi & (GMP_NUMB_MASK >> PHIGH_BITS)) - + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, 19 * cy); -} -#endif /* HAVE_NATIVE_ecc_25519_modp */ - -#define QHIGH_BITS (GMP_NUMB_BITS * ECC_LIMB_SIZE - 252) - -#if QHIGH_BITS == 0 -#error Unsupported limb size */ -#endif - -static void -ecc_25519_modq (const struct ecc_modulo *q, mp_limb_t *rp) -{ - mp_size_t n; - mp_limb_t cy; - - /* n is the offset where we add in the next term */ - for (n = ECC_LIMB_SIZE; n-- > 0;) - { - cy = mpn_submul_1 (rp + n, - q->B_shifted, ECC_LIMB_SIZE, - rp[n + ECC_LIMB_SIZE]); - /* Top limb of mBmodq_shifted is zero, so we get cy == 0 or 1 */ - assert (cy < 2); - cnd_add_n (cy, rp+n, q->m, ECC_LIMB_SIZE); - } - - cy = mpn_submul_1 (rp, q->m, ECC_LIMB_SIZE, - rp[ECC_LIMB_SIZE-1] >> (GMP_NUMB_BITS - QHIGH_BITS)); - assert (cy < 2); - cnd_add_n (cy, rp, q->m, ECC_LIMB_SIZE); -} - -/* Needs 2*ecc->size limbs at rp, and 2*ecc->size additional limbs of - scratch space. No overlap allowed. */ -static void -ecc_mod_pow_2kp1 (const struct ecc_modulo *m, - mp_limb_t *rp, const mp_limb_t *xp, - unsigned k, mp_limb_t *tp) -{ - if (k & 1) - { - ecc_mod_sqr (m, tp, xp); - k--; - } - else - { - ecc_mod_sqr (m, rp, xp); - ecc_mod_sqr (m, tp, rp); - k -= 2; - } - while (k > 0) - { - ecc_mod_sqr (m, rp, tp); - ecc_mod_sqr (m, tp, rp); - k -= 2; - } - ecc_mod_mul (m, rp, tp, xp); -} - -/* Computes a^{(p-5)/8} = a^{2^{252}-3} mod m. Needs 5 * n scratch - space. */ -static void -ecc_mod_pow_252m3 (const struct ecc_modulo *m, - mp_limb_t *rp, const mp_limb_t *ap, mp_limb_t *scratch) -{ -#define a7 scratch -#define t0 (scratch + ECC_LIMB_SIZE) -#define t1 (scratch + 3*ECC_LIMB_SIZE) - - /* a^{2^252 - 3} = a^{(p-5)/8}, using the addition chain - 2^252 - 3 - = 1 + (2^252-4) - = 1 + 4 (2^250-1) - = 1 + 4 (2^125+1)(2^125-1) - = 1 + 4 (2^125+1)(1+2(2^124-1)) - = 1 + 4 (2^125+1)(1+2(2^62+1)(2^62-1)) - = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(2^31-1)) - = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^28-1))) - = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^14-1))) - = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^7+1)(2^7-1))) - = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^7+1)(1+2(2^6-1)))) - = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^7+1)(1+2(2^3+1)*7))) - */ - - ecc_mod_pow_2kp1 (m, t0, ap, 1, t1); /* a^3 */ - ecc_mod_sqr (m, rp, t0); /* a^6 */ - ecc_mod_mul (m, a7, rp, ap); /* a^7 */ - ecc_mod_pow_2kp1 (m, rp, a7, 3, t0); /* a^63 = a^{2^6-1} */ - ecc_mod_sqr (m, t0, rp); /* a^{2^7-2} */ - ecc_mod_mul (m, rp, t0, ap); /* a^{2^7-1} */ - ecc_mod_pow_2kp1 (m, t0, rp, 7, t1); /* a^{2^14-1}*/ - ecc_mod_pow_2kp1 (m, rp, t0, 14, t1); /* a^{2^28-1} */ - ecc_mod_sqr (m, t0, rp); /* a^{2^29-2} */ - ecc_mod_sqr (m, t1, t0); /* a^{2^30-4} */ - ecc_mod_sqr (m, t0, t1); /* a^{2^31-8} */ - ecc_mod_mul (m, rp, t0, a7); /* a^{2^31-1} */ - ecc_mod_pow_2kp1 (m, t0, rp, 31, t1); /* a^{2^62-1} */ - ecc_mod_pow_2kp1 (m, rp, t0, 62, t1); /* a^{2^124-1}*/ - ecc_mod_sqr (m, t0, rp); /* a^{2^125-2} */ - ecc_mod_mul (m, rp, t0, ap); /* a^{2^125-1} */ - ecc_mod_pow_2kp1 (m, t0, rp, 125, t1);/* a^{2^250-1} */ - ecc_mod_sqr (m, rp, t0); /* a^{2^251-2} */ - ecc_mod_sqr (m, t0, rp); /* a^{2^252-4} */ - ecc_mod_mul (m, rp, t0, ap); /* a^{2^252-3} */ -#undef t0 -#undef t1 -#undef a7 -} - -/* Needs 5*ECC_LIMB_SIZE scratch space. */ -#define ECC_25519_INV_ITCH (5*ECC_LIMB_SIZE) - -static void ecc_25519_inv (const struct ecc_modulo *p, - mp_limb_t *rp, const mp_limb_t *ap, - mp_limb_t *scratch) -{ -#define t0 scratch - - /* Addition chain - - p - 2 = 2^{255} - 21 - = 1 + 2 (1 + 4 (2^{252}-3)) - */ - ecc_mod_pow_252m3 (p, rp, ap, t0); - ecc_mod_sqr (p, t0, rp); - ecc_mod_sqr (p, rp, t0); - ecc_mod_mul (p, t0, ap, rp); - ecc_mod_sqr (p, rp, t0); - ecc_mod_mul (p, t0, ap, rp); - mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */ -#undef t0 -} - -/* First, do a canonical reduction, then check if zero */ -static int -ecc_25519_zero_p (const struct ecc_modulo *p, mp_limb_t *xp) -{ - mp_limb_t cy; - mp_limb_t w; - mp_size_t i; -#if PHIGH_BITS > 0 - mp_limb_t hi = xp[ECC_LIMB_SIZE-1]; - xp[ECC_LIMB_SIZE-1] = (hi & (GMP_NUMB_MASK >> PHIGH_BITS)) - + sec_add_1 (xp, xp, ECC_LIMB_SIZE - 1, 19 * (hi >> (GMP_NUMB_BITS - PHIGH_BITS))); -#endif - cy = mpn_sub_n (xp, xp, p->m, ECC_LIMB_SIZE); - cnd_add_n (cy, xp, p->m, ECC_LIMB_SIZE); - - for (i = 0, w = 0; i < ECC_LIMB_SIZE; i++) - w |= xp[i]; - return w == 0; -} - -/* Compute x such that x^2 = u/v (mod p). Returns one on success, zero - on failure. We use the e = 2 special case of the Shanks-Tonelli - algorithm (see http://www.math.vt.edu/people/brown/doc/sqrts.pdf, - or Henri Cohen, Computational Algebraic Number Theory, 1.5.1). - - To avoid a separate inversion, we also use a trick of djb's, to - compute the candidate root as - - x = (u/v)^{(p+3)/8} = u v^3 (u v^7)^{(p-5)/8}. -*/ -#if ECC_SQRT_E != 2 -#error Broken curve25519 parameters -#endif - -/* Needs 4*n space + scratch for ecc_mod_pow_252m3. */ -#define ECC_25519_SQRT_ITCH (9*ECC_LIMB_SIZE) - -static int -ecc_25519_sqrt(const struct ecc_modulo *p, mp_limb_t *rp, - const mp_limb_t *up, const mp_limb_t *vp, - mp_limb_t *scratch) -{ - int pos, neg; - -#define uv3 scratch -#define uv7 (scratch + ECC_LIMB_SIZE) -#define uv7p (scratch + 2*ECC_LIMB_SIZE) -#define v2 (scratch + 2*ECC_LIMB_SIZE) -#define uv (scratch + 3*ECC_LIMB_SIZE) -#define v4 (scratch + 3*ECC_LIMB_SIZE) - -#define scratch_out (scratch + 4 * ECC_LIMB_SIZE) - -#define x2 scratch -#define vx2 (scratch + ECC_LIMB_SIZE) -#define t0 (scratch + 2*ECC_LIMB_SIZE) - - /* Live values */ - ecc_mod_sqr (p, v2, vp); /* v2 */ - ecc_mod_mul (p, uv, up, vp); /* uv, v2 */ - ecc_mod_mul (p, uv3, uv, v2); /* uv3, v2 */ - ecc_mod_sqr (p, v4, v2); /* uv3, v4 */ - ecc_mod_mul (p, uv7, uv3, v4); /* uv3, uv7 */ - ecc_mod_pow_252m3 (p, uv7p, uv7, scratch_out); /* uv3, uv7p */ - ecc_mod_mul (p, rp, uv7p, uv3); /* none */ - - /* Check sign. If square root exists, have v x^2 = ±u */ - ecc_mod_sqr (p, x2, rp); - ecc_mod_mul (p, vx2, x2, vp); - ecc_mod_add (p, t0, vx2, up); - neg = ecc_25519_zero_p (p, t0); - ecc_mod_sub (p, t0, up, vx2); - pos = ecc_25519_zero_p (p, t0); - - ecc_mod_mul (p, t0, rp, ecc_sqrt_z); - cnd_copy (neg, rp, t0, ECC_LIMB_SIZE); - return pos | neg; - -#undef uv3 -#undef uv7 -#undef uv7p -#undef v2 -#undef v4 -#undef scratch_out -#undef x2 -#undef vx2 -#undef t0 -} - -const struct ecc_curve _nettle_curve25519 = -{ - { - 255, - ECC_LIMB_SIZE, - ECC_BMODP_SIZE, - 0, - ECC_25519_INV_ITCH, - ECC_25519_SQRT_ITCH, - - ecc_p, - ecc_Bmodp, - ecc_Bmodp_shifted, - NULL, - ecc_pp1h, - - ecc_25519_modp, - ecc_25519_modp, - ecc_25519_inv, - ecc_25519_sqrt, - }, - { - 253, - ECC_LIMB_SIZE, - ECC_BMODQ_SIZE, - 0, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_q, - ecc_Bmodq, - ecc_mBmodq_shifted, /* Use q - 2^{252} instead. */ - NULL, - ecc_qp1h, - - ecc_25519_modq, - ecc_25519_modq, - ecc_mod_inv, - NULL, - }, - - 0, /* No redc */ - ECC_PIPPENGER_K, - ECC_PIPPENGER_C, - - ECC_ADD_TH_ITCH (ECC_LIMB_SIZE), - ECC_ADD_THH_ITCH (ECC_LIMB_SIZE), - ECC_DUP_TH_ITCH (ECC_LIMB_SIZE), - ECC_MUL_A_EH_ITCH (ECC_LIMB_SIZE), - ECC_MUL_G_EH_ITCH (ECC_LIMB_SIZE), - ECC_EH_TO_A_ITCH (ECC_LIMB_SIZE, ECC_25519_INV_ITCH), - - ecc_add_th, - ecc_add_thh, - ecc_dup_th, - ecc_mul_a_eh, - ecc_mul_g_eh, - ecc_eh_to_a, - - ecc_b, /* Edwards curve constant. */ - ecc_g, - ecc_unit, - ecc_table -}; diff --git a/ecc-256.c b/ecc-256.c deleted file mode 100644 index 0990cb3b..00000000 --- a/ecc-256.c +++ /dev/null @@ -1,310 +0,0 @@ -/* ecc-256.c - - Compile time constant (but machine dependent) tables. - - Copyright (C) 2013, 2014 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include - -#include "ecc.h" -#include "ecc-internal.h" - -#if HAVE_NATIVE_ecc_256_redc -# define USE_REDC 1 -#else -# define USE_REDC (ECC_REDC_SIZE != 0) -#endif - -#include "ecc-256.h" - -#if HAVE_NATIVE_ecc_256_redc -# define ecc_256_redc nettle_ecc_256_redc -void -ecc_256_redc (const struct ecc_modulo *p, mp_limb_t *rp); -#else /* !HAVE_NATIVE_ecc_256_redc */ -# if ECC_REDC_SIZE > 0 -# define ecc_256_redc ecc_pp1_redc -# elif ECC_REDC_SIZE == 0 -# define ecc_256_redc NULL -# else -# error Configuration error -# endif -#endif /* !HAVE_NATIVE_ecc_256_redc */ - -#if ECC_BMODP_SIZE < ECC_LIMB_SIZE -#define ecc_256_modp ecc_mod -#define ecc_256_modq ecc_mod -#elif GMP_NUMB_BITS == 64 - -static void -ecc_256_modp (const struct ecc_modulo *p, mp_limb_t *rp) -{ - mp_limb_t u1, u0; - mp_size_t n; - - n = 2*p->size; - u1 = rp[--n]; - u0 = rp[n-1]; - - /* This is not particularly fast, but should work well with assembly implementation. */ - for (; n >= p->size; n--) - { - mp_limb_t q2, q1, q0, t, cy; - - /* = v * u1 + , with v = 2^32 - 1: - - +---+---+ - | u1| u0| - +---+---+ - |-u1| - +-+-+-+ - | u1| - +---+-+-+-+-+ - | q2| q1| q0| - +---+---+---+ - */ - q1 = u1 - (u1 > u0); - q0 = u0 - u1; - t = u1 << 32; - q0 += t; - t = (u1 >> 32) + (q0 < t) + 1; - q1 += t; - q2 = q1 < t; - - /* Compute candidate remainder */ - u1 = u0 + (q1 << 32) - q1; - t = -(mp_limb_t) (u1 > q0); - u1 -= t & 0xffffffff; - q1 += t; - q2 += t + (q1 < t); - - assert (q2 < 2); - - /* - n-1 n-2 n-3 n-4 - +---+---+---+---+ - | u1| u0| u low | - +---+---+---+---+ - - | q1(2^96-1)| - +-------+---+ - |q2(2^.)| - +-------+ - - We multiply by two low limbs of p, 2^96 - 1, so we could use - shifts rather than mul. - */ - t = mpn_submul_1 (rp + n - 4, p->m, 2, q1); - t += cnd_sub_n (q2, rp + n - 3, p->m, 1); - t += (-q2) & 0xffffffff; - - u0 = rp[n-2]; - cy = (u0 < t); - u0 -= t; - t = (u1 < cy); - u1 -= cy; - - cy = cnd_add_n (t, rp + n - 4, p->m, 2); - u0 += cy; - u1 += (u0 < cy); - u1 -= (-t) & 0xffffffff; - } - rp[2] = u0; - rp[3] = u1; -} - -static void -ecc_256_modq (const struct ecc_modulo *q, mp_limb_t *rp) -{ - mp_limb_t u2, u1, u0; - mp_size_t n; - - n = 2*q->size; - u2 = rp[--n]; - u1 = rp[n-1]; - - /* This is not particularly fast, but should work well with assembly implementation. */ - for (; n >= q->size; n--) - { - mp_limb_t q2, q1, q0, t, c1, c0; - - u0 = rp[n-2]; - - /* = v * u2 + , same method as above. - - +---+---+ - | u2| u1| - +---+---+ - |-u2| - +-+-+-+ - | u2| - +---+-+-+-+-+ - | q2| q1| q0| - +---+---+---+ - */ - q1 = u2 - (u2 > u1); - q0 = u1 - u2; - t = u2 << 32; - q0 += t; - t = (u2 >> 32) + (q0 < t) + 1; - q1 += t; - q2 = q1 < t; - - /* Compute candidate remainder, - * (2^128 - 2^96 + 2^64 - 1) - + 2^64 q2 + (2^96 - 2^64 + 1) q1 (mod 2^128) - - +---+---+ - | u1| u0| - +---+---+ - | q2| q1| - +---+---+ - |-q1| - +-+-+-+ - | q1| - --+-+-+-+---+ - | u2| u1| - +---+---+ - */ - u2 = u1 + q2 - q1; - u1 = u0 + q1; - u2 += (u1 < q1); - u2 += (q1 << 32); - - t = -(mp_limb_t) (u2 >= q0); - q1 += t; - q2 += t + (q1 < t); - u1 += t; - u2 += (t << 32) + (u1 < t); - - assert (q2 < 2); - - c0 = cnd_sub_n (q2, rp + n - 3, q->m, 1); - c0 += (-q2) & q->m[1]; - t = mpn_submul_1 (rp + n - 4, q->m, 2, q1); - c0 += t; - c1 = c0 < t; - - /* Construct underflow condition. */ - c1 += (u1 < c0); - t = - (mp_limb_t) (u2 < c1); - - u1 -= c0; - u2 -= c1; - - /* Conditional add of p */ - u1 += t; - u2 += (t<<32) + (u1 < t); - - t = cnd_add_n (t, rp + n - 4, q->m, 2); - u1 += t; - u2 += (u1 < t); - } - rp[2] = u1; - rp[3] = u2; -} - -#else -#error Unsupported parameters -#endif - -const struct ecc_curve _nettle_secp_256r1 = -{ - { - 256, - ECC_LIMB_SIZE, - ECC_BMODP_SIZE, - ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_p, - ecc_Bmodp, - ecc_Bmodp_shifted, - ecc_redc_ppm1, - ecc_pp1h, - - ecc_256_modp, - USE_REDC ? ecc_256_redc : ecc_256_modp, - ecc_mod_inv, - NULL, - }, - { - 256, - ECC_LIMB_SIZE, - ECC_BMODQ_SIZE, - 0, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_q, - ecc_Bmodq, - ecc_Bmodq_shifted, - NULL, - ecc_qp1h, - - ecc_256_modq, - ecc_256_modq, - ecc_mod_inv, - NULL, - }, - - USE_REDC, - ECC_PIPPENGER_K, - ECC_PIPPENGER_C, - - ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), - ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), - ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), - ECC_MUL_A_ITCH (ECC_LIMB_SIZE), - ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), - - ecc_add_jja, - ecc_add_jjj, - ecc_dup_jj, - ecc_mul_a, - ecc_mul_g, - ecc_j_to_a, - - ecc_b, - ecc_g, - ecc_unit, - ecc_table -}; - -const struct ecc_curve *nettle_get_secp_256r1(void) -{ - return &_nettle_secp_256r1; -} diff --git a/ecc-384.c b/ecc-384.c deleted file mode 100644 index 5bb2a247..00000000 --- a/ecc-384.c +++ /dev/null @@ -1,218 +0,0 @@ -/* ecc-384.c - - Compile time constant (but machine dependent) tables. - - Copyright (C) 2013, 2014 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include - -#include "ecc.h" -#include "ecc-internal.h" - -#define USE_REDC 0 - -#include "ecc-384.h" - -#if HAVE_NATIVE_ecc_384_modp -#define ecc_384_modp nettle_ecc_384_modp -void -ecc_384_modp (const struct ecc_modulo *m, mp_limb_t *rp); -#elif GMP_NUMB_BITS == 32 - -/* Use that 2^{384} = 2^{128} + 2^{96} - 2^{32} + 1, and eliminate 256 - bits at a time. - - We can get carry == 2 in the first iteration, and I think *only* in - the first iteration. */ - -/* p is 12 limbs, and B^12 - p = B^4 + B^3 - B + 1. We can eliminate - almost 8 at a time. Do only 7, to avoid additional carry - propagation, followed by 5. */ -static void -ecc_384_modp (const struct ecc_modulo *p, mp_limb_t *rp) -{ - mp_limb_t cy, bw; - - /* Reduce from 24 to 17 limbs. */ - cy = mpn_add_n (rp + 4, rp + 4, rp + 16, 8); - cy = sec_add_1 (rp + 12, rp + 12, 3, cy); - - bw = mpn_sub_n (rp + 5, rp + 5, rp + 16, 8); - bw = sec_sub_1 (rp + 13, rp + 13, 3, bw); - - cy += mpn_add_n (rp + 7, rp + 7, rp + 16, 8); - cy = sec_add_1 (rp + 15, rp + 15, 1, cy); - - cy += mpn_add_n (rp + 8, rp + 8, rp + 16, 8); - assert (bw <= cy); - cy -= bw; - - assert (cy <= 2); - rp[16] = cy; - - /* Reduce from 17 to 12 limbs */ - cy = mpn_add_n (rp, rp, rp + 12, 5); - cy = sec_add_1 (rp + 5, rp + 5, 3, cy); - - bw = mpn_sub_n (rp + 1, rp + 1, rp + 12, 5); - bw = sec_sub_1 (rp + 6, rp + 6, 6, bw); - - cy += mpn_add_n (rp + 3, rp + 3, rp + 12, 5); - cy = sec_add_1 (rp + 8, rp + 8, 1, cy); - - cy += mpn_add_n (rp + 4, rp + 4, rp + 12, 5); - cy = sec_add_1 (rp + 9, rp + 9, 3, cy); - - assert (cy >= bw); - cy -= bw; - assert (cy <= 1); - cy = cnd_add_n (cy, rp, p->B, ECC_LIMB_SIZE); - assert (cy == 0); -} -#elif GMP_NUMB_BITS == 64 -/* p is 6 limbs, and B^6 - p = B^2 + 2^32 (B - 1) + 1. Eliminate 3 - (almost 4) limbs at a time. */ -static void -ecc_384_modp (const struct ecc_modulo *p, mp_limb_t *rp) -{ - mp_limb_t tp[6]; - mp_limb_t cy; - - /* Reduce from 12 to 9 limbs */ - tp[0] = 0; /* FIXME: Could use mpn_sub_nc */ - mpn_copyi (tp + 1, rp + 8, 3); - tp[4] = rp[11] - mpn_sub_n (tp, tp, rp + 8, 4); - tp[5] = mpn_lshift (tp, tp, 5, 32); - - cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4); - cy = sec_add_1 (rp + 6, rp + 6, 2, cy); - - cy += mpn_add_n (rp + 2, rp + 2, tp, 6); - cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4); - - assert (cy <= 2); - rp[8] = cy; - - /* Reduce from 9 to 6 limbs */ - tp[0] = 0; - mpn_copyi (tp + 1, rp + 6, 2); - tp[3] = rp[8] - mpn_sub_n (tp, tp, rp + 6, 3); - tp[4] = mpn_lshift (tp, tp, 4, 32); - - cy = mpn_add_n (rp, rp, rp + 6, 3); - cy = sec_add_1 (rp + 3, rp + 3, 2, cy); - cy += mpn_add_n (rp, rp, tp, 5); - cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3); - - cy = sec_add_1 (rp + 5, rp + 5, 1, cy); - assert (cy <= 1); - - cy = cnd_add_n (cy, rp, p->B, ECC_LIMB_SIZE); - assert (cy == 0); -} -#else -#define ecc_384_modp ecc_mod -#endif - -const struct ecc_curve _nettle_secp_384r1 = -{ - { - 384, - ECC_LIMB_SIZE, - ECC_BMODP_SIZE, - ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_p, - ecc_Bmodp, - ecc_Bmodp_shifted, - ecc_redc_ppm1, - ecc_pp1h, - - ecc_384_modp, - ecc_384_modp, - ecc_mod_inv, - NULL, - }, - { - 384, - ECC_LIMB_SIZE, - ECC_BMODQ_SIZE, - 0, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_q, - ecc_Bmodq, - ecc_Bmodq_shifted, - NULL, - ecc_qp1h, - - ecc_mod, - ecc_mod, - ecc_mod_inv, - NULL, - }, - - USE_REDC, - ECC_PIPPENGER_K, - ECC_PIPPENGER_C, - - ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), - ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), - ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), - ECC_MUL_A_ITCH (ECC_LIMB_SIZE), - ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), - - ecc_add_jja, - ecc_add_jjj, - ecc_dup_jj, - ecc_mul_a, - ecc_mul_g, - ecc_j_to_a, - - ecc_b, - ecc_g, - ecc_unit, - ecc_table -}; - -const struct ecc_curve *nettle_get_secp_384r1(void) -{ - return &_nettle_secp_384r1; -} diff --git a/ecc-448.c b/ecc-448.c deleted file mode 100644 index b32ad463..00000000 --- a/ecc-448.c +++ /dev/null @@ -1,334 +0,0 @@ -/* ecc-448.c - - Arithmetic and tables for curve448, - - Copyright (C) 2017 Daiki Ueno - Copyright (C) 2017 Red Hat, Inc. - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include - -#include "ecc.h" -#include "ecc-internal.h" - -#define USE_REDC 0 - -#include "ecc-448.h" - -#if HAVE_NATIVE_ecc_curve448_modp -#define ecc_448_modp nettle_ecc_curve448_modp -void -ecc_448_modp (const struct ecc_modulo *m, mp_limb_t *rp); -#elif GMP_NUMB_BITS == 64 -static void -ecc_448_modp(const struct ecc_modulo *m, mp_limb_t *rp) -{ - /* Let B = 2^64, b = 2^32 = sqrt(B). - p = B^7 - b B^3 - 1 ==> B^7 = b B^3 + 1 - - We use this to reduce - - {r_{13}, ..., r_0} = - {r_6,...,r_0} - + {r_{10},...,r_7} - + 2 {r_{13},r_{12}, r_{11}} B^4 - + b {r_{10},...,r_7,r_{13},r_{12},r_{11} (mod p) - - or - - +----+----+----+----+----+----+----+ - |r_6 |r_5 |r_4 |r_3 |r_2 |r_1 |r_0 | - +----+----+----+----+----+----+----+ - |r_10|r_9 |r_8 |r_7 | - +----+----+----+----+----+----+----+ - 2 * |r_13|r_12|r_11| - +----+----+----+----+----+----+----+ - + b * |r_10|r_9 |r_8 |r_7 |r_13|r_12|r_11| - -------+----+----+----+----+----+----+----+ - c_7 |r_6 |r_5 |r_4 |r_3 |r_2 |r_1 |r_0 | - +----+----+----+----+----+----+----+ - */ - mp_limb_t c3, c4, c7; - mp_limb_t *tp = rp + 7; - - c4 = mpn_add_n (rp, rp, rp + 7, 4); - c7 = mpn_addmul_1 (rp + 4, rp + 11, 3, 2); - c3 = mpn_addmul_1 (rp, rp + 11, 3, (mp_limb_t) 1 << 32); - c7 += mpn_addmul_1 (rp + 3, rp + 7, 4, (mp_limb_t) 1 << 32); - tp[0] = c7; - tp[1] = tp[2] = 0; - tp[3] = c3 + (c7 << 32); - tp[4] = c4 + (c7 >> 32) + (tp[3] < c3); - tp[5] = tp[6] = 0; - c7 = mpn_add_n (rp, rp, tp, 7); - c7 = cnd_add_n (c7, rp, m->B, 7); - assert (c7 == 0); -} -#else -#define ecc_448_modp ecc_mod -#endif - -/* Needs 2*ecc->size limbs at rp, and 2*ecc->size additional limbs of - scratch space. No overlap allowed. */ -static void -ecc_mod_pow_2k (const struct ecc_modulo *m, - mp_limb_t *rp, const mp_limb_t *xp, - unsigned k, mp_limb_t *tp) -{ - if (k & 1) - { - ecc_mod_sqr (m, rp, xp); - k--; - } - else - { - ecc_mod_sqr (m, tp, xp); - ecc_mod_sqr (m, rp, tp); - k -= 2; - } - while (k > 0) - { - ecc_mod_sqr (m, tp, rp); - ecc_mod_sqr (m, rp, tp); - k -= 2; - } -} - -static void -ecc_mod_pow_2kp1 (const struct ecc_modulo *m, - mp_limb_t *rp, const mp_limb_t *xp, - unsigned k, mp_limb_t *tp) -{ - ecc_mod_pow_2k (m, tp, xp, k, rp); - ecc_mod_mul (m, rp, tp, xp); -} - -/* Computes a^{(p-3)/4} = a^{2^446-2^222-1} mod m. Needs 5 * n scratch - space. */ -static void -ecc_mod_pow_446m224m1 (const struct ecc_modulo *p, - mp_limb_t *rp, const mp_limb_t *ap, - mp_limb_t *scratch) -{ -/* Note overlap: operations writing to t0 clobber t1. */ -#define t0 scratch -#define t1 (scratch + 1*ECC_LIMB_SIZE) -#define t2 (scratch + 3*ECC_LIMB_SIZE) - - ecc_mod_sqr (p, rp, ap); /* a^2 */ - ecc_mod_mul (p, t0, ap, rp); /* a^3 */ - ecc_mod_sqr (p, rp, t0); /* a^6 */ - ecc_mod_mul (p, t0, ap, rp); /* a^{2^3-1} */ - - ecc_mod_pow_2kp1 (p, t1, t0, 3, rp); /* a^{2^6-1} */ - ecc_mod_pow_2k (p, rp, t1, 3, t2); /* a^{2^9-2^3} */ - ecc_mod_mul (p, t2, t0, rp); /* a^{2^9-1} */ - ecc_mod_pow_2kp1 (p, t0, t2, 9, rp); /* a^{2^18-1} */ - - ecc_mod_sqr (p, t1, t0); /* a^{2^19-2} */ - ecc_mod_mul (p, rp, ap, t1); /* a^{2^19-1} */ - ecc_mod_pow_2k (p, t1, rp, 18, t2); /* a^{2^37-2^18} */ - ecc_mod_mul (p, rp, t0, t1); /* a^{2^37-1} */ - mpn_copyi (t0, rp, p->size); - - ecc_mod_pow_2kp1 (p, rp, t0, 37, t2); /* a^{2^74-1} */ - ecc_mod_pow_2k (p, t1, rp, 37, t2); /* a^{2^111-2^37} */ - ecc_mod_mul (p, rp, t0, t1); /* a^{2^111-1} */ - ecc_mod_pow_2kp1 (p, t0, rp, 111, t2);/* a^{2^222-1} */ - - ecc_mod_sqr (p, t1, t0); /* a^{2^223-2} */ - ecc_mod_mul (p, rp, ap, t1); /* a^{2^223-1} */ - ecc_mod_pow_2k (p, t1, rp, 223, t2); /* a^{2^446-2^223} */ - ecc_mod_mul (p, rp, t0, t1); /* a^{2^446-2^222-1} */ -#undef t0 -#undef t1 -#undef t2 -} - -#define ECC_448_INV_ITCH (5*ECC_LIMB_SIZE) - -static void ecc_448_inv (const struct ecc_modulo *p, - mp_limb_t *rp, const mp_limb_t *ap, - mp_limb_t *scratch) -{ -#define t0 scratch - - ecc_mod_pow_446m224m1 (p, rp, ap, scratch); /* a^{2^446-2^222-1} */ - ecc_mod_sqr (p, t0, rp); /* a^{2^447-2^223-2} */ - ecc_mod_sqr (p, rp, t0); /* a^{2^448-2^224-4} */ - ecc_mod_mul (p, t0, ap, rp); /* a^{2^448-2^224-3} */ - - mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */ -#undef t0 -} - -/* First, do a canonical reduction, then check if zero */ -static int -ecc_448_zero_p (const struct ecc_modulo *p, mp_limb_t *xp) -{ - mp_limb_t cy; - mp_limb_t w; - mp_size_t i; - cy = mpn_sub_n (xp, xp, p->m, ECC_LIMB_SIZE); - cnd_add_n (cy, xp, p->m, ECC_LIMB_SIZE); - - for (i = 0, w = 0; i < ECC_LIMB_SIZE; i++) - w |= xp[i]; - return w == 0; -} - -/* Compute x such that x^2 = u/v (mod p). Returns one on success, zero - on failure. - - To avoid a separate inversion, we use a trick of djb's, to - compute the candidate root as - - x = (u/v)^{(p+1)/4} = u^3 v (u^5 v^3)^{(p-3)/4}. -*/ - -/* Needs 4*n space + scratch for ecc_mod_pow_446m224m1. */ -#define ECC_448_SQRT_ITCH (9*ECC_LIMB_SIZE) - -static int -ecc_448_sqrt(const struct ecc_modulo *p, mp_limb_t *rp, - const mp_limb_t *up, const mp_limb_t *vp, - mp_limb_t *scratch) -{ -#define u3v scratch -#define u5v3 (scratch + ECC_LIMB_SIZE) -#define u5v3p (scratch + 2*ECC_LIMB_SIZE) -#define u2 (scratch + 2*ECC_LIMB_SIZE) -#define u3 (scratch + 3*ECC_LIMB_SIZE) -#define uv (scratch + 2*ECC_LIMB_SIZE) -#define u2v2 (scratch + 3*ECC_LIMB_SIZE) - -#define scratch_out (scratch + 4 * ECC_LIMB_SIZE) - -#define x2 scratch -#define vx2 (scratch + ECC_LIMB_SIZE) -#define t0 (scratch + 2*ECC_LIMB_SIZE) - - /* Live values */ - ecc_mod_sqr (p, u2, up); /* u2 */ - ecc_mod_mul (p, u3, u2, up); /* u3 */ - ecc_mod_mul (p, u3v, u3, vp); /* u3v */ - ecc_mod_mul (p, uv, up, vp); /* u3v, uv */ - ecc_mod_sqr (p, u2v2, uv); /* u3v, u2v2 */ - ecc_mod_mul (p, u5v3, u3v, u2v2); /* u3v, u5v3 */ - ecc_mod_pow_446m224m1 (p, u5v3p, u5v3, scratch_out); /* u3v, u5v3p */ - ecc_mod_mul (p, rp, u5v3p, u3v); /* none */ - - /* If square root exists, have v x^2 = u */ - ecc_mod_sqr (p, x2, rp); - ecc_mod_mul (p, vx2, x2, vp); - ecc_mod_sub (p, t0, vx2, up); - - return ecc_448_zero_p (p, t0); - -#undef u3v -#undef u5v3 -#undef u5v3p -#undef u2 -#undef u3 -#undef uv -#undef u2v2 -#undef scratch_out -#undef x2 -#undef vx2 -#undef t0 -} - -const struct ecc_curve _nettle_curve448 = -{ - { - 448, - ECC_LIMB_SIZE, - ECC_BMODP_SIZE, - 0, - ECC_448_INV_ITCH, - ECC_448_SQRT_ITCH, - - ecc_p, - ecc_Bmodp, - ecc_Bmodp_shifted, - NULL, - ecc_pp1h, - - ecc_448_modp, - ecc_448_modp, - ecc_448_inv, - ecc_448_sqrt, - }, - { - 446, - ECC_LIMB_SIZE, - ECC_BMODQ_SIZE, - 0, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_q, - ecc_Bmodq, - ecc_Bmodq_shifted, - NULL, - ecc_qp1h, - - ecc_mod, /* FIXME: Implement optimized mod function */ - ecc_mod, /* FIXME: Implement optimized reduce function */ - ecc_mod_inv, - NULL, - }, - - 0, /* No redc */ - ECC_PIPPENGER_K, - ECC_PIPPENGER_C, - - ECC_ADD_EH_ITCH (ECC_LIMB_SIZE), - ECC_ADD_EHH_ITCH (ECC_LIMB_SIZE), - ECC_DUP_EH_ITCH (ECC_LIMB_SIZE), - ECC_MUL_A_EH_ITCH (ECC_LIMB_SIZE), - ECC_MUL_G_EH_ITCH (ECC_LIMB_SIZE), - ECC_EH_TO_A_ITCH (ECC_LIMB_SIZE, ECC_448_INV_ITCH), - - ecc_add_eh, - ecc_add_ehh, - ecc_dup_eh, - ecc_mul_a_eh, - ecc_mul_g_eh, - ecc_eh_to_a, - - ecc_b, - ecc_g, - ecc_unit, - ecc_table -}; diff --git a/ecc-521.c b/ecc-521.c deleted file mode 100644 index 8ca0e6d2..00000000 --- a/ecc-521.c +++ /dev/null @@ -1,146 +0,0 @@ -/* ecc-521.c - - Compile time constant (but machine dependent) tables. - - Copyright (C) 2013, 2014 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include "ecc.h" -#include "ecc-internal.h" - -#define USE_REDC 0 - -#include "ecc-521.h" - -#if HAVE_NATIVE_ecc_521_modp -#define ecc_521_modp nettle_ecc_521_modp -void -ecc_521_modp (const struct ecc_modulo *m, mp_limb_t *rp); - -#else - -#define B_SHIFT (521 % GMP_NUMB_BITS) -#define BMODP_SHIFT (GMP_NUMB_BITS - B_SHIFT) -#define BMODP ((mp_limb_t) 1 << BMODP_SHIFT) - -/* Result may be *slightly* larger than 2^521 */ -static void -ecc_521_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) -{ - /* FIXME: Should use mpn_addlsh_n_ip1 */ - mp_limb_t hi; - /* Reduce from 2*ECC_LIMB_SIZE to ECC_LIMB_SIZE + 1 */ - rp[ECC_LIMB_SIZE] - = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, BMODP); - hi = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, 1, BMODP); - hi = sec_add_1 (rp + 1, rp + 1, ECC_LIMB_SIZE - 1, hi); - - /* Combine hi with top bits, and add in. */ - hi = (hi << BMODP_SHIFT) | (rp[ECC_LIMB_SIZE-1] >> B_SHIFT); - rp[ECC_LIMB_SIZE-1] = (rp[ECC_LIMB_SIZE-1] - & (((mp_limb_t) 1 << B_SHIFT)-1)) - + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, hi); -} -#endif - -const struct ecc_curve _nettle_secp_521r1 = -{ - { - 521, - ECC_LIMB_SIZE, - ECC_BMODP_SIZE, - ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_p, - ecc_Bmodp, - ecc_Bmodp_shifted, - ecc_redc_ppm1, - ecc_pp1h, - - ecc_521_modp, - ecc_521_modp, - ecc_mod_inv, - NULL, - }, - { - 521, - ECC_LIMB_SIZE, - ECC_BMODQ_SIZE, - 0, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), - 0, - - ecc_q, - ecc_Bmodq, - ecc_Bmodq_shifted, - NULL, - ecc_qp1h, - - ecc_mod, - ecc_mod, - ecc_mod_inv, - NULL, - }, - - USE_REDC, - ECC_PIPPENGER_K, - ECC_PIPPENGER_C, - - ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), - ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), - ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), - ECC_MUL_A_ITCH (ECC_LIMB_SIZE), - ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), - - ecc_add_jja, - ecc_add_jjj, - ecc_dup_jj, - ecc_mul_a, - ecc_mul_g, - ecc_j_to_a, - - ecc_b, - ecc_g, - ecc_unit, - ecc_table -}; - -const struct ecc_curve *nettle_get_secp_521r1(void) -{ - return &_nettle_secp_521r1; -} diff --git a/ecc-curve25519.c b/ecc-curve25519.c new file mode 100644 index 00000000..73d72765 --- /dev/null +++ b/ecc-curve25519.c @@ -0,0 +1,356 @@ +/* ecc-curve25519.c + + Arithmetic and tables for curve25519, + + Copyright (C) 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "ecc.h" +#include "ecc-internal.h" + +#define USE_REDC 0 + +#include "ecc-curve25519.h" + +#define PHIGH_BITS (GMP_NUMB_BITS * ECC_LIMB_SIZE - 255) + +#if HAVE_NATIVE_ecc_25519_modp + +#define ecc_25519_modp nettle_ecc_25519_modp +void +ecc_25519_modp (const struct ecc_modulo *m, mp_limb_t *rp); +#else + +#if PHIGH_BITS == 0 +#error Unsupported limb size */ +#endif + +static void +ecc_25519_modp(const struct ecc_modulo *m UNUSED, mp_limb_t *rp) +{ + mp_limb_t hi, cy; + + cy = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, + (mp_limb_t) 19 << PHIGH_BITS); + hi = rp[ECC_LIMB_SIZE-1]; + cy = (cy << PHIGH_BITS) + (hi >> (GMP_NUMB_BITS - PHIGH_BITS)); + rp[ECC_LIMB_SIZE-1] = (hi & (GMP_NUMB_MASK >> PHIGH_BITS)) + + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, 19 * cy); +} +#endif /* HAVE_NATIVE_ecc_25519_modp */ + +#define QHIGH_BITS (GMP_NUMB_BITS * ECC_LIMB_SIZE - 252) + +#if QHIGH_BITS == 0 +#error Unsupported limb size */ +#endif + +static void +ecc_25519_modq (const struct ecc_modulo *q, mp_limb_t *rp) +{ + mp_size_t n; + mp_limb_t cy; + + /* n is the offset where we add in the next term */ + for (n = ECC_LIMB_SIZE; n-- > 0;) + { + cy = mpn_submul_1 (rp + n, + q->B_shifted, ECC_LIMB_SIZE, + rp[n + ECC_LIMB_SIZE]); + /* Top limb of mBmodq_shifted is zero, so we get cy == 0 or 1 */ + assert (cy < 2); + cnd_add_n (cy, rp+n, q->m, ECC_LIMB_SIZE); + } + + cy = mpn_submul_1 (rp, q->m, ECC_LIMB_SIZE, + rp[ECC_LIMB_SIZE-1] >> (GMP_NUMB_BITS - QHIGH_BITS)); + assert (cy < 2); + cnd_add_n (cy, rp, q->m, ECC_LIMB_SIZE); +} + +/* Needs 2*ecc->size limbs at rp, and 2*ecc->size additional limbs of + scratch space. No overlap allowed. */ +static void +ecc_mod_pow_2kp1 (const struct ecc_modulo *m, + mp_limb_t *rp, const mp_limb_t *xp, + unsigned k, mp_limb_t *tp) +{ + if (k & 1) + { + ecc_mod_sqr (m, tp, xp); + k--; + } + else + { + ecc_mod_sqr (m, rp, xp); + ecc_mod_sqr (m, tp, rp); + k -= 2; + } + while (k > 0) + { + ecc_mod_sqr (m, rp, tp); + ecc_mod_sqr (m, tp, rp); + k -= 2; + } + ecc_mod_mul (m, rp, tp, xp); +} + +/* Computes a^{(p-5)/8} = a^{2^{252}-3} mod m. Needs 5 * n scratch + space. */ +static void +ecc_mod_pow_252m3 (const struct ecc_modulo *m, + mp_limb_t *rp, const mp_limb_t *ap, mp_limb_t *scratch) +{ +#define a7 scratch +#define t0 (scratch + ECC_LIMB_SIZE) +#define t1 (scratch + 3*ECC_LIMB_SIZE) + + /* a^{2^252 - 3} = a^{(p-5)/8}, using the addition chain + 2^252 - 3 + = 1 + (2^252-4) + = 1 + 4 (2^250-1) + = 1 + 4 (2^125+1)(2^125-1) + = 1 + 4 (2^125+1)(1+2(2^124-1)) + = 1 + 4 (2^125+1)(1+2(2^62+1)(2^62-1)) + = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(2^31-1)) + = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^28-1))) + = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^14-1))) + = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^7+1)(2^7-1))) + = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^7+1)(1+2(2^6-1)))) + = 1 + 4 (2^125+1)(1+2(2^62+1)(2^31+1)(7+8(2^14+1)(2^7+1)(1+2(2^3+1)*7))) + */ + + ecc_mod_pow_2kp1 (m, t0, ap, 1, t1); /* a^3 */ + ecc_mod_sqr (m, rp, t0); /* a^6 */ + ecc_mod_mul (m, a7, rp, ap); /* a^7 */ + ecc_mod_pow_2kp1 (m, rp, a7, 3, t0); /* a^63 = a^{2^6-1} */ + ecc_mod_sqr (m, t0, rp); /* a^{2^7-2} */ + ecc_mod_mul (m, rp, t0, ap); /* a^{2^7-1} */ + ecc_mod_pow_2kp1 (m, t0, rp, 7, t1); /* a^{2^14-1}*/ + ecc_mod_pow_2kp1 (m, rp, t0, 14, t1); /* a^{2^28-1} */ + ecc_mod_sqr (m, t0, rp); /* a^{2^29-2} */ + ecc_mod_sqr (m, t1, t0); /* a^{2^30-4} */ + ecc_mod_sqr (m, t0, t1); /* a^{2^31-8} */ + ecc_mod_mul (m, rp, t0, a7); /* a^{2^31-1} */ + ecc_mod_pow_2kp1 (m, t0, rp, 31, t1); /* a^{2^62-1} */ + ecc_mod_pow_2kp1 (m, rp, t0, 62, t1); /* a^{2^124-1}*/ + ecc_mod_sqr (m, t0, rp); /* a^{2^125-2} */ + ecc_mod_mul (m, rp, t0, ap); /* a^{2^125-1} */ + ecc_mod_pow_2kp1 (m, t0, rp, 125, t1);/* a^{2^250-1} */ + ecc_mod_sqr (m, rp, t0); /* a^{2^251-2} */ + ecc_mod_sqr (m, t0, rp); /* a^{2^252-4} */ + ecc_mod_mul (m, rp, t0, ap); /* a^{2^252-3} */ +#undef t0 +#undef t1 +#undef a7 +} + +/* Needs 5*ECC_LIMB_SIZE scratch space. */ +#define ECC_25519_INV_ITCH (5*ECC_LIMB_SIZE) + +static void ecc_25519_inv (const struct ecc_modulo *p, + mp_limb_t *rp, const mp_limb_t *ap, + mp_limb_t *scratch) +{ +#define t0 scratch + + /* Addition chain + + p - 2 = 2^{255} - 21 + = 1 + 2 (1 + 4 (2^{252}-3)) + */ + ecc_mod_pow_252m3 (p, rp, ap, t0); + ecc_mod_sqr (p, t0, rp); + ecc_mod_sqr (p, rp, t0); + ecc_mod_mul (p, t0, ap, rp); + ecc_mod_sqr (p, rp, t0); + ecc_mod_mul (p, t0, ap, rp); + mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */ +#undef t0 +} + +/* First, do a canonical reduction, then check if zero */ +static int +ecc_25519_zero_p (const struct ecc_modulo *p, mp_limb_t *xp) +{ + mp_limb_t cy; + mp_limb_t w; + mp_size_t i; +#if PHIGH_BITS > 0 + mp_limb_t hi = xp[ECC_LIMB_SIZE-1]; + xp[ECC_LIMB_SIZE-1] = (hi & (GMP_NUMB_MASK >> PHIGH_BITS)) + + sec_add_1 (xp, xp, ECC_LIMB_SIZE - 1, 19 * (hi >> (GMP_NUMB_BITS - PHIGH_BITS))); +#endif + cy = mpn_sub_n (xp, xp, p->m, ECC_LIMB_SIZE); + cnd_add_n (cy, xp, p->m, ECC_LIMB_SIZE); + + for (i = 0, w = 0; i < ECC_LIMB_SIZE; i++) + w |= xp[i]; + return w == 0; +} + +/* Compute x such that x^2 = u/v (mod p). Returns one on success, zero + on failure. We use the e = 2 special case of the Shanks-Tonelli + algorithm (see http://www.math.vt.edu/people/brown/doc/sqrts.pdf, + or Henri Cohen, Computational Algebraic Number Theory, 1.5.1). + + To avoid a separate inversion, we also use a trick of djb's, to + compute the candidate root as + + x = (u/v)^{(p+3)/8} = u v^3 (u v^7)^{(p-5)/8}. +*/ +#if ECC_SQRT_E != 2 +#error Broken curve25519 parameters +#endif + +/* Needs 4*n space + scratch for ecc_mod_pow_252m3. */ +#define ECC_25519_SQRT_ITCH (9*ECC_LIMB_SIZE) + +static int +ecc_25519_sqrt(const struct ecc_modulo *p, mp_limb_t *rp, + const mp_limb_t *up, const mp_limb_t *vp, + mp_limb_t *scratch) +{ + int pos, neg; + +#define uv3 scratch +#define uv7 (scratch + ECC_LIMB_SIZE) +#define uv7p (scratch + 2*ECC_LIMB_SIZE) +#define v2 (scratch + 2*ECC_LIMB_SIZE) +#define uv (scratch + 3*ECC_LIMB_SIZE) +#define v4 (scratch + 3*ECC_LIMB_SIZE) + +#define scratch_out (scratch + 4 * ECC_LIMB_SIZE) + +#define x2 scratch +#define vx2 (scratch + ECC_LIMB_SIZE) +#define t0 (scratch + 2*ECC_LIMB_SIZE) + + /* Live values */ + ecc_mod_sqr (p, v2, vp); /* v2 */ + ecc_mod_mul (p, uv, up, vp); /* uv, v2 */ + ecc_mod_mul (p, uv3, uv, v2); /* uv3, v2 */ + ecc_mod_sqr (p, v4, v2); /* uv3, v4 */ + ecc_mod_mul (p, uv7, uv3, v4); /* uv3, uv7 */ + ecc_mod_pow_252m3 (p, uv7p, uv7, scratch_out); /* uv3, uv7p */ + ecc_mod_mul (p, rp, uv7p, uv3); /* none */ + + /* Check sign. If square root exists, have v x^2 = ±u */ + ecc_mod_sqr (p, x2, rp); + ecc_mod_mul (p, vx2, x2, vp); + ecc_mod_add (p, t0, vx2, up); + neg = ecc_25519_zero_p (p, t0); + ecc_mod_sub (p, t0, up, vx2); + pos = ecc_25519_zero_p (p, t0); + + ecc_mod_mul (p, t0, rp, ecc_sqrt_z); + cnd_copy (neg, rp, t0, ECC_LIMB_SIZE); + return pos | neg; + +#undef uv3 +#undef uv7 +#undef uv7p +#undef v2 +#undef v4 +#undef scratch_out +#undef x2 +#undef vx2 +#undef t0 +} + +const struct ecc_curve _nettle_curve25519 = +{ + { + 255, + ECC_LIMB_SIZE, + ECC_BMODP_SIZE, + 0, + ECC_25519_INV_ITCH, + ECC_25519_SQRT_ITCH, + + ecc_p, + ecc_Bmodp, + ecc_Bmodp_shifted, + NULL, + ecc_pp1h, + + ecc_25519_modp, + ecc_25519_modp, + ecc_25519_inv, + ecc_25519_sqrt, + }, + { + 253, + ECC_LIMB_SIZE, + ECC_BMODQ_SIZE, + 0, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_q, + ecc_Bmodq, + ecc_mBmodq_shifted, /* Use q - 2^{252} instead. */ + NULL, + ecc_qp1h, + + ecc_25519_modq, + ecc_25519_modq, + ecc_mod_inv, + NULL, + }, + + 0, /* No redc */ + ECC_PIPPENGER_K, + ECC_PIPPENGER_C, + + ECC_ADD_TH_ITCH (ECC_LIMB_SIZE), + ECC_ADD_THH_ITCH (ECC_LIMB_SIZE), + ECC_DUP_TH_ITCH (ECC_LIMB_SIZE), + ECC_MUL_A_EH_ITCH (ECC_LIMB_SIZE), + ECC_MUL_G_EH_ITCH (ECC_LIMB_SIZE), + ECC_EH_TO_A_ITCH (ECC_LIMB_SIZE, ECC_25519_INV_ITCH), + + ecc_add_th, + ecc_add_thh, + ecc_dup_th, + ecc_mul_a_eh, + ecc_mul_g_eh, + ecc_eh_to_a, + + ecc_b, /* Edwards curve constant. */ + ecc_g, + ecc_unit, + ecc_table +}; diff --git a/ecc-curve448.c b/ecc-curve448.c new file mode 100644 index 00000000..7020e3e8 --- /dev/null +++ b/ecc-curve448.c @@ -0,0 +1,334 @@ +/* ecc-curve448.c + + Arithmetic and tables for curve448, + + Copyright (C) 2017 Daiki Ueno + Copyright (C) 2017 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "ecc.h" +#include "ecc-internal.h" + +#define USE_REDC 0 + +#include "ecc-curve448.h" + +#if HAVE_NATIVE_ecc_curve448_modp +#define ecc_448_modp nettle_ecc_curve448_modp +void +ecc_448_modp (const struct ecc_modulo *m, mp_limb_t *rp); +#elif GMP_NUMB_BITS == 64 +static void +ecc_448_modp(const struct ecc_modulo *m, mp_limb_t *rp) +{ + /* Let B = 2^64, b = 2^32 = sqrt(B). + p = B^7 - b B^3 - 1 ==> B^7 = b B^3 + 1 + + We use this to reduce + + {r_{13}, ..., r_0} = + {r_6,...,r_0} + + {r_{10},...,r_7} + + 2 {r_{13},r_{12}, r_{11}} B^4 + + b {r_{10},...,r_7,r_{13},r_{12},r_{11} (mod p) + + or + + +----+----+----+----+----+----+----+ + |r_6 |r_5 |r_4 |r_3 |r_2 |r_1 |r_0 | + +----+----+----+----+----+----+----+ + |r_10|r_9 |r_8 |r_7 | + +----+----+----+----+----+----+----+ + 2 * |r_13|r_12|r_11| + +----+----+----+----+----+----+----+ + + b * |r_10|r_9 |r_8 |r_7 |r_13|r_12|r_11| + -------+----+----+----+----+----+----+----+ + c_7 |r_6 |r_5 |r_4 |r_3 |r_2 |r_1 |r_0 | + +----+----+----+----+----+----+----+ + */ + mp_limb_t c3, c4, c7; + mp_limb_t *tp = rp + 7; + + c4 = mpn_add_n (rp, rp, rp + 7, 4); + c7 = mpn_addmul_1 (rp + 4, rp + 11, 3, 2); + c3 = mpn_addmul_1 (rp, rp + 11, 3, (mp_limb_t) 1 << 32); + c7 += mpn_addmul_1 (rp + 3, rp + 7, 4, (mp_limb_t) 1 << 32); + tp[0] = c7; + tp[1] = tp[2] = 0; + tp[3] = c3 + (c7 << 32); + tp[4] = c4 + (c7 >> 32) + (tp[3] < c3); + tp[5] = tp[6] = 0; + c7 = mpn_add_n (rp, rp, tp, 7); + c7 = cnd_add_n (c7, rp, m->B, 7); + assert (c7 == 0); +} +#else +#define ecc_448_modp ecc_mod +#endif + +/* Needs 2*ecc->size limbs at rp, and 2*ecc->size additional limbs of + scratch space. No overlap allowed. */ +static void +ecc_mod_pow_2k (const struct ecc_modulo *m, + mp_limb_t *rp, const mp_limb_t *xp, + unsigned k, mp_limb_t *tp) +{ + if (k & 1) + { + ecc_mod_sqr (m, rp, xp); + k--; + } + else + { + ecc_mod_sqr (m, tp, xp); + ecc_mod_sqr (m, rp, tp); + k -= 2; + } + while (k > 0) + { + ecc_mod_sqr (m, tp, rp); + ecc_mod_sqr (m, rp, tp); + k -= 2; + } +} + +static void +ecc_mod_pow_2kp1 (const struct ecc_modulo *m, + mp_limb_t *rp, const mp_limb_t *xp, + unsigned k, mp_limb_t *tp) +{ + ecc_mod_pow_2k (m, tp, xp, k, rp); + ecc_mod_mul (m, rp, tp, xp); +} + +/* Computes a^{(p-3)/4} = a^{2^446-2^222-1} mod m. Needs 5 * n scratch + space. */ +static void +ecc_mod_pow_446m224m1 (const struct ecc_modulo *p, + mp_limb_t *rp, const mp_limb_t *ap, + mp_limb_t *scratch) +{ +/* Note overlap: operations writing to t0 clobber t1. */ +#define t0 scratch +#define t1 (scratch + 1*ECC_LIMB_SIZE) +#define t2 (scratch + 3*ECC_LIMB_SIZE) + + ecc_mod_sqr (p, rp, ap); /* a^2 */ + ecc_mod_mul (p, t0, ap, rp); /* a^3 */ + ecc_mod_sqr (p, rp, t0); /* a^6 */ + ecc_mod_mul (p, t0, ap, rp); /* a^{2^3-1} */ + + ecc_mod_pow_2kp1 (p, t1, t0, 3, rp); /* a^{2^6-1} */ + ecc_mod_pow_2k (p, rp, t1, 3, t2); /* a^{2^9-2^3} */ + ecc_mod_mul (p, t2, t0, rp); /* a^{2^9-1} */ + ecc_mod_pow_2kp1 (p, t0, t2, 9, rp); /* a^{2^18-1} */ + + ecc_mod_sqr (p, t1, t0); /* a^{2^19-2} */ + ecc_mod_mul (p, rp, ap, t1); /* a^{2^19-1} */ + ecc_mod_pow_2k (p, t1, rp, 18, t2); /* a^{2^37-2^18} */ + ecc_mod_mul (p, rp, t0, t1); /* a^{2^37-1} */ + mpn_copyi (t0, rp, p->size); + + ecc_mod_pow_2kp1 (p, rp, t0, 37, t2); /* a^{2^74-1} */ + ecc_mod_pow_2k (p, t1, rp, 37, t2); /* a^{2^111-2^37} */ + ecc_mod_mul (p, rp, t0, t1); /* a^{2^111-1} */ + ecc_mod_pow_2kp1 (p, t0, rp, 111, t2);/* a^{2^222-1} */ + + ecc_mod_sqr (p, t1, t0); /* a^{2^223-2} */ + ecc_mod_mul (p, rp, ap, t1); /* a^{2^223-1} */ + ecc_mod_pow_2k (p, t1, rp, 223, t2); /* a^{2^446-2^223} */ + ecc_mod_mul (p, rp, t0, t1); /* a^{2^446-2^222-1} */ +#undef t0 +#undef t1 +#undef t2 +} + +#define ECC_448_INV_ITCH (5*ECC_LIMB_SIZE) + +static void ecc_448_inv (const struct ecc_modulo *p, + mp_limb_t *rp, const mp_limb_t *ap, + mp_limb_t *scratch) +{ +#define t0 scratch + + ecc_mod_pow_446m224m1 (p, rp, ap, scratch); /* a^{2^446-2^222-1} */ + ecc_mod_sqr (p, t0, rp); /* a^{2^447-2^223-2} */ + ecc_mod_sqr (p, rp, t0); /* a^{2^448-2^224-4} */ + ecc_mod_mul (p, t0, ap, rp); /* a^{2^448-2^224-3} */ + + mpn_copyi (rp, t0, ECC_LIMB_SIZE); /* FIXME: Eliminate copy? */ +#undef t0 +} + +/* First, do a canonical reduction, then check if zero */ +static int +ecc_448_zero_p (const struct ecc_modulo *p, mp_limb_t *xp) +{ + mp_limb_t cy; + mp_limb_t w; + mp_size_t i; + cy = mpn_sub_n (xp, xp, p->m, ECC_LIMB_SIZE); + cnd_add_n (cy, xp, p->m, ECC_LIMB_SIZE); + + for (i = 0, w = 0; i < ECC_LIMB_SIZE; i++) + w |= xp[i]; + return w == 0; +} + +/* Compute x such that x^2 = u/v (mod p). Returns one on success, zero + on failure. + + To avoid a separate inversion, we use a trick of djb's, to + compute the candidate root as + + x = (u/v)^{(p+1)/4} = u^3 v (u^5 v^3)^{(p-3)/4}. +*/ + +/* Needs 4*n space + scratch for ecc_mod_pow_446m224m1. */ +#define ECC_448_SQRT_ITCH (9*ECC_LIMB_SIZE) + +static int +ecc_448_sqrt(const struct ecc_modulo *p, mp_limb_t *rp, + const mp_limb_t *up, const mp_limb_t *vp, + mp_limb_t *scratch) +{ +#define u3v scratch +#define u5v3 (scratch + ECC_LIMB_SIZE) +#define u5v3p (scratch + 2*ECC_LIMB_SIZE) +#define u2 (scratch + 2*ECC_LIMB_SIZE) +#define u3 (scratch + 3*ECC_LIMB_SIZE) +#define uv (scratch + 2*ECC_LIMB_SIZE) +#define u2v2 (scratch + 3*ECC_LIMB_SIZE) + +#define scratch_out (scratch + 4 * ECC_LIMB_SIZE) + +#define x2 scratch +#define vx2 (scratch + ECC_LIMB_SIZE) +#define t0 (scratch + 2*ECC_LIMB_SIZE) + + /* Live values */ + ecc_mod_sqr (p, u2, up); /* u2 */ + ecc_mod_mul (p, u3, u2, up); /* u3 */ + ecc_mod_mul (p, u3v, u3, vp); /* u3v */ + ecc_mod_mul (p, uv, up, vp); /* u3v, uv */ + ecc_mod_sqr (p, u2v2, uv); /* u3v, u2v2 */ + ecc_mod_mul (p, u5v3, u3v, u2v2); /* u3v, u5v3 */ + ecc_mod_pow_446m224m1 (p, u5v3p, u5v3, scratch_out); /* u3v, u5v3p */ + ecc_mod_mul (p, rp, u5v3p, u3v); /* none */ + + /* If square root exists, have v x^2 = u */ + ecc_mod_sqr (p, x2, rp); + ecc_mod_mul (p, vx2, x2, vp); + ecc_mod_sub (p, t0, vx2, up); + + return ecc_448_zero_p (p, t0); + +#undef u3v +#undef u5v3 +#undef u5v3p +#undef u2 +#undef u3 +#undef uv +#undef u2v2 +#undef scratch_out +#undef x2 +#undef vx2 +#undef t0 +} + +const struct ecc_curve _nettle_curve448 = +{ + { + 448, + ECC_LIMB_SIZE, + ECC_BMODP_SIZE, + 0, + ECC_448_INV_ITCH, + ECC_448_SQRT_ITCH, + + ecc_p, + ecc_Bmodp, + ecc_Bmodp_shifted, + NULL, + ecc_pp1h, + + ecc_448_modp, + ecc_448_modp, + ecc_448_inv, + ecc_448_sqrt, + }, + { + 446, + ECC_LIMB_SIZE, + ECC_BMODQ_SIZE, + 0, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_q, + ecc_Bmodq, + ecc_Bmodq_shifted, + NULL, + ecc_qp1h, + + ecc_mod, /* FIXME: Implement optimized mod function */ + ecc_mod, /* FIXME: Implement optimized reduce function */ + ecc_mod_inv, + NULL, + }, + + 0, /* No redc */ + ECC_PIPPENGER_K, + ECC_PIPPENGER_C, + + ECC_ADD_EH_ITCH (ECC_LIMB_SIZE), + ECC_ADD_EHH_ITCH (ECC_LIMB_SIZE), + ECC_DUP_EH_ITCH (ECC_LIMB_SIZE), + ECC_MUL_A_EH_ITCH (ECC_LIMB_SIZE), + ECC_MUL_G_EH_ITCH (ECC_LIMB_SIZE), + ECC_EH_TO_A_ITCH (ECC_LIMB_SIZE, ECC_448_INV_ITCH), + + ecc_add_eh, + ecc_add_ehh, + ecc_dup_eh, + ecc_mul_a_eh, + ecc_mul_g_eh, + ecc_eh_to_a, + + ecc_b, + ecc_g, + ecc_unit, + ecc_table +}; diff --git a/ecc-secp192r1.c b/ecc-secp192r1.c new file mode 100644 index 00000000..858a1b75 --- /dev/null +++ b/ecc-secp192r1.c @@ -0,0 +1,181 @@ +/* ecc-secp192r1.c + + Compile time constant (but machine dependent) tables. + + Copyright (C) 2013, 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +/* FIXME: Remove ecc.h include, once prototypes of more internal + functions are moved to ecc-internal.h */ +#include "ecc.h" +#include "ecc-internal.h" + +#define USE_REDC 0 + +#include "ecc-secp192r1.h" + +#if HAVE_NATIVE_ecc_192_modp + +#define ecc_192_modp nettle_ecc_192_modp +void +ecc_192_modp (const struct ecc_modulo *m, mp_limb_t *rp); + +/* Use that p = 2^{192} - 2^64 - 1, to eliminate 128 bits at a time. */ + +#elif GMP_NUMB_BITS == 32 +/* p is 6 limbs, p = B^6 - B^2 - 1 */ +static void +ecc_192_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) +{ + mp_limb_t cy; + + /* Reduce from 12 to 9 limbs (top limb small)*/ + cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4); + cy = sec_add_1 (rp + 6, rp + 6, 2, cy); + cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4); + assert (cy <= 2); + + rp[8] = cy; + + /* Reduce from 9 to 6 limbs */ + cy = mpn_add_n (rp, rp, rp + 6, 3); + cy = sec_add_1 (rp + 3, rp + 3, 2, cy); + cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3); + cy = sec_add_1 (rp + 5, rp + 5, 1, cy); + + assert (cy <= 1); + cy = cnd_add_n (cy, rp, ecc_Bmodp, 6); + assert (cy == 0); +} +#elif GMP_NUMB_BITS == 64 +/* p is 3 limbs, p = B^3 - B - 1 */ +static void +ecc_192_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) +{ + mp_limb_t cy; + + /* Reduce from 6 to 5 limbs (top limb small)*/ + cy = mpn_add_n (rp + 1, rp + 1, rp + 4, 2); + cy = sec_add_1 (rp + 3, rp + 3, 1, cy); + cy += mpn_add_n (rp + 2, rp + 2, rp + 4, 2); + assert (cy <= 2); + + rp[4] = cy; + + /* Reduce from 5 to 4 limbs (high limb small) */ + cy = mpn_add_n (rp, rp, rp + 3, 2); + cy = sec_add_1 (rp + 2, rp + 2, 1, cy); + cy += mpn_add_n (rp + 1, rp + 1, rp + 3, 2); + + assert (cy <= 1); + cy = cnd_add_n (cy, rp, ecc_Bmodp, 3); + assert (cy == 0); +} + +#else +#define ecc_192_modp ecc_mod +#endif + +const struct ecc_curve _nettle_secp_192r1 = +{ + { + 192, + ECC_LIMB_SIZE, + ECC_BMODP_SIZE, + ECC_REDC_SIZE, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_p, + ecc_Bmodp, + ecc_Bmodp_shifted, + ecc_redc_ppm1, + ecc_pp1h, + + ecc_192_modp, + ecc_192_modp, + ecc_mod_inv, + NULL, + }, + { + 192, + ECC_LIMB_SIZE, + ECC_BMODQ_SIZE, + 0, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_q, + ecc_Bmodq, + ecc_Bmodq_shifted, + NULL, + ecc_qp1h, + + ecc_mod, + ecc_mod, + ecc_mod_inv, + NULL, + }, + + USE_REDC, + ECC_PIPPENGER_K, + ECC_PIPPENGER_C, + + ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), + ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), + ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), + ECC_MUL_A_ITCH (ECC_LIMB_SIZE), + ECC_MUL_G_ITCH (ECC_LIMB_SIZE), + ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + + ecc_add_jja, + ecc_add_jjj, + ecc_dup_jj, + ecc_mul_a, + ecc_mul_g, + ecc_j_to_a, + + ecc_b, + ecc_g, + ecc_unit, + ecc_table +}; + +const struct ecc_curve *nettle_get_secp_192r1(void) +{ + return &_nettle_secp_192r1; +} diff --git a/ecc-secp224r1.c b/ecc-secp224r1.c new file mode 100644 index 00000000..4d82f54b --- /dev/null +++ b/ecc-secp224r1.c @@ -0,0 +1,133 @@ +/* ecc-secp224r1.c + + Compile time constant (but machine dependent) tables. + + Copyright (C) 2013, 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ecc.h" +#include "ecc-internal.h" + +#if HAVE_NATIVE_ecc_224_modp + +#define USE_REDC 0 +#define ecc_224_modp nettle_ecc_224_modp +void +ecc_224_modp (const struct ecc_modulo *m, mp_limb_t *rp); + +#else +#define USE_REDC (ECC_REDC_SIZE != 0) +#define ecc_224_modp ecc_mod +#endif + +#include "ecc-secp224r1.h" + +#if ECC_REDC_SIZE < 0 +# define ecc_224_redc ecc_pm1_redc +#elif ECC_REDC_SIZE == 0 +# define ecc_224_redc NULL +#else +# error Configuration error +#endif + +const struct ecc_curve _nettle_secp_224r1 = +{ + { + 224, + ECC_LIMB_SIZE, + ECC_BMODP_SIZE, + -ECC_REDC_SIZE, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_p, + ecc_Bmodp, + ecc_Bmodp_shifted, + ecc_redc_ppm1, + ecc_pp1h, + + ecc_224_modp, + USE_REDC ? ecc_224_redc : ecc_224_modp, + ecc_mod_inv, + NULL, + }, + { + 224, + ECC_LIMB_SIZE, + ECC_BMODQ_SIZE, + 0, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_q, + ecc_Bmodq, + ecc_Bmodq_shifted, + NULL, + ecc_qp1h, + + ecc_mod, + ecc_mod, + ecc_mod_inv, + NULL, + }, + + USE_REDC, + ECC_PIPPENGER_K, + ECC_PIPPENGER_C, + + ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), + ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), + ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), + ECC_MUL_A_ITCH (ECC_LIMB_SIZE), + ECC_MUL_G_ITCH (ECC_LIMB_SIZE), + ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + + ecc_add_jja, + ecc_add_jjj, + ecc_dup_jj, + ecc_mul_a, + ecc_mul_g, + ecc_j_to_a, + + ecc_b, + ecc_g, + ecc_unit, + ecc_table +}; + +const struct ecc_curve *nettle_get_secp_224r1(void) +{ + return &_nettle_secp_224r1; +} diff --git a/ecc-secp256r1.c b/ecc-secp256r1.c new file mode 100644 index 00000000..835c91d3 --- /dev/null +++ b/ecc-secp256r1.c @@ -0,0 +1,310 @@ +/* ecc-secp256r1.c + + Compile time constant (but machine dependent) tables. + + Copyright (C) 2013, 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "ecc.h" +#include "ecc-internal.h" + +#if HAVE_NATIVE_ecc_256_redc +# define USE_REDC 1 +#else +# define USE_REDC (ECC_REDC_SIZE != 0) +#endif + +#include "ecc-secp256r1.h" + +#if HAVE_NATIVE_ecc_256_redc +# define ecc_256_redc nettle_ecc_256_redc +void +ecc_256_redc (const struct ecc_modulo *p, mp_limb_t *rp); +#else /* !HAVE_NATIVE_ecc_256_redc */ +# if ECC_REDC_SIZE > 0 +# define ecc_256_redc ecc_pp1_redc +# elif ECC_REDC_SIZE == 0 +# define ecc_256_redc NULL +# else +# error Configuration error +# endif +#endif /* !HAVE_NATIVE_ecc_256_redc */ + +#if ECC_BMODP_SIZE < ECC_LIMB_SIZE +#define ecc_256_modp ecc_mod +#define ecc_256_modq ecc_mod +#elif GMP_NUMB_BITS == 64 + +static void +ecc_256_modp (const struct ecc_modulo *p, mp_limb_t *rp) +{ + mp_limb_t u1, u0; + mp_size_t n; + + n = 2*p->size; + u1 = rp[--n]; + u0 = rp[n-1]; + + /* This is not particularly fast, but should work well with assembly implementation. */ + for (; n >= p->size; n--) + { + mp_limb_t q2, q1, q0, t, cy; + + /* = v * u1 + , with v = 2^32 - 1: + + +---+---+ + | u1| u0| + +---+---+ + |-u1| + +-+-+-+ + | u1| + +---+-+-+-+-+ + | q2| q1| q0| + +---+---+---+ + */ + q1 = u1 - (u1 > u0); + q0 = u0 - u1; + t = u1 << 32; + q0 += t; + t = (u1 >> 32) + (q0 < t) + 1; + q1 += t; + q2 = q1 < t; + + /* Compute candidate remainder */ + u1 = u0 + (q1 << 32) - q1; + t = -(mp_limb_t) (u1 > q0); + u1 -= t & 0xffffffff; + q1 += t; + q2 += t + (q1 < t); + + assert (q2 < 2); + + /* + n-1 n-2 n-3 n-4 + +---+---+---+---+ + | u1| u0| u low | + +---+---+---+---+ + - | q1(2^96-1)| + +-------+---+ + |q2(2^.)| + +-------+ + + We multiply by two low limbs of p, 2^96 - 1, so we could use + shifts rather than mul. + */ + t = mpn_submul_1 (rp + n - 4, p->m, 2, q1); + t += cnd_sub_n (q2, rp + n - 3, p->m, 1); + t += (-q2) & 0xffffffff; + + u0 = rp[n-2]; + cy = (u0 < t); + u0 -= t; + t = (u1 < cy); + u1 -= cy; + + cy = cnd_add_n (t, rp + n - 4, p->m, 2); + u0 += cy; + u1 += (u0 < cy); + u1 -= (-t) & 0xffffffff; + } + rp[2] = u0; + rp[3] = u1; +} + +static void +ecc_256_modq (const struct ecc_modulo *q, mp_limb_t *rp) +{ + mp_limb_t u2, u1, u0; + mp_size_t n; + + n = 2*q->size; + u2 = rp[--n]; + u1 = rp[n-1]; + + /* This is not particularly fast, but should work well with assembly implementation. */ + for (; n >= q->size; n--) + { + mp_limb_t q2, q1, q0, t, c1, c0; + + u0 = rp[n-2]; + + /* = v * u2 + , same method as above. + + +---+---+ + | u2| u1| + +---+---+ + |-u2| + +-+-+-+ + | u2| + +---+-+-+-+-+ + | q2| q1| q0| + +---+---+---+ + */ + q1 = u2 - (u2 > u1); + q0 = u1 - u2; + t = u2 << 32; + q0 += t; + t = (u2 >> 32) + (q0 < t) + 1; + q1 += t; + q2 = q1 < t; + + /* Compute candidate remainder, - * (2^128 - 2^96 + 2^64 - 1) + + 2^64 q2 + (2^96 - 2^64 + 1) q1 (mod 2^128) + + +---+---+ + | u1| u0| + +---+---+ + | q2| q1| + +---+---+ + |-q1| + +-+-+-+ + | q1| + --+-+-+-+---+ + | u2| u1| + +---+---+ + */ + u2 = u1 + q2 - q1; + u1 = u0 + q1; + u2 += (u1 < q1); + u2 += (q1 << 32); + + t = -(mp_limb_t) (u2 >= q0); + q1 += t; + q2 += t + (q1 < t); + u1 += t; + u2 += (t << 32) + (u1 < t); + + assert (q2 < 2); + + c0 = cnd_sub_n (q2, rp + n - 3, q->m, 1); + c0 += (-q2) & q->m[1]; + t = mpn_submul_1 (rp + n - 4, q->m, 2, q1); + c0 += t; + c1 = c0 < t; + + /* Construct underflow condition. */ + c1 += (u1 < c0); + t = - (mp_limb_t) (u2 < c1); + + u1 -= c0; + u2 -= c1; + + /* Conditional add of p */ + u1 += t; + u2 += (t<<32) + (u1 < t); + + t = cnd_add_n (t, rp + n - 4, q->m, 2); + u1 += t; + u2 += (u1 < t); + } + rp[2] = u1; + rp[3] = u2; +} + +#else +#error Unsupported parameters +#endif + +const struct ecc_curve _nettle_secp_256r1 = +{ + { + 256, + ECC_LIMB_SIZE, + ECC_BMODP_SIZE, + ECC_REDC_SIZE, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_p, + ecc_Bmodp, + ecc_Bmodp_shifted, + ecc_redc_ppm1, + ecc_pp1h, + + ecc_256_modp, + USE_REDC ? ecc_256_redc : ecc_256_modp, + ecc_mod_inv, + NULL, + }, + { + 256, + ECC_LIMB_SIZE, + ECC_BMODQ_SIZE, + 0, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_q, + ecc_Bmodq, + ecc_Bmodq_shifted, + NULL, + ecc_qp1h, + + ecc_256_modq, + ecc_256_modq, + ecc_mod_inv, + NULL, + }, + + USE_REDC, + ECC_PIPPENGER_K, + ECC_PIPPENGER_C, + + ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), + ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), + ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), + ECC_MUL_A_ITCH (ECC_LIMB_SIZE), + ECC_MUL_G_ITCH (ECC_LIMB_SIZE), + ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + + ecc_add_jja, + ecc_add_jjj, + ecc_dup_jj, + ecc_mul_a, + ecc_mul_g, + ecc_j_to_a, + + ecc_b, + ecc_g, + ecc_unit, + ecc_table +}; + +const struct ecc_curve *nettle_get_secp_256r1(void) +{ + return &_nettle_secp_256r1; +} diff --git a/ecc-secp384r1.c b/ecc-secp384r1.c new file mode 100644 index 00000000..248b1cf3 --- /dev/null +++ b/ecc-secp384r1.c @@ -0,0 +1,218 @@ +/* ecc-secp384r1.c + + Compile time constant (but machine dependent) tables. + + Copyright (C) 2013, 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "ecc.h" +#include "ecc-internal.h" + +#define USE_REDC 0 + +#include "ecc-secp384r1.h" + +#if HAVE_NATIVE_ecc_384_modp +#define ecc_384_modp nettle_ecc_384_modp +void +ecc_384_modp (const struct ecc_modulo *m, mp_limb_t *rp); +#elif GMP_NUMB_BITS == 32 + +/* Use that 2^{384} = 2^{128} + 2^{96} - 2^{32} + 1, and eliminate 256 + bits at a time. + + We can get carry == 2 in the first iteration, and I think *only* in + the first iteration. */ + +/* p is 12 limbs, and B^12 - p = B^4 + B^3 - B + 1. We can eliminate + almost 8 at a time. Do only 7, to avoid additional carry + propagation, followed by 5. */ +static void +ecc_384_modp (const struct ecc_modulo *p, mp_limb_t *rp) +{ + mp_limb_t cy, bw; + + /* Reduce from 24 to 17 limbs. */ + cy = mpn_add_n (rp + 4, rp + 4, rp + 16, 8); + cy = sec_add_1 (rp + 12, rp + 12, 3, cy); + + bw = mpn_sub_n (rp + 5, rp + 5, rp + 16, 8); + bw = sec_sub_1 (rp + 13, rp + 13, 3, bw); + + cy += mpn_add_n (rp + 7, rp + 7, rp + 16, 8); + cy = sec_add_1 (rp + 15, rp + 15, 1, cy); + + cy += mpn_add_n (rp + 8, rp + 8, rp + 16, 8); + assert (bw <= cy); + cy -= bw; + + assert (cy <= 2); + rp[16] = cy; + + /* Reduce from 17 to 12 limbs */ + cy = mpn_add_n (rp, rp, rp + 12, 5); + cy = sec_add_1 (rp + 5, rp + 5, 3, cy); + + bw = mpn_sub_n (rp + 1, rp + 1, rp + 12, 5); + bw = sec_sub_1 (rp + 6, rp + 6, 6, bw); + + cy += mpn_add_n (rp + 3, rp + 3, rp + 12, 5); + cy = sec_add_1 (rp + 8, rp + 8, 1, cy); + + cy += mpn_add_n (rp + 4, rp + 4, rp + 12, 5); + cy = sec_add_1 (rp + 9, rp + 9, 3, cy); + + assert (cy >= bw); + cy -= bw; + assert (cy <= 1); + cy = cnd_add_n (cy, rp, p->B, ECC_LIMB_SIZE); + assert (cy == 0); +} +#elif GMP_NUMB_BITS == 64 +/* p is 6 limbs, and B^6 - p = B^2 + 2^32 (B - 1) + 1. Eliminate 3 + (almost 4) limbs at a time. */ +static void +ecc_384_modp (const struct ecc_modulo *p, mp_limb_t *rp) +{ + mp_limb_t tp[6]; + mp_limb_t cy; + + /* Reduce from 12 to 9 limbs */ + tp[0] = 0; /* FIXME: Could use mpn_sub_nc */ + mpn_copyi (tp + 1, rp + 8, 3); + tp[4] = rp[11] - mpn_sub_n (tp, tp, rp + 8, 4); + tp[5] = mpn_lshift (tp, tp, 5, 32); + + cy = mpn_add_n (rp + 2, rp + 2, rp + 8, 4); + cy = sec_add_1 (rp + 6, rp + 6, 2, cy); + + cy += mpn_add_n (rp + 2, rp + 2, tp, 6); + cy += mpn_add_n (rp + 4, rp + 4, rp + 8, 4); + + assert (cy <= 2); + rp[8] = cy; + + /* Reduce from 9 to 6 limbs */ + tp[0] = 0; + mpn_copyi (tp + 1, rp + 6, 2); + tp[3] = rp[8] - mpn_sub_n (tp, tp, rp + 6, 3); + tp[4] = mpn_lshift (tp, tp, 4, 32); + + cy = mpn_add_n (rp, rp, rp + 6, 3); + cy = sec_add_1 (rp + 3, rp + 3, 2, cy); + cy += mpn_add_n (rp, rp, tp, 5); + cy += mpn_add_n (rp + 2, rp + 2, rp + 6, 3); + + cy = sec_add_1 (rp + 5, rp + 5, 1, cy); + assert (cy <= 1); + + cy = cnd_add_n (cy, rp, p->B, ECC_LIMB_SIZE); + assert (cy == 0); +} +#else +#define ecc_384_modp ecc_mod +#endif + +const struct ecc_curve _nettle_secp_384r1 = +{ + { + 384, + ECC_LIMB_SIZE, + ECC_BMODP_SIZE, + ECC_REDC_SIZE, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_p, + ecc_Bmodp, + ecc_Bmodp_shifted, + ecc_redc_ppm1, + ecc_pp1h, + + ecc_384_modp, + ecc_384_modp, + ecc_mod_inv, + NULL, + }, + { + 384, + ECC_LIMB_SIZE, + ECC_BMODQ_SIZE, + 0, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_q, + ecc_Bmodq, + ecc_Bmodq_shifted, + NULL, + ecc_qp1h, + + ecc_mod, + ecc_mod, + ecc_mod_inv, + NULL, + }, + + USE_REDC, + ECC_PIPPENGER_K, + ECC_PIPPENGER_C, + + ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), + ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), + ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), + ECC_MUL_A_ITCH (ECC_LIMB_SIZE), + ECC_MUL_G_ITCH (ECC_LIMB_SIZE), + ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + + ecc_add_jja, + ecc_add_jjj, + ecc_dup_jj, + ecc_mul_a, + ecc_mul_g, + ecc_j_to_a, + + ecc_b, + ecc_g, + ecc_unit, + ecc_table +}; + +const struct ecc_curve *nettle_get_secp_384r1(void) +{ + return &_nettle_secp_384r1; +} diff --git a/ecc-secp521r1.c b/ecc-secp521r1.c new file mode 100644 index 00000000..cc747303 --- /dev/null +++ b/ecc-secp521r1.c @@ -0,0 +1,146 @@ +/* ecc-secp521r1.c + + Compile time constant (but machine dependent) tables. + + Copyright (C) 2013, 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ecc.h" +#include "ecc-internal.h" + +#define USE_REDC 0 + +#include "ecc-secp521r1.h" + +#if HAVE_NATIVE_ecc_521_modp +#define ecc_521_modp nettle_ecc_521_modp +void +ecc_521_modp (const struct ecc_modulo *m, mp_limb_t *rp); + +#else + +#define B_SHIFT (521 % GMP_NUMB_BITS) +#define BMODP_SHIFT (GMP_NUMB_BITS - B_SHIFT) +#define BMODP ((mp_limb_t) 1 << BMODP_SHIFT) + +/* Result may be *slightly* larger than 2^521 */ +static void +ecc_521_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp) +{ + /* FIXME: Should use mpn_addlsh_n_ip1 */ + mp_limb_t hi; + /* Reduce from 2*ECC_LIMB_SIZE to ECC_LIMB_SIZE + 1 */ + rp[ECC_LIMB_SIZE] + = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, ECC_LIMB_SIZE, BMODP); + hi = mpn_addmul_1 (rp, rp + ECC_LIMB_SIZE, 1, BMODP); + hi = sec_add_1 (rp + 1, rp + 1, ECC_LIMB_SIZE - 1, hi); + + /* Combine hi with top bits, and add in. */ + hi = (hi << BMODP_SHIFT) | (rp[ECC_LIMB_SIZE-1] >> B_SHIFT); + rp[ECC_LIMB_SIZE-1] = (rp[ECC_LIMB_SIZE-1] + & (((mp_limb_t) 1 << B_SHIFT)-1)) + + sec_add_1 (rp, rp, ECC_LIMB_SIZE - 1, hi); +} +#endif + +const struct ecc_curve _nettle_secp_521r1 = +{ + { + 521, + ECC_LIMB_SIZE, + ECC_BMODP_SIZE, + ECC_REDC_SIZE, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_p, + ecc_Bmodp, + ecc_Bmodp_shifted, + ecc_redc_ppm1, + ecc_pp1h, + + ecc_521_modp, + ecc_521_modp, + ecc_mod_inv, + NULL, + }, + { + 521, + ECC_LIMB_SIZE, + ECC_BMODQ_SIZE, + 0, + ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + 0, + + ecc_q, + ecc_Bmodq, + ecc_Bmodq_shifted, + NULL, + ecc_qp1h, + + ecc_mod, + ecc_mod, + ecc_mod_inv, + NULL, + }, + + USE_REDC, + ECC_PIPPENGER_K, + ECC_PIPPENGER_C, + + ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), + ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), + ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), + ECC_MUL_A_ITCH (ECC_LIMB_SIZE), + ECC_MUL_G_ITCH (ECC_LIMB_SIZE), + ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + + ecc_add_jja, + ecc_add_jjj, + ecc_dup_jj, + ecc_mul_a, + ecc_mul_g, + ecc_j_to_a, + + ecc_b, + ecc_g, + ecc_unit, + ecc_table +}; + +const struct ecc_curve *nettle_get_secp_521r1(void) +{ + return &_nettle_secp_521r1; +} diff --git a/eccdata.c b/eccdata.c index 74002c1f..d76a42bc 100644 --- a/eccdata.c +++ b/eccdata.c @@ -432,11 +432,10 @@ ecc_curve_init_str (struct ecc_curve *ecc, enum ecc_type type, } static void -ecc_curve_init (struct ecc_curve *ecc, unsigned bit_size) +ecc_curve_init (struct ecc_curve *ecc, const char *curve) { - switch (bit_size) + if (!strcmp (curve, "secp192r1")) { - case 192: ecc_curve_init_str (ecc, ECC_TYPE_WEIERSTRASS, /* p = 2^{192} - 2^{64} - 1 */ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE" @@ -466,8 +465,9 @@ ecc_curve_init (struct ecc_curve *ecc, unsigned bit_size) "35433907297cc378b0015703374729d7a4fe46647084e4ba", "a2649984f2135c301ea3acb0776cd4f125389b311db3be32"); - break; - case 224: + } + else if (!strcmp (curve, "secp224r1")) + { ecc_curve_init_str (ecc, ECC_TYPE_WEIERSTRASS, /* p = 2^{224} - 2^{96} + 1 */ "ffffffffffffffffffffffffffffffff" @@ -498,8 +498,9 @@ ecc_curve_init (struct ecc_curve *ecc, unsigned bit_size) "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301", "482580a0ec5bc47e88bc8c378632cd196cb3fa058a7114eb03054c9"); - break; - case 256: + } + else if (!strcmp (curve, "secp256r1")) + { ecc_curve_init_str (ecc, ECC_TYPE_WEIERSTRASS, /* p = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 1 */ "FFFFFFFF000000010000000000000000" @@ -530,8 +531,9 @@ ecc_curve_init (struct ecc_curve *ecc, unsigned bit_size) "e2534a3532d08fbba02dde659ee62bd0031fe2db785596ef509302446b030852", "e0f1575a4c633cc719dfee5fda862d764efc96c3f30ee0055c42c23f184ed8c6"); - break; - case 384: + } + else if (!strcmp (curve, "secp384r1")) + { ecc_curve_init_str (ecc, ECC_TYPE_WEIERSTRASS, /* p = 2^{384} - 2^{128} - 2^{96} + 2^{32} - 1 */ "ffffffffffffffffffffffffffffffff" @@ -567,8 +569,9 @@ ecc_curve_init (struct ecc_curve *ecc, unsigned bit_size) "138251cd52ac9298c1c8aad977321deb97e709bd0b4ca0aca55dc8ad51dcfc9d1589a1597e3a5120e1efd631c63e1835", "cacae29869a62e1631e8a28181ab56616dc45d918abc09f3ab0e63cf792aa4dced7387be37bba569549f1c02b270ed67"); - break; - case 521: + } + else if (!strcmp (curve, "secp521r1")) + { ecc_curve_init_str (ecc, ECC_TYPE_WEIERSTRASS, "1ff" /* p = 2^{521} - 1 */ "ffffffffffffffffffffffffffffffff" @@ -613,9 +616,15 @@ ecc_curve_init (struct ecc_curve *ecc, unsigned bit_size) "35b5df64ae2ac204c354b483487c9070cdc61c891c5ff39afc06c5d55541d3ceac8659e24afe3d0750e8b88e9f078af066a1d5025b08e5a5e2fbc87412871902f3", "82096f84261279d2b673e0178eb0b4abb65521aef6e6e32e1b5ae63fe2f19907f279f283e54ba385405224f750a95b85eebb7faef04699d1d9e21f47fc346e4d0d"); - break; - case 255: - /* Edwards curve used for eddsa25519 and curve25519, + } + else if (!strcmp (curve, "curve25519")) + { + /* curve25519, y^2 = x^3 + 486662 x^2 + x (mod p), with p = 2^{255} - 19. + + According to http://cr.yp.to/papers.html#newelliptic, this + is birationally equivalent to the Edwards curve + + x^2 + y^2 = 1 + (121665/121666) x^2 y^2 (mod p). -x^2 + y^2 = 1 - (121665/121666) x^2 y^2, with p = 2^{255} - 19. @@ -664,9 +673,9 @@ ecc_curve_init (struct ecc_curve *ecc, unsigned bit_size) "1a739ec193ce1547493aa657c4c9f870", "47d0e827cb1595e1470eb88580d5716c" "4cf22832ea2f0ff0df38ab61ca32112f"); - break; - - case 448: + } + else if (!strcmp (curve, "curve448")) + { /* curve448, y^2 = x^3 + 156326 x^2 + x (mod p), with p = 2^{448} - 2^{224} - 1. According to RFC 7748, this is 4-isogenious to the Edwards @@ -745,14 +754,13 @@ ecc_curve_init (struct ecc_curve *ecc, unsigned bit_size) "9cb7c02f0457d845c90dc3227b8a5bc1" "c0d8f97ea1ca9472b5d444285d0d4f5b" "32e236f86de51839"); - - break; - - default: - fprintf (stderr, "No known curve for size %d\n", bit_size); - exit(EXIT_FAILURE); } - ecc->bit_size = bit_size; + else + { + fprintf (stderr, "No known curve with name %s\n", curve); + exit(EXIT_FAILURE); + } + ecc->bit_size = mpz_sizeinbase (ecc->p, 2); } static void @@ -1312,7 +1320,7 @@ main (int argc, char **argv) return EXIT_FAILURE; } - ecc_curve_init (&ecc, atoi(argv[1])); + ecc_curve_init (&ecc, argv[1]); ecc_pippenger_precompute (&ecc, atoi(argv[2]), atoi(argv[3])); diff --git a/x86_64/ecc-192-modp.asm b/x86_64/ecc-192-modp.asm deleted file mode 100644 index f0660525..00000000 --- a/x86_64/ecc-192-modp.asm +++ /dev/null @@ -1,88 +0,0 @@ -C x86_64/ecc-192-modp.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-192-modp.asm" - -define(, <%rsi>) -define(, <%rdi>) C Overlaps unused modulo input -define(, <%rcx>) -define(, <%rdx>) -define(, <%r8>) -define(, <%r9>) -define(, <%r10>) -define(, <%r11>) - - C ecc_192_modp (const struct ecc_modulo *m, mp_limb_t *rp) - .text - ALIGN(16) -PROLOGUE(nettle_ecc_192_modp) - W64_ENTRY(2, 0) - mov 16(RP), T2 - mov 24(RP), T3 - mov 40(RP), H - xor C1, C1 - xor C2, C2 - - add H, T2 - adc H, T3 - C Carry to be added in at T1 and T2 - setc LREG(C2) - - mov 8(RP), T1 - mov 32(RP), H - adc H, T1 - adc H, T2 - C Carry to be added in at T0 and T1 - setc LREG(C1) - - mov (RP), T0 - adc T3, T0 - adc T3, T1 - adc $0, C2 - - C Add in C1 and C2 - add C1, T1 - adc C2, T2 - setc LREG(C1) - - C Fold final carry. - adc $0, T0 - adc C1, T1 - adc $0, T2 - - mov T0, (RP) - mov T1, 8(RP) - mov T2, 16(RP) - - W64_EXIT(2, 0) - ret -EPILOGUE(nettle_ecc_192_modp) diff --git a/x86_64/ecc-224-modp.asm b/x86_64/ecc-224-modp.asm deleted file mode 100644 index 07bd4003..00000000 --- a/x86_64/ecc-224-modp.asm +++ /dev/null @@ -1,131 +0,0 @@ -C x86_64/ecc-224-modp.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-224-modp.asm" - -GMP_NUMB_BITS(64) - -define(, <%rsi>) -define(, <%rdi>) C Overlaps unused modulo input -define(, <%rcx>) -define(, <%rax>) -define(

, <%rdx>) -define(

, <%r8>) -define(, <%r9>) -define(, <%r10>) -define(, <%r11>) - - C ecc_224_modp (const struct ecc_modulo *m, mp_limb_t *rp) -PROLOGUE(nettle_ecc_224_modp) - W64_ENTRY(2, 0) - mov 48(RP), H0 - mov 56(RP), H1 - C Set (F2,F1,F0) <-- (H1,H0) << 32 - mov H0, F0 - mov H0, F1 - shl $32, F0 - shr $32, F1 - mov H1, F2 - mov H1, T0 - shl $32, T0 - shr $32, F2 - or T0, F1 - - xor H2, H2 - mov 16(RP), T0 - mov 24(RP), T1 - sub F0, T0 - sbb F1, T1 - sbb F2, H0 - sbb $0, H1 C No further borrow - - adc 32(RP), H0 - adc 40(RP), H1 - adc $0, H2 - - C Set (F2,F1,F0) <-- (H2,H1,H0) << 32 - C To free registers, add in T1, T0 as soon as H0, H1 have been copied - mov H0, F0 - mov H0, F1 - add T0, H0 - mov H1, F2 - mov H1, T0 - adc T1, H1 - mov H2, T1 - adc $0, H2 - - C Shift 32 bits - shl $32, F0 - shr $32, F1 - shl $32, T0 - shr $32, F2 - shl $32, T1 - or T0, F1 - or T1, F2 - - mov (RP), T0 - mov 8(RP), T1 - sub F0, T0 - sbb F1, T1 - sbb F2, H0 - sbb $0, H1 - sbb $0, H2 - - C We now have H2, H1, H0, T1, T0, with 33 bits left to reduce - C Set F0 <-- (H2, H1) >> 32 - C Set (F2,F1) <-- (H2, H1 & 0xffffffff00000000) - C H1 <-- H1 & 0xffffffff - - mov H1, F0 - mov H1, F1 - mov H2, F2 - movl XREG(H1), XREG(H1) C Clears high 32 bits - sub H1, F1 C Clears low 32 bits - shr $32, F0 - shl $32, H2 - or H2, F0 - - sub F0, T0 - sbb $0, F1 - sbb $0, F2 - add F1, T1 - adc F2, H0 - adc $0, H1 - - mov T0, (RP) - mov T1, 8(RP) - mov H0, 16(RP) - mov H1, 24(RP) - - W64_EXIT(2, 0) - ret -EPILOGUE(nettle_ecc_224_modp) diff --git a/x86_64/ecc-25519-modp.asm b/x86_64/ecc-25519-modp.asm deleted file mode 100644 index 58c14fe0..00000000 --- a/x86_64/ecc-25519-modp.asm +++ /dev/null @@ -1,94 +0,0 @@ -C x86_64/ecc-25519-modp.asm - -ifelse(< - Copyright (C) 2014 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-25519-modp.asm" - -define(, <%rsi>) -define(, <%rdi>) C Overlaps unused modulo input -define(, <%rcx>) -define(, <%r8>) -define(, <%r9>) -define(, <%r10>) -define(, <%r11>) -define(, <%rbx>) - -PROLOGUE(nettle_ecc_25519_modp) - W64_ENTRY(2, 0) - push %rbx - - C First fold the limbs affecting bit 255 - mov 56(RP), %rax - mov $38, M - mul M - mov 24(RP), U3 - xor T0, T0 - add %rax, U3 - adc %rdx, T0 - - mov 40(RP), %rax C Do this early as possible - mul M - - add U3, U3 - adc T0, T0 - shr U3 C Undo shift, clear high bit - - C Fold the high limb again, together with RP[5] - imul $19, T0 - - mov (RP), U0 - mov 8(RP), U1 - mov 16(RP), U2 - add T0, U0 - adc %rax, U1 - mov 32(RP), %rax - adc %rdx, U2 - adc $0, U3 - - C Fold final two limbs, RP[4] and RP[6] - mul M - mov %rax, T0 - mov 48(RP), %rax - mov %rdx, T1 - mul M - add T0, U0 - mov U0, (RP) - adc T1, U1 - mov U1, 8(RP) - adc %rax, U2 - mov U2, 16(RP) - adc %rdx, U3 - mov U3, 24(RP) - - pop %rbx - W64_EXIT(2, 0) - ret -EPILOGUE(nettle_ecc_25519_modp) diff --git a/x86_64/ecc-256-redc.asm b/x86_64/ecc-256-redc.asm deleted file mode 100644 index fb163354..00000000 --- a/x86_64/ecc-256-redc.asm +++ /dev/null @@ -1,129 +0,0 @@ -C x86_64/ecc-256-redc.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-256-redc.asm" - -define(, <%rsi>) -define(, <%rdi>) C Overlaps unused modulo input -define(, <%rcx>) -define(, <%rax>) -define(, <%rdx>) -define(, <%r8>) -define(, <%r9>) -define(, <%r10>) -define(, <%r11>) -define(, <%r12>) -define(, <%rbx>) -define(, <%rbp>) - -C FOLD(x), sets (F3,F2,F1,F0) <-- (x << 224) - (x << 128) - (x<<32) -define(, < - mov $1, F2 - mov $1, F3 - shl <$>32, F2 - shr <$>32, F3 - xor F0,F0 - xor F1,F1 - sub F2, F0 - sbb F3, F1 - sbb $1, F2 - sbb <$>0, F3 ->) -PROLOGUE(nettle_ecc_256_redc) - W64_ENTRY(2, 0) - C save all registers that need to be saved - push %rbx - push %rbp - push %r12 - - mov (RP), U0 - FOLD(U0) - mov 8(RP), U1 - mov 16(RP), U2 - mov 24(RP), U3 - sub F0, U1 - sbb F1, U2 - sbb F2, U3 - sbb F3, U0 C Add in later - - FOLD(U1) - mov 32(RP), U4 - sub F0, U2 - sbb F1, U3 - sbb F2, U4 - sbb F3, U1 - - FOLD(U2) - mov 40(RP), U5 - sub F0, U3 - sbb F1, U4 - sbb F2, U5 - sbb F3, U2 - - FOLD(U3) - mov 48(RP), U6 - sub F0, U4 - sbb F1, U5 - sbb F2, U6 - sbb F3, U3 - - add U4, U0 - adc U5, U1 - adc U6, U2 - adc 56(RP), U3 - - C If carry, we need to add in - C 2^256 - p = <0xfffffffe, 0xff..ff, 0xffffffff00000000, 1> - sbb F2, F2 - mov F2, F0 - mov F2, F1 - mov XREG(F2), XREG(F3) - neg F0 - shl $32, F1 - and $-2, XREG(F3) - - add F0, U0 - mov U0, (RP) - adc F1, U1 - mov U1, 8(RP) - adc F2, U2 - mov U2, 16(RP) - adc F3, U3 - - mov U3, 24(RP) - - pop %r12 - pop %rbp - pop %rbx - W64_EXIT(2, 0) - ret -EPILOGUE(nettle_ecc_256_redc) diff --git a/x86_64/ecc-384-modp.asm b/x86_64/ecc-384-modp.asm deleted file mode 100644 index 8e55393f..00000000 --- a/x86_64/ecc-384-modp.asm +++ /dev/null @@ -1,234 +0,0 @@ -C x86_64/ecc-384-modp.asm - -ifelse(< - Copyright (C) 2013, 2015 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-384-modp.asm" - -define(, <%rsi>) -define(, <%rax>) -define(, <%rbx>) -define(, <%rcx>) -define(, <%rdx>) -define(, <%rbp>) -define(, <%rdi>) -define(, <%r8>) -define(, <%r9>) -define(

, <%r10>) -define(

, <%r11>) -define(

, <%r12>) -define(

, <%r13>) -define(

, <%r14>) -define(, <%r15>) -define(, H5) C Overlap -define(, RP) C Overlap - - -PROLOGUE(nettle_ecc_384_modp) - W64_ENTRY(2, 0) - - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - - C First get top 2 limbs, which need folding twice. - C B^10 = B^6 + B^4 + 2^32 (B-1)B^4. - C We handle the terms as follow: - C - C B^6: Folded immediatly. - C - C B^4: Delayed, added in in the next folding. - C - C 2^32(B-1) B^4: Low half limb delayed until the next - C folding. Top 1.5 limbs subtracted and shifter now, resulting - C in 2.5 limbs. The low limb saved in D5, high 1.5 limbs added - C in. - - mov 80(RP), H4 - mov 88(RP), H5 - C Shift right 32 bits, into H1, H0 - mov H4, H0 - mov H5, H1 - mov H5, D5 - shr $32, H1 - shl $32, D5 - shr $32, H0 - or D5, H0 - - C H1 H0 - C - H1 H0 - C -------- - C H1 H0 D5 - mov H0, D5 - neg D5 - sbb H1, H0 - sbb $0, H1 - - xor C2, C2 - add H4, H0 - adc H5, H1 - adc $0, C2 - - C Add in to high part - add 48(RP), H0 - adc 56(RP), H1 - adc $0, C2 C Do C2 later - - C +1 term - mov (RP), T0 - add H0, T0 - mov 8(RP), T1 - adc H1, T1 - mov 16(RP), T2 - mov 64(RP), H2 - adc H2, T2 - mov 24(RP), T3 - mov 72(RP), H3 - adc H3, T3 - mov 32(RP), T4 - adc H4, T4 - mov 40(RP), T5 - adc H5, T5 - sbb C0, C0 - neg C0 C FIXME: Switch sign of C0? - - push RP - - C +B^2 term - add H0, T2 - adc H1, T3 - adc H2, T4 - adc H3, T5 - adc $0, C0 - - C Shift left, including low half of H4 - mov H3, TMP - shl $32, H4 - shr $32, TMP - or TMP, H4 - - mov H2, TMP - shl $32, H3 - shr $32, TMP - or TMP, H3 - - mov H1, TMP - shl $32, H2 - shr $32, TMP - or TMP, H2 - - mov H0, TMP - shl $32, H1 - shr $32, TMP - or TMP, H1 - - shl $32, H0 - - C H4 H3 H2 H1 H0 0 - C - H4 H3 H2 H1 H0 - C --------------- - C H4 H3 H2 H1 H0 TMP - - mov H0, TMP - neg TMP - sbb H1, H0 - sbb H2, H1 - sbb H3, H2 - sbb H4, H3 - sbb $0, H4 - - add TMP, T0 - adc H0, T1 - adc H1, T2 - adc H2, T3 - adc H3, T4 - adc H4, T5 - adc $0, C0 - - C Remains to add in C2 and C0 - C Set H1, H0 = (2^96 - 2^32 + 1) C0 - mov C0, H0 - mov C0, H1 - shl $32, H1 - sub H1, H0 - sbb $0, H1 - - C Set H3, H2 = (2^96 - 2^32 + 1) C2 - mov C2, H2 - mov C2, H3 - shl $32, H3 - sub H3, H2 - sbb $0, H3 - add C0, H2 C No carry. Could use lea trick - - xor C0, C0 - add H0, T0 - adc H1, T1 - adc H2, T2 - adc H3, T3 - adc C2, T4 - adc D5, T5 C Value delayed from initial folding - adc $0, C0 C Use sbb and switch sign? - - C Final unlikely carry - mov C0, H0 - mov C0, H1 - shl $32, H1 - sub H1, H0 - sbb $0, H1 - - pop RP - - add H0, T0 - mov T0, (RP) - adc H1, T1 - mov T1, 8(RP) - adc C0, T2 - mov T2, 16(RP) - adc $0, T3 - mov T3, 24(RP) - adc $0, T4 - mov T4, 32(RP) - adc $0, T5 - mov T5, 40(RP) - - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - - W64_EXIT(2, 0) - ret -EPILOGUE(nettle_ecc_384_modp) diff --git a/x86_64/ecc-521-modp.asm b/x86_64/ecc-521-modp.asm deleted file mode 100644 index 6e818ad8..00000000 --- a/x86_64/ecc-521-modp.asm +++ /dev/null @@ -1,158 +0,0 @@ -C x86_64/ecc-521-modp.asm - -ifelse(< - Copyright (C) 2013 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. ->) - - .file "ecc-521-modp.asm" - -GMP_NUMB_BITS(64) - -define(, <%rsi>) -define(, <%rax>) -define(, <%rbx>) -define(, <%rcx>) -define(, <%rdx>) -define(, <%rbp>) -define(, <%rdi>) -define(, <%r8>) -define(, <%r9>) -define(, <%r10>) -define(, <%r11>) -define(, <%r12>) -define(, <%r13>) - -PROLOGUE(nettle_ecc_521_modp) - W64_ENTRY(2, 0) - push %rbx - push %rbp - push %r12 - push %r13 - - C Read top 17 limbs, shift left 55 bits - mov 72(RP), U1 - mov U1, U0 - shl $55, U0 - shr $9, U1 - - mov 80(RP), U2 - mov U2, T0 - shr $9, U2 - shl $55, T0 - or T0, U1 - - mov 88(RP), U3 - mov U3, T0 - shr $9, U3 - shl $55, T0 - or T0, U2 - - mov 96(RP), U4 - mov U4, T0 - shr $9, U4 - shl $55, T0 - or T0, U3 - - mov 104(RP), U5 - mov U5, T0 - shr $9, U5 - shl $55, T0 - or T0, U4 - - mov 112(RP), U6 - mov U6, T0 - shr $9, U6 - shl $55, T0 - or T0, U5 - - mov 120(RP), U7 - mov U7, T0 - shr $9, U7 - shl $55, T0 - or T0, U6 - - mov 128(RP), U8 - mov U8, T0 - shr $9, U8 - shl $55, T0 - or T0, U7 - - mov 136(RP), U9 - mov U9, T0 - shr $9, U9 - shl $55, T0 - or T0, U8 - - add (RP), U0 - adc 8(RP), U1 - adc 16(RP), U2 - adc 24(RP), U3 - adc 32(RP), U4 - adc 40(RP), U5 - adc 48(RP), U6 - adc 56(RP), U7 - adc 64(RP), U8 - adc $0, U9 - - C Top limbs are . Keep low 9 bits of 8, and fold the - C top bits (at most 65 bits). - mov U8, T0 - shr $9, T0 - and $0x1ff, U8 - mov U9, T1 - shl $55, U9 - shr $9, T1 - or U9, T0 - - add T0, U0 - mov U0, (RP) - adc T1, U1 - mov U1, 8(RP) - adc $0, U2 - mov U2, 16(RP) - adc $0, U3 - mov U3, 24(RP) - adc $0, U4 - mov U4, 32(RP) - adc $0, U5 - mov U5, 40(RP) - adc $0, U6 - mov U6, 48(RP) - adc $0, U7 - mov U7, 56(RP) - adc $0, U8 - mov U8, 64(RP) - - pop %r13 - pop %r12 - pop %rbp - pop %rbx - W64_EXIT(2, 0) - ret -EPILOGUE(nettle_ecc_521_modp) diff --git a/x86_64/ecc-curve25519-modp.asm b/x86_64/ecc-curve25519-modp.asm new file mode 100644 index 00000000..58c14fe0 --- /dev/null +++ b/x86_64/ecc-curve25519-modp.asm @@ -0,0 +1,94 @@ +C x86_64/ecc-25519-modp.asm + +ifelse(< + Copyright (C) 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-25519-modp.asm" + +define(, <%rsi>) +define(, <%rdi>) C Overlaps unused modulo input +define(, <%rcx>) +define(, <%r8>) +define(, <%r9>) +define(, <%r10>) +define(, <%r11>) +define(, <%rbx>) + +PROLOGUE(nettle_ecc_25519_modp) + W64_ENTRY(2, 0) + push %rbx + + C First fold the limbs affecting bit 255 + mov 56(RP), %rax + mov $38, M + mul M + mov 24(RP), U3 + xor T0, T0 + add %rax, U3 + adc %rdx, T0 + + mov 40(RP), %rax C Do this early as possible + mul M + + add U3, U3 + adc T0, T0 + shr U3 C Undo shift, clear high bit + + C Fold the high limb again, together with RP[5] + imul $19, T0 + + mov (RP), U0 + mov 8(RP), U1 + mov 16(RP), U2 + add T0, U0 + adc %rax, U1 + mov 32(RP), %rax + adc %rdx, U2 + adc $0, U3 + + C Fold final two limbs, RP[4] and RP[6] + mul M + mov %rax, T0 + mov 48(RP), %rax + mov %rdx, T1 + mul M + add T0, U0 + mov U0, (RP) + adc T1, U1 + mov U1, 8(RP) + adc %rax, U2 + mov U2, 16(RP) + adc %rdx, U3 + mov U3, 24(RP) + + pop %rbx + W64_EXIT(2, 0) + ret +EPILOGUE(nettle_ecc_25519_modp) diff --git a/x86_64/ecc-secp192r1-modp.asm b/x86_64/ecc-secp192r1-modp.asm new file mode 100644 index 00000000..644ed60c --- /dev/null +++ b/x86_64/ecc-secp192r1-modp.asm @@ -0,0 +1,88 @@ +C x86_64/ecc-secp192r1-modp.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp192r1-modp.asm" + +define(, <%rsi>) +define(, <%rdi>) C Overlaps unused modulo input +define(, <%rcx>) +define(, <%rdx>) +define(, <%r8>) +define(, <%r9>) +define(, <%r10>) +define(, <%r11>) + + C ecc_192_modp (const struct ecc_modulo *m, mp_limb_t *rp) + .text + ALIGN(16) +PROLOGUE(nettle_ecc_192_modp) + W64_ENTRY(2, 0) + mov 16(RP), T2 + mov 24(RP), T3 + mov 40(RP), H + xor C1, C1 + xor C2, C2 + + add H, T2 + adc H, T3 + C Carry to be added in at T1 and T2 + setc LREG(C2) + + mov 8(RP), T1 + mov 32(RP), H + adc H, T1 + adc H, T2 + C Carry to be added in at T0 and T1 + setc LREG(C1) + + mov (RP), T0 + adc T3, T0 + adc T3, T1 + adc $0, C2 + + C Add in C1 and C2 + add C1, T1 + adc C2, T2 + setc LREG(C1) + + C Fold final carry. + adc $0, T0 + adc C1, T1 + adc $0, T2 + + mov T0, (RP) + mov T1, 8(RP) + mov T2, 16(RP) + + W64_EXIT(2, 0) + ret +EPILOGUE(nettle_ecc_192_modp) diff --git a/x86_64/ecc-secp224r1-modp.asm b/x86_64/ecc-secp224r1-modp.asm new file mode 100644 index 00000000..ca164ac7 --- /dev/null +++ b/x86_64/ecc-secp224r1-modp.asm @@ -0,0 +1,131 @@ +C x86_64/ecc-secp224r1-modp.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp224r1-modp.asm" + +GMP_NUMB_BITS(64) + +define(, <%rsi>) +define(, <%rdi>) C Overlaps unused modulo input +define(, <%rcx>) +define(, <%rax>) +define(

, <%rdx>) +define(

, <%r8>) +define(, <%r9>) +define(, <%r10>) +define(, <%r11>) + + C ecc_224_modp (const struct ecc_modulo *m, mp_limb_t *rp) +PROLOGUE(nettle_ecc_224_modp) + W64_ENTRY(2, 0) + mov 48(RP), H0 + mov 56(RP), H1 + C Set (F2,F1,F0) <-- (H1,H0) << 32 + mov H0, F0 + mov H0, F1 + shl $32, F0 + shr $32, F1 + mov H1, F2 + mov H1, T0 + shl $32, T0 + shr $32, F2 + or T0, F1 + + xor H2, H2 + mov 16(RP), T0 + mov 24(RP), T1 + sub F0, T0 + sbb F1, T1 + sbb F2, H0 + sbb $0, H1 C No further borrow + + adc 32(RP), H0 + adc 40(RP), H1 + adc $0, H2 + + C Set (F2,F1,F0) <-- (H2,H1,H0) << 32 + C To free registers, add in T1, T0 as soon as H0, H1 have been copied + mov H0, F0 + mov H0, F1 + add T0, H0 + mov H1, F2 + mov H1, T0 + adc T1, H1 + mov H2, T1 + adc $0, H2 + + C Shift 32 bits + shl $32, F0 + shr $32, F1 + shl $32, T0 + shr $32, F2 + shl $32, T1 + or T0, F1 + or T1, F2 + + mov (RP), T0 + mov 8(RP), T1 + sub F0, T0 + sbb F1, T1 + sbb F2, H0 + sbb $0, H1 + sbb $0, H2 + + C We now have H2, H1, H0, T1, T0, with 33 bits left to reduce + C Set F0 <-- (H2, H1) >> 32 + C Set (F2,F1) <-- (H2, H1 & 0xffffffff00000000) + C H1 <-- H1 & 0xffffffff + + mov H1, F0 + mov H1, F1 + mov H2, F2 + movl XREG(H1), XREG(H1) C Clears high 32 bits + sub H1, F1 C Clears low 32 bits + shr $32, F0 + shl $32, H2 + or H2, F0 + + sub F0, T0 + sbb $0, F1 + sbb $0, F2 + add F1, T1 + adc F2, H0 + adc $0, H1 + + mov T0, (RP) + mov T1, 8(RP) + mov H0, 16(RP) + mov H1, 24(RP) + + W64_EXIT(2, 0) + ret +EPILOGUE(nettle_ecc_224_modp) diff --git a/x86_64/ecc-secp256r1-redc.asm b/x86_64/ecc-secp256r1-redc.asm new file mode 100644 index 00000000..ee689cd6 --- /dev/null +++ b/x86_64/ecc-secp256r1-redc.asm @@ -0,0 +1,129 @@ +C x86_64/ecc-secp256r1-redc.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp256r1-redc.asm" + +define(, <%rsi>) +define(, <%rdi>) C Overlaps unused modulo input +define(, <%rcx>) +define(, <%rax>) +define(, <%rdx>) +define(, <%r8>) +define(, <%r9>) +define(, <%r10>) +define(, <%r11>) +define(, <%r12>) +define(, <%rbx>) +define(, <%rbp>) + +C FOLD(x), sets (F3,F2,F1,F0) <-- (x << 224) - (x << 128) - (x<<32) +define(, < + mov $1, F2 + mov $1, F3 + shl <$>32, F2 + shr <$>32, F3 + xor F0,F0 + xor F1,F1 + sub F2, F0 + sbb F3, F1 + sbb $1, F2 + sbb <$>0, F3 +>) +PROLOGUE(nettle_ecc_256_redc) + W64_ENTRY(2, 0) + C save all registers that need to be saved + push %rbx + push %rbp + push %r12 + + mov (RP), U0 + FOLD(U0) + mov 8(RP), U1 + mov 16(RP), U2 + mov 24(RP), U3 + sub F0, U1 + sbb F1, U2 + sbb F2, U3 + sbb F3, U0 C Add in later + + FOLD(U1) + mov 32(RP), U4 + sub F0, U2 + sbb F1, U3 + sbb F2, U4 + sbb F3, U1 + + FOLD(U2) + mov 40(RP), U5 + sub F0, U3 + sbb F1, U4 + sbb F2, U5 + sbb F3, U2 + + FOLD(U3) + mov 48(RP), U6 + sub F0, U4 + sbb F1, U5 + sbb F2, U6 + sbb F3, U3 + + add U4, U0 + adc U5, U1 + adc U6, U2 + adc 56(RP), U3 + + C If carry, we need to add in + C 2^256 - p = <0xfffffffe, 0xff..ff, 0xffffffff00000000, 1> + sbb F2, F2 + mov F2, F0 + mov F2, F1 + mov XREG(F2), XREG(F3) + neg F0 + shl $32, F1 + and $-2, XREG(F3) + + add F0, U0 + mov U0, (RP) + adc F1, U1 + mov U1, 8(RP) + adc F2, U2 + mov U2, 16(RP) + adc F3, U3 + + mov U3, 24(RP) + + pop %r12 + pop %rbp + pop %rbx + W64_EXIT(2, 0) + ret +EPILOGUE(nettle_ecc_256_redc) diff --git a/x86_64/ecc-secp384r1-modp.asm b/x86_64/ecc-secp384r1-modp.asm new file mode 100644 index 00000000..3c8ec3f4 --- /dev/null +++ b/x86_64/ecc-secp384r1-modp.asm @@ -0,0 +1,234 @@ +C x86_64/ecc-secp384r1-modp.asm + +ifelse(< + Copyright (C) 2013, 2015 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp384r1-modp.asm" + +define(, <%rsi>) +define(, <%rax>) +define(, <%rbx>) +define(, <%rcx>) +define(, <%rdx>) +define(, <%rbp>) +define(, <%rdi>) +define(, <%r8>) +define(, <%r9>) +define(

, <%r10>) +define(

, <%r11>) +define(

, <%r12>) +define(

, <%r13>) +define(

, <%r14>) +define(, <%r15>) +define(, H5) C Overlap +define(, RP) C Overlap + + +PROLOGUE(nettle_ecc_384_modp) + W64_ENTRY(2, 0) + + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + C First get top 2 limbs, which need folding twice. + C B^10 = B^6 + B^4 + 2^32 (B-1)B^4. + C We handle the terms as follow: + C + C B^6: Folded immediatly. + C + C B^4: Delayed, added in in the next folding. + C + C 2^32(B-1) B^4: Low half limb delayed until the next + C folding. Top 1.5 limbs subtracted and shifter now, resulting + C in 2.5 limbs. The low limb saved in D5, high 1.5 limbs added + C in. + + mov 80(RP), H4 + mov 88(RP), H5 + C Shift right 32 bits, into H1, H0 + mov H4, H0 + mov H5, H1 + mov H5, D5 + shr $32, H1 + shl $32, D5 + shr $32, H0 + or D5, H0 + + C H1 H0 + C - H1 H0 + C -------- + C H1 H0 D5 + mov H0, D5 + neg D5 + sbb H1, H0 + sbb $0, H1 + + xor C2, C2 + add H4, H0 + adc H5, H1 + adc $0, C2 + + C Add in to high part + add 48(RP), H0 + adc 56(RP), H1 + adc $0, C2 C Do C2 later + + C +1 term + mov (RP), T0 + add H0, T0 + mov 8(RP), T1 + adc H1, T1 + mov 16(RP), T2 + mov 64(RP), H2 + adc H2, T2 + mov 24(RP), T3 + mov 72(RP), H3 + adc H3, T3 + mov 32(RP), T4 + adc H4, T4 + mov 40(RP), T5 + adc H5, T5 + sbb C0, C0 + neg C0 C FIXME: Switch sign of C0? + + push RP + + C +B^2 term + add H0, T2 + adc H1, T3 + adc H2, T4 + adc H3, T5 + adc $0, C0 + + C Shift left, including low half of H4 + mov H3, TMP + shl $32, H4 + shr $32, TMP + or TMP, H4 + + mov H2, TMP + shl $32, H3 + shr $32, TMP + or TMP, H3 + + mov H1, TMP + shl $32, H2 + shr $32, TMP + or TMP, H2 + + mov H0, TMP + shl $32, H1 + shr $32, TMP + or TMP, H1 + + shl $32, H0 + + C H4 H3 H2 H1 H0 0 + C - H4 H3 H2 H1 H0 + C --------------- + C H4 H3 H2 H1 H0 TMP + + mov H0, TMP + neg TMP + sbb H1, H0 + sbb H2, H1 + sbb H3, H2 + sbb H4, H3 + sbb $0, H4 + + add TMP, T0 + adc H0, T1 + adc H1, T2 + adc H2, T3 + adc H3, T4 + adc H4, T5 + adc $0, C0 + + C Remains to add in C2 and C0 + C Set H1, H0 = (2^96 - 2^32 + 1) C0 + mov C0, H0 + mov C0, H1 + shl $32, H1 + sub H1, H0 + sbb $0, H1 + + C Set H3, H2 = (2^96 - 2^32 + 1) C2 + mov C2, H2 + mov C2, H3 + shl $32, H3 + sub H3, H2 + sbb $0, H3 + add C0, H2 C No carry. Could use lea trick + + xor C0, C0 + add H0, T0 + adc H1, T1 + adc H2, T2 + adc H3, T3 + adc C2, T4 + adc D5, T5 C Value delayed from initial folding + adc $0, C0 C Use sbb and switch sign? + + C Final unlikely carry + mov C0, H0 + mov C0, H1 + shl $32, H1 + sub H1, H0 + sbb $0, H1 + + pop RP + + add H0, T0 + mov T0, (RP) + adc H1, T1 + mov T1, 8(RP) + adc C0, T2 + mov T2, 16(RP) + adc $0, T3 + mov T3, 24(RP) + adc $0, T4 + mov T4, 32(RP) + adc $0, T5 + mov T5, 40(RP) + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + + W64_EXIT(2, 0) + ret +EPILOGUE(nettle_ecc_384_modp) diff --git a/x86_64/ecc-secp521r1-modp.asm b/x86_64/ecc-secp521r1-modp.asm new file mode 100644 index 00000000..43a8cb8c --- /dev/null +++ b/x86_64/ecc-secp521r1-modp.asm @@ -0,0 +1,158 @@ +C x86_64/ecc-secp521r1-modp.asm + +ifelse(< + Copyright (C) 2013 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-secp521r1-modp.asm" + +GMP_NUMB_BITS(64) + +define(, <%rsi>) +define(, <%rax>) +define(, <%rbx>) +define(, <%rcx>) +define(, <%rdx>) +define(, <%rbp>) +define(, <%rdi>) +define(, <%r8>) +define(, <%r9>) +define(, <%r10>) +define(, <%r11>) +define(, <%r12>) +define(, <%r13>) + +PROLOGUE(nettle_ecc_521_modp) + W64_ENTRY(2, 0) + push %rbx + push %rbp + push %r12 + push %r13 + + C Read top 17 limbs, shift left 55 bits + mov 72(RP), U1 + mov U1, U0 + shl $55, U0 + shr $9, U1 + + mov 80(RP), U2 + mov U2, T0 + shr $9, U2 + shl $55, T0 + or T0, U1 + + mov 88(RP), U3 + mov U3, T0 + shr $9, U3 + shl $55, T0 + or T0, U2 + + mov 96(RP), U4 + mov U4, T0 + shr $9, U4 + shl $55, T0 + or T0, U3 + + mov 104(RP), U5 + mov U5, T0 + shr $9, U5 + shl $55, T0 + or T0, U4 + + mov 112(RP), U6 + mov U6, T0 + shr $9, U6 + shl $55, T0 + or T0, U5 + + mov 120(RP), U7 + mov U7, T0 + shr $9, U7 + shl $55, T0 + or T0, U6 + + mov 128(RP), U8 + mov U8, T0 + shr $9, U8 + shl $55, T0 + or T0, U7 + + mov 136(RP), U9 + mov U9, T0 + shr $9, U9 + shl $55, T0 + or T0, U8 + + add (RP), U0 + adc 8(RP), U1 + adc 16(RP), U2 + adc 24(RP), U3 + adc 32(RP), U4 + adc 40(RP), U5 + adc 48(RP), U6 + adc 56(RP), U7 + adc 64(RP), U8 + adc $0, U9 + + C Top limbs are . Keep low 9 bits of 8, and fold the + C top bits (at most 65 bits). + mov U8, T0 + shr $9, T0 + and $0x1ff, U8 + mov U9, T1 + shl $55, U9 + shr $9, T1 + or U9, T0 + + add T0, U0 + mov U0, (RP) + adc T1, U1 + mov U1, 8(RP) + adc $0, U2 + mov U2, 16(RP) + adc $0, U3 + mov U3, 24(RP) + adc $0, U4 + mov U4, 32(RP) + adc $0, U5 + mov U5, 40(RP) + adc $0, U6 + mov U6, 48(RP) + adc $0, U7 + mov U7, 56(RP) + adc $0, U8 + mov U8, 64(RP) + + pop %r13 + pop %r12 + pop %rbp + pop %rbx + W64_EXIT(2, 0) + ret +EPILOGUE(nettle_ecc_521_modp) -- cgit v1.2.1