1 files changed, 170 insertions, 0 deletions
diff --git a/sec-modinv.c b/sec-modinv.c
new file mode 100644
index 00000000..f674a8fb
--- /dev/null
+++ b/sec-modinv.c
@@ -0,0 +1,170 @@
+/* sec-modinv.c */
+
+/* nettle, low-level cryptographics library
+ *
+ * Copyright (C) 2013 Niels Möller
+ *  
+ * The nettle library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ * 
+ * The nettle library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the nettle library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02111-1301, USA.
+ */
+
+/* Development of Nettle's ECC support was funded by Internetfonden. */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+
+#include "ecc-internal.h"
+
+static void
+cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n)
+{
+  mp_limb_t cy = (cnd != 0);
+  mp_limb_t mask = -cy;
+  mp_size_t i;
+
+  for (i = 0; i < n; i++)
+    {
+      mp_limb_t r = (ap[i] ^ mask) + cy;
+      cy = r < cy;
+      rp[i] = r;
+    }
+}
+
+static void
+cnd_swap (int cnd, mp_limb_t *ap, mp_limb_t *bp, mp_size_t n)
+{
+  mp_limb_t mask = - (mp_limb_t) (cnd != 0);
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    {
+      mp_limb_t a, b, t;
+      a = ap[i];
+      b = bp[i];
+      t = (a ^ b) & mask;
+      ap[i] = a ^ t;
+      bp[i] = b ^ t;
+    }
+}
+
+/* Compute a^{-1} mod m, with running time depending only on the size.
+   Also needs (m+1)/2, and m must be odd. */
+void
+sec_modinv (mp_limb_t *vp, mp_limb_t *ap, mp_size_t n,
+	    const mp_limb_t *mp, const mp_limb_t *mp1h, mp_size_t bit_size,
+	    mp_limb_t *scratch)
+{
+#define bp scratch
+#define dp (scratch + n)
+#define up (scratch + 2*n)
+
+  mp_bitcnt_t i;
+
+  /* Maintain
+
+       a = u * orig_a (mod m)
+       b = v * orig_a (mod m)
+
+     and b odd at all times. Initially,
+
+       a = a_orig, u = 1
+       b = m,      v = 0
+     */
+
+  assert (ap != vp);
+
+  up[0] = 1;
+  mpn_zero (up+1, n - 1);
+  mpn_copyi (bp, mp, n);
+  mpn_zero (vp, n);
+
+  for (i = bit_size + GMP_NUMB_BITS * n; i-- > 0; )
+    {
+      mp_limb_t odd, swap, cy;
+      
+      /* Always maintain b odd. The logic of the iteration is as
+	 follows. For a, b:
+
+	   odd = a & 1
+	   a -= odd * b
+	   if (underflow from a-b)
+	     {
+	       b += a, assigns old a
+	       a = B^n-a
+	     }
+	   
+	   a /= 2
+
+	 For u, v:
+
+	   if (underflow from a - b)
+	     swap u, v
+	   u -= odd * v
+	   if (underflow from u - v)
+	     u += m
+
+	   u /= 2
+	   if (a one bit was shifted out)
+	     u += (m+1)/2
+
+	 As long as a > 0, the quantity
+
+	   (bitsize of a) + (bitsize of b)
+
+	 is reduced by at least one bit per iteration, hence after
+         (bit_size of orig_a) + (bit_size of m) - 1 iterations we
+         surely have a = 0. Then b = gcd(orig_a, m) and if b = 1 then
+         also v = orig_a^{-1} (mod m)
+      */
+
+      assert (bp[0] & 1);
+      odd = ap[0] & 1;
+
+      /* Which variant is fastest depends on the speed of the various
+	 cnd_* functions. Assembly implementation would help. */
+#if 1
+      swap = cnd_sub_n (odd, ap, bp, n);
+      cnd_add_n (swap, bp, ap, n);
+      cnd_neg (swap, ap, ap, n);
+#else
+      swap = odd & mpn_sub_n (dp, ap, bp, n);
+      cnd_copy (swap, bp, ap, n);
+      cnd_neg (swap, dp, dp, n);
+      cnd_copy (odd, ap, dp, n);
+#endif
+
+#if 1
+      cnd_swap (swap, up, vp, n);
+      cy = cnd_sub_n (odd, up, vp, n);
+      cy -= cnd_add_n (cy, up, mp, n);
+#else
+      cy = cnd_sub_n (odd, up, vp, n);
+      cnd_add_n (swap, vp, up, n);
+      cnd_neg (swap, up, up, n);
+      cnd_add_n (cy ^ swap, up, mp, n);
+#endif
+      cy = mpn_rshift (ap, ap, n, 1);
+      assert (cy == 0);
+      cy = mpn_rshift (up, up, n, 1);
+      cy = cnd_add_n (cy, up, mp1h, n);
+      assert (cy == 0);
+    }
+  assert ( (ap[0] | ap[n-1]) == 0);
+#undef bp
+#undef dp
+#undef up
+}