diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | configure.in | 6 | ||||
-rw-r--r-- | doc/gmp.texi | 10 | ||||
-rw-r--r-- | doc/tasks.html | 4 | ||||
-rw-r--r-- | gmp-h.in | 17 | ||||
-rw-r--r-- | gmp-impl.h | 8 | ||||
-rw-r--r-- | mpn/Makefile.am | 4 | ||||
-rw-r--r-- | mpn/alpha/com.asm (renamed from mpn/alpha/com_n.asm) | 6 | ||||
-rw-r--r-- | mpn/asm-defs.m4 | 4 | ||||
-rw-r--r-- | mpn/generic/binvert.c | 2 | ||||
-rw-r--r-- | mpn/generic/com.c | 34 | ||||
-rw-r--r-- | mpn/generic/invert.c | 2 | ||||
-rw-r--r-- | mpn/generic/invertappr.c | 4 | ||||
-rw-r--r-- | mpn/generic/mul_fft.c | 4 | ||||
-rw-r--r-- | mpn/generic/neg.c (renamed from mpn/generic/neg_n.c) | 4 | ||||
-rw-r--r-- | mpn/powerpc32/750/com.asm (renamed from mpn/powerpc32/750/com_n.asm) | 6 | ||||
-rw-r--r-- | mpn/powerpc32/vmx/copyd.asm | 2 | ||||
-rw-r--r-- | mpn/powerpc32/vmx/copyi.asm | 2 | ||||
-rw-r--r-- | mpn/powerpc64/com.asm (renamed from mpn/powerpc64/com_n.asm) | 4 | ||||
-rw-r--r-- | mpn/x86/k6/mmx/com.asm (renamed from mpn/x86/k6/mmx/com_n.asm) | 6 | ||||
-rw-r--r-- | mpn/x86/k7/mmx/com.asm (renamed from mpn/x86/k7/mmx/com_n.asm) | 6 | ||||
-rw-r--r-- | mpn/x86/pentium/com.asm (renamed from mpn/x86/pentium/com_n.asm) | 6 | ||||
-rw-r--r-- | mpn/x86_64/com.asm (renamed from mpn/x86_64/com_n.asm) | 4 | ||||
-rw-r--r-- | mpz/aorsmul_i.c | 6 | ||||
-rw-r--r-- | mpz/cfdiv_r_2exp.c | 2 | ||||
-rw-r--r-- | tests/devel/try.c | 14 | ||||
-rw-r--r-- | tests/mpn/t-instrument.c | 6 | ||||
-rw-r--r-- | tests/mpz/bit.c | 2 | ||||
-rw-r--r-- | tests/refmpn.c | 16 | ||||
-rw-r--r-- | tests/refmpz.c | 4 | ||||
-rw-r--r-- | tests/tests.h | 4 | ||||
-rw-r--r-- | tune/common.c | 4 | ||||
-rw-r--r-- | tune/many.pl | 2 | ||||
-rw-r--r-- | tune/speed.c | 2 | ||||
-rw-r--r-- | tune/speed.h | 2 |
35 files changed, 127 insertions, 88 deletions
@@ -1,5 +1,11 @@ 2009-12-28 Torbjorn Granlund <tege@gmplib.org> + * configure.in (gmp_mpn_functions_optional) Move "com" from here... + (gmp_mpn_functions): ...to here. + * mpn/generic/com.c: New file. + * (mpn_com): New name for mpn_com_n. Make public. + * (mpn_neg): Analogous changes. + * tune/tuneup.c (tune_mu_div, tune_mu_bdiv): Set step_factor. * tune/common.c, tune/speed.c, tune/speed.h: Support measuring diff --git a/configure.in b/configure.in index b2b44eada..02a6e7ca5 100644 --- a/configure.in +++ b/configure.in @@ -2481,7 +2481,7 @@ fi # can optionally provide the latter as an extra entrypoint. Likewise # divrem_1 and pre_divrem_1. -gmp_mpn_functions_optional="umul udiv com_n \ +gmp_mpn_functions_optional="umul udiv \ invert_limb sqr_diagonal \ mul_2 mul_3 mul_4 \ addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8 \ @@ -2491,7 +2491,7 @@ gmp_mpn_functions_optional="umul udiv com_n \ add_n_sub_n addaddmul_1msb0" gmp_mpn_functions="$extra_functions \ - add add_1 add_n sub sub_1 sub_n neg_n mul_1 addmul_1 \ + add add_1 add_n sub sub_1 sub_n neg com mul_1 addmul_1 \ submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2 \ fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump \ mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc \ @@ -2997,7 +2997,7 @@ AH_VERBATIM([HAVE_NATIVE], #undef HAVE_NATIVE_mpn_bdiv_dbm1c #undef HAVE_NATIVE_mpn_bdiv_q_1 #undef HAVE_NATIVE_mpn_pi1_bdiv_q_1 -#undef HAVE_NATIVE_mpn_com_n +#undef HAVE_NATIVE_mpn_com #undef HAVE_NATIVE_mpn_copyd #undef HAVE_NATIVE_mpn_copyi #undef HAVE_NATIVE_mpn_divexact_1 diff --git a/doc/gmp.texi b/doc/gmp.texi index e77e2289a..bb3fc0343 100644 --- a/doc/gmp.texi +++ b/doc/gmp.texi @@ -5167,6 +5167,11 @@ This function requires that @var{s1n} is greater than or equal to @var{s2n}. @end deftypefun +@deftypefun void mpn_neg_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n}) +Perform the negation of @{@var{sp}, @var{n}@}, and write the result to +@{@var{rp}, @var{n}@}. Return carry-out. +@end deftypefun + @deftypefun void mpn_mul_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n}) Multiply @{@var{s1p}, @var{n}@} and @{@var{s2p}, @var{n}@}, and write the 2*@var{n}-limb result to @var{rp}. @@ -5525,6 +5530,11 @@ Perform the bitwise logical exclusive or of @{@var{s1p}, @var{n}@} and @{@var{rp}, @var{n}@}. @end deftypefun +@deftypefun void mpn_com_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n}) +Perform the bitwise complement of @{@var{sp}, @var{n}@}, and write the result +to @{@var{rp}, @var{n}@}. +@end deftypefun + @deftypefun void mpn_copyi (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}) Copy from @{@var{s1p}, @var{n}@} to @{@var{rp}, @var{n}@}, increasingly. @end deftypefun diff --git a/doc/tasks.html b/doc/tasks.html index f8fd6aaf6..d86e79428 100644 --- a/doc/tasks.html +++ b/doc/tasks.html @@ -37,7 +37,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. <hr> <!-- NB. timestamp updated automatically by emacs --> - This file current as of 28 Nov 2009. An up-to-date version is available at + This file current as of 28 Dec 2009. An up-to-date version is available at <a href="http://gmplib.org/tasks.html">http://gmplib.org/tasks.html</a>. Please send comments about this page to gmp-devel<font>@</font>gmplib.org. @@ -436,7 +436,7 @@ either already been taken care of, or have become irrelevant. <code>mpn_rshift</code> already provided. <li> Cray T3E: Experiment with optimization options. In particular, -hpipeline3 seems promising. We should at least up -O to -O2 or -O3. -<li> Cray: <code>mpn_com_n</code> and <code>mpn_and_n</code> etc very probably +<li> Cray: <code>mpn_com</code> and <code>mpn_and_n</code> etc very probably wants a pragma like <code>MPN_COPY_INCR</code>. <li> Cray vector systems: <code>mpn_lshift</code>, <code>mpn_rshift</code>, <code>mpn_popcount</code> and <code>mpn_hamdist</code> are nice and small @@ -1571,9 +1571,14 @@ __GMP_DECLSPEC void mpn_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_siz #define mpn_sqr __MPN(sqr) __GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); -#define mpn_neg_n __MPN(neg_n) -#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_neg_n) -__GMP_DECLSPEC mp_limb_t mpn_neg_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +#define mpn_neg __MPN(neg) +#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_neg) +__GMP_DECLSPEC mp_limb_t mpn_neg __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +#endif + +#define mpn_com __MPN(com) +#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_com) +__GMP_DECLSPEC void mpn_com __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); #endif #define mpn_perfect_square_p __MPN(perfect_square_p) @@ -2158,12 +2163,12 @@ mpn_sub_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_ } #endif -#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_neg_n) -#if ! defined (__GMP_FORCE_mpn_neg_n) +#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_neg) +#if ! defined (__GMP_FORCE_mpn_neg) __GMP_EXTERN_INLINE #endif mp_limb_t -mpn_neg_n (mp_ptr __gmp_rp, mp_srcptr __gmp_up, mp_size_t __gmp_n) +mpn_neg (mp_ptr __gmp_rp, mp_srcptr __gmp_up, mp_size_t __gmp_n) { mp_limb_t __gmp_ul, __gmp_cy; __gmp_cy = 0; diff --git a/gmp-impl.h b/gmp-impl.h index eb0c90cf1..959064b9e 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -2084,11 +2084,9 @@ __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const cha #endif -#if HAVE_NATIVE_mpn_com_n -#define mpn_com_n __MPN(com_n) -__GMP_DECLSPEC void mpn_com_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); -#else -#define mpn_com_n(d,s,n) \ +#if ! HAVE_NATIVE_mpn_com +#undef mpn_com +#define mpn_com(d,s,n) \ do { \ mp_ptr __d = (d); \ mp_srcptr __s = (s); \ diff --git a/mpn/Makefile.am b/mpn/Makefile.am index 0ed15d46b..a504d40d4 100644 --- a/mpn/Makefile.am +++ b/mpn/Makefile.am @@ -35,7 +35,7 @@ nodist_EXTRA_libmpn_la_SOURCES = \ addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c \ addmul_7.c addmul_8.c \ and_n.c andn_n.c \ - cmp.c com_n.c copyd.c copyi.c \ + cmp.c com.c copyd.c copyi.c \ dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c \ sbpi1_bdiv_qr.c sbpi1_bdiv_q.c \ sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c \ @@ -60,7 +60,7 @@ nodist_EXTRA_libmpn_la_SOURCES = \ toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c \ toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \ invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c \ - mullo_n.c mullo_basecase.c nand_n.c neg_n.c nior_n.c perfsqr.c \ + mullo_n.c mullo_basecase.c nand_n.c neg.c nior_n.c perfsqr.c \ popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \ rootrem.c scan0.c scan1.c set_str.c \ sqr_basecase.c sqr_diagonal.c \ diff --git a/mpn/alpha/com_n.asm b/mpn/alpha/com.asm index fe898a4d3..6f6c39af6 100644 --- a/mpn/alpha/com_n.asm +++ b/mpn/alpha/com.asm @@ -1,4 +1,4 @@ -dnl Alpha mpn_com_n -- mpn one's complement. +dnl Alpha mpn_com -- mpn one's complement. dnl Copyright 2003 Free Software Foundation, Inc. dnl @@ -26,7 +26,7 @@ C EV5: 2.0 C EV6: 1.5 -C mp_limb_t mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); +C mp_limb_t mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); C C For ev5 the main loop is 7 cycles plus 1 taken branch bubble, for a total C 2.0 c/l. In general, a pattern like this unrolled to N limbs per loop @@ -60,7 +60,7 @@ FLOAT64(L(dat), 2.0) ALIGN(16) -PROLOGUE(mpn_com_n,gp) +PROLOGUE(mpn_com,gp) C r16 dst C r17 src diff --git a/mpn/asm-defs.m4 b/mpn/asm-defs.m4 index 9ad92c182..a0382d00f 100644 --- a/mpn/asm-defs.m4 +++ b/mpn/asm-defs.m4 @@ -1325,7 +1325,7 @@ define_mpn(bdiv_q_1) define_mpn(pi1_bdiv_q_1) define_mpn(bdiv_dbm1c) define_mpn(cmp) -define_mpn(com_n) +define_mpn(com) define_mpn(copyd) define_mpn(copyi) define_mpn(count_leading_zeros) @@ -1376,7 +1376,7 @@ define_mpn(popcount) define_mpn(preinv_divrem_1) define_mpn(preinv_mod_1) define_mpn(nand_n) -define_mpn(neg_n) +define_mpn(neg) define_mpn(nior_n) define_mpn(powm) define_mpn(powlo) diff --git a/mpn/generic/binvert.c b/mpn/generic/binvert.c index bf42c48f7..6570bdfd2 100644 --- a/mpn/generic/binvert.c +++ b/mpn/generic/binvert.c @@ -94,6 +94,6 @@ mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) /* R = R(X/B^rn) */ mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn); - mpn_neg_n (rp + rn, rp + rn, newrn - rn); + mpn_neg (rp + rn, rp + rn, newrn - rn); } } diff --git a/mpn/generic/com.c b/mpn/generic/com.c new file mode 100644 index 000000000..ed817e6aa --- /dev/null +++ b/mpn/generic/com.c @@ -0,0 +1,34 @@ +/* mpn_com - complement an mpn. + +Copyright 2009 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#undef mpn_com +#define mpn_com __MPN(com) + +void +mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n) +{ + mp_limb_t ul; + do { + ul = *up++; + *rp++ = ~ul & GMP_NUMB_MASK; + } while (--n != 0); +} diff --git a/mpn/generic/invert.c b/mpn/generic/invert.c index 53c391c22..b3f557ed2 100644 --- a/mpn/generic/invert.c +++ b/mpn/generic/invert.c @@ -64,7 +64,7 @@ mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) xp = scratch; /* 2 * n limbs */ for (i = n - 1; i >= 0; i--) xp[i] = GMP_NUMB_MAX; - mpn_com_n (xp + n, dp, n); + mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { diff --git a/mpn/generic/invertappr.c b/mpn/generic/invertappr.c index 6a05ba34a..b23d89f32 100644 --- a/mpn/generic/invertappr.c +++ b/mpn/generic/invertappr.c @@ -105,7 +105,7 @@ mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr tp) for (i = n - 1; i >= 0; i--) xp[i] = GMP_NUMB_MAX; - mpn_com_n (xp + n, dp, n); + mpn_com (xp + n, dp, n); /* Now xp contains B^2n - {dp,n}*B^n - 1 */ @@ -245,7 +245,7 @@ mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n)); ASSERT (xp[n] == 0); } else { /* "negative" residue class */ - mpn_com_n (xp, xp, n + 1); + mpn_com (xp, xp, n + 1); MPN_INCR_U(xp, n + 1, method); ASSERT (xp[n] <= 1); #if USE_MUL_N diff --git a/mpn/generic/mul_fft.c b/mpn/generic/mul_fft.c index 23f390500..cdf8cc133 100644 --- a/mpn/generic/mul_fft.c +++ b/mpn/generic/mul_fft.c @@ -212,7 +212,7 @@ mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n) { MPN_COPY (r, a + n - d, d); rd = a[n]; - mpn_com_n (r + d, a, n - d); + mpn_com (r + d, a, n - d); cc = 0; } @@ -249,7 +249,7 @@ mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n) else { /* r[d] is not used below, but we save a test for d=0 */ - mpn_com_n (r, a + n - d, d + 1); + mpn_com (r, a + n - d, d + 1); rd = a[n]; MPN_COPY (r + d, a, n - d); cc = 0; diff --git a/mpn/generic/neg_n.c b/mpn/generic/neg.c index 1609204c9..980e59e67 100644 --- a/mpn/generic/neg_n.c +++ b/mpn/generic/neg.c @@ -1,4 +1,4 @@ -/* mpn_neg_n - negate an mpn. +/* mpn_neg - negate an mpn. Copyright 2001, 2009 Free Software Foundation, Inc. @@ -17,7 +17,7 @@ License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define __GMP_FORCE_mpn_neg_n 1 +#define __GMP_FORCE_mpn_neg 1 #include "gmp.h" #include "gmp-impl.h" diff --git a/mpn/powerpc32/750/com_n.asm b/mpn/powerpc32/750/com.asm index 02fc4b658..c6b4b101b 100644 --- a/mpn/powerpc32/750/com_n.asm +++ b/mpn/powerpc32/750/com.asm @@ -1,4 +1,4 @@ -dnl PowerPC 750 mpn_com_n -- mpn bitwise one's complement +dnl PowerPC 750 mpn_com -- mpn bitwise one's complement dnl Copyright 2002, 2003 Free Software Foundation, Inc. dnl @@ -27,12 +27,12 @@ C 75x (G3): 2.0 C 7400,7410 (G4): 2.0 C 744x,745x (G4+): 3.0 -C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); +C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); C C This loop form is necessary for the claimed speed. ASM_START() -PROLOGUE(mpn_com_n) +PROLOGUE(mpn_com) C r3 dst C r4 src diff --git a/mpn/powerpc32/vmx/copyd.asm b/mpn/powerpc32/vmx/copyd.asm index e345eef01..e56f21c05 100644 --- a/mpn/powerpc32/vmx/copyd.asm +++ b/mpn/powerpc32/vmx/copyd.asm @@ -37,7 +37,7 @@ C read-modify-write tricks. C * The VMX code is used from the smallest sizes it handles, but measurements C show a large speed bump at the cutoff points. Small copying (perhaps C using some read-modify-write technique) should be optimized. -C * Make a mpn_com_n based on this code. +C * Make a mpn_com based on this code. define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8)) define(`LIMBS_PER_VR', eval(16/GMP_LIMB_BYTES)) diff --git a/mpn/powerpc32/vmx/copyi.asm b/mpn/powerpc32/vmx/copyi.asm index b6b2e7ea8..6c8303ac2 100644 --- a/mpn/powerpc32/vmx/copyi.asm +++ b/mpn/powerpc32/vmx/copyi.asm @@ -37,7 +37,7 @@ C read-modify-write tricks. C * The VMX code is used from the smallest sizes it handles, but measurements C show a large speed bump at the cutoff points. Small copying (perhaps C using some read-modify-write technique) should be optimized. -C * Make a mpn_com_n based on this code. +C * Make a mpn_com based on this code. define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8)) define(`LIMBS_PER_VR', eval(16/GMP_LIMB_BYTES)) diff --git a/mpn/powerpc64/com_n.asm b/mpn/powerpc64/com.asm index 0c43d06cf..4fb2e65d7 100644 --- a/mpn/powerpc64/com_n.asm +++ b/mpn/powerpc64/com.asm @@ -1,4 +1,4 @@ -dnl PowerPC-64 mpn_com_n. +dnl PowerPC-64 mpn_com. dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc. @@ -32,7 +32,7 @@ C up r4 C n r5 ASM_START() -PROLOGUE(mpn_com_n) +PROLOGUE(mpn_com) rldic. r0, r5, 3, 59 C r0 = (r5 & 3) << 3; cr0 = (n == 4t)? cmpldi cr6, r0, 16 C cr6 = (n cmp 4t + 2)? diff --git a/mpn/x86/k6/mmx/com_n.asm b/mpn/x86/k6/mmx/com.asm index 42e6ab392..3dcf539a5 100644 --- a/mpn/x86/k6/mmx/com_n.asm +++ b/mpn/x86/k6/mmx/com.asm @@ -1,4 +1,4 @@ -dnl AMD K6-2 mpn_com_n -- mpn bitwise one's complement. +dnl AMD K6-2 mpn_com -- mpn bitwise one's complement. dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl @@ -28,7 +28,7 @@ C K6-2 1.0 1.18 1.18 1.18 cycles/limb C K6 1.5 1.85 1.75 1.85 -C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); +C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); C C Take the bitwise ones-complement of src,size and write it to dst,size. @@ -38,7 +38,7 @@ defframe(PARAM_DST, 4) TEXT ALIGN(16) -PROLOGUE(mpn_com_n) +PROLOGUE(mpn_com) deflit(`FRAME',0) movl PARAM_SIZE, %ecx diff --git a/mpn/x86/k7/mmx/com_n.asm b/mpn/x86/k7/mmx/com.asm index 068c01f07..3c6704bc1 100644 --- a/mpn/x86/k7/mmx/com_n.asm +++ b/mpn/x86/k7/mmx/com.asm @@ -1,4 +1,4 @@ -dnl AMD Athlon mpn_com_n -- mpn bitwise one's complement. +dnl AMD Athlon mpn_com -- mpn bitwise one's complement. dnl Copyright 2002 Free Software Foundation, Inc. dnl @@ -23,7 +23,7 @@ include(`../config.m4') C K7: 1.0 cycles/limb -C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); +C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); C C The loop form below is necessary for the claimed speed. It needs to be C aligned to a 16 byte boundary and only 16 bytes long. Maybe that's so it @@ -51,7 +51,7 @@ defframe(PARAM_DST, 4) TEXT ALIGN(16) -PROLOGUE(mpn_com_n) +PROLOGUE(mpn_com) deflit(`FRAME',0) movl PARAM_DST, %edx diff --git a/mpn/x86/pentium/com_n.asm b/mpn/x86/pentium/com.asm index c6d2d72e5..fbb4ffd3f 100644 --- a/mpn/x86/pentium/com_n.asm +++ b/mpn/x86/pentium/com.asm @@ -1,4 +1,4 @@ -dnl Intel Pentium mpn_com_n -- mpn ones complement. +dnl Intel Pentium mpn_com -- mpn ones complement. dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc. dnl @@ -26,7 +26,7 @@ C P5: 1.75 cycles/limb NAILS_SUPPORT(0-31) -C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); +C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); C C This code is similar to mpn_copyi, basically there's just some "xorl C $GMP_NUMB_MASK"s inserted. @@ -44,7 +44,7 @@ defframe(PARAM_DST, 4) TEXT ALIGN(8) -PROLOGUE(mpn_com_n) +PROLOGUE(mpn_com) deflit(`FRAME',0) movl PARAM_SRC, %eax diff --git a/mpn/x86_64/com_n.asm b/mpn/x86_64/com.asm index fba938464..699da11b6 100644 --- a/mpn/x86_64/com_n.asm +++ b/mpn/x86_64/com.asm @@ -1,4 +1,4 @@ -dnl AMD64 mpn_com_n. +dnl AMD64 mpn_com. dnl Copyright 2004, 2005, 2006 Free Software Foundation, Inc. @@ -35,7 +35,7 @@ define(`n',`%rdx') ASM_START() TEXT ALIGN(32) -PROLOGUE(mpn_com_n) +PROLOGUE(mpn_com) movq (up), %r8 movl %edx, %eax leaq (up,n,8), up diff --git a/mpz/aorsmul_i.c b/mpz/aorsmul_i.c index 1bc6f4768..b3c2efae4 100644 --- a/mpz/aorsmul_i.c +++ b/mpz/aorsmul_i.c @@ -47,7 +47,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ The final w will retain its sign, unless an underflow occurs in a submul of absolute values, in which case it's flipped. - If x has more limbs than w, then mpn_submul_1 followed by mpn_com_n is + If x has more limbs than w, then mpn_submul_1 followed by mpn_com is used. The alternative would be mpn_mul_1 into temporary space followed by mpn_sub_n. Avoiding temporary space seem good, and submul+com stands a chance of being faster since it involves only one set of carry @@ -144,7 +144,7 @@ mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub) /* Borrow out of w, take twos complement negative to get absolute value, flip sign of w. */ wp[new_wsize] = ~-cy; /* extra limb is 0-cy */ - mpn_com_n (wp, wp, new_wsize); + mpn_com (wp, wp, new_wsize); new_wsize++; MPN_INCR_U (wp, new_wsize, CNST_LIMB(1)); wsize_signed = -wsize_signed; @@ -158,7 +158,7 @@ mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub) mp_limb_t cy2; /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */ - mpn_com_n (wp, wp, wsize); + mpn_com (wp, wp, wsize); cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1)); cy -= 1; diff --git a/mpz/cfdiv_r_2exp.c b/mpz/cfdiv_r_2exp.c index 64bb7eeb4..5611ad675 100644 --- a/mpz/cfdiv_r_2exp.c +++ b/mpz/cfdiv_r_2exp.c @@ -109,7 +109,7 @@ cfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir) /* Ones complement */ i = MIN (abs_usize, limb_cnt+1); - mpn_com_n (wp, up, i); + mpn_com (wp, up, i); for ( ; i <= limb_cnt; i++) wp[i] = GMP_NUMB_MAX; diff --git a/tests/devel/try.c b/tests/devel/try.c index 755653351..f8d1b0d3c 100644 --- a/tests/devel/try.c +++ b/tests/devel/try.c @@ -590,7 +590,7 @@ validate_sqrtrem (void) #define TYPE_COPY 26 #define TYPE_COPYI 27 #define TYPE_COPYD 28 -#define TYPE_COM_N 29 +#define TYPE_COM 29 #define TYPE_ADDLSH1_N 30 #define TYPE_ADDLSH2_N 48 @@ -888,10 +888,10 @@ param_init (void) p->size = SIZE_ALLOW_ZERO; REFERENCE (refmpn_copyd); - p = ¶m[TYPE_COM_N]; + p = ¶m[TYPE_COM]; p->dst[0] = 1; p->src[0] = 1; - REFERENCE (refmpn_com_n); + REFERENCE (refmpn_com); p = ¶m[TYPE_ADDLSH1_N]; @@ -1251,8 +1251,8 @@ __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) #endif void -mpn_com_n_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) -{ mpn_com_n (rp, sp, size); } +mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) +{ mpn_com (rp, sp, size); } void mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) @@ -1447,7 +1447,7 @@ const struct choice_t choice_array[] = { { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 }, #endif - { TRY_FUNFUN(mpn_com_n), TYPE_COM_N }, + { TRY_FUNFUN(mpn_com), TYPE_COM }, { TRY_FUNFUN(MPN_COPY), TYPE_COPY }, { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI }, @@ -2131,7 +2131,7 @@ call (struct each_t *e, tryfun_t function) case TYPE_COPY: case TYPE_COPYI: case TYPE_COPYD: - case TYPE_COM_N: + case TYPE_COM: CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); break; diff --git a/tests/mpn/t-instrument.c b/tests/mpn/t-instrument.c index 98e35fdc1..a3d296958 100644 --- a/tests/mpn/t-instrument.c +++ b/tests/mpn/t-instrument.c @@ -176,9 +176,9 @@ check (void) post (); #endif -#if HAVE_NATIVE_mpn_com_n - pre ("mpn_com_n"); - mpn_com_n (wp, xp, size); +#if HAVE_NATIVE_mpn_com + pre ("mpn_com"); + mpn_com (wp, xp, size); post (); #endif diff --git a/tests/mpz/bit.c b/tests/mpz/bit.c index 9dc551d3c..9f63e039e 100644 --- a/tests/mpz/bit.c +++ b/tests/mpz/bit.c @@ -165,7 +165,7 @@ check_tstbit (void) if (low1) pos[0] |= 1; - refmpn_neg_n (neg, pos, (mp_size_t) numberof(neg)); + refmpn_neg (neg, pos, (mp_size_t) numberof(neg)); mpz_set_n (z, neg, (mp_size_t) numberof(neg)); mpz_neg (z, z); diff --git a/tests/refmpn.c b/tests/refmpn.c index 04566b3f3..7bc2e2165 100644 --- a/tests/refmpn.c +++ b/tests/refmpn.c @@ -337,20 +337,6 @@ refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize) refmpn_zero (wp, wsize-xsize); } -void -refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size) -{ - mp_size_t i; - - ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); - ASSERT (size >= 1); - ASSERT_MPN (sp, size); - - for (i = 0; i < size; i++) - rp[i] = sp[i] ^ GMP_NUMB_MASK; -} - - int refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) { @@ -728,7 +714,7 @@ refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) /* Twos complement, return borrow. */ mp_limb_t -refmpn_neg_n (mp_ptr dst, mp_srcptr src, mp_size_t size) +refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size) { mp_ptr zeros; mp_limb_t ret; diff --git a/tests/refmpz.c b/tests/refmpz.c index b21b5b9fe..29e0a16f8 100644 --- a/tests/refmpz.c +++ b/tests/refmpz.c @@ -67,10 +67,10 @@ refmpz_hamdist (mpz_srcptr x, mpz_srcptr y) refmpn_copy (yp, PTR(y), ysize); if (SIZ(x) < 0) - refmpn_neg_n (xp, xp, tsize); + refmpn_neg (xp, xp, tsize); if (SIZ(x) < 0) - refmpn_neg_n (yp, yp, tsize); + refmpn_neg (yp, yp, tsize); ret = refmpn_hamdist (xp, yp, tsize); diff --git a/tests/tests.h b/tests/tests.h index b505b2a26..60a229820 100644 --- a/tests/tests.h +++ b/tests/tests.h @@ -227,7 +227,7 @@ int refmpn_cmp_allowzero __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)); int refmpn_cmp_twosizes __GMP_PROTO ((mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)); -void refmpn_com_n __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size)); +void refmpn_com __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size)); void refmpn_copy __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size)); void refmpn_copyi __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size)); void refmpn_copyd __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size)); @@ -319,7 +319,7 @@ void refmpn_nand_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)); void refmpn_nior_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)); -mp_limb_t refmpn_neg_n __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size)); +mp_limb_t refmpn_neg __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size)); mp_size_t refmpn_normalize __GMP_PROTO ((mp_srcptr, mp_size_t)); unsigned long refmpn_popcount __GMP_PROTO ((mp_srcptr sp, mp_size_t size)); diff --git a/tune/common.c b/tune/common.c index c89d972cf..c041c176e 100644 --- a/tune/common.c +++ b/tune/common.c @@ -452,9 +452,9 @@ speed_memcpy (struct speed_params *s) SPEED_ROUTINE_MPN_COPY_BYTES (memcpy); } double -speed_mpn_com_n (struct speed_params *s) +speed_mpn_com (struct speed_params *s) { - SPEED_ROUTINE_MPN_COPY (mpn_com_n); + SPEED_ROUTINE_MPN_COPY (mpn_com); } diff --git a/tune/many.pl b/tune/many.pl index 18130aea1..5c5648a10 100644 --- a/tune/many.pl +++ b/tune/many.pl @@ -376,7 +376,7 @@ my @table = }, { - 'regexp'=> 'com_n|copyi|copyd', + 'regexp'=> 'com|copyi|copyd', 'ret' => 'void', 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size', 'speed' => 'SPEED_ROUTINE_MPN_COPY', diff --git a/tune/speed.c b/tune/speed.c index c3b47fb79..08f77390c 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -253,7 +253,7 @@ const struct routine_t { { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, - { "mpn_com_n", speed_mpn_com_n }, + { "mpn_com", speed_mpn_com }, { "mpn_popcount", speed_mpn_popcount }, { "mpn_hamdist", speed_mpn_hamdist }, diff --git a/tune/speed.h b/tune/speed.h index 385b5f88b..e75e87a89 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -158,7 +158,7 @@ double speed_mpn_addmul_5 __GMP_PROTO ((struct speed_params *s)); double speed_mpn_addmul_6 __GMP_PROTO ((struct speed_params *s)); double speed_mpn_addmul_7 __GMP_PROTO ((struct speed_params *s)); double speed_mpn_addmul_8 __GMP_PROTO ((struct speed_params *s)); -double speed_mpn_com_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_com __GMP_PROTO ((struct speed_params *s)); double speed_mpn_copyd __GMP_PROTO ((struct speed_params *s)); double speed_mpn_copyi __GMP_PROTO ((struct speed_params *s)); double speed_mpn_dcpi1_div_qr_n __GMP_PROTO ((struct speed_params *s)); |