diff options
Diffstat (limited to 'gmp/mpn/x86')
214 files changed, 4630 insertions, 14803 deletions
diff --git a/gmp/mpn/x86/README b/gmp/mpn/x86/README index 8d7ac9080d..883db227d2 100644 --- a/gmp/mpn/x86/README +++ b/gmp/mpn/x86/README @@ -1,30 +1,19 @@ -Copyright 1999-2002 Free Software Foundation, Inc. +Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. diff --git a/gmp/mpn/x86/aors_n.asm b/gmp/mpn/x86/aors_n.asm index 5d359f59b6..c8969995c8 100644 --- a/gmp/mpn/x86/aors_n.asm +++ b/gmp/mpn/x86/aors_n.asm @@ -1,42 +1,32 @@ dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. -dnl Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software +dnl Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C P5 3.375 -C P6 3.125 -C K6 3.5 -C K7 2.25 -C P4 8.75 +C P5: 3.375 +C P6: 3.125 +C K6: 3.5 +C K7: 2.25 +C P4: 8.75 ifdef(`OPERATION_add_n',` @@ -109,7 +99,7 @@ L(0a): leal (%eax,%eax,8),%eax C possible to simplify. pushl %ebp FRAME_pushl() movl PARAM_CARRY,%ebp - shrl %ebp C shift bit 0 into carry + shrl $1,%ebp C shift bit 0 into carry popl %ebp FRAME_popl() jmp *%eax C jump into loop @@ -158,7 +148,7 @@ L(0b): leal (%eax,%eax,8),%eax L(oopgo): pushl %ebp FRAME_pushl() movl PARAM_CARRY,%ebp - shrl %ebp C shift bit 0 into carry + shrl $1,%ebp C shift bit 0 into carry popl %ebp FRAME_popl() ALIGN(16) diff --git a/gmp/mpn/x86/aorsmul_1.asm b/gmp/mpn/x86/aorsmul_1.asm index 54a8905441..b4db427657 100644 --- a/gmp/mpn/x86/aorsmul_1.asm +++ b/gmp/mpn/x86/aorsmul_1.asm @@ -1,51 +1,40 @@ dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a dnl limb and add the result to a second limb vector. -dnl Copyright 1992, 1994, 1997, 1999-2002, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. +dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software +dnl Foundation, Inc. dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P5 14.75 -C P6 model 0-8,10-12 7.5 -C P6 model 9 (Banias) 6.7 -C P6 model 13 (Dothan) 6.75 -C P4 model 0 (Willamette) 24.0 -C P4 model 1 (?) 24.0 -C P4 model 2 (Northwood) 24.0 + +C cycles/limb +C P5: 14.75 +C P6 model 0-8,10-12) 7.5 +C P6 model 9 (Banias) +C P6 model 13 (Dothan) 6.75 +C P4 model 0 (Willamette) 24.0 +C P4 model 1 (?) 24.0 +C P4 model 2 (Northwood) 24.0 C P4 model 3 (Prescott) C P4 model 4 (Nocona) -C Intel Atom -C AMD K6 12.5 -C AMD K7 5.25 -C AMD K8 -C AMD K10 +C K6: 12.5 +C K7: 5.25 +C K8: ifdef(`OPERATION_addmul_1',` diff --git a/gmp/mpn/x86/atom/aorrlsh1_n.asm b/gmp/mpn/x86/atom/aorrlsh1_n.asm deleted file mode 100644 index cd1a650022..0000000000 --- a/gmp/mpn/x86/atom/aorrlsh1_n.asm +++ /dev/null @@ -1,53 +0,0 @@ -dnl Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[] - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -define(LSH, 1) -define(RSH, 31) - -ifdef(`OPERATION_addlsh1_n', ` - define(M4_inst, adc) - define(M4_opp, sub) - define(M4_function, mpn_addlsh1_n) - define(M4_function_c, mpn_addlsh1_nc) -',`ifdef(`OPERATION_rsblsh1_n', ` - define(M4_inst, sbb) - define(M4_opp, add) - define(M4_function, mpn_rsblsh1_n) - define(M4_function_c, mpn_rsblsh1_nc) -',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n -')')') - -MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc) - -include_mpn(`x86/atom/aorrlshC_n.asm') diff --git a/gmp/mpn/x86/atom/aorrlsh2_n.asm b/gmp/mpn/x86/atom/aorrlsh2_n.asm deleted file mode 100644 index 10f4419de9..0000000000 --- a/gmp/mpn/x86/atom/aorrlsh2_n.asm +++ /dev/null @@ -1,53 +0,0 @@ -dnl Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[] - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -define(LSH, 2) -define(RSH, 30) - -ifdef(`OPERATION_addlsh2_n', ` - define(M4_inst, adcl) - define(M4_opp, subl) - define(M4_function, mpn_addlsh2_n) - define(M4_function_c, mpn_addlsh2_nc) -',`ifdef(`OPERATION_rsblsh2_n', ` - define(M4_inst, sbbl) - define(M4_opp, addl) - define(M4_function, mpn_rsblsh2_n) - define(M4_function_c, mpn_rsblsh2_nc) -',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n -')')') - -MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc) - -include_mpn(`x86/atom/aorrlshC_n.asm') diff --git a/gmp/mpn/x86/atom/aorrlshC_n.asm b/gmp/mpn/x86/atom/aorrlshC_n.asm deleted file mode 100644 index 71cfe490d6..0000000000 --- a/gmp/mpn/x86/atom/aorrlshC_n.asm +++ /dev/null @@ -1,156 +0,0 @@ -dnl Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[] - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size); -C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_limb_t carry); -C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size); -C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_signed_limb_t carry); - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 6 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(PARAM_CORB, 20) -defframe(PARAM_SIZE, 16) -defframe(PARAM_DBLD, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBP,`PARAM_DBLD') -define(SAVE_VP,`PARAM_SRC') -define(SAVE_UP,`PARAM_DST') - -define(M, eval(m4_lshift(1,LSH))) -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebx') - -ASM_START() - TEXT - ALIGN(8) - -PROLOGUE(M4_function_c) -deflit(`FRAME',0) - movl PARAM_CORB, %eax - movl %eax, %edx - shr $LSH, %edx - andl $1, %edx - M4_opp %edx, %eax - jmp L(start_nc) -EPILOGUE() - -PROLOGUE(M4_function) -deflit(`FRAME',0) - - xor %eax, %eax - xor %edx, %edx -L(start_nc): - push rp FRAME_pushl() - - mov PARAM_SIZE, %ecx C size - mov PARAM_DST, rp - mov up, SAVE_UP - incl %ecx C size + 1 - mov PARAM_SRC, up - mov vp, SAVE_VP - shr %ecx C (size+1)\2 - mov PARAM_DBLD, vp - mov %ebp, SAVE_EBP - mov %ecx, VAR_COUNT - jnc L(entry) C size odd - - shr %edx C size even - mov (vp), %ecx - lea 4(vp), vp - lea (%eax,%ecx,M), %edx - mov %ecx, %eax - lea -4(up), up - lea -4(rp), rp - jmp L(enteven) - - ALIGN(16) -L(oop): - lea (%eax,%ecx,M), %ebp - shr $RSH, %ecx - mov 4(vp), %eax - shr %edx - lea 8(vp), vp - M4_inst (up), %ebp - lea (%ecx,%eax,M), %edx - mov %ebp, (rp) -L(enteven): - M4_inst 4(up), %edx - lea 8(up), up - mov %edx, 4(rp) - adc %edx, %edx - shr $RSH, %eax - lea 8(rp), rp -L(entry): - mov (vp), %ecx - decl VAR_COUNT - jnz L(oop) - - lea (%eax,%ecx,M), %ebp - shr $RSH, %ecx - shr %edx - mov SAVE_VP, vp - M4_inst (up), %ebp - mov %ecx, %eax - mov SAVE_UP, up - M4_inst $0, %eax - mov %ebp, (rp) - mov SAVE_EBP, %ebp - pop rp FRAME_popl() - ret -EPILOGUE() - -ASM_END() diff --git a/gmp/mpn/x86/atom/aors_n.asm b/gmp/mpn/x86/atom/aors_n.asm deleted file mode 100644 index 45ec287c3a..0000000000 --- a/gmp/mpn/x86/atom/aors_n.asm +++ /dev/null @@ -1,159 +0,0 @@ -dnl Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[]. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 3 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -ifdef(`OPERATION_add_n', ` - define(M4_inst, adcl) - define(M4_function_n, mpn_add_n) - define(M4_function_nc, mpn_add_nc) - define(M4_description, add) -',`ifdef(`OPERATION_sub_n', ` - define(M4_inst, sbbl) - define(M4_function_n, mpn_sub_n) - define(M4_function_nc, mpn_sub_nc) - define(M4_description, subtract) -',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n -')')') - -MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) - -C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size); -C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_limb_t carry); -C -C Calculate src1,size M4_description src2,size, and store the result in -C dst,size. The return value is the carry bit from the top of the result (1 -C or 0). -C -C The _nc version accepts 1 or 0 for an initial carry into the low limb of -C the calculation. Note values other than 1 or 0 here will lead to garbage -C results. - -defframe(PARAM_CARRY,20) -defframe(PARAM_SIZE, 16) -defframe(PARAM_SRC2, 12) -defframe(PARAM_SRC1, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_RP,`PARAM_SIZE') -define(SAVE_VP,`PARAM_SRC1') -define(SAVE_UP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebx') -define(`cy', `%ecx') -define(`r1', `%ecx') -define(`r2', `%edx') - -ASM_START() - TEXT - ALIGN(16) -deflit(`FRAME',0) - -PROLOGUE(M4_function_n) - xor cy, cy C carry -L(start): - mov PARAM_SIZE, %eax C size - mov rp, SAVE_RP - mov PARAM_DST, rp - mov up, SAVE_UP - mov PARAM_SRC1, up - shr %eax C size >> 1 - mov vp, SAVE_VP - mov PARAM_SRC2, vp - jz L(one) C size == 1 - jc L(three) C size % 2 == 1 - - shr cy - mov (up), r2 - lea 4(up), up - lea 4(vp), vp - lea -4(rp), rp - jmp L(entry) -L(one): - shr cy - mov (up), r1 - jmp L(end) -L(three): - shr cy - mov (up), r1 - - ALIGN(16) -L(oop): - M4_inst (vp), r1 - lea 8(up), up - mov -4(up), r2 - lea 8(vp), vp - mov r1, (rp) -L(entry): - M4_inst -4(vp), r2 - lea 8(rp), rp - dec %eax - mov (up), r1 - mov r2, -4(rp) - jnz L(oop) - -L(end): C %eax is zero here - mov SAVE_UP, up - M4_inst (vp), r1 - mov SAVE_VP, vp - mov r1, (rp) - adc %eax, %eax - mov SAVE_RP, rp - ret -EPILOGUE() - -PROLOGUE(M4_function_nc) - mov PARAM_CARRY, cy C carry - jmp L(start) -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/aorslshC_n.asm b/gmp/mpn/x86/atom/aorslshC_n.asm deleted file mode 100644 index 75ace65e51..0000000000 --- a/gmp/mpn/x86/atom/aorslshC_n.asm +++ /dev/null @@ -1,247 +0,0 @@ -dnl Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C) - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size); -C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t carry); -C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,); -C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_signed_limb_t borrow); - -defframe(PARAM_CORB, 16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size,); -C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_limb_t carry); -C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size,); -C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_limb_t borrow); - -C if src1 == dst, _ip1 is used - -C cycles/limb -C dst!=src1,src2 dst==src1 -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 7 6 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(GPARAM_CORB, 20) -defframe(GPARAM_SIZE, 16) -defframe(GPARAM_SRC2, 12) - -dnl re-use parameter space -define(SAVE_EBP,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_UP,`PARAM_DST') - -define(M, eval(m4_lshift(1,LSH))) -define(`rp', `%edi') -define(`up', `%esi') - -ASM_START() - TEXT - ALIGN(8) - -PROLOGUE(M4_ip_function_c) -deflit(`FRAME',0) - movl PARAM_CORB, %ecx - movl %ecx, %edx - shr $LSH, %edx - andl $1, %edx - M4_opp %edx, %ecx - jmp L(start_nc) -EPILOGUE() - -PROLOGUE(M4_ip_function) -deflit(`FRAME',0) - - xor %ecx, %ecx - xor %edx, %edx -L(start_nc): - push rp FRAME_pushl() - mov PARAM_DST, rp - mov up, SAVE_UP - mov PARAM_SRC, up - mov %ebx, SAVE_EBX - mov PARAM_SIZE, %ebx C size -L(inplace): - incl %ebx C size + 1 - shr %ebx C (size+1)\2 - mov %ebp, SAVE_EBP - jnc L(entry) C size odd - - add %edx, %edx C size even - mov %ecx, %ebp - mov (up), %ecx - lea -4(rp), rp - lea (%ebp,%ecx,M), %eax - lea 4(up), up - jmp L(enteven) - - ALIGN(16) -L(oop): - lea (%ecx,%eax,M), %ebp - shr $RSH, %eax - mov 4(up), %ecx - add %edx, %edx - lea 8(up), up - M4_inst %ebp, (rp) - lea (%eax,%ecx,M), %eax - -L(enteven): - M4_inst %eax, 4(rp) - lea 8(rp), rp - - sbb %edx, %edx - shr $RSH, %ecx - -L(entry): - mov (up), %eax - decl %ebx - jnz L(oop) - - lea (%ecx,%eax,M), %ebp - shr $RSH, %eax - shr %edx - M4_inst %ebp, (rp) - mov SAVE_UP, up - adc $0, %eax - mov SAVE_EBP, %ebp - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret -EPILOGUE() - -PROLOGUE(M4_function_c) -deflit(`FRAME',0) - movl GPARAM_CORB, %ecx - movl %ecx, %edx - shr $LSH, %edx - andl $1, %edx - M4_opp %edx, %ecx - jmp L(generic_nc) -EPILOGUE() - -PROLOGUE(M4_function) -deflit(`FRAME',0) - - xor %ecx, %ecx - xor %edx, %edx -L(generic_nc): - push rp FRAME_pushl() - mov PARAM_DST, rp - mov up, SAVE_UP - mov PARAM_SRC, up - cmp rp, up - mov %ebx, SAVE_EBX - jne L(general) - mov GPARAM_SIZE, %ebx C size - mov GPARAM_SRC2, up - jmp L(inplace) - -L(general): - mov GPARAM_SIZE, %eax C size - mov %ebx, SAVE_EBX - incl %eax C size + 1 - mov up, %ebx C vp - mov GPARAM_SRC2, up C up - shr %eax C (size+1)\2 - mov %ebp, SAVE_EBP - mov %eax, GPARAM_SIZE - jnc L(entry2) C size odd - - add %edx, %edx C size even - mov %ecx, %ebp - mov (up), %ecx - lea -4(rp), rp - lea -4(%ebx), %ebx - lea (%ebp,%ecx,M), %eax - lea 4(up), up - jmp L(enteven2) - - ALIGN(16) -L(oop2): - lea (%ecx,%eax,M), %ebp - shr $RSH, %eax - mov 4(up), %ecx - add %edx, %edx - lea 8(up), up - mov (%ebx), %edx - M4_inst %ebp, %edx - lea (%eax,%ecx,M), %eax - mov %edx, (rp) -L(enteven2): - mov 4(%ebx), %edx - lea 8(%ebx), %ebx - M4_inst %eax, %edx - mov %edx, 4(rp) - sbb %edx, %edx - shr $RSH, %ecx - lea 8(rp), rp -L(entry2): - mov (up), %eax - decl GPARAM_SIZE - jnz L(oop2) - - lea (%ecx,%eax,M), %ebp - shr $RSH, %eax - shr %edx - mov (%ebx), %edx - M4_inst %ebp, %edx - mov %edx, (rp) - mov SAVE_UP, up - adc $0, %eax - mov SAVE_EBP, %ebp - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret -EPILOGUE() - -ASM_END() diff --git a/gmp/mpn/x86/atom/bdiv_q_1.asm b/gmp/mpn/x86/atom/bdiv_q_1.asm deleted file mode 100644 index 31e908ec44..0000000000 --- a/gmp/mpn/x86/atom/bdiv_q_1.asm +++ /dev/null @@ -1,35 +0,0 @@ -dnl Intel Atom mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel -dnl division by 1-limb divisor, returning quotient only. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1) -include_mpn(`x86/pentium/bdiv_q_1.asm') diff --git a/gmp/mpn/x86/atom/cnd_add_n.asm b/gmp/mpn/x86/atom/cnd_add_n.asm deleted file mode 100644 index 50bf2ad64b..0000000000 --- a/gmp/mpn/x86/atom/cnd_add_n.asm +++ /dev/null @@ -1,113 +0,0 @@ -dnl X86 mpn_cnd_add_n optimised for Intel Atom. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) ? -C P4 model 3-4 (Prescott) ? -C Intel atom 4.67 -C AMD K6 ? -C AMD K7 ? -C AMD K8 ? - - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebp') -define(`n', `%ecx') -define(`cnd', `20(%esp)') - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_cnd_add_n) - push %edi - push %esi - push %ebx - push %ebp - - mov cnd, %eax C make cnd into a mask (1) - mov 24(%esp), rp - neg %eax C make cnd into a mask (1) - mov 28(%esp), up - sbb %eax, %eax C make cnd into a mask (1) - mov 32(%esp), vp - mov %eax, cnd C make cnd into a mask (1) - mov 36(%esp), n - - xor %edx, %edx - - shr $1, n - jnc L(top) - - mov 0(vp), %eax - and cnd, %eax - lea 4(vp), vp - add 0(up), %eax - lea 4(rp), rp - lea 4(up), up - sbb %edx, %edx - mov %eax, -4(rp) - inc n - dec n - je L(end) - -L(top): sbb %edx, %edx - mov 0(vp), %eax - and cnd, %eax - lea 8(vp), vp - lea 8(rp), rp - mov -4(vp), %ebx - and cnd, %ebx - add %edx, %edx - adc 0(up), %eax - lea 8(up), up - mov %eax, -8(rp) - adc -4(up), %ebx - dec n - mov %ebx, -4(rp) - jne L(top) - -L(end): mov $0, %eax - adc %eax, %eax - - pop %ebp - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/cnd_sub_n.asm b/gmp/mpn/x86/atom/cnd_sub_n.asm deleted file mode 100644 index 221bedca37..0000000000 --- a/gmp/mpn/x86/atom/cnd_sub_n.asm +++ /dev/null @@ -1,124 +0,0 @@ -dnl X86 mpn_cnd_sub_n optimised for Intel Atom. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) ? -C P4 model 3-4 (Prescott) ? -C Intel atom 5.67 -C AMD K6 ? -C AMD K7 ? -C AMD K8 ? - - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebp') -define(`n', `%ecx') -define(`cnd', `20(%esp)') - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_cnd_sub_n) - push %edi - push %esi - push %ebx - push %ebp - - mov cnd, %eax C make cnd into a mask (1) - mov 24(%esp), rp - neg %eax C make cnd into a mask (1) - mov 28(%esp), up - sbb %eax, %eax C make cnd into a mask (1) - mov 32(%esp), vp - mov %eax, cnd C make cnd into a mask (1) - mov 36(%esp), n - - xor %edx, %edx - - inc n - shr n - jnc L(ent) - - mov 0(vp), %eax - and cnd, %eax - lea 4(vp), vp - mov 0(up), %edx - sub %eax, %edx - lea 4(rp), rp - lea 4(up), up - mov %edx, -4(rp) - sbb %edx, %edx C save cy - -L(ent): mov 0(vp), %ebx - and cnd, %ebx - add %edx, %edx C restore cy - mov 0(up), %edx - dec n - je L(end) - -L(top): sbb %ebx, %edx - mov 4(vp), %eax - mov %edx, 0(rp) - sbb %edx, %edx C save cy - mov 8(vp), %ebx - lea 8(up), up - and cnd, %ebx - and cnd, %eax - add %edx, %edx C restore cy - mov -4(up), %edx - lea 8(rp), rp - sbb %eax, %edx - mov %edx, -4(rp) - dec n - mov 0(up), %edx - lea 8(vp), vp - jne L(top) - -L(end): sbb %ebx, %edx - mov %edx, 0(rp) - - mov $0, %eax - adc %eax, %eax - - pop %ebp - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/dive_1.asm b/gmp/mpn/x86/atom/dive_1.asm deleted file mode 100644 index 71036a15a4..0000000000 --- a/gmp/mpn/x86/atom/dive_1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_divexact_1 -- mpn by limb exact division. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_divexact_1) -include_mpn(`x86/pentium/dive_1.asm') diff --git a/gmp/mpn/x86/atom/gmp-mparam.h b/gmp/mpn/x86/atom/gmp-mparam.h deleted file mode 100644 index 45df12806c..0000000000 --- a/gmp/mpn/x86/atom/gmp-mparam.h +++ /dev/null @@ -1,201 +0,0 @@ -/* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 1667 MHz Pineview (Atom D510) */ -/* FFT tuning limit = 25000000 */ -/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 3 -#define MOD_1_UNNORM_THRESHOLD 5 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 11 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 4 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 31 - -#define MUL_TOOM22_THRESHOLD 20 -#define MUL_TOOM33_THRESHOLD 74 -#define MUL_TOOM44_THRESHOLD 178 -#define MUL_TOOM6H_THRESHOLD 270 -#define MUL_TOOM8H_THRESHOLD 399 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 127 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 30 -#define SQR_TOOM3_THRESHOLD 105 -#define SQR_TOOM4_THRESHOLD 178 -#define SQR_TOOM6_THRESHOLD 303 -#define SQR_TOOM8_THRESHOLD 527 - -#define MULMID_TOOM42_THRESHOLD 54 - -#define MULMOD_BNM1_THRESHOLD 13 -#define SQRMOD_BNM1_THRESHOLD 18 - -#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 380, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ - { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \ - { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ - { 47, 8}, { 95,10}, { 31, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 127, 8}, { 255, 9}, { 135,10}, { 79, 9}, \ - { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \ - { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \ - { 143, 9}, { 287, 8}, { 575,10}, { 159,11}, \ - { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ - { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \ - { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ - { 671,10}, { 351, 9}, { 703,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 415, 9}, { 831,11}, \ - { 223,10}, { 447,12}, { 127,11}, { 255,10}, \ - { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \ - { 319,10}, { 671,11}, { 351,10}, { 703,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ - { 831,11}, { 447,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 735,12}, { 383,11}, { 831,12}, \ - { 447,11}, { 959,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1151,12}, { 703,11}, \ - { 1471,13}, { 383,12}, { 831,11}, { 1663,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \ - { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ - { 1663,13}, { 895,12}, { 1919,14}, { 511,13}, \ - { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \ - { 1407,12}, { 2943,14}, { 767,13}, { 1663,12}, \ - { 3455,13}, { 1919,15}, { 511,14}, { 1023,13}, \ - { 2431,14}, { 1279,13}, { 2943,12}, { 5887,14}, \ - { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 150 -#define MUL_FFT_THRESHOLD 4544 - -#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 340, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ - { 12, 5}, { 25, 6}, { 21, 7}, { 11, 6}, \ - { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \ - { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \ - { 255,10}, { 79, 9}, { 159, 8}, { 319,10}, \ - { 95, 9}, { 191,11}, { 63,10}, { 127, 9}, \ - { 255, 8}, { 511, 9}, { 271,10}, { 143, 9}, \ - { 287, 8}, { 575, 9}, { 303, 8}, { 607,10}, \ - { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ - { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543,10}, { 287, 9}, \ - { 575,10}, { 303, 9}, { 607,10}, { 319, 9}, \ - { 639,10}, { 335, 9}, { 671,10}, { 351, 9}, \ - { 703,11}, { 191,10}, { 383, 9}, { 767,10}, \ - { 415,11}, { 223,10}, { 447,12}, { 127,11}, \ - { 255,10}, { 543,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 671,11}, { 351,10}, { 703,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ - { 831,11}, { 479,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \ - { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \ - { 831,12}, { 447,11}, { 959,13}, { 255,12}, \ - { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1407,13}, \ - { 383,12}, { 831,11}, { 1663,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1215,13}, { 639,12}, \ - { 1471,13}, { 767,12}, { 1663,13}, { 895,12}, \ - { 1791,14}, { 511,13}, { 1023,12}, { 2111,13}, \ - { 1151,12}, { 2431,13}, { 1407,14}, { 767,13}, \ - { 1663,12}, { 3455,13}, { 1791,15}, { 511,14}, \ - { 1023,13}, { 2431,14}, { 1279,13}, { 2943,12}, \ - { 5887,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 151 -#define SQR_FFT_THRESHOLD 2880 - -#define MULLO_BASECASE_THRESHOLD 6 -#define MULLO_DC_THRESHOLD 48 -#define MULLO_MUL_N_THRESHOLD 8907 - -#define DC_DIV_QR_THRESHOLD 59 -#define DC_DIVAPPR_Q_THRESHOLD 250 -#define DC_BDIV_QR_THRESHOLD 59 -#define DC_BDIV_Q_THRESHOLD 169 - -#define INV_MULMOD_BNM1_THRESHOLD 38 -#define INV_NEWTON_THRESHOLD 246 -#define INV_APPR_THRESHOLD 246 - -#define BINV_NEWTON_THRESHOLD 276 -#define REDC_1_TO_REDC_N_THRESHOLD 67 - -#define MU_DIV_QR_THRESHOLD 1334 -#define MU_DIVAPPR_Q_THRESHOLD 1442 -#define MUPI_DIV_QR_THRESHOLD 114 -#define MU_BDIV_QR_THRESHOLD 1142 -#define MU_BDIV_Q_THRESHOLD 1334 - -#define POWM_SEC_TABLE 1,22,98,416,1378 - -#define MATRIX22_STRASSEN_THRESHOLD 13 -#define HGCD_THRESHOLD 133 -#define HGCD_APPR_THRESHOLD 169 -#define HGCD_REDUCE_THRESHOLD 2479 -#define GCD_DC_THRESHOLD 460 -#define GCDEXT_DC_THRESHOLD 342 -#define JACOBI_BASE_METHOD 3 - -#define GET_STR_DC_THRESHOLD 12 -#define GET_STR_PRECOMPUTE_THRESHOLD 23 -#define SET_STR_DC_THRESHOLD 321 -#define SET_STR_PRECOMPUTE_THRESHOLD 1099 - -#define FAC_DSC_THRESHOLD 198 -#define FAC_ODD_THRESHOLD 34 diff --git a/gmp/mpn/x86/atom/logops_n.asm b/gmp/mpn/x86/atom/logops_n.asm deleted file mode 100644 index 3cb6d7310c..0000000000 --- a/gmp/mpn/x86/atom/logops_n.asm +++ /dev/null @@ -1,151 +0,0 @@ -dnl Intel Atom mpn_and_n,...,mpn_xnor_n -- bitwise logical operations. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C op nop opn -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 3 3.5 3.5 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -define(M4_choose_op, -`ifdef(`OPERATION_$1',` -define(`M4_function', `mpn_$1') -define(`M4_want_pre', `$4') -define(`M4_inst', `$3') -define(`M4_want_post',`$2') -')') -define(M4pre, `ifelse(M4_want_pre, yes,`$1')') -define(M4post,`ifelse(M4_want_post,yes,`$1')') - -M4_choose_op( and_n, , andl, ) -M4_choose_op( andn_n, , andl, yes) -M4_choose_op( nand_n, yes, andl, ) -M4_choose_op( ior_n, , orl, ) -M4_choose_op( iorn_n, , orl, yes) -M4_choose_op( nior_n, yes, orl, ) -M4_choose_op( xor_n, , xorl, ) -M4_choose_op( xnor_n, yes, xorl, ) - -ifdef(`M4_function',, -`m4_error(`Unrecognised or undefined OPERATION symbol -')') - -MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) - -C void M4_function (mp_ptr dst, mp_srcptr src2, mp_srcptr src1, mp_size_t size); -C - -defframe(PARAM_SIZE, 16) -defframe(PARAM_SRC1, 12) -defframe(PARAM_SRC2, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_RP,`PARAM_SIZE') -define(SAVE_VP,`PARAM_SRC1') -define(SAVE_UP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebx') -define(`cnt', `%eax') -define(`r1', `%ecx') -define(`r2', `%edx') - -ASM_START() - TEXT - ALIGN(16) -deflit(`FRAME',0) - -PROLOGUE(M4_function) - mov PARAM_SIZE, cnt C size - mov rp, SAVE_RP - mov PARAM_DST, rp - mov up, SAVE_UP - mov PARAM_SRC1, up - shr cnt C size >> 1 - mov vp, SAVE_VP - mov PARAM_SRC2, vp - mov (up), r1 - jz L(end) C size == 1 - jnc L(even) C size % 2 == 0 - - ALIGN(16) -L(oop): -M4pre(` notl_or_xorl_GMP_NUMB_MASK(r1)') - M4_inst (vp), r1 - lea 8(up), up - mov -4(up), r2 -M4post(` notl_or_xorl_GMP_NUMB_MASK(r1)') - lea 8(vp), vp - mov r1, (rp) -L(entry): -M4pre(` notl_or_xorl_GMP_NUMB_MASK(r2)') - M4_inst -4(vp), r2 - lea 8(rp), rp -M4post(` notl_or_xorl_GMP_NUMB_MASK(r2)') - dec cnt - mov (up), r1 - mov r2, -4(rp) - jnz L(oop) - -L(end): -M4pre(` notl_or_xorl_GMP_NUMB_MASK(r1)') - mov SAVE_UP, up - M4_inst (vp), r1 -M4post(`notl_or_xorl_GMP_NUMB_MASK(r1)') - mov SAVE_VP, vp - mov r1, (rp) - mov SAVE_RP, rp - ret - -L(even): - mov r1, r2 - lea 4(up), up - lea 4(vp), vp - lea -4(rp), rp - jmp L(entry) -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/lshift.asm b/gmp/mpn/x86/atom/lshift.asm deleted file mode 100644 index f2c70dd3e8..0000000000 --- a/gmp/mpn/x86/atom/lshift.asm +++ /dev/null @@ -1,218 +0,0 @@ -dnl Intel Atom mpn_lshift -- mpn left shift. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, -C unsigned cnt); - -C cycles/limb -C cnt!=1 cnt==1 -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 5 2.5 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(PARAM_CNT, 16) -defframe(PARAM_SIZE,12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_UP,`PARAM_CNT') -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_EBP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`cnt', `%ecx') - -ASM_START() - TEXT - ALIGN(8) -deflit(`FRAME',0) -PROLOGUE(mpn_lshift) - mov PARAM_CNT, cnt - mov PARAM_SIZE, %edx - mov up, SAVE_UP - mov PARAM_SRC, up - push rp FRAME_pushl() - mov PARAM_DST, rp - -C We can use faster code for shift-by-1 under certain conditions. - cmp $1,cnt - jne L(normal) - cmpl rp, up - jnc L(special) C jump if s_ptr + 1 >= res_ptr - leal (up,%edx,4),%eax - cmpl %eax,rp - jnc L(special) C jump if res_ptr >= s_ptr + size - -L(normal): - lea -4(up,%edx,4), up - mov %ebx, SAVE_EBX - lea -4(rp,%edx,4), rp - - shr %edx - mov (up), %eax - mov %edx, VAR_COUNT - jnc L(evn) - - mov %eax, %ebx - shl %cl, %ebx - neg cnt - shr %cl, %eax - test %edx, %edx - jnz L(gt1) - mov %ebx, (rp) - jmp L(quit) - -L(gt1): mov %ebp, SAVE_EBP - push %eax - mov -4(up), %eax - mov %eax, %ebp - shr %cl, %eax - jmp L(lo1) - -L(evn): mov %ebp, SAVE_EBP - neg cnt - mov %eax, %ebp - mov -4(up), %edx - shr %cl, %eax - mov %edx, %ebx - shr %cl, %edx - neg cnt - decl VAR_COUNT - lea 4(rp), rp - lea -4(up), up - jz L(end) - push %eax FRAME_pushl() - - ALIGN(8) -L(top): shl %cl, %ebp - or %ebp, %edx - shl %cl, %ebx - neg cnt - mov -4(up), %eax - mov %eax, %ebp - mov %edx, -4(rp) - shr %cl, %eax - lea -8(rp), rp -L(lo1): mov -8(up), %edx - or %ebx, %eax - mov %edx, %ebx - shr %cl, %edx - lea -8(up), up - neg cnt - mov %eax, (rp) - decl VAR_COUNT - jg L(top) - - pop %eax FRAME_popl() -L(end): - shl %cl, %ebp - shl %cl, %ebx - or %ebp, %edx - mov SAVE_EBP, %ebp - mov %edx, -4(rp) - mov %ebx, -8(rp) - -L(quit): - mov SAVE_UP, up - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret - -L(special): -deflit(`FRAME',4) - lea 3(%edx), %eax C size + 3 - dec %edx C size - 1 - mov (up), %ecx - shr $2, %eax C (size + 3) / 4 - and $3, %edx C (size - 1) % 4 - jz L(goloop) C jmp if size == 1 (mod 4) - shr %edx - jnc L(odd) C jum if size == 3 (mod 4) - - add %ecx, %ecx - lea 4(up), up - mov %ecx, (rp) - mov (up), %ecx - lea 4(rp), rp - - dec %edx - jnz L(goloop) C jump if size == 0 (mod 4) -L(odd): lea -8(up), up - lea -8(rp), rp - jmp L(sentry) C reached if size == 2 or 3 (mod 4) - -L(sloop): - adc %ecx, %ecx - mov 4(up), %edx - mov %ecx, (rp) - adc %edx, %edx - mov 8(up), %ecx - mov %edx, 4(rp) -L(sentry): - adc %ecx, %ecx - mov 12(up), %edx - mov %ecx, 8(rp) - adc %edx, %edx - lea 16(up), up - mov %edx, 12(rp) - lea 16(rp), rp - mov (up), %ecx -L(goloop): - decl %eax - jnz L(sloop) - -L(squit): - adc %ecx, %ecx - mov %ecx, (rp) - adc %eax, %eax - - mov SAVE_UP, up - pop rp FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/lshiftc.asm b/gmp/mpn/x86/atom/lshiftc.asm deleted file mode 100644 index 5be53ed19d..0000000000 --- a/gmp/mpn/x86/atom/lshiftc.asm +++ /dev/null @@ -1,159 +0,0 @@ -dnl Intel Atom mpn_lshiftc -- mpn left shift with complement. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size, -C unsigned cnt); - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 5.5 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(PARAM_CNT, 16) -defframe(PARAM_SIZE,12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_UP,`PARAM_CNT') -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_EBP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`cnt', `%ecx') - -ASM_START() - TEXT - -PROLOGUE(mpn_lshiftc) -deflit(`FRAME',0) - mov PARAM_CNT, cnt - mov PARAM_SIZE, %edx - mov up, SAVE_UP - mov PARAM_SRC, up - push rp FRAME_pushl() - mov PARAM_DST, rp - - lea -4(up,%edx,4), up - mov %ebx, SAVE_EBX - lea -4(rp,%edx,4), rp - - shr %edx - mov (up), %eax - mov %edx, VAR_COUNT - jnc L(evn) - - mov %eax, %ebx - shl %cl, %ebx - neg cnt - shr %cl, %eax - test %edx, %edx - jnz L(gt1) - not %ebx - mov %ebx, (rp) - jmp L(quit) - -L(gt1): mov %ebp, SAVE_EBP - push %eax - mov -4(up), %eax - mov %eax, %ebp - shr %cl, %eax - jmp L(lo1) - -L(evn): mov %ebp, SAVE_EBP - neg cnt - mov %eax, %ebp - mov -4(up), %edx - shr %cl, %eax - mov %edx, %ebx - shr %cl, %edx - neg cnt - decl VAR_COUNT - lea 4(rp), rp - lea -4(up), up - jz L(end) - push %eax FRAME_pushl() - -L(top): shl %cl, %ebp - or %ebp, %edx - shl %cl, %ebx - neg cnt - not %edx - mov -4(up), %eax - mov %eax, %ebp - mov %edx, -4(rp) - shr %cl, %eax - lea -8(rp), rp -L(lo1): mov -8(up), %edx - or %ebx, %eax - mov %edx, %ebx - shr %cl, %edx - not %eax - lea -8(up), up - neg cnt - mov %eax, (rp) - decl VAR_COUNT - jg L(top) - - pop %eax FRAME_popl() -L(end): - shl %cl, %ebp - shl %cl, %ebx - or %ebp, %edx - mov SAVE_EBP, %ebp - not %edx - not %ebx - mov %edx, -4(rp) - mov %ebx, -8(rp) - -L(quit): - mov SAVE_UP, up - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/mmx/copyd.asm b/gmp/mpn/x86/atom/mmx/copyd.asm deleted file mode 100644 index b80fb033fe..0000000000 --- a/gmp/mpn/x86/atom/mmx/copyd.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_copyd -- copy limb vector, decrementing. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_copyd) -include_mpn(`x86/k7/mmx/copyd.asm') diff --git a/gmp/mpn/x86/atom/mmx/copyi.asm b/gmp/mpn/x86/atom/mmx/copyi.asm deleted file mode 100644 index 49b6b8d662..0000000000 --- a/gmp/mpn/x86/atom/mmx/copyi.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_copyi -- copy limb vector, incrementing. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_copyi) -include_mpn(`x86/k7/mmx/copyi.asm') diff --git a/gmp/mpn/x86/atom/mmx/hamdist.asm b/gmp/mpn/x86/atom/mmx/hamdist.asm deleted file mode 100644 index 3fe8253240..0000000000 --- a/gmp/mpn/x86/atom/mmx/hamdist.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_hamdist -- hamming distance. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_hamdist) -include_mpn(`x86/k7/mmx/popham.asm') diff --git a/gmp/mpn/x86/atom/mod_34lsub1.asm b/gmp/mpn/x86/atom/mod_34lsub1.asm deleted file mode 100644 index 6d57ba385d..0000000000 --- a/gmp/mpn/x86/atom/mod_34lsub1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_mod_34lsub1 -- remainder modulo 2^24-1. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_mod_34lsub1) -include_mpn(`x86/p6/mod_34lsub1.asm') diff --git a/gmp/mpn/x86/atom/mode1o.asm b/gmp/mpn/x86/atom/mode1o.asm deleted file mode 100644 index c9ee6bd2db..0000000000 --- a/gmp/mpn/x86/atom/mode1o.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_modexact_1_odd -- exact division style remainder. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_modexact_1_odd mpn_modexact_1c_odd) -include_mpn(`x86/pentium/mode1o.asm') diff --git a/gmp/mpn/x86/atom/rshift.asm b/gmp/mpn/x86/atom/rshift.asm deleted file mode 100644 index 1cb5dbefe9..0000000000 --- a/gmp/mpn/x86/atom/rshift.asm +++ /dev/null @@ -1,152 +0,0 @@ -dnl Intel Atom mpn_rshift -- mpn right shift. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Converted from AMD64 by Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, -C unsigned cnt); - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 5 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(PARAM_CNT, 16) -defframe(PARAM_SIZE,12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_UP,`PARAM_CNT') -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_EBP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`cnt', `%ecx') - -ASM_START() - TEXT - ALIGN(8) -deflit(`FRAME',0) -PROLOGUE(mpn_rshift) - mov PARAM_CNT, cnt - mov PARAM_SIZE, %edx - mov up, SAVE_UP - mov PARAM_SRC, up - push rp FRAME_pushl() - mov PARAM_DST, rp - mov %ebx, SAVE_EBX - - shr %edx - mov (up), %eax - mov %edx, VAR_COUNT - jnc L(evn) - - mov %eax, %ebx - shr %cl, %ebx - neg cnt - shl %cl, %eax - test %edx, %edx - jnz L(gt1) - mov %ebx, (rp) - jmp L(quit) - -L(gt1): mov %ebp, SAVE_EBP - push %eax - mov 4(up), %eax - mov %eax, %ebp - shl %cl, %eax - jmp L(lo1) - -L(evn): mov %ebp, SAVE_EBP - neg cnt - mov %eax, %ebp - mov 4(up), %edx - shl %cl, %eax - mov %edx, %ebx - shl %cl, %edx - neg cnt - decl VAR_COUNT - lea -4(rp), rp - lea 4(up), up - jz L(end) - push %eax FRAME_pushl() - - ALIGN(8) -L(top): shr %cl, %ebp - or %ebp, %edx - shr %cl, %ebx - neg cnt - mov 4(up), %eax - mov %eax, %ebp - mov %edx, 4(rp) - shl %cl, %eax - lea 8(rp), rp -L(lo1): mov 8(up), %edx - or %ebx, %eax - mov %edx, %ebx - shl %cl, %edx - lea 8(up), up - neg cnt - mov %eax, (rp) - decl VAR_COUNT - jg L(top) - - pop %eax FRAME_popl() -L(end): - shr %cl, %ebp - shr %cl, %ebx - or %ebp, %edx - mov SAVE_EBP, %ebp - mov %edx, 4(rp) - mov %ebx, 8(rp) - -L(quit): - mov SAVE_UP, up - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm b/gmp/mpn/x86/atom/sse2/aorsmul_1.asm deleted file mode 100644 index 969a14a919..0000000000 --- a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm +++ /dev/null @@ -1,174 +0,0 @@ -dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C cycles/limb -C P5 - -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 8 -C AMD K6 -C AMD K7 - -C AMD K8 -C AMD K10 - -define(`rp', `%edi') -define(`up', `%esi') -define(`n', `%ecx') - -ifdef(`OPERATION_addmul_1',` - define(ADDSUB, add) - define(func_1, mpn_addmul_1) - define(func_1c, mpn_addmul_1c)') -ifdef(`OPERATION_submul_1',` - define(ADDSUB, sub) - define(func_1, mpn_submul_1) - define(func_1c, mpn_submul_1c)') - -MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c) - - TEXT - ALIGN(16) -PROLOGUE(func_1) - xor %edx, %edx -L(ent): push %edi - push %esi - push %ebx - mov 16(%esp), rp - mov 20(%esp), up - mov 24(%esp), n - movd 28(%esp), %mm7 - test $1, n - jz L(fi0or2) - movd (up), %mm0 - pmuludq %mm7, %mm0 - shr $2, n - jnc L(fi1) - -L(fi3): lea -8(up), up - lea -8(rp), rp - movd 12(up), %mm1 - movd %mm0, %ebx - pmuludq %mm7, %mm1 - add $1, n C increment and clear carry - jmp L(lo3) - -L(fi1): movd %mm0, %ebx - jz L(wd1) - movd 4(up), %mm1 - pmuludq %mm7, %mm1 - jmp L(lo1) - -L(fi0or2): - movd (up), %mm1 - pmuludq %mm7, %mm1 - shr $2, n - movd 4(up), %mm0 - jc L(fi2) - lea -4(up), up - lea -4(rp), rp - movd %mm1, %eax - pmuludq %mm7, %mm0 - jmp L(lo0) - -L(fi2): lea 4(up), up - add $1, n C increment and clear carry - movd %mm1, %eax - lea -12(rp), rp - jmp L(lo2) - -C ALIGN(16) C alignment seems irrelevant -L(top): movd 4(up), %mm1 - adc $0, %edx - ADDSUB %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp -L(lo1): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - ADDSUB %ebx, (rp) -L(lo0): psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - ADDSUB %eax, 4(rp) -L(lo3): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - ADDSUB %ebx, 8(rp) -L(lo2): psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - dec n - jnz L(top) - -L(end): adc n, %edx C n is zero here - ADDSUB %eax, 12(rp) - movd %mm0, %ebx - lea 16(rp), rp -L(wd1): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc n, %eax - ADDSUB %ebx, (rp) - emms - adc n, %eax - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() -PROLOGUE(func_1c) - mov 20(%esp), %edx C carry - jmp L(ent) -EPILOGUE() diff --git a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm b/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm deleted file mode 100644 index 782e914019..0000000000 --- a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_bdiv_dbm1. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_bdiv_dbm1c) -include_mpn(`x86/pentium4/sse2/bdiv_dbm1c.asm') diff --git a/gmp/mpn/x86/atom/sse2/divrem_1.asm b/gmp/mpn/x86/atom/sse2/divrem_1.asm deleted file mode 100644 index f84709a22e..0000000000 --- a/gmp/mpn/x86/atom/sse2/divrem_1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_divrem_1 -- mpn by limb division. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_preinv_divrem_1 mpn_divrem_1c mpn_divrem_1) -include_mpn(`x86/pentium4/sse2/divrem_1.asm') diff --git a/gmp/mpn/x86/atom/sse2/mod_1_1.asm b/gmp/mpn/x86/atom/sse2/mod_1_1.asm deleted file mode 100644 index ae6581d9b6..0000000000 --- a/gmp/mpn/x86/atom/sse2/mod_1_1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom/SSE2 mpn_mod_1_1. - -dnl Copyright 2009, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_mod_1_1p) -include_mpn(`x86/pentium4/sse2/mod_1_1.asm') diff --git a/gmp/mpn/x86/atom/sse2/mod_1_4.asm b/gmp/mpn/x86/atom/sse2/mod_1_4.asm deleted file mode 100644 index 31faa3f0a3..0000000000 --- a/gmp/mpn/x86/atom/sse2/mod_1_4.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom/SSE2 mpn_mod_1_4. - -dnl Copyright 2009, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_mod_1s_4p) -include_mpn(`x86/pentium4/sse2/mod_1_4.asm') diff --git a/gmp/mpn/x86/atom/sse2/mul_1.asm b/gmp/mpn/x86/atom/sse2/mul_1.asm deleted file mode 100644 index aa3bb974bb..0000000000 --- a/gmp/mpn/x86/atom/sse2/mul_1.asm +++ /dev/null @@ -1,124 +0,0 @@ -dnl Intel Atom mpn_mul_1. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C cycles/limb -C P5 - -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 7.5 -C AMD K6 - -C AMD K7 - -C AMD K8 -C AMD K10 - -defframe(PARAM_CARRY,20) -defframe(PARAM_MUL, 16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -define(`rp', `%edx') -define(`up', `%esi') -define(`n', `%ecx') - -ASM_START() - TEXT - ALIGN(16) -deflit(`FRAME',0) - -PROLOGUE(mpn_mul_1c) - movd PARAM_CARRY, %mm6 C carry - jmp L(ent) -EPILOGUE() - - ALIGN(8) C for compact code -PROLOGUE(mpn_mul_1) - pxor %mm6, %mm6 -L(ent): push %esi FRAME_pushl() - mov PARAM_SRC, up - mov PARAM_SIZE, %eax C size - movd PARAM_MUL, %mm7 - movd (up), %mm0 - mov %eax, n - and $3, %eax - pmuludq %mm7, %mm0 - mov PARAM_DST, rp - jz L(lo0) - cmp $2, %eax - lea -16(up,%eax,4),up - lea -16(rp,%eax,4),rp - jc L(lo1) - jz L(lo2) - jmp L(lo3) - - ALIGN(16) -L(top): movd (up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp -L(lo0): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 -L(lo3): paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 -L(lo2): paddq %mm0, %mm6 - movd 12(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 8(rp) - psrlq $32, %mm6 -L(lo1): paddq %mm0, %mm6 - sub $4, n - movd %mm6, 12(rp) - lea 16(up), up - ja L(top) - - psrlq $32, %mm6 - movd %mm6, %eax - emms - pop %esi FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/sse2/mul_basecase.asm b/gmp/mpn/x86/atom/sse2/mul_basecase.asm deleted file mode 100644 index 97d3aeb5ad..0000000000 --- a/gmp/mpn/x86/atom/sse2/mul_basecase.asm +++ /dev/null @@ -1,501 +0,0 @@ -dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result in -dnl a third limb vector. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C TODO -C * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the -C 4 large loops into one; we could use it for the outer loop branch. -C * Optimise code outside of inner loops. -C * Write combined addmul_1 feed-in a wind-down code, and use when iterating -C outer each loop. ("Overlapping software pipelining") -C * Postpone push of ebx until we know vn > 1. Perhaps use caller-saves regs -C for inlined mul_1, allowing us to postpone all pushes. -C * Perhaps write special code for vn <= un < M, for some small M. - -C void mpn_mul_basecase (mp_ptr wp, -C mp_srcptr xp, mp_size_t xn, -C mp_srcptr yp, mp_size_t yn); -C - -define(`rp', `%edi') -define(`up', `%esi') -define(`un', `%ecx') -define(`vp', `%ebp') -define(`vn', `36(%esp)') - - TEXT - ALIGN(16) -PROLOGUE(mpn_mul_basecase) - push %edi - push %esi - push %ebx - push %ebp - mov 20(%esp), rp - mov 24(%esp), up - mov 28(%esp), un - mov 32(%esp), vp - - movd (up), %mm0 - movd (vp), %mm7 - pmuludq %mm7, %mm0 - pxor %mm6, %mm6 - - mov un, %eax - and $3, %eax - jz L(of0) - cmp $2, %eax - jc L(of1) - jz L(of2) - -C ================================================================ - jmp L(m3) - ALIGN(16) -L(lm3): movd -4(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(m3): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - sub $4, un - movd %mm6, 8(rp) - lea 16(up), up - ja L(lm3) - - psrlq $32, %mm6 - movd %mm6, 12(rp) - - decl vn - jz L(done) - lea -8(rp), rp - -L(ol3): mov 28(%esp), un - neg un - lea 4(vp), vp - movd (vp), %mm7 C read next V limb - mov 24(%esp), up - lea 16(rp,un,4), rp - - movd (up), %mm0 - pmuludq %mm7, %mm0 - sar $2, un - movd 4(up), %mm1 - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea -8(up), up - xor %edx, %edx C zero edx and CF - jmp L(a3) - -L(la3): movd 4(up), %mm1 - adc $0, %edx - add %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %ebx, (rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %eax, 4(rp) -L(a3): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %ebx, 8(rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - jnz L(la3) - - adc un, %edx C un is zero here - add %eax, 12(rp) - movd %mm0, %ebx - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc un, %eax - add %ebx, 16(rp) - adc un, %eax - mov %eax, 20(rp) - - decl vn - jnz L(ol3) - jmp L(done) - -C ================================================================ - ALIGN(16) -L(lm0): movd (up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp -L(of0): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 12(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - sub $4, un - movd %mm6, 12(rp) - lea 16(up), up - ja L(lm0) - - psrlq $32, %mm6 - movd %mm6, 16(rp) - - decl vn - jz L(done) - lea -4(rp), rp - -L(ol0): mov 28(%esp), un - neg un - lea 4(vp), vp - movd (vp), %mm7 C read next V limb - mov 24(%esp), up - lea 20(rp,un,4), rp - - movd (up), %mm1 - pmuludq %mm7, %mm1 - sar $2, un - movd 4(up), %mm0 - lea -4(up), up - movd %mm1, %eax - pmuludq %mm7, %mm0 - xor %edx, %edx C zero edx and CF - jmp L(a0) - -L(la0): movd 4(up), %mm1 - adc $0, %edx - add %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %ebx, (rp) -L(a0): psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %eax, 4(rp) - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %ebx, 8(rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - jnz L(la0) - - adc un, %edx C un is zero here - add %eax, 12(rp) - movd %mm0, %ebx - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc un, %eax - add %ebx, 16(rp) - adc un, %eax - mov %eax, 20(rp) - - decl vn - jnz L(ol0) - jmp L(done) - -C ================================================================ - ALIGN(16) -L(lm1): movd -12(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd -8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -12(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd -4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(of1): paddq %mm0, %mm6 - sub $4, un - movd %mm6, (rp) - lea 16(up), up - ja L(lm1) - - psrlq $32, %mm6 - movd %mm6, 4(rp) - - decl vn - jz L(done) - lea -16(rp), rp - -L(ol1): mov 28(%esp), un - neg un - lea 4(vp), vp - movd (vp), %mm7 C read next V limb - mov 24(%esp), up - lea 24(rp,un,4), rp - - movd (up), %mm0 - pmuludq %mm7, %mm0 - sar $2, un - movd %mm0, %ebx - movd 4(up), %mm1 - pmuludq %mm7, %mm1 - xor %edx, %edx C zero edx and CF - inc un - jmp L(a1) - -L(la1): movd 4(up), %mm1 - adc $0, %edx - add %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp -L(a1): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %ebx, (rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %eax, 4(rp) - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %ebx, 8(rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - jnz L(la1) - - adc un, %edx C un is zero here - add %eax, 12(rp) - movd %mm0, %ebx - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc un, %eax - add %ebx, 16(rp) - adc un, %eax - mov %eax, 20(rp) - - decl vn - jnz L(ol1) - jmp L(done) - -C ================================================================ - ALIGN(16) -L(lm2): movd -8(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd -4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(of2): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - sub $4, un - movd %mm6, 4(rp) - lea 16(up), up - ja L(lm2) - - psrlq $32, %mm6 - movd %mm6, 8(rp) - - decl vn - jz L(done) - lea -12(rp), rp - -L(ol2): mov 28(%esp), un - neg un - lea 4(vp), vp - movd (vp), %mm7 C read next V limb - mov 24(%esp), up - lea 12(rp,un,4), rp - - movd (up), %mm1 - pmuludq %mm7, %mm1 - sar $2, un - movd 4(up), %mm0 - lea 4(up), up - movd %mm1, %eax - xor %edx, %edx C zero edx and CF - jmp L(lo2) - -L(la2): movd 4(up), %mm1 - adc $0, %edx - add %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %ebx, (rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %eax, 4(rp) - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %ebx, 8(rp) -L(lo2): psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - jnz L(la2) - - adc un, %edx C un is zero here - add %eax, 12(rp) - movd %mm0, %ebx - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc un, %eax - add %ebx, 16(rp) - adc un, %eax - mov %eax, 20(rp) - - decl vn - jnz L(ol2) -C jmp L(done) - -C ================================================================ -L(done): - emms - pop %ebp - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() diff --git a/gmp/mpn/x86/atom/sse2/popcount.asm b/gmp/mpn/x86/atom/sse2/popcount.asm deleted file mode 100644 index 7847aec8e6..0000000000 --- a/gmp/mpn/x86/atom/sse2/popcount.asm +++ /dev/null @@ -1,35 +0,0 @@ -dnl Intel Atom mpn_popcount -- population count. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -MULFUNC_PROLOGUE(mpn_popcount) -include_mpn(`x86/pentium4/sse2/popcount.asm') diff --git a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm b/gmp/mpn/x86/atom/sse2/sqr_basecase.asm deleted file mode 100644 index af19ed854d..0000000000 --- a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm +++ /dev/null @@ -1,634 +0,0 @@ -dnl x86 mpn_sqr_basecase -- square an mpn number, optimised for atom. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C TODO -C * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the -C 4 large loops into one; we could use it for the outer loop branch. -C * Optimise code outside of inner loops. -C * Write combined addmul_1 feed-in a wind-down code, and use when iterating -C outer each loop. ("Overlapping software pipelining") -C * Perhaps use caller-saves regs for inlined mul_1, allowing us to postpone -C all pushes. -C * Perhaps write special code for n < M, for some small M. -C * Replace inlined addmul_1 with smaller code from aorsmul_1.asm, or perhaps -C with even less pipelined code. -C * We run the outer loop until we have a 2-limb by 1-limb addmul_1 left. -C Consider breaking out earlier, saving high the cost of short loops. - -C void mpn_sqr_basecase (mp_ptr wp, -C mp_srcptr xp, mp_size_t xn); - -define(`rp', `%edi') -define(`up', `%esi') -define(`n', `%ecx') - -define(`un', `%ebp') - - TEXT - ALIGN(16) -PROLOGUE(mpn_sqr_basecase) - push %edi - push %esi - mov 12(%esp), rp - mov 16(%esp), up - mov 20(%esp), n - - lea 4(rp), rp C write triangular product starting at rp[1] - dec n - movd (up), %mm7 - - jz L(one) - lea 4(up), up - push %ebx - push %ebp - mov n, %eax - - movd (up), %mm0 - neg n - pmuludq %mm7, %mm0 - pxor %mm6, %mm6 - mov n, un - - and $3, %eax - jz L(of0) - cmp $2, %eax - jc L(of1) - jz L(of2) - -C ================================================================ - jmp L(m3) - ALIGN(16) -L(lm3): movd -4(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(m3): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - add $4, un - movd %mm6, 8(rp) - lea 16(up), up - js L(lm3) - - psrlq $32, %mm6 - movd %mm6, 12(rp) - - inc n -C jz L(done) - lea -12(up), up - lea 4(rp), rp - jmp L(ol2) - -C ================================================================ - ALIGN(16) -L(lm0): movd (up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp -L(of0): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 12(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - add $4, un - movd %mm6, 12(rp) - lea 16(up), up - js L(lm0) - - psrlq $32, %mm6 - movd %mm6, 16(rp) - - inc n -C jz L(done) - lea -8(up), up - lea 8(rp), rp - jmp L(ol3) - -C ================================================================ - ALIGN(16) -L(lm1): movd -12(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd -8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -12(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd -4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(of1): paddq %mm0, %mm6 - add $4, un - movd %mm6, (rp) - lea 16(up), up - js L(lm1) - - psrlq $32, %mm6 - movd %mm6, 4(rp) - - inc n - jz L(done) C goes away when we add special n=2 code - lea -20(up), up - lea -4(rp), rp - jmp L(ol0) - -C ================================================================ - ALIGN(16) -L(lm2): movd -8(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd -4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(of2): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - add $4, un - movd %mm6, 4(rp) - lea 16(up), up - js L(lm2) - - psrlq $32, %mm6 - movd %mm6, 8(rp) - - inc n -C jz L(done) - lea -16(up), up -C lea (rp), rp -C jmp L(ol1) - -C ================================================================ - -L(ol1): lea 4(up,n,4), up - movd (up), %mm7 C read next U invariant limb - lea 8(rp,n,4), rp - mov n, un - - movd 4(up), %mm1 - pmuludq %mm7, %mm1 - sar $2, un - movd %mm1, %ebx - inc un - jz L(re1) - - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - xor %edx, %edx C zero edx and CF - jmp L(a1) - -L(la1): adc $0, %edx - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %eax, (rp) -L(a1): psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - movd %mm0, %eax - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %ebx, 4(rp) - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %eax, 8(rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - movd 4(up), %mm1 - jnz L(la1) - - adc un, %edx C un is zero here - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - adc un, %eax - add %ebx, 4(rp) - adc un, %eax - mov %eax, 8(rp) - - inc n - -C ================================================================ - -L(ol0): lea (up,n,4), up - movd 4(up), %mm7 C read next U invariant limb - lea 4(rp,n,4), rp - mov n, un - - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - sar $2, un - movd 12(up), %mm1 - movd %mm0, %eax - pmuludq %mm7, %mm1 - xor %edx, %edx C zero edx and CF - jmp L(a0) - -L(la0): adc $0, %edx - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - movd %mm0, %eax - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %ebx, 4(rp) -L(a0): psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %eax, 8(rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - movd 4(up), %mm1 - jnz L(la0) - - adc un, %edx C un is zero here - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - adc un, %eax - add %ebx, 4(rp) - adc un, %eax - mov %eax, 8(rp) - - inc n - -C ================================================================ - -L(ol3): lea 12(up,n,4), up - movd -8(up), %mm7 C read next U invariant limb - lea (rp,n,4), rp C put rp back - mov n, un - - movd -4(up), %mm1 - pmuludq %mm7, %mm1 - sar $2, un - movd %mm1, %ebx - movd (up), %mm0 - xor %edx, %edx C zero edx and CF - jmp L(a3) - -L(la3): adc $0, %edx - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - movd %mm0, %eax - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %ebx, 4(rp) - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %eax, 8(rp) -L(a3): psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - movd 4(up), %mm1 - jnz L(la3) - - adc un, %edx C un is zero here - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - adc un, %eax - add %ebx, 4(rp) - adc un, %eax - mov %eax, 8(rp) - - inc n - -C ================================================================ - -L(ol2): lea 8(up,n,4), up - movd -4(up), %mm7 C read next U invariant limb - lea 12(rp,n,4), rp - mov n, un - - movd (up), %mm0 - pmuludq %mm7, %mm0 - xor %edx, %edx - sar $2, un - movd 4(up), %mm1 - test un, un C clear carry - movd %mm0, %eax - pmuludq %mm7, %mm1 - inc un - jnz L(a2) - jmp L(re2) - -L(la2): adc $0, %edx - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp -L(a2): psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - movd %mm0, %eax - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %ebx, 4(rp) - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %eax, 8(rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - movd 4(up), %mm1 - jnz L(la2) - - adc un, %edx C un is zero here - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - adc un, %eax - add %ebx, 4(rp) - adc un, %eax - mov %eax, 8(rp) - - inc n - jmp L(ol1) - -C ================================================================ -L(re2): psrlq $32, %mm0 - movd (up), %mm7 C read next U invariant limb - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - lea 4(rp), rp - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - movd 4(up), %mm1 - adc un, %eax - add %ebx, (rp) - pmuludq %mm7, %mm1 - adc un, %eax - mov %eax, 4(rp) - movd %mm1, %ebx - -L(re1): psrlq $32, %mm1 - add %ebx, 4(rp) - movd %mm1, %eax - adc un, %eax - xor n, n C make n zeroness assumption below true - mov %eax, 8(rp) - -L(done): C n is zero here - mov 24(%esp), up - mov 28(%esp), %eax - - movd (up), %mm0 - inc %eax - pmuludq %mm0, %mm0 - lea 4(up), up - mov 20(%esp), rp - shr %eax - movd %mm0, (rp) - psrlq $32, %mm0 - lea -12(rp), rp - mov %eax, 28(%esp) - jnc L(odd) - - movd %mm0, %ebp - movd (up), %mm0 - lea 8(rp), rp - pmuludq %mm0, %mm0 - lea -4(up), up - add 8(rp), %ebp - movd %mm0, %edx - adc 12(rp), %edx - rcr n - jmp L(ent) - -C ALIGN(16) C alignment seems irrelevant -L(top): movd (up), %mm1 - adc n, n - movd %mm0, %eax - pmuludq %mm1, %mm1 - movd 4(up), %mm0 - adc (rp), %eax - movd %mm1, %ebx - pmuludq %mm0, %mm0 - psrlq $32, %mm1 - adc 4(rp), %ebx - movd %mm1, %ebp - movd %mm0, %edx - adc 8(rp), %ebp - adc 12(rp), %edx - rcr n C FIXME: isn't this awfully slow on atom??? - adc %eax, (rp) - adc %ebx, 4(rp) -L(ent): lea 8(up), up - adc %ebp, 8(rp) - psrlq $32, %mm0 - adc %edx, 12(rp) -L(odd): decl 28(%esp) - lea 16(rp), rp - jnz L(top) - -L(end): adc n, n - movd %mm0, %eax - adc n, %eax - mov %eax, (rp) - -L(rtn): emms - pop %ebp - pop %ebx - pop %esi - pop %edi - ret - -L(one): pmuludq %mm7, %mm7 - movq %mm7, -4(rp) - emms - pop %esi - pop %edi - ret -EPILOGUE() diff --git a/gmp/mpn/x86/atom/sublsh1_n.asm b/gmp/mpn/x86/atom/sublsh1_n.asm deleted file mode 100644 index d3e7e5b5cb..0000000000 --- a/gmp/mpn/x86/atom/sublsh1_n.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1) - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_sublsh1_n_ip1) -include_mpn(`x86/k7/sublsh1_n.asm') diff --git a/gmp/mpn/x86/atom/sublsh2_n.asm b/gmp/mpn/x86/atom/sublsh2_n.asm deleted file mode 100644 index 79405cf9f4..0000000000 --- a/gmp/mpn/x86/atom/sublsh2_n.asm +++ /dev/null @@ -1,57 +0,0 @@ -dnl Intel Atom mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2). - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -define(LSH, 2) -define(RSH, 30) - -ifdef(`OPERATION_addlsh2_n', ` - define(M4_inst, adcl) - define(M4_opp, subl) - define(M4_function, mpn_addlsh2_n) - define(M4_function_c, mpn_addlsh2_nc) - define(M4_ip_function_c, mpn_addlsh2_nc_ip1) - define(M4_ip_function, mpn_addlsh2_n_ip1) -',`ifdef(`OPERATION_sublsh2_n', ` - define(M4_inst, sbbl) - define(M4_opp, addl) - define(M4_function, mpn_sublsh2_n) - define(M4_function_c, mpn_sublsh2_nc) - define(M4_ip_function_c, mpn_sublsh2_nc_ip1) - define(M4_ip_function, mpn_sublsh2_n_ip1) -',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_sublsh2_n -')')') - -MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_sublsh2_n_ip1 mpn_sublsh2_nc_ip1) - -include_mpn(`x86/atom/aorslshC_n.asm') diff --git a/gmp/mpn/x86/bd1/gmp-mparam.h b/gmp/mpn/x86/bd1/gmp-mparam.h deleted file mode 100644 index 7d80a1cb4c..0000000000 --- a/gmp/mpn/x86/bd1/gmp-mparam.h +++ /dev/null @@ -1,208 +0,0 @@ -/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software -Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 3600 MHz Bulldozer Zambezi */ -/* FFT tuning limit = 25000000 */ -/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 0 /* always */ -#define MOD_1_UNNORM_THRESHOLD 3 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 4 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 3 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 27 - -#define MUL_TOOM22_THRESHOLD 32 -#define MUL_TOOM33_THRESHOLD 65 -#define MUL_TOOM44_THRESHOLD 154 -#define MUL_TOOM6H_THRESHOLD 230 -#define MUL_TOOM8H_THRESHOLD 354 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 110 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 48 -#define SQR_TOOM3_THRESHOLD 87 -#define SQR_TOOM4_THRESHOLD 204 -#define SQR_TOOM6_THRESHOLD 315 -#define SQR_TOOM8_THRESHOLD 430 - -#define MULMID_TOOM42_THRESHOLD 48 - -#define MULMOD_BNM1_THRESHOLD 21 -#define SQRMOD_BNM1_THRESHOLD 23 - -#define MUL_FFT_MODF_THRESHOLD 840 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 840, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \ - { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 29, 8}, { 15, 7}, { 31, 6}, \ - { 63, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \ - { 31, 7}, { 63, 8}, { 39, 7}, { 79, 9}, \ - { 23, 8}, { 55, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \ - { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \ - { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \ - { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \ - { 127, 9}, { 255,10}, { 159,11}, { 95,10}, \ - { 191,12}, { 63,11}, { 127,10}, { 271,11}, \ - { 159,10}, { 319, 9}, { 639,11}, { 191,10}, \ - { 383, 9}, { 767,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 511,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 639,12}, { 191,11}, { 383,10}, \ - { 799,13}, { 127,12}, { 255,11}, { 543,10}, \ - { 1087,11}, { 607,12}, { 319,11}, { 671,10}, \ - { 1343,11}, { 735,10}, { 1471, 9}, { 2943,12}, \ - { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \ - { 1727,12}, { 447,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,10}, \ - { 2943,13}, { 383,12}, { 767,11}, { 1599,12}, \ - { 831,11}, { 1727,10}, { 3455,14}, { 255,13}, \ - { 511,12}, { 1087,11}, { 2239,12}, { 1215,11}, \ - { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \ - { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \ - { 1919,11}, { 3839,12}, { 1983,11}, { 3967,10}, \ - { 7935,14}, { 511,13}, { 1023,12}, { 2239,13}, \ - { 1151,12}, { 2495,11}, { 4991,13}, { 1279,12}, \ - { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \ - { 1535,12}, { 3071,13}, { 1663,12}, { 3455,13}, \ - { 1791,12}, { 3583,13}, { 1919,12}, { 3967,11}, \ - { 7935,15}, { 511,14}, { 1023,13}, { 2175,12}, \ - { 4479,13}, { 2431,12}, { 4991,14}, { 1279,13}, \ - { 2943,12}, { 6015,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 160 -#define MUL_FFT_THRESHOLD 7808 - -#define SQR_FFT_MODF_THRESHOLD 690 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 690, 5}, { 28, 6}, { 15, 5}, { 32, 6}, \ - { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \ - { 43, 7}, { 23, 6}, { 47, 7}, { 35, 8}, \ - { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \ - { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \ - { 39, 7}, { 79, 8}, { 43, 9}, { 23, 8}, \ - { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 127,10}, { 79, 9}, { 167,10}, { 95, 9}, \ - { 191,10}, { 111,11}, { 63,10}, { 159,11}, \ - { 95,10}, { 191,12}, { 63,11}, { 127,10}, \ - { 271,11}, { 159,10}, { 319, 9}, { 639,11}, \ - { 191,10}, { 383,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \ - { 287,10}, { 607, 9}, { 1215,11}, { 319,10}, \ - { 639,12}, { 191,11}, { 383,10}, { 799,11}, \ - { 415,13}, { 127,12}, { 255,11}, { 511,10}, \ - { 1023,11}, { 543,10}, { 1087,11}, { 607,10}, \ - { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \ - { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \ - { 1599,11}, { 863,12}, { 447,11}, { 927,13}, \ - { 255,12}, { 511,11}, { 1055,10}, { 2111,11}, \ - { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \ - { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ - { 1727,10}, { 3455,12}, { 895,14}, { 255,13}, \ - { 511,12}, { 1023,11}, { 2111,12}, { 1087,11}, \ - { 2239,10}, { 4479,12}, { 1215,11}, { 2431,13}, \ - { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ - { 1727,11}, { 3455,13}, { 895,12}, { 1983,11}, \ - { 3967,14}, { 511,13}, { 1023,12}, { 2239,11}, \ - { 4479,13}, { 1151,12}, { 2495,11}, { 4991,10}, \ - { 9983,13}, { 1279,12}, { 2623,13}, { 1407,12}, \ - { 2943,14}, { 767,13}, { 1663,12}, { 3455,13}, \ - { 1791,12}, { 3583,13}, { 1919,12}, { 3967,15}, \ - { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \ - { 2431,12}, { 4991,11}, { 9983,14}, { 1279,13}, \ - { 2687,12}, { 5375,13}, { 2943,12}, { 5887,14}, \ - { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 166 -#define SQR_FFT_THRESHOLD 6784 - -#define MULLO_BASECASE_THRESHOLD 5 -#define MULLO_DC_THRESHOLD 31 -#define MULLO_MUL_N_THRESHOLD 14709 - -#define DC_DIV_QR_THRESHOLD 53 -#define DC_DIVAPPR_Q_THRESHOLD 230 -#define DC_BDIV_QR_THRESHOLD 50 -#define DC_BDIV_Q_THRESHOLD 136 - -#define INV_MULMOD_BNM1_THRESHOLD 78 -#define INV_NEWTON_THRESHOLD 202 -#define INV_APPR_THRESHOLD 202 - -#define BINV_NEWTON_THRESHOLD 236 -#define REDC_1_TO_REDC_N_THRESHOLD 55 - -#define MU_DIV_QR_THRESHOLD 1442 -#define MU_DIVAPPR_Q_THRESHOLD 1652 -#define MUPI_DIV_QR_THRESHOLD 81 -#define MU_BDIV_QR_THRESHOLD 1787 -#define MU_BDIV_Q_THRESHOLD 1685 - -#define POWM_SEC_TABLE 1,22,194,376,692,2657 - -#define MATRIX22_STRASSEN_THRESHOLD 21 -#define HGCD_THRESHOLD 85 -#define HGCD_APPR_THRESHOLD 50 -#define HGCD_REDUCE_THRESHOLD 4455 -#define GCD_DC_THRESHOLD 456 -#define GCDEXT_DC_THRESHOLD 345 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 17 -#define GET_STR_PRECOMPUTE_THRESHOLD 27 -#define SET_STR_DC_THRESHOLD 100 -#define SET_STR_PRECOMPUTE_THRESHOLD 960 - -#define FAC_DSC_THRESHOLD 208 -#define FAC_ODD_THRESHOLD 26 diff --git a/gmp/mpn/x86/bd2/gmp-mparam.h b/gmp/mpn/x86/bd2/gmp-mparam.h deleted file mode 100644 index c5a53f2f9f..0000000000 --- a/gmp/mpn/x86/bd2/gmp-mparam.h +++ /dev/null @@ -1,209 +0,0 @@ -/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software -Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 4000 MHz Piledriver Vishera */ -/* FFT tuning limit = 40000000 */ -/* Generated by tuneup.c, 2014-03-12, gcc 4.8 */ - -#define MOD_1_NORM_THRESHOLD 0 /* always */ -#define MOD_1_UNNORM_THRESHOLD 3 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 19 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 3 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 24 - -#define MUL_TOOM22_THRESHOLD 30 -#define MUL_TOOM33_THRESHOLD 81 -#define MUL_TOOM44_THRESHOLD 153 -#define MUL_TOOM6H_THRESHOLD 222 -#define MUL_TOOM8H_THRESHOLD 357 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 99 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 96 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 38 -#define SQR_TOOM3_THRESHOLD 89 -#define SQR_TOOM4_THRESHOLD 196 -#define SQR_TOOM6_THRESHOLD 290 -#define SQR_TOOM8_THRESHOLD 454 - -#define MULMID_TOOM42_THRESHOLD 68 - -#define MULMOD_BNM1_THRESHOLD 19 -#define SQRMOD_BNM1_THRESHOLD 22 - -#define MUL_FFT_MODF_THRESHOLD 636 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 636, 5}, { 27, 6}, { 27, 7}, { 15, 6}, \ - { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ - { 39, 7}, { 23, 6}, { 47, 7}, { 29, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 49, 8}, { 27, 7}, { 55, 9}, \ - { 15, 8}, { 31, 7}, { 63, 8}, { 43, 9}, \ - { 23, 8}, { 55, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \ - { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \ - { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \ - { 95, 9}, { 191,11}, { 63,10}, { 127, 6}, \ - { 2111, 5}, { 4351, 6}, { 2239, 7}, { 1215, 9}, \ - { 311, 8}, { 639,10}, { 175, 8}, { 703,10}, \ - { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \ - { 159, 9}, { 671,11}, { 191,10}, { 383, 9}, \ - { 799,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 671,12}, { 191,11}, { 383,10}, \ - { 799,11}, { 415,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \ - { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,12}, { 447,11}, { 895,13}, { 255,12}, \ - { 511,11}, { 1087,12}, { 575,11}, { 1215,10}, \ - { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \ - { 1471,13}, { 383,12}, { 767,11}, { 1599,12}, \ - { 831,11}, { 1727,10}, { 3455,12}, { 895,14}, \ - { 255,13}, { 511,12}, { 1023,11}, { 2047,12}, \ - { 1087,11}, { 2239,10}, { 4479,12}, { 1215,11}, \ - { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \ - { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \ - { 1919,14}, { 511,13}, { 1023,12}, { 2239,11}, \ - { 4479,13}, { 1151,12}, { 2495,11}, { 4991,13}, \ - { 1279,12}, { 2623,13}, { 1407,12}, { 2943,14}, \ - { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \ - { 3455,13}, { 1919,15}, { 511,14}, { 1023,13}, \ - { 2175,12}, { 4479,13}, { 2431,12}, { 4991,14}, \ - { 1279,13}, { 2943,12}, { 5887,14}, { 1535,13}, \ - { 3455,14}, { 1791,13}, { 3967,12}, { 7935,11}, \ - { 15871,15}, { 1023,14}, { 2047,13}, { 4479,14}, \ - { 2303,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 172 -#define MUL_FFT_THRESHOLD 6784 - -#define SQR_FFT_MODF_THRESHOLD 606 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 606, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ - { 29, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \ - { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \ - { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95,10}, { 31, 9}, \ - { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \ - { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \ - { 95, 9}, { 191,11}, { 63,10}, { 159,11}, \ - { 95,10}, { 191, 6}, { 3135, 5}, { 6399, 6}, \ - { 3455, 8}, { 895, 9}, { 479, 8}, { 991,10}, \ - { 255, 9}, { 575,11}, { 159, 9}, { 639,10}, \ - { 335, 8}, { 1343,10}, { 351,11}, { 191, 9}, \ - { 799,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543,11}, { 287,10}, { 607, 9}, { 1215,10}, \ - { 671,12}, { 191,11}, { 383,10}, { 767, 9}, \ - { 1535,10}, { 799,11}, { 415,10}, { 863,13}, \ - { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \ - { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \ - { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \ - { 799,10}, { 1599,11}, { 863,12}, { 447,11}, \ - { 927,13}, { 255,12}, { 511,11}, { 1087,12}, \ - { 575,11}, { 1215,12}, { 639,11}, { 1343,12}, \ - { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \ - { 1599,12}, { 831,11}, { 1727,12}, { 895,11}, \ - { 1791,12}, { 959,14}, { 255,13}, { 511,12}, \ - { 1087,11}, { 2239,10}, { 4479,12}, { 1215,13}, \ - { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ - { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \ - { 1023,12}, { 2239,11}, { 4479,13}, { 1151,12}, \ - { 2495,11}, { 4991,13}, { 1279,12}, { 2623,13}, \ - { 1407,12}, { 2943,14}, { 767,13}, { 1663,12}, \ - { 3455,13}, { 1791,12}, { 3583,13}, { 1919,15}, \ - { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \ - { 2431,12}, { 4991,14}, { 1279,13}, { 2943,12}, \ - { 5887,14}, { 1535,13}, { 3455,14}, { 1791,13}, \ - { 3967,15}, { 1023,14}, { 2047,13}, { 4479,14}, \ - { 2303,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 160 -#define SQR_FFT_THRESHOLD 5760 - -#define MULLO_BASECASE_THRESHOLD 3 -#define MULLO_DC_THRESHOLD 34 -#define MULLO_MUL_N_THRESHOLD 13463 - -#define DC_DIV_QR_THRESHOLD 67 -#define DC_DIVAPPR_Q_THRESHOLD 196 -#define DC_BDIV_QR_THRESHOLD 67 -#define DC_BDIV_Q_THRESHOLD 112 - -#define INV_MULMOD_BNM1_THRESHOLD 70 -#define INV_NEWTON_THRESHOLD 262 -#define INV_APPR_THRESHOLD 222 - -#define BINV_NEWTON_THRESHOLD 288 -#define REDC_1_TO_REDC_N_THRESHOLD 67 - -#define MU_DIV_QR_THRESHOLD 1718 -#define MU_DIVAPPR_Q_THRESHOLD 1652 -#define MUPI_DIV_QR_THRESHOLD 122 -#define MU_BDIV_QR_THRESHOLD 1387 -#define MU_BDIV_Q_THRESHOLD 1528 - -#define POWM_SEC_TABLE 1,16,69,508,1378,2657,2825 - -#define MATRIX22_STRASSEN_THRESHOLD 19 -#define HGCD_THRESHOLD 61 -#define HGCD_APPR_THRESHOLD 50 -#define HGCD_REDUCE_THRESHOLD 3389 -#define GCD_DC_THRESHOLD 492 -#define GCDEXT_DC_THRESHOLD 345 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 9 -#define GET_STR_PRECOMPUTE_THRESHOLD 21 -#define SET_STR_DC_THRESHOLD 189 -#define SET_STR_PRECOMPUTE_THRESHOLD 541 - -#define FAC_DSC_THRESHOLD 141 -#define FAC_ODD_THRESHOLD 29 diff --git a/gmp/mpn/x86/bdiv_dbm1c.asm b/gmp/mpn/x86/bdiv_dbm1c.asm index 0288c475cd..dbee28fd94 100644 --- a/gmp/mpn/x86/bdiv_dbm1c.asm +++ b/gmp/mpn/x86/bdiv_dbm1c.asm @@ -1,51 +1,32 @@ dnl x86 mpn_bdiv_dbm1. -dnl Copyright 2008, 2011 Free Software Foundation, Inc. +dnl Copyright 2008 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. -include(`../config.m4') +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. -C cycles/limb -C P5 -C P6 model 0-8,10-12) -C P6 model 9 (Banias) -C P6 model 13 (Dothan) 5.1 -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) 13.67 -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom -C AMD K6 -C AMD K7 3.5 -C AMD K8 -C AMD K10 +include(`../config.m4') +C cycles/limb +C K7: 3.5 +C P4 m0: ? +C P4 m1: ? +C P4 m2: 13.67 +C P4 m3: ? +C P4 m4: ? +C P6-13: 5.1 C TODO C * Optimize for more x86 processors @@ -76,17 +57,18 @@ PROLOGUE(mpn_bdiv_dbm1c) cmp $2, %eax jc L(b1) jz L(b2) - -L(b3): lea -8(%esi), %esi - lea 8(%edi), %edi - add $-3, %ebp - jmp L(3) + jmp L(b3) L(b0): mov 4(%esi), %eax lea -4(%esi), %esi lea 12(%edi), %edi add $-4, %ebp jmp L(0) +L(b3): + lea -8(%esi), %esi + lea 8(%edi), %edi + add $-3, %ebp + jmp L(3) L(b2): mov 4(%esi), %eax lea 4(%esi), %esi @@ -95,7 +77,8 @@ L(b2): mov 4(%esi), %eax jmp L(2) ALIGN(8) -L(top): mov 4(%esi), %eax +L(top): + mov 4(%esi), %eax mul %ecx lea 16(%edi), %edi sub %eax, %ebx diff --git a/gmp/mpn/x86/bdiv_q_1.asm b/gmp/mpn/x86/bdiv_q_1.asm deleted file mode 100644 index 825cd296a1..0000000000 --- a/gmp/mpn/x86/bdiv_q_1.asm +++ /dev/null @@ -1,208 +0,0 @@ -dnl x86 mpn_bdiv_q_1 -- mpn by limb exact division. - -dnl Rearranged from mpn/x86/dive_1.asm by Marco Bodrato. - -dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C cycles/limb -C P54 30.0 -C P55 29.0 -C P6 13.0 odd divisor, 12.0 even (strangely) -C K6 14.0 -C K7 12.0 -C P4 42.0 - -MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1) - -defframe(PARAM_SHIFT, 24) -defframe(PARAM_INVERSE,20) -defframe(PARAM_DIVISOR,16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(VAR_INVERSE,`PARAM_SRC') - - TEXT - -C mp_limb_t -C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor, -C mp_limb_t inverse, int shift) - - ALIGN(16) -PROLOGUE(mpn_pi1_bdiv_q_1) -deflit(`FRAME',0) - - movl PARAM_SHIFT, %ecx - pushl %ebp FRAME_pushl() - - movl PARAM_INVERSE, %eax - movl PARAM_SIZE, %ebp - pushl %ebx FRAME_pushl() -L(common): - pushl %edi FRAME_pushl() - pushl %esi FRAME_pushl() - - movl PARAM_SRC, %esi - movl PARAM_DST, %edi - - leal (%esi,%ebp,4), %esi C src end - leal (%edi,%ebp,4), %edi C dst end - negl %ebp C -size - - movl %eax, VAR_INVERSE - movl (%esi,%ebp,4), %eax C src[0] - - xorl %ebx, %ebx - xorl %edx, %edx - - incl %ebp - jz L(one) - - movl (%esi,%ebp,4), %edx C src[1] - - shrdl( %cl, %edx, %eax) - - movl VAR_INVERSE, %edx - jmp L(entry) - - - ALIGN(8) - nop C k6 code alignment - nop -L(top): - C eax q - C ebx carry bit, 0 or -1 - C ecx shift - C edx carry limb - C esi src end - C edi dst end - C ebp counter, limbs, negative - - movl -4(%esi,%ebp,4), %eax - subl %ebx, %edx C accumulate carry bit - - movl (%esi,%ebp,4), %ebx - - shrdl( %cl, %ebx, %eax) - - subl %edx, %eax C apply carry limb - movl VAR_INVERSE, %edx - - sbbl %ebx, %ebx - -L(entry): - imull %edx, %eax - - movl %eax, -4(%edi,%ebp,4) - movl PARAM_DIVISOR, %edx - - mull %edx - - incl %ebp - jnz L(top) - - - movl -4(%esi), %eax C src high limb -L(one): - shrl %cl, %eax - popl %esi FRAME_popl() - - addl %ebx, %eax C apply carry bit - - subl %edx, %eax C apply carry limb - - imull VAR_INVERSE, %eax - - movl %eax, -4(%edi) - - popl %edi - popl %ebx - popl %ebp - - ret - -EPILOGUE() - -C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t divisor); -C - - ALIGN(16) -PROLOGUE(mpn_bdiv_q_1) -deflit(`FRAME',0) - - movl PARAM_DIVISOR, %eax - pushl %ebp FRAME_pushl() - - movl $-1, %ecx C shift count - movl PARAM_SIZE, %ebp - - pushl %ebx FRAME_pushl() - -L(strip_twos): - incl %ecx - - shrl %eax - jnc L(strip_twos) - - leal 1(%eax,%eax), %ebx C d without twos - andl $127, %eax C d/2, 7 bits - -ifdef(`PIC',` - LEA( binvert_limb_table, %edx) - movzbl (%eax,%edx), %eax C inv 8 bits -',` - movzbl binvert_limb_table(%eax), %eax C inv 8 bits -') - - leal (%eax,%eax), %edx C 2*inv - movl %ebx, PARAM_DIVISOR C d without twos - imull %eax, %eax C inv*inv - imull %ebx, %eax C inv*inv*d - subl %eax, %edx C inv = 2*inv - inv*inv*d - - leal (%edx,%edx), %eax C 2*inv - imull %edx, %edx C inv*inv - imull %ebx, %edx C inv*inv*d - subl %edx, %eax C inv = 2*inv - inv*inv*d - - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS - pushl %eax FRAME_pushl() - imull PARAM_DIVISOR, %eax - cmpl $1, %eax - popl %eax FRAME_popl()') - - jmp L(common) -EPILOGUE() - diff --git a/gmp/mpn/x86/bobcat/gmp-mparam.h b/gmp/mpn/x86/bobcat/gmp-mparam.h deleted file mode 100644 index 198081f9fd..0000000000 --- a/gmp/mpn/x86/bobcat/gmp-mparam.h +++ /dev/null @@ -1,197 +0,0 @@ -/* x86/bobcat gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 1600 MHz AMD Bobcat Zacate E-350 */ -/* FFT tuning limit = 25000000 */ -/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 0 /* always */ -#define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 12 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 16 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 2 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 40 - -#define MUL_TOOM22_THRESHOLD 28 -#define MUL_TOOM33_THRESHOLD 90 -#define MUL_TOOM44_THRESHOLD 154 -#define MUL_TOOM6H_THRESHOLD 270 -#define MUL_TOOM8H_THRESHOLD 490 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 107 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 95 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 110 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 38 -#define SQR_TOOM3_THRESHOLD 121 -#define SQR_TOOM4_THRESHOLD 212 -#define SQR_TOOM6_THRESHOLD 303 -#define SQR_TOOM8_THRESHOLD 454 - -#define MULMID_TOOM42_THRESHOLD 74 - -#define MULMOD_BNM1_THRESHOLD 18 -#define SQRMOD_BNM1_THRESHOLD 23 - -#define MUL_FFT_MODF_THRESHOLD 660 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 660, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \ - { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \ - { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \ - { 191,10}, { 111,11}, { 63,10}, { 159,11}, \ - { 95,10}, { 191,12}, { 63,11}, { 127,10}, \ - { 255, 9}, { 511,10}, { 271, 9}, { 543,11}, \ - { 159, 9}, { 639,10}, { 335, 9}, { 671,11}, \ - { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ - { 799,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \ - { 1215,10}, { 671,12}, { 191,11}, { 383,10}, \ - { 799, 9}, { 1599,11}, { 415,13}, { 127,12}, \ - { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \ - { 1215,11}, { 671,10}, { 1343,11}, { 735,10}, \ - { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,12}, { 447,11}, { 991,13}, { 255,12}, \ - { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \ - { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ - { 1727,12}, { 959,14}, { 255,13}, { 511,12}, \ - { 1215,13}, { 639,12}, { 1471,13}, { 767,12}, \ - { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \ - { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \ - { 1407,14}, { 767,13}, { 1663,12}, { 3455,13}, \ - { 1919,15}, { 511,14}, { 1023,13}, { 2175,12}, \ - { 4479,13}, { 2431,14}, { 1279,13}, { 2943,12}, \ - { 5887,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 139 -#define MUL_FFT_THRESHOLD 7552 - -#define SQR_FFT_MODF_THRESHOLD 606 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 606, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ - { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \ - { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \ - { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \ - { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ - { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \ - { 159,11}, { 95,10}, { 191,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ - { 543, 8}, { 1087,11}, { 159,10}, { 319, 9}, \ - { 639,10}, { 335, 9}, { 671, 8}, { 1343,10}, \ - { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \ - { 399, 9}, { 799,10}, { 415, 9}, { 831,12}, \ - { 127,11}, { 255,10}, { 511, 9}, { 1023,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \ - { 1215,11}, { 319,10}, { 671, 9}, { 1343,12}, \ - { 191,11}, { 383,10}, { 799,11}, { 415,10}, \ - { 831,13}, { 127,12}, { 255,11}, { 511,10}, \ - { 1023,11}, { 543,10}, { 1087,11}, { 607,10}, \ - { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \ - { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \ - { 1599,11}, { 863,12}, { 447,11}, { 991,13}, \ - { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ - { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \ - { 1471,13}, { 383,12}, { 767,11}, { 1599,12}, \ - { 831,11}, { 1727,12}, { 959,14}, { 255,13}, \ - { 511,12}, { 1215,13}, { 639,12}, { 1471,13}, \ - { 767,12}, { 1727,13}, { 895,12}, { 1983,14}, \ - { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \ - { 2431,13}, { 1407,14}, { 767,13}, { 1663,12}, \ - { 3455,13}, { 1919,15}, { 511,14}, { 1023,13}, \ - { 2175,12}, { 4479,13}, { 2431,14}, { 1279,13}, \ - { 2943,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 147 -#define SQR_FFT_THRESHOLD 5760 - -#define MULLO_BASECASE_THRESHOLD 5 -#define MULLO_DC_THRESHOLD 45 -#define MULLO_MUL_N_THRESHOLD 14281 - -#define DC_DIV_QR_THRESHOLD 71 -#define DC_DIVAPPR_Q_THRESHOLD 238 -#define DC_BDIV_QR_THRESHOLD 67 -#define DC_BDIV_Q_THRESHOLD 151 - -#define INV_MULMOD_BNM1_THRESHOLD 66 -#define INV_NEWTON_THRESHOLD 228 -#define INV_APPR_THRESHOLD 222 - -#define BINV_NEWTON_THRESHOLD 270 -#define REDC_1_TO_REDC_N_THRESHOLD 71 - -#define MU_DIV_QR_THRESHOLD 1718 -#define MU_DIVAPPR_Q_THRESHOLD 1718 -#define MUPI_DIV_QR_THRESHOLD 91 -#define MU_BDIV_QR_THRESHOLD 1589 -#define MU_BDIV_Q_THRESHOLD 1718 - -#define POWM_SEC_TABLE 1,16,96,416,1185 - -#define MATRIX22_STRASSEN_THRESHOLD 17 -#define HGCD_THRESHOLD 88 -#define HGCD_APPR_THRESHOLD 137 -#define HGCD_REDUCE_THRESHOLD 3664 -#define GCD_DC_THRESHOLD 465 -#define GCDEXT_DC_THRESHOLD 345 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 18 -#define GET_STR_PRECOMPUTE_THRESHOLD 34 -#define SET_STR_DC_THRESHOLD 270 -#define SET_STR_PRECOMPUTE_THRESHOLD 828 - -#define FAC_DSC_THRESHOLD 256 -#define FAC_ODD_THRESHOLD 34 diff --git a/gmp/mpn/x86/cnd_aors_n.asm b/gmp/mpn/x86/cnd_aors_n.asm deleted file mode 100644 index 74f4917ecc..0000000000 --- a/gmp/mpn/x86/cnd_aors_n.asm +++ /dev/null @@ -1,124 +0,0 @@ -dnl X86 mpn_cnd_add_n, mpn_cnd_sub_n - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) 5.4 -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 14.5 -C P4 model 3-4 (Prescott) 21 -C Intel atom 11 -C AMD K6 ? -C AMD K7 3.4 -C AMD K8 ? - - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebp') -define(`n', `%ecx') -define(`cnd', `20(%esp)') -define(`cy', `%edx') - -ifdef(`OPERATION_cnd_add_n', ` - define(ADDSUB, add) - define(ADCSBB, adc) - define(func, mpn_cnd_add_n)') -ifdef(`OPERATION_cnd_sub_n', ` - define(ADDSUB, sub) - define(ADCSBB, sbb) - define(func, mpn_cnd_sub_n)') - -MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n) - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(func) - add $-16, %esp - mov %ebp, (%esp) - mov %ebx, 4(%esp) - mov %esi, 8(%esp) - mov %edi, 12(%esp) - - C make cnd into a full mask - mov cnd, %eax - neg %eax - sbb %eax, %eax - mov %eax, cnd - - C load parameters into registers - mov 24(%esp), rp - mov 28(%esp), up - mov 32(%esp), vp - mov 36(%esp), n - - mov (vp), %eax - mov (up), %ebx - - C put operand pointers just beyond their last limb - lea (vp,n,4), vp - lea (up,n,4), up - lea -4(rp,n,4), rp - neg n - - and cnd, %eax - ADDSUB %eax, %ebx - sbb cy, cy - inc n - je L(end) - - ALIGN(16) -L(top): mov (vp,n,4), %eax - and cnd, %eax - mov %ebx, (rp,n,4) - mov (up,n,4), %ebx - add cy, cy - ADCSBB %eax, %ebx - sbb cy, cy - inc n - jne L(top) - -L(end): mov %ebx, (rp) - xor %eax, %eax - sub cy, %eax - - mov (%esp), %ebp - mov 4(%esp), %ebx - mov 8(%esp), %esi - mov 12(%esp), %edi - add $16, %esp - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/copyd.asm b/gmp/mpn/x86/copyd.asm index 51fa19568b..4ce3bbbc69 100644 --- a/gmp/mpn/x86/copyd.asm +++ b/gmp/mpn/x86/copyd.asm @@ -1,42 +1,31 @@ dnl x86 mpn_copyd -- copy limb vector, decrementing. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb startup (approx) -C P5 1.0 40 -C P6 2.4 70 -C K6 1.0 55 -C K7 1.3 75 -C P4 2.6 175 +C P5: 1.0 40 +C P6 2.4 70 +C K6 1.0 55 +C K7: 1.3 75 +C P4: 2.6 175 C C (Startup time includes some function call overheads.) diff --git a/gmp/mpn/x86/copyi.asm b/gmp/mpn/x86/copyi.asm index f6b0354b4f..c6bbaeee65 100644 --- a/gmp/mpn/x86/copyi.asm +++ b/gmp/mpn/x86/copyi.asm @@ -1,42 +1,31 @@ dnl x86 mpn_copyi -- copy limb vector, incrementing. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb startup (approx) -C P5 1.0 35 -C P6 0.75 45 -C K6 1.0 30 -C K7 1.3 65 -C P4 1.0 120 +C P5: 1.0 35 +C P6 0.75 45 +C K6 1.0 30 +C K7: 1.3 65 +C P4: 1.0 120 C C (Startup time includes some function call overheads.) diff --git a/gmp/mpn/x86/core2/gmp-mparam.h b/gmp/mpn/x86/core2/gmp-mparam.h deleted file mode 100644 index b370eb5877..0000000000 --- a/gmp/mpn/x86/core2/gmp-mparam.h +++ /dev/null @@ -1,200 +0,0 @@ -/* x86/core2 gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 2133 MHz Core 2 (65nm) */ -/* FFT tuning limit = 25000000 */ -/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 4 -#define MOD_1_UNNORM_THRESHOLD 4 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 4 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 3 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 19 - -#define MUL_TOOM22_THRESHOLD 26 -#define MUL_TOOM33_THRESHOLD 90 -#define MUL_TOOM44_THRESHOLD 144 -#define MUL_TOOM6H_THRESHOLD 286 -#define MUL_TOOM8H_THRESHOLD 430 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 93 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 140 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 136 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 34 -#define SQR_TOOM3_THRESHOLD 114 -#define SQR_TOOM4_THRESHOLD 178 -#define SQR_TOOM6_THRESHOLD 262 -#define SQR_TOOM8_THRESHOLD 357 - -#define MULMID_TOOM42_THRESHOLD 66 - -#define MULMOD_BNM1_THRESHOLD 15 -#define SQRMOD_BNM1_THRESHOLD 21 - -#define MUL_FFT_MODF_THRESHOLD 600 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 600, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \ - { 25, 7}, { 13, 6}, { 29, 7}, { 15, 6}, \ - { 33, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \ - { 39, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 43, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \ - { 43, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ - { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \ - { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \ - { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \ - { 95, 9}, { 191,11}, { 63,10}, { 159,11}, \ - { 95,10}, { 191,12}, { 63,11}, { 127,10}, \ - { 271, 9}, { 543, 8}, { 1087,11}, { 159,10}, \ - { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \ - { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \ - { 399, 9}, { 799,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 543, 9}, { 1087,11}, { 287,10}, \ - { 607, 9}, { 1215,11}, { 319,10}, { 671,11}, \ - { 351,12}, { 191,11}, { 383,10}, { 799, 9}, \ - { 1599,13}, { 127,12}, { 255,11}, { 543,10}, \ - { 1087,11}, { 607,10}, { 1215,12}, { 319,11}, \ - { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \ - { 799,10}, { 1599,11}, { 863,10}, { 1727,12}, \ - { 447,11}, { 959,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \ - { 1343,12}, { 703,13}, { 383,12}, { 767,11}, \ - { 1599,12}, { 831,11}, { 1727,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1087,11}, { 2239,10}, \ - { 4479,12}, { 1215,13}, { 639,12}, { 1471,11}, \ - { 2943,13}, { 767,12}, { 1727,13}, { 895,12}, \ - { 1983,14}, { 511,13}, { 1023,12}, { 2239,11}, \ - { 4479,13}, { 1151,12}, { 2495,13}, { 1279,12}, \ - { 2623,13}, { 1407,12}, { 2815,14}, { 767,13}, \ - { 1663,12}, { 3455,13}, { 1919,15}, { 511,14}, \ - { 1023,13}, { 2175,12}, { 4479,13}, { 2431,14}, \ - { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \ - { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 149 -#define MUL_FFT_THRESHOLD 6784 - -#define SQR_FFT_MODF_THRESHOLD 500 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 500, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ - { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 55,10}, { 31, 9}, \ - { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \ - { 63, 9}, { 127,10}, { 79, 9}, { 159,10}, \ - { 95,11}, { 63,10}, { 143, 9}, { 287,10}, \ - { 159,11}, { 95,12}, { 63,11}, { 127,10}, \ - { 271, 9}, { 543,10}, { 287,11}, { 159,10}, \ - { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \ - { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \ - { 831,10}, { 431,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 543, 9}, { 1087,11}, { 287,10}, \ - { 607,11}, { 319,10}, { 671,11}, { 351,10}, \ - { 703,12}, { 191,11}, { 383,10}, { 799,11}, \ - { 415,10}, { 863,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \ - { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,12}, { 447,11}, { 959,13}, { 255,12}, \ - { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \ - { 383,12}, { 831,11}, { 1727,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1087,11}, { 2239,12}, \ - { 1215,13}, { 639,12}, { 1471,11}, { 2943,13}, \ - { 767,12}, { 1727,13}, { 895,12}, { 1983,14}, \ - { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \ - { 2495,13}, { 1407,12}, { 2943,14}, { 767,13}, \ - { 1663,12}, { 3455,13}, { 1919,15}, { 511,14}, \ - { 1023,13}, { 2175,12}, { 4479,13}, { 2431,14}, \ - { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \ - { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 145 -#define SQR_FFT_THRESHOLD 5312 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 29 -#define MULLO_MUL_N_THRESHOLD 13463 - -#define DC_DIV_QR_THRESHOLD 21 -#define DC_DIVAPPR_Q_THRESHOLD 50 -#define DC_BDIV_QR_THRESHOLD 79 -#define DC_BDIV_Q_THRESHOLD 174 - -#define INV_MULMOD_BNM1_THRESHOLD 50 -#define INV_NEWTON_THRESHOLD 39 -#define INV_APPR_THRESHOLD 37 - -#define BINV_NEWTON_THRESHOLD 318 -#define REDC_1_TO_REDC_N_THRESHOLD 87 - -#define MU_DIV_QR_THRESHOLD 1099 -#define MU_DIVAPPR_Q_THRESHOLD 792 -#define MUPI_DIV_QR_THRESHOLD 0 /* always */ -#define MU_BDIV_QR_THRESHOLD 1442 -#define MU_BDIV_Q_THRESHOLD 1589 - -#define POWM_SEC_TABLE 3,32,95,480,597,2657 - -#define MATRIX22_STRASSEN_THRESHOLD 21 -#define HGCD_THRESHOLD 83 -#define HGCD_APPR_THRESHOLD 159 -#define HGCD_REDUCE_THRESHOLD 3389 -#define GCD_DC_THRESHOLD 379 -#define GCDEXT_DC_THRESHOLD 309 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 10 -#define GET_STR_PRECOMPUTE_THRESHOLD 25 -#define SET_STR_DC_THRESHOLD 442 -#define SET_STR_PRECOMPUTE_THRESHOLD 1104 - -#define FAC_DSC_THRESHOLD 155 -#define FAC_ODD_THRESHOLD 34 diff --git a/gmp/mpn/x86/coreihwl/gmp-mparam.h b/gmp/mpn/x86/coreihwl/gmp-mparam.h deleted file mode 100644 index e2b289cc3c..0000000000 --- a/gmp/mpn/x86/coreihwl/gmp-mparam.h +++ /dev/null @@ -1,210 +0,0 @@ -/* x86/coreihwl gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 2900 MHz Core i5 Haswell */ -/* FFT tuning limit = 40000000 */ -/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 16 -#define MOD_1_UNNORM_THRESHOLD 13 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 11 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 9 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 5 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 15 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 19 - -#define MUL_TOOM22_THRESHOLD 27 -#define MUL_TOOM33_THRESHOLD 90 -#define MUL_TOOM44_THRESHOLD 218 -#define MUL_TOOM6H_THRESHOLD 318 -#define MUL_TOOM8H_THRESHOLD 490 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 153 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 101 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 44 -#define SQR_TOOM3_THRESHOLD 137 -#define SQR_TOOM4_THRESHOLD 242 -#define SQR_TOOM6_THRESHOLD 351 -#define SQR_TOOM8_THRESHOLD 597 - -#define MULMID_TOOM42_THRESHOLD 98 - -#define MULMOD_BNM1_THRESHOLD 17 -#define SQRMOD_BNM1_THRESHOLD 21 - -#define MUL_FFT_MODF_THRESHOLD 630 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 630, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 15, 5}, { 31, 6}, { 28, 7}, { 15, 6}, \ - { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ - { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159,10}, { 95, 9}, { 191,10}, \ - { 111,11}, { 63,10}, { 159,11}, { 95,10}, \ - { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543,11}, { 159,10}, \ - { 319, 9}, { 639,10}, { 335, 9}, { 671,11}, \ - { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ - { 799,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \ - { 319,10}, { 671,12}, { 191,11}, { 383,10}, \ - { 799,11}, { 415,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \ - { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,10}, { 1727,12}, { 447,11}, { 959,13}, \ - { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ - { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \ - { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \ - { 1599,12}, { 831,11}, { 1727,10}, { 3455,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1087,11}, \ - { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \ - { 1471,11}, { 2943,10}, { 5887,13}, { 767,12}, \ - { 1727,11}, { 3455,13}, { 895,12}, { 1983,14}, \ - { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \ - { 2495,13}, { 1279,12}, { 2559,13}, { 1407,12}, \ - { 2943,11}, { 5887,14}, { 767,13}, { 1535,12}, \ - { 3071,13}, { 1663,12}, { 3455,13}, { 1919,15}, \ - { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \ - { 2431,14}, { 1279,13}, { 2943,12}, { 5887,14}, \ - { 1535,13}, { 3455,14}, { 1791,13}, { 3967,12}, \ - { 7935,15}, { 1023,14}, { 2047,13}, { 4479,14}, \ - { 2303,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 168 -#define MUL_FFT_THRESHOLD 7424 - -#define SQR_FFT_MODF_THRESHOLD 530 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 530, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ - { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \ - { 27, 7}, { 55, 9}, { 15, 8}, { 31, 7}, \ - { 63, 8}, { 39, 9}, { 23, 8}, { 55,10}, \ - { 15, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \ - { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ - { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \ - { 63,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \ - { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \ - { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ - { 799,12}, { 127,11}, { 255,10}, { 511, 9}, \ - { 1023,10}, { 543,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 671,11}, { 351,12}, { 191,11}, \ - { 383,10}, { 799,11}, { 415,10}, { 831,13}, \ - { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \ - { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \ - { 671,10}, { 1343,11}, { 735,10}, { 1471,12}, \ - { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \ - { 1727,12}, { 447,11}, { 991,13}, { 255,12}, \ - { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \ - { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ - { 1727,12}, { 959,11}, { 1983,14}, { 255,13}, \ - { 511,12}, { 1023,11}, { 2047,12}, { 1087,11}, \ - { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \ - { 1471,11}, { 2943,13}, { 767,12}, { 1727,13}, \ - { 895,12}, { 1983,14}, { 511,13}, { 1023,12}, \ - { 2239,13}, { 1151,12}, { 2495,13}, { 1279,12}, \ - { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \ - { 1535,12}, { 3071,13}, { 1663,12}, { 3455,13}, \ - { 1919,12}, { 3839,15}, { 511,14}, { 1023,13}, \ - { 2175,12}, { 4479,13}, { 2431,12}, { 4863,14}, \ - { 1279,13}, { 2943,12}, { 5887,14}, { 1535,13}, \ - { 3455,14}, { 1791,13}, { 3967,15}, { 1023,14}, \ - { 2047,13}, { 4479,14}, { 2303,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 170 -#define SQR_FFT_THRESHOLD 5760 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 57 -#define MULLO_MUL_N_THRESHOLD 14281 - -#define DC_DIV_QR_THRESHOLD 23 -#define DC_DIVAPPR_Q_THRESHOLD 63 -#define DC_BDIV_QR_THRESHOLD 87 -#define DC_BDIV_Q_THRESHOLD 204 - -#define INV_MULMOD_BNM1_THRESHOLD 54 -#define INV_NEWTON_THRESHOLD 75 -#define INV_APPR_THRESHOLD 67 - -#define BINV_NEWTON_THRESHOLD 296 -#define REDC_1_TO_REDC_N_THRESHOLD 79 - -#define MU_DIV_QR_THRESHOLD 872 -#define MU_DIVAPPR_Q_THRESHOLD 654 -#define MUPI_DIV_QR_THRESHOLD 0 /* always */ -#define MU_BDIV_QR_THRESHOLD 1858 -#define MU_BDIV_Q_THRESHOLD 2089 - -#define POWM_SEC_TABLE 1,17,127,508,1603 - -#define MATRIX22_STRASSEN_THRESHOLD 19 -#define HGCD_THRESHOLD 61 -#define HGCD_APPR_THRESHOLD 60 -#define HGCD_REDUCE_THRESHOLD 3810 -#define GCD_DC_THRESHOLD 263 -#define GCDEXT_DC_THRESHOLD 278 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 11 -#define GET_STR_PRECOMPUTE_THRESHOLD 21 -#define SET_STR_DC_THRESHOLD 527 -#define SET_STR_PRECOMPUTE_THRESHOLD 1178 - -#define FAC_DSC_THRESHOLD 187 -#define FAC_ODD_THRESHOLD 34 diff --git a/gmp/mpn/x86/coreinhm/gmp-mparam.h b/gmp/mpn/x86/coreinhm/gmp-mparam.h deleted file mode 100644 index 13289c0c23..0000000000 --- a/gmp/mpn/x86/coreinhm/gmp-mparam.h +++ /dev/null @@ -1,224 +0,0 @@ -/* x86/coreinhm gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 2667 MHz Core i7 Nehalem */ -/* FFT tuning limit = 100000000 */ -/* Generated by tuneup.c, 2014-03-19, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 25 -#define MOD_1_UNNORM_THRESHOLD 15 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 8 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 3 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 18 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 15 - -#define MUL_TOOM22_THRESHOLD 26 -#define MUL_TOOM33_THRESHOLD 89 -#define MUL_TOOM44_THRESHOLD 214 -#define MUL_TOOM6H_THRESHOLD 327 -#define MUL_TOOM8H_THRESHOLD 466 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 159 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 95 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 101 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 142 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 44 -#define SQR_TOOM3_THRESHOLD 145 -#define SQR_TOOM4_THRESHOLD 232 -#define SQR_TOOM6_THRESHOLD 342 -#define SQR_TOOM8_THRESHOLD 502 - -#define MULMID_TOOM42_THRESHOLD 78 - -#define MULMOD_BNM1_THRESHOLD 17 -#define SQRMOD_BNM1_THRESHOLD 21 - -#define MUL_FFT_MODF_THRESHOLD 606 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 606, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \ - { 15, 5}, { 33, 6}, { 29, 7}, { 15, 6}, \ - { 33, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \ - { 39, 7}, { 23, 6}, { 47, 7}, { 35, 8}, \ - { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \ - { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \ - { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \ - { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ - { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \ - { 63,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \ - { 639,10}, { 335,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 399,12}, { 127,11}, { 255,10}, \ - { 511, 9}, { 1023,10}, { 543,11}, { 287,10}, \ - { 607,11}, { 319,10}, { 639,12}, { 191,11}, \ - { 383,10}, { 767,13}, { 127,12}, { 255,11}, \ - { 511,10}, { 1023,11}, { 543,10}, { 1087,11}, \ - { 607,12}, { 319,11}, { 671,10}, { 1343,11}, \ - { 735,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,10}, { 1727,12}, { 447,11}, { 927,10}, \ - { 1855,11}, { 991,13}, { 255,12}, { 511,11}, \ - { 1119,12}, { 575,11}, { 1215,10}, { 2431,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \ - { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ - { 1727,12}, { 895,11}, { 1855,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1023,11}, { 2111,12}, \ - { 1087,11}, { 2239,10}, { 4479,12}, { 1215,11}, \ - { 2431,13}, { 639,12}, { 1471,13}, { 767,12}, \ - { 1727,11}, { 3455,13}, { 895,12}, { 1983,11}, \ - { 3967,14}, { 511,13}, { 1023,12}, { 2239,11}, \ - { 4479,13}, { 1151,12}, { 2495,11}, { 4991,13}, \ - { 1279,12}, { 2623,13}, { 1407,12}, { 2943,14}, \ - { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \ - { 3455,13}, { 1919,12}, { 3967,15}, { 511,14}, \ - { 1023,13}, { 2175,12}, { 4479,13}, { 2431,12}, \ - { 4991,14}, { 1279,13}, { 2687,12}, { 5503,13}, \ - { 2943,12}, { 6015,14}, { 1535,13}, { 3455,14}, \ - { 1791,13}, { 3967,12}, { 7935,15}, { 1023,14}, \ - { 2047,13}, { 4479,14}, { 2303,13}, { 4991,12}, \ - { 9983,14}, { 2559,13}, { 5503,14}, { 2815,13}, \ - { 6015,15}, { 1535,14}, { 3839,13}, { 7935,16}, \ - { 1023,15}, { 2047,14}, { 4095,13}, { 8191,12}, \ - { 16383,11}, { 32767,10}, { 65535, 9}, { 131071, 8}, \ - { 256, 9}, { 512,10}, { 1024,11}, { 2048,12}, \ - { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 192 -#define MUL_FFT_THRESHOLD 6784 - -#define SQR_FFT_MODF_THRESHOLD 555 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 555, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ - { 16, 5}, { 33, 6}, { 29, 7}, { 15, 6}, \ - { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ - { 39, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \ - { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \ - { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \ - { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \ - { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 135,10}, { 79, 9}, { 159,10}, { 95,11}, \ - { 63,10}, { 143, 9}, { 287,10}, { 159,11}, \ - { 95,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511, 8}, { 1023,10}, { 271, 9}, { 543,10}, \ - { 287,11}, { 159,10}, { 319, 9}, { 639,10}, \ - { 335, 9}, { 671,10}, { 351,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ - { 415,12}, { 127,11}, { 255,10}, { 511, 9}, \ - { 1023,10}, { 543,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 671,11}, { 351,12}, { 191,11}, \ - { 383,10}, { 799,11}, { 415,13}, { 127,12}, \ - { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \ - { 1087,11}, { 607,12}, { 319,11}, { 671,10}, \ - { 1343,11}, { 735,10}, { 1471,12}, { 383,11}, \ - { 799,10}, { 1599,11}, { 863,10}, { 1727,12}, \ - { 447,11}, { 991,10}, { 1983,13}, { 255,12}, \ - { 511,11}, { 1023,10}, { 2047,11}, { 1087,12}, \ - { 575,11}, { 1215,10}, { 2431,12}, { 639,11}, \ - { 1343,12}, { 703,11}, { 1471,13}, { 383,12}, \ - { 767,11}, { 1599,12}, { 831,11}, { 1727,10}, \ - { 3455,12}, { 895,11}, { 1791,12}, { 959,11}, \ - { 1983,14}, { 255,13}, { 511,12}, { 1023,11}, \ - { 2111,12}, { 1087,11}, { 2239,10}, { 4479,12}, \ - { 1215,11}, { 2431,13}, { 639,12}, { 1471,11}, \ - { 2943,13}, { 767,12}, { 1727,11}, { 3455,13}, \ - { 895,12}, { 1983,11}, { 3967,14}, { 511,13}, \ - { 1023,12}, { 2239,11}, { 4479,13}, { 1151,12}, \ - { 2495,13}, { 1279,12}, { 2623,13}, { 1407,12}, \ - { 2943,14}, { 767,13}, { 1663,12}, { 3455,13}, \ - { 1919,12}, { 3967,15}, { 511,14}, { 1023,13}, \ - { 2175,12}, { 4479,13}, { 2431,12}, { 4863,14}, \ - { 1279,13}, { 2943,12}, { 5887,14}, { 1535,13}, \ - { 3455,14}, { 1791,13}, { 3967,12}, { 7935,15}, \ - { 1023,14}, { 2047,13}, { 4479,14}, { 2303,13}, \ - { 4991,12}, { 9983,14}, { 2815,13}, { 5887,15}, \ - { 1535,14}, { 3327,13}, { 6655,14}, { 3839,13}, \ - { 7935,16}, { 1023,15}, { 2047,14}, { 4095,13}, \ - { 8191,12}, { 16383,11}, { 32767,10}, { 65535, 9}, \ - { 131071, 8}, { 256, 9}, { 512,10}, { 1024,11}, \ - { 2048,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ - { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 201 -#define SQR_FFT_THRESHOLD 5312 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 38 -#define MULLO_MUL_N_THRESHOLD 13463 - -#define DC_DIV_QR_THRESHOLD 22 -#define DC_DIVAPPR_Q_THRESHOLD 43 -#define DC_BDIV_QR_THRESHOLD 78 -#define DC_BDIV_Q_THRESHOLD 157 - -#define INV_MULMOD_BNM1_THRESHOLD 50 -#define INV_NEWTON_THRESHOLD 15 -#define INV_APPR_THRESHOLD 18 - -#define BINV_NEWTON_THRESHOLD 351 -#define REDC_1_TO_REDC_N_THRESHOLD 84 - -#define MU_DIV_QR_THRESHOLD 889 -#define MU_DIVAPPR_Q_THRESHOLD 483 -#define MUPI_DIV_QR_THRESHOLD 0 /* always */ -#define MU_BDIV_QR_THRESHOLD 1589 -#define MU_BDIV_Q_THRESHOLD 1787 - -#define POWM_SEC_TABLE 2,25,95,473,1357 - -#define MATRIX22_STRASSEN_THRESHOLD 20 -#define HGCD_THRESHOLD 52 -#define HGCD_APPR_THRESHOLD 51 -#define HGCD_REDUCE_THRESHOLD 3524 -#define GCD_DC_THRESHOLD 213 -#define GCDEXT_DC_THRESHOLD 249 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 24 -#define SET_STR_DC_THRESHOLD 145 -#define SET_STR_PRECOMPUTE_THRESHOLD 545 - -#define FAC_DSC_THRESHOLD 91 -#define FAC_ODD_THRESHOLD 29 diff --git a/gmp/mpn/x86/coreisbr/gmp-mparam.h b/gmp/mpn/x86/coreisbr/gmp-mparam.h deleted file mode 100644 index 9b227a71ba..0000000000 --- a/gmp/mpn/x86/coreisbr/gmp-mparam.h +++ /dev/null @@ -1,203 +0,0 @@ -/* x86/coreisbr gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 3300 MHz Core i5 Sandy Bridge */ -/* FFT tuning limit = 40000000 */ -/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 18 -#define MOD_1_UNNORM_THRESHOLD 11 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 9 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 9 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 16 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 19 - -#define MUL_TOOM22_THRESHOLD 28 -#define MUL_TOOM33_THRESHOLD 99 -#define MUL_TOOM44_THRESHOLD 160 -#define MUL_TOOM6H_THRESHOLD 268 -#define MUL_TOOM8H_THRESHOLD 490 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 106 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 140 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 109 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 108 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 137 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 48 -#define SQR_TOOM3_THRESHOLD 105 -#define SQR_TOOM4_THRESHOLD 256 -#define SQR_TOOM6_THRESHOLD 366 -#define SQR_TOOM8_THRESHOLD 562 - -#define MULMID_TOOM42_THRESHOLD 98 - -#define MULMOD_BNM1_THRESHOLD 19 -#define SQRMOD_BNM1_THRESHOLD 23 - -#define MUL_FFT_MODF_THRESHOLD 636 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 636, 5}, { 27, 6}, { 28, 7}, { 15, 6}, \ - { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ - { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \ - { 63,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \ - { 639,10}, { 335, 9}, { 671,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 399, 9}, { 799,11}, \ - { 223,12}, { 127,11}, { 255,10}, { 543, 9}, \ - { 1087,11}, { 287,10}, { 607, 9}, { 1215,11}, \ - { 319,10}, { 671,12}, { 191,11}, { 383,10}, \ - { 799,11}, { 415,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \ - { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,12}, { 447,11}, { 959,13}, { 255,12}, \ - { 511,11}, { 1087,12}, { 575,11}, { 1215,10}, \ - { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \ - { 1471,13}, { 383,12}, { 767,11}, { 1599,12}, \ - { 831,11}, { 1727,12}, { 959,14}, { 255,13}, \ - { 511,12}, { 1087,11}, { 2239,12}, { 1215,11}, \ - { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \ - { 767,12}, { 1727,13}, { 895,12}, { 1983,14}, \ - { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \ - { 2431,13}, { 1279,12}, { 2559,13}, { 1407,12}, \ - { 2943,14}, { 767,13}, { 1535,12}, { 3071,13}, \ - { 1663,12}, { 3455,13}, { 1919,15}, { 511,14}, \ - { 1023,13}, { 2175,12}, { 4479,13}, { 2431,14}, \ - { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \ - { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 149 -#define MUL_FFT_THRESHOLD 7424 - -#define SQR_FFT_MODF_THRESHOLD 555 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 555, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \ - { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 43, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \ - { 43, 9}, { 23, 8}, { 51, 9}, { 31, 8}, \ - { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \ - { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \ - { 191,10}, { 111,11}, { 63,10}, { 159,11}, \ - { 95,10}, { 191,12}, { 63,11}, { 127,10}, \ - { 255, 9}, { 543,11}, { 159,10}, { 319, 9}, \ - { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \ - { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \ - { 799,10}, { 415,12}, { 127,11}, { 255,10}, \ - { 511, 9}, { 1023,10}, { 543,11}, { 287,10}, \ - { 607,11}, { 319,10}, { 671,11}, { 351,12}, \ - { 191,11}, { 383,10}, { 799,11}, { 415,13}, \ - { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \ - { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,10}, { 1727,12}, { 447,11}, { 959,10}, \ - { 1919,11}, { 991,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \ - { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ - { 1727,12}, { 959,11}, { 1919,14}, { 255,13}, \ - { 511,12}, { 1023,11}, { 2047,12}, { 1087,11}, \ - { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \ - { 1471,11}, { 2943,13}, { 767,12}, { 1727,13}, \ - { 895,12}, { 1983,14}, { 511,13}, { 1023,12}, \ - { 2239,13}, { 1151,12}, { 2495,13}, { 1279,12}, \ - { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \ - { 1663,12}, { 3455,13}, { 1919,12}, { 3839,15}, \ - { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \ - { 2431,12}, { 4863,14}, { 1279,13}, { 2943,12}, \ - { 5887,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 159 -#define SQR_FFT_THRESHOLD 5760 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 62 -#define MULLO_MUL_N_THRESHOLD 14281 - -#define DC_DIV_QR_THRESHOLD 25 -#define DC_DIVAPPR_Q_THRESHOLD 43 -#define DC_BDIV_QR_THRESHOLD 99 -#define DC_BDIV_Q_THRESHOLD 240 - -#define INV_MULMOD_BNM1_THRESHOLD 54 -#define INV_NEWTON_THRESHOLD 14 -#define INV_APPR_THRESHOLD 13 - -#define BINV_NEWTON_THRESHOLD 363 -#define REDC_1_TO_REDC_N_THRESHOLD 90 - -#define MU_DIV_QR_THRESHOLD 998 -#define MU_DIVAPPR_Q_THRESHOLD 667 -#define MUPI_DIV_QR_THRESHOLD 0 /* always */ -#define MU_BDIV_QR_THRESHOLD 1787 -#define MU_BDIV_Q_THRESHOLD 2130 - -#define POWM_SEC_TABLE 1,16,126,480,1317 - -#define MATRIX22_STRASSEN_THRESHOLD 21 -#define HGCD_THRESHOLD 61 -#define HGCD_APPR_THRESHOLD 56 -#define HGCD_REDUCE_THRESHOLD 3810 -#define GCD_DC_THRESHOLD 283 -#define GCDEXT_DC_THRESHOLD 309 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 12 -#define GET_STR_PRECOMPUTE_THRESHOLD 21 -#define SET_STR_DC_THRESHOLD 399 -#define SET_STR_PRECOMPUTE_THRESHOLD 1183 - -#define FAC_DSC_THRESHOLD 194 -#define FAC_ODD_THRESHOLD 34 diff --git a/gmp/mpn/x86/darwin.m4 b/gmp/mpn/x86/darwin.m4 index f8363db3f7..7ef8dfc105 100644 --- a/gmp/mpn/x86/darwin.m4 +++ b/gmp/mpn/x86/darwin.m4 @@ -1,82 +1,40 @@ divert(-1) -dnl Copyright 2007, 2011, 2012 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. +dnl Copyright 2007 Free Software Foundation, Inc. dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. define(`DARWIN') - dnl Usage LEA(symbol,reg) dnl -dnl We maintain lists of stuff to append in load_eip and darwin_bd. The -dnl `index' stuff is needed to suppress repeated definitions. To avoid -dnl getting fooled by "var" and "var1", we add 'bol ' (the end of -dnl 'indirect_symbol') at the beginning and and a newline at the end. This -dnl might be a bit fragile. +dnl FIXME: Only handles one symbol per assembly file because of the +dnl way EPILOGUE_cpu is handled. -define(`LEA', -m4_assert_numargs(2) -`ifdef(`PIC',` -ifelse(index(defn(`load_eip'), `$2'),-1, -`m4append(`load_eip', -`L(movl_eip_`'substr($2,1)): +define(`LEA',` +define(`EPILOGUE_cpu', +` L(movl_eip_`'substr($2,1)): movl (%esp), $2 ret_internal -')') -ifelse(index(defn(`darwin_bd'), `bol $1 -'),-1, -`m4append(`darwin_bd', -` .section __IMPORT,__pointers,non_lazy_symbol_pointers + .section __IMPORT,__pointers,non_lazy_symbol_pointers L($1`'$non_lazy_ptr): .indirect_symbol $1 .long 0 -')') +') call L(movl_eip_`'substr($2,1)) movl L($1`'$non_lazy_ptr)-.($2), $2 -',` - movl `$'$1, $2 -')') - - -dnl EPILOGUE_cpu - -define(`EPILOGUE_cpu',`load_eip`'darwin_bd') - -define(`load_eip', `') dnl updated in LEA -define(`darwin_bd', `') dnl updated in LEA - - -dnl Usage: CALL(funcname) -dnl - -define(`CALL', -m4_assert_numargs(1) -`call GSYM_PREFIX`'$1') - -undefine(`PIC_WITH_EBX') +') divert`'dnl diff --git a/gmp/mpn/x86/dive_1.asm b/gmp/mpn/x86/dive_1.asm index 9a6cbb7931..d2d02f9f72 100644 --- a/gmp/mpn/x86/dive_1.asm +++ b/gmp/mpn/x86/dive_1.asm @@ -1,32 +1,21 @@ dnl x86 mpn_divexact_1 -- mpn by limb exact division. dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -108,7 +97,7 @@ ifdef(`PIC',` subl %edx, %eax C inv = 2*inv - inv*inv*d - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB pushl %eax FRAME_pushl() imull PARAM_DIVISOR, %eax cmpl $1, %eax diff --git a/gmp/mpn/x86/divrem_1.asm b/gmp/mpn/x86/divrem_1.asm index 255d4935c3..a5fb88071d 100644 --- a/gmp/mpn/x86/divrem_1.asm +++ b/gmp/mpn/x86/divrem_1.asm @@ -1,32 +1,22 @@ dnl x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient. -dnl Copyright 1999-2003, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation, +dnl Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/divrem_2.asm b/gmp/mpn/x86/divrem_2.asm index 4c38ad0acb..bbadda921c 100644 --- a/gmp/mpn/x86/divrem_2.asm +++ b/gmp/mpn/x86/divrem_2.asm @@ -3,30 +3,19 @@ dnl x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. dnl Copyright 2007, 2008 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -92,7 +81,7 @@ PROLOGUE(mpn_divrem_2) seta %dl cmp 20(%esp), %ebp setae %al - orb %dl, %al C "orb" form to placate Sun tools + or %dl, %al jne L(35) L(8): mov 60(%esp), %esi C fn @@ -185,7 +174,7 @@ L(9): mov 64(%esp), %esi C up L(fix): seta %dl cmp 20(%esp), %ebp setae %al - orb %dl, %al C "orb" form to placate Sun tools + or %dl, %al je L(bck) inc %edi sub 20(%esp), %ebp diff --git a/gmp/mpn/x86/fat/com.c b/gmp/mpn/x86/fat/com.c deleted file mode 100644 index d359d4ce73..0000000000 --- a/gmp/mpn/x86/fat/com.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Fat binary fallback mpn_com. - -Copyright 2003, 2009, 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - - -#include "mpn/generic/com.c" diff --git a/gmp/mpn/x86/fat/diveby3.c b/gmp/mpn/x86/fat/diveby3.c new file mode 100644 index 0000000000..7ea0161b72 --- /dev/null +++ b/gmp/mpn/x86/fat/diveby3.c @@ -0,0 +1,21 @@ +/* Fat binary fallback mpn_divexact_by3c. + +Copyright 2003, 2009 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + + +#include "mpn/generic/diveby3.c" diff --git a/gmp/mpn/x86/fat/fat.c b/gmp/mpn/x86/fat/fat.c index 1740813886..c3d1866c69 100644 --- a/gmp/mpn/x86/fat/fat.c +++ b/gmp/mpn/x86/fat/fat.c @@ -4,33 +4,22 @@ THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES. -Copyright 2003, 2004, 2011, 2012 Free Software Foundation, Inc. +Copyright 2003, 2004 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include <stdio.h> /* for printf */ #include <stdlib.h> /* for getenv */ @@ -42,10 +31,14 @@ see https://www.gnu.org/licenses/. */ /* Change this to "#define TRACE(x) x" for some traces. */ #define TRACE(x) +/* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */ +#define WANT_FAKE_CPUID 0 + /* fat_entry.asm */ -long __gmpn_cpuid (char [12], int); -int __gmpn_cpuid_available (void); +long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id)); +int __gmpn_cpuid_available __GMP_PROTO ((void)); + #if WANT_FAKE_CPUID @@ -56,9 +49,8 @@ int __gmpn_cpuid_available (void); #define __gmpn_cpuid fake_cpuid #define __gmpn_cpuid_available fake_cpuid_available -#define MAKE_FMS(family, model) \ - ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \ - + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12)) +#define MAKE_FMS(family, model) \ + (((family) << 8) + ((model << 4))) static struct { const char *name; @@ -72,29 +64,17 @@ static struct { { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) }, { "pentium2", "GenuineIntel", MAKE_FMS (6, 2) }, { "pentium3", "GenuineIntel", MAKE_FMS (6, 7) }, - { "pentium4", "GenuineIntel", MAKE_FMS (15, 2) }, - { "prescott", "GenuineIntel", MAKE_FMS (15, 3) }, - { "nocona", "GenuineIntel", MAKE_FMS (15, 4) }, - { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) }, - { "coreinhm", "GenuineIntel", MAKE_FMS (6, 0x1a) }, - { "coreiwsm", "GenuineIntel", MAKE_FMS (6, 0x25) }, - { "coreisbr", "GenuineIntel", MAKE_FMS (6, 0x2a) }, - { "coreihwl", "GenuineIntel", MAKE_FMS (6, 0x3c) }, - { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) }, + { "pentium4", "GenuineIntel", MAKE_FMS (7, 0) }, { "k5", "AuthenticAMD", MAKE_FMS (5, 0) }, { "k6", "AuthenticAMD", MAKE_FMS (5, 3) }, { "k62", "AuthenticAMD", MAKE_FMS (5, 8) }, { "k63", "AuthenticAMD", MAKE_FMS (5, 9) }, { "athlon", "AuthenticAMD", MAKE_FMS (6, 0) }, - { "k8", "AuthenticAMD", MAKE_FMS (15, 0) }, - { "k10", "AuthenticAMD", MAKE_FMS (16, 0) }, - { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) }, - { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) }, + { "x86_64", "AuthenticAMD", MAKE_FMS (15, 0) }, { "viac3", "CentaurHauls", MAKE_FMS (6, 0) }, { "viac32", "CentaurHauls", MAKE_FMS (6, 9) }, - { "nano", "CentaurHauls", MAKE_FMS (6, 15) }, }; static int @@ -148,46 +128,28 @@ typedef DECL_preinv_mod_1 ((*preinv_mod_1_t)); struct cpuvec_t __gmpn_cpuvec = { __MPN(add_n_init), - 0, - 0, __MPN(addmul_1_init), - 0, - __MPN(bdiv_dbm1c_init), - __MPN(cnd_add_n_init), - __MPN(cnd_sub_n_init), - __MPN(com_init), __MPN(copyd_init), __MPN(copyi_init), __MPN(divexact_1_init), + __MPN(divexact_by3c_init), __MPN(divrem_1_init), __MPN(gcd_1_init), __MPN(lshift_init), - __MPN(lshiftc_init), __MPN(mod_1_init), - __MPN(mod_1_1p_init), - __MPN(mod_1_1p_cps_init), - __MPN(mod_1s_2p_init), - __MPN(mod_1s_2p_cps_init), - __MPN(mod_1s_4p_init), - __MPN(mod_1s_4p_cps_init), __MPN(mod_34lsub1_init), __MPN(modexact_1c_odd_init), __MPN(mul_1_init), __MPN(mul_basecase_init), - __MPN(mullo_basecase_init), __MPN(preinv_divrem_1_init), __MPN(preinv_mod_1_init), - __MPN(redc_1_init), - __MPN(redc_2_init), __MPN(rshift_init), __MPN(sqr_basecase_init), __MPN(sub_n_init), - 0, __MPN(submul_1_init), 0 }; -int __gmpn_cpuvec_initialized = 0; /* The following setups start with generic x86, then overwrite with specifics for a chip, and higher versions of that chip. @@ -257,107 +219,21 @@ __gmpn_cpuvec_init (void) case 6: TRACE (printf (" p6\n")); CPUVEC_SETUP_p6; - switch (model) - { - case 0x00: - case 0x01: - TRACE (printf (" pentiumpro\n")); - break; - - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - TRACE (printf (" pentium2\n")); - CPUVEC_SETUP_p6_mmx; - break; - - case 0x07: - case 0x08: - case 0x0a: - case 0x0b: - case 0x0c: - TRACE (printf (" pentium3\n")); - CPUVEC_SETUP_p6_mmx; - CPUVEC_SETUP_p6_p3mmx; - break; - - case 0x09: /* Banias */ - case 0x0d: /* Dothan */ - case 0x0e: /* Yonah */ - TRACE (printf (" Banias/Dothan/Yonah\n")); - CPUVEC_SETUP_p6_mmx; - CPUVEC_SETUP_p6_p3mmx; - CPUVEC_SETUP_p6_sse2; - break; - - case 0x0f: /* Conroe Merom Kentsfield Allendale */ - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: /* PNR Wolfdale Yorkfield */ - case 0x18: - case 0x19: - case 0x1d: /* PNR Dunnington */ - TRACE (printf (" Conroe\n")); - CPUVEC_SETUP_p6_mmx; - CPUVEC_SETUP_p6_p3mmx; - CPUVEC_SETUP_p6_sse2; - CPUVEC_SETUP_core2; - break; - - case 0x1c: /* Atom Silverthorne */ - case 0x26: /* Atom Lincroft */ - case 0x27: /* Atom Saltwell */ - case 0x36: /* Atom Cedarview/Saltwell */ - TRACE (printf (" atom\n")); - CPUVEC_SETUP_atom; - CPUVEC_SETUP_atom_mmx; - CPUVEC_SETUP_atom_sse2; - break; - - case 0x1a: /* NHM Gainestown */ - case 0x1b: - case 0x1e: /* NHM Lynnfield/Jasper */ - case 0x1f: - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: /* WSM Clarkdale/Arrandale */ - case 0x28: - case 0x29: - case 0x2b: - case 0x2c: /* WSM Gulftown */ - case 0x2e: /* NHM Beckton */ - case 0x2f: /* WSM Eagleton */ - TRACE (printf (" nehalem/westmere\n")); - CPUVEC_SETUP_p6_mmx; - CPUVEC_SETUP_p6_p3mmx; - CPUVEC_SETUP_p6_sse2; - CPUVEC_SETUP_core2; - CPUVEC_SETUP_coreinhm; - break; - - case 0x2a: /* SBR */ - case 0x2d: /* SBR-EP */ - case 0x3a: /* IBR */ - case 0x3c: /* Haswell */ - TRACE (printf (" sandybridge\n")); + if (model >= 2) + { + TRACE (printf (" pentium2\n")); CPUVEC_SETUP_p6_mmx; + } + if (model >= 7) + { + TRACE (printf (" pentium3\n")); CPUVEC_SETUP_p6_p3mmx; + } + if (model >= 0xD || model == 9) + { + TRACE (printf (" p6 with sse2\n")); CPUVEC_SETUP_p6_sse2; - CPUVEC_SETUP_core2; - CPUVEC_SETUP_coreinhm; - CPUVEC_SETUP_coreisbr; - break; - } + } break; case 15: @@ -395,40 +271,13 @@ __gmpn_cpuvec_init (void) break; case 6: TRACE (printf (" athlon\n")); + athlon: CPUVEC_SETUP_k7; CPUVEC_SETUP_k7_mmx; break; - - case 0x0f: /* k8 */ - case 0x11: /* "fam 11h", mix of k8 and k10 */ - case 0x13: /* unknown, conservatively assume k8 */ - case 0x16: /* unknown, conservatively assume k8 */ - case 0x17: /* unknown, conservatively assume k8 */ - TRACE (printf (" k8\n")); - CPUVEC_SETUP_k7; - CPUVEC_SETUP_k7_mmx; - CPUVEC_SETUP_k8; - break; - - case 0x10: /* k10 */ - case 0x12: /* k10 (llano) */ - TRACE (printf (" k10\n")); - CPUVEC_SETUP_k7; - CPUVEC_SETUP_k7_mmx; - break; - - case 0x14: /* bobcat */ - TRACE (printf (" bobcat\n")); - CPUVEC_SETUP_k7; - CPUVEC_SETUP_k7_mmx; - CPUVEC_SETUP_bobcat; - break; - - case 0x15: /* bulldozer */ - TRACE (printf (" bulldozer\n")); - CPUVEC_SETUP_k7; - CPUVEC_SETUP_k7_mmx; - break; + case 15: + TRACE (printf (" x86_64\n")); + goto athlon; } } else if (strcmp (vendor_string, "CentaurHauls") == 0) @@ -441,11 +290,6 @@ __gmpn_cpuvec_init (void) { TRACE (printf (" viac32\n")); } - if (model >= 15) - { - TRACE (printf (" nano\n")); - CPUVEC_SETUP_nano; - } break; } } @@ -469,5 +313,5 @@ __gmpn_cpuvec_init (void) /* Set this once the threshold fields are ready. Use volatile to prevent it getting moved. */ - *((volatile int *) &__gmpn_cpuvec_initialized) = 1; + ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1; } diff --git a/gmp/mpn/x86/fat/fat_entry.asm b/gmp/mpn/x86/fat/fat_entry.asm index 6e3cb44dd5..bd46e4e8bd 100644 --- a/gmp/mpn/x86/fat/fat_entry.asm +++ b/gmp/mpn/x86/fat/fat_entry.asm @@ -1,32 +1,21 @@ dnl x86 fat binary entrypoints. -dnl Copyright 2003, 2012 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2003 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -129,7 +118,7 @@ EPILOGUE() L(fat_init): C al __gmpn_cpuvec byte offset - movzbl %al, %eax + movsbl %al, %eax pushl %eax ifdef(`PIC',` diff --git a/gmp/mpn/x86/fat/gcd_1.c b/gmp/mpn/x86/fat/gcd_1.c index f809bd8092..5bd000618c 100644 --- a/gmp/mpn/x86/fat/gcd_1.c +++ b/gmp/mpn/x86/fat/gcd_1.c @@ -5,28 +5,17 @@ Copyright 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "mpn/generic/gcd_1.c" diff --git a/gmp/mpn/x86/fat/gmp-mparam.h b/gmp/mpn/x86/fat/gmp-mparam.h index 3641a6bafa..9127d1425f 100644 --- a/gmp/mpn/x86/fat/gmp-mparam.h +++ b/gmp/mpn/x86/fat/gmp-mparam.h @@ -1,35 +1,25 @@ /* Fat binary x86 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 2000-2003, 2011 Free Software Foundation, Inc. +Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003 Free Software Foundation, +Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 /* mpn_divexact_1 is faster than mpn_divrem_1 at all sizes. The only time @@ -44,17 +34,15 @@ see https://www.gnu.org/licenses/. */ preinv. */ #define USE_PREINV_DIVREM_1 1 -#define BMOD_1_TO_MOD_1_THRESHOLD 20 - /* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need - for mpn_sqr to call the latter. */ + for mpn_sqr_n to call the latter. */ #define SQR_BASECASE_THRESHOLD 0 /* Sensible fallbacks for these, when not taken from a cpu-specific gmp-mparam.h. */ -#define MUL_TOOM22_THRESHOLD 20 -#define MUL_TOOM33_THRESHOLD 130 -#define SQR_TOOM2_THRESHOLD 30 +#define MUL_KARATSUBA_THRESHOLD 20 +#define MUL_TOOM3_THRESHOLD 130 +#define SQR_KARATSUBA_THRESHOLD 30 #define SQR_TOOM3_THRESHOLD 200 /* These are values more or less in the middle of what the typical x86 chips diff --git a/gmp/mpn/x86/fat/lshiftc.c b/gmp/mpn/x86/fat/lshiftc.c deleted file mode 100644 index 9ecf48978f..0000000000 --- a/gmp/mpn/x86/fat/lshiftc.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Fat binary fallback mpn_lshiftc. - -Copyright 2003, 2009, 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - - -#include "mpn/generic/lshiftc.c" diff --git a/gmp/mpn/x86/fat/mod_1.c b/gmp/mpn/x86/fat/mod_1.c deleted file mode 100644 index 4f149cc353..0000000000 --- a/gmp/mpn/x86/fat/mod_1.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Fat binary fallback mpn_mod_1. - -Copyright 2003, 2009 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - - -#include "mpn/generic/mod_1.c" diff --git a/gmp/mpn/x86/fat/mod_1_1.c b/gmp/mpn/x86/fat/mod_1_1.c deleted file mode 100644 index 92eaa7a87f..0000000000 --- a/gmp/mpn/x86/fat/mod_1_1.c +++ /dev/null @@ -1,36 +0,0 @@ -/* Fat binary fallback mpn_mod_1_1p. - -Copyright 2003, 2009, 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -/* -PROLOGUE(mpn_mod_1_1p_cps) -*/ - -#define OPERATION_mod_1_1_cps 1 -#include "mpn/generic/mod_1_1.c" diff --git a/gmp/mpn/x86/fat/mod_1_2.c b/gmp/mpn/x86/fat/mod_1_2.c deleted file mode 100644 index 9095a61c93..0000000000 --- a/gmp/mpn/x86/fat/mod_1_2.c +++ /dev/null @@ -1,36 +0,0 @@ -/* Fat binary fallback mpn_mod_1s_2p. - -Copyright 2003, 2009, 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -/* -PROLOGUE(mpn_mod_1s_2p_cps) -*/ - -#define OPERATION_mod_1_2_cps 1 -#include "mpn/generic/mod_1_2.c" diff --git a/gmp/mpn/x86/fat/mod_1_4.c b/gmp/mpn/x86/fat/mod_1_4.c deleted file mode 100644 index 51c0def443..0000000000 --- a/gmp/mpn/x86/fat/mod_1_4.c +++ /dev/null @@ -1,36 +0,0 @@ -/* Fat binary fallback mpn_mod_1s_4p. - -Copyright 2003, 2009, 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -/* -PROLOGUE(mpn_mod_1s_4p_cps) -*/ - -#define OPERATION_mod_1_4_cps 1 -#include "mpn/generic/mod_1_4.c" diff --git a/gmp/mpn/x86/fat/mode1o.c b/gmp/mpn/x86/fat/mode1o.c index 870ddb899b..a5244cae44 100644 --- a/gmp/mpn/x86/fat/mode1o.c +++ b/gmp/mpn/x86/fat/mode1o.c @@ -5,28 +5,17 @@ Copyright 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "mpn/generic/mode1o.c" diff --git a/gmp/mpn/x86/fat/mullo_basecase.c b/gmp/mpn/x86/fat/mullo_basecase.c deleted file mode 100644 index 7f86be64c5..0000000000 --- a/gmp/mpn/x86/fat/mullo_basecase.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Fat binary fallback mpn_mullo_basecase. - -Copyright 2012 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - - -#include "mpn/generic/mullo_basecase.c" diff --git a/gmp/mpn/x86/fat/redc_1.c b/gmp/mpn/x86/fat/redc_1.c deleted file mode 100644 index 0025403353..0000000000 --- a/gmp/mpn/x86/fat/redc_1.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Fat binary fallback mpn_redc_1. - -Copyright 2012 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - - -#include "mpn/generic/redc_1.c" diff --git a/gmp/mpn/x86/fat/redc_2.c b/gmp/mpn/x86/fat/redc_2.c deleted file mode 100644 index 1932d58323..0000000000 --- a/gmp/mpn/x86/fat/redc_2.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Fat binary fallback mpn_redc_2. - -Copyright 2012 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - - -#include "mpn/generic/redc_2.c" diff --git a/gmp/mpn/x86/geode/gmp-mparam.h b/gmp/mpn/x86/geode/gmp-mparam.h deleted file mode 100644 index cc9c9f1789..0000000000 --- a/gmp/mpn/x86/geode/gmp-mparam.h +++ /dev/null @@ -1,141 +0,0 @@ -/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2002, 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* Generated by tuneup.c, 2011-01-30, gcc 3.4 */ - -#define MOD_1_NORM_THRESHOLD 6 -#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 17 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 9 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */ -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14 -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */ -#define USE_PREINV_DIVREM_1 0 -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 42 - -#define MUL_TOOM22_THRESHOLD 18 -#define MUL_TOOM33_THRESHOLD 66 -#define MUL_TOOM44_THRESHOLD 105 -#define MUL_TOOM6H_THRESHOLD 141 -#define MUL_TOOM8H_THRESHOLD 212 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 62 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 67 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 33 -#define SQR_TOOM3_THRESHOLD 60 -#define SQR_TOOM4_THRESHOLD 136 -#define SQR_TOOM6_THRESHOLD 196 -#define SQR_TOOM8_THRESHOLD 292 - -#define MULMOD_BNM1_THRESHOLD 14 -#define SQRMOD_BNM1_THRESHOLD 16 - -#define MUL_FFT_MODF_THRESHOLD 468 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 468, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ - { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \ - { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ - { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \ - { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ - { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \ - { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \ - { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \ - { 287,10}, { 159,11}, { 95,10}, { 191, 9}, \ - { 383,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ - { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 61 -#define MUL_FFT_THRESHOLD 5504 - -#define SQR_FFT_MODF_THRESHOLD 396 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 396, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ - { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \ - { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \ - { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \ - { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \ - { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \ - { 255, 9}, { 135,10}, { 79, 9}, { 159, 8}, \ - { 319,10}, { 95, 9}, { 191,11}, { 63,10}, \ - { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \ - { 287, 8}, { 575,10}, { 159,11}, { 95,10}, \ - { 191,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ - { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 61 -#define SQR_FFT_THRESHOLD 3712 - -#define MULLO_BASECASE_THRESHOLD 3 -#define MULLO_DC_THRESHOLD 37 -#define MULLO_MUL_N_THRESHOLD 10950 - -#define DC_DIV_QR_THRESHOLD 59 -#define DC_DIVAPPR_Q_THRESHOLD 189 -#define DC_BDIV_QR_THRESHOLD 55 -#define DC_BDIV_Q_THRESHOLD 136 - -#define INV_MULMOD_BNM1_THRESHOLD 50 -#define INV_NEWTON_THRESHOLD 183 -#define INV_APPR_THRESHOLD 181 - -#define BINV_NEWTON_THRESHOLD 204 -#define REDC_1_TO_REDC_N_THRESHOLD 54 - -#define MU_DIV_QR_THRESHOLD 1142 -#define MU_DIVAPPR_Q_THRESHOLD 1142 -#define MUPI_DIV_QR_THRESHOLD 81 -#define MU_BDIV_QR_THRESHOLD 889 -#define MU_BDIV_Q_THRESHOLD 998 - -#define MATRIX22_STRASSEN_THRESHOLD 13 -#define HGCD_THRESHOLD 133 -#define GCD_DC_THRESHOLD 451 -#define GCDEXT_DC_THRESHOLD 318 -#define JACOBI_BASE_METHOD 1 - -#define GET_STR_DC_THRESHOLD 15 -#define GET_STR_PRECOMPUTE_THRESHOLD 30 -#define SET_STR_DC_THRESHOLD 547 -#define SET_STR_PRECOMPUTE_THRESHOLD 1049 diff --git a/gmp/mpn/x86/gmp-mparam.h b/gmp/mpn/x86/gmp-mparam.h index 2cb1984889..22ee86f7e1 100644 --- a/gmp/mpn/x86/gmp-mparam.h +++ b/gmp/mpn/x86/gmp-mparam.h @@ -1,35 +1,24 @@ /* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 2000-2002 Free Software Foundation, Inc. +Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 /* Generic x86 mpn_divexact_1 is faster than generic x86 mpn_divrem_1 on all diff --git a/gmp/mpn/x86/i486/gmp-mparam.h b/gmp/mpn/x86/i486/gmp-mparam.h index aa7dbad45b..aaddea9f18 100644 --- a/gmp/mpn/x86/i486/gmp-mparam.h +++ b/gmp/mpn/x86/i486/gmp-mparam.h @@ -1,46 +1,35 @@ /* 80486 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 2001-2003 Free Software Foundation, Inc. +Copyright 2001, 2002, 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 /* 100MHz DX4 */ /* Generated by tuneup.c, 2003-02-13, gcc 2.95 */ -#define MUL_TOOM22_THRESHOLD 18 -#define MUL_TOOM33_THRESHOLD 228 +#define MUL_KARATSUBA_THRESHOLD 18 +#define MUL_TOOM3_THRESHOLD 228 #define SQR_BASECASE_THRESHOLD 13 -#define SQR_TOOM2_THRESHOLD 49 +#define SQR_KARATSUBA_THRESHOLD 49 #define SQR_TOOM3_THRESHOLD 238 #define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */ diff --git a/gmp/mpn/x86/k10/gmp-mparam.h b/gmp/mpn/x86/k10/gmp-mparam.h deleted file mode 100644 index 2a1ae5a6bb..0000000000 --- a/gmp/mpn/x86/k10/gmp-mparam.h +++ /dev/null @@ -1,211 +0,0 @@ -/* x86/k10 gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 2400 MHz K10 Barcelona */ -/* FFT tuning limit = 25000000 */ -/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 0 /* always */ -#define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 12 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 12 -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 1 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 32 - -#define MUL_TOOM22_THRESHOLD 24 -#define MUL_TOOM33_THRESHOLD 81 -#define MUL_TOOM44_THRESHOLD 130 -#define MUL_TOOM6H_THRESHOLD 189 -#define MUL_TOOM8H_THRESHOLD 430 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 82 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 90 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 112 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 38 -#define SQR_TOOM3_THRESHOLD 77 -#define SQR_TOOM4_THRESHOLD 184 -#define SQR_TOOM6_THRESHOLD 262 -#define SQR_TOOM8_THRESHOLD 369 - -#define MULMID_TOOM42_THRESHOLD 56 - -#define MULMOD_BNM1_THRESHOLD 17 -#define SQRMOD_BNM1_THRESHOLD 18 - -#define MUL_FFT_MODF_THRESHOLD 765 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 765, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ - { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95,10}, { 31, 9}, \ - { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \ - { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159,10}, { 95, 9}, { 199,10}, \ - { 111,11}, { 63,10}, { 127, 9}, { 263,10}, \ - { 175,11}, { 95,10}, { 207,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 543, 8}, { 1087, 9}, \ - { 575,11}, { 159,10}, { 319, 9}, { 671, 8}, \ - { 1343, 9}, { 735,11}, { 191, 9}, { 799, 8}, \ - { 1599,10}, { 415, 9}, { 863,11}, { 223,12}, \ - { 127,11}, { 255,10}, { 543, 9}, { 1087,10}, \ - { 607, 9}, { 1215, 8}, { 2431,11}, { 319,10}, \ - { 671, 9}, { 1343,10}, { 735,12}, { 191,11}, \ - { 383,10}, { 799, 9}, { 1599,11}, { 415,10}, \ - { 863, 9}, { 1727,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215, 9}, \ - { 2431,12}, { 319,11}, { 671,10}, { 1343,11}, \ - { 735,10}, { 1471, 9}, { 2943, 8}, { 5887,12}, \ - { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \ - { 1727,12}, { 447,11}, { 959,10}, { 1919,11}, \ - { 991,10}, { 1983,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,10}, \ - { 2943, 9}, { 5887,13}, { 383,12}, { 767,11}, \ - { 1599,12}, { 831,11}, { 1727,10}, { 3455,12}, \ - { 959,11}, { 1983,14}, { 255,13}, { 511,12}, \ - { 1087,11}, { 2239,12}, { 1215,11}, { 2431,13}, \ - { 639,12}, { 1471,11}, { 2943,10}, { 5887,13}, \ - { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \ - { 1983,14}, { 511,13}, { 1023,12}, { 2239,13}, \ - { 1151,12}, { 2495,13}, { 1407,12}, { 2943,11}, \ - { 5887,14}, { 767,13}, { 1663,12}, { 3455,13}, \ - { 1919,12}, { 3839,15}, { 511,14}, { 1023,13}, \ - { 2175,12}, { 4351,13}, { 2431,14}, { 1279,13}, \ - { 2943,12}, { 5887,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 172 -#define MUL_FFT_THRESHOLD 6784 - -#define SQR_FFT_MODF_THRESHOLD 555 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 555, 5}, { 21, 6}, { 11, 5}, { 25, 6}, \ - { 13, 5}, { 27, 6}, { 27, 7}, { 15, 6}, \ - { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \ - { 39, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \ - { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ - { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 127,10}, \ - { 79, 9}, { 167,10}, { 95, 9}, { 191,10}, \ - { 111,11}, { 63,10}, { 143, 9}, { 287, 8}, \ - { 575,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \ - { 1087,10}, { 287, 9}, { 607,11}, { 159,10}, \ - { 319, 9}, { 671, 8}, { 1343,10}, { 351, 9}, \ - { 735, 8}, { 1471,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 399, 9}, { 799, 8}, { 1599,10}, \ - { 415, 9}, { 863,11}, { 223,10}, { 479,12}, \ - { 127,11}, { 255,10}, { 543, 9}, { 1087,11}, \ - { 287,10}, { 607, 9}, { 1215, 8}, { 2431,11}, \ - { 319,10}, { 671, 9}, { 1343,11}, { 351,10}, \ - { 735, 9}, { 1471,12}, { 191,11}, { 383,10}, \ - { 799, 9}, { 1599,11}, { 415,10}, { 863, 9}, \ - { 1727,11}, { 479,13}, { 127,12}, { 255,11}, \ - { 511,10}, { 1023,11}, { 543,10}, { 1087,11}, \ - { 607,10}, { 1215, 9}, { 2431,12}, { 319,11}, \ - { 671,10}, { 1343,11}, { 735,10}, { 1471, 9}, \ - { 2943,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,10}, { 1727,12}, { 447,11}, { 959,10}, \ - { 1919,11}, { 991,10}, { 1983,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1471,10}, \ - { 2943,13}, { 383,12}, { 767,11}, { 1599,12}, \ - { 831,11}, { 1727,10}, { 3455,12}, { 959,11}, \ - { 1983,13}, { 511,12}, { 1215,11}, { 2431,13}, \ - { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ - { 1727,11}, { 3455,13}, { 895,12}, { 1983,14}, \ - { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \ - { 2431,13}, { 1407,12}, { 2943,14}, { 767,13}, \ - { 1663,12}, { 3455,13}, { 1919,12}, { 3839,15}, \ - { 511,14}, { 1023,13}, { 2431,14}, { 1279,13}, \ - { 2943,12}, { 5887,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 172 -#define SQR_FFT_THRESHOLD 5504 - -#define MULLO_BASECASE_THRESHOLD 7 -#define MULLO_DC_THRESHOLD 40 -#define MULLO_MUL_N_THRESHOLD 13463 - -#define DC_DIV_QR_THRESHOLD 59 -#define DC_DIVAPPR_Q_THRESHOLD 270 -#define DC_BDIV_QR_THRESHOLD 55 -#define DC_BDIV_Q_THRESHOLD 206 - -#define INV_MULMOD_BNM1_THRESHOLD 62 -#define INV_NEWTON_THRESHOLD 254 -#define INV_APPR_THRESHOLD 252 - -#define BINV_NEWTON_THRESHOLD 274 -#define REDC_1_TO_REDC_N_THRESHOLD 74 - -#define MU_DIV_QR_THRESHOLD 1589 -#define MU_DIVAPPR_Q_THRESHOLD 1589 -#define MUPI_DIV_QR_THRESHOLD 106 -#define MU_BDIV_QR_THRESHOLD 1470 -#define MU_BDIV_Q_THRESHOLD 1558 - -#define POWM_SEC_TABLE 1,16,114,428,1240 - -#define MATRIX22_STRASSEN_THRESHOLD 19 -#define HGCD_THRESHOLD 136 -#define HGCD_APPR_THRESHOLD 175 -#define HGCD_REDUCE_THRESHOLD 3389 -#define GCD_DC_THRESHOLD 595 -#define GCDEXT_DC_THRESHOLD 424 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 15 -#define GET_STR_PRECOMPUTE_THRESHOLD 28 -#define SET_STR_DC_THRESHOLD 100 -#define SET_STR_PRECOMPUTE_THRESHOLD 1360 - -#define FAC_DSC_THRESHOLD 224 -#define FAC_ODD_THRESHOLD 29 diff --git a/gmp/mpn/x86/k6/README b/gmp/mpn/x86/k6/README index 1d65af3851..f488cbd1d8 100644 --- a/gmp/mpn/x86/k6/README +++ b/gmp/mpn/x86/k6/README @@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. diff --git a/gmp/mpn/x86/k6/aors_n.asm b/gmp/mpn/x86/k6/aors_n.asm index 168f9b4ae4..09afd8f688 100644 --- a/gmp/mpn/x86/k6/aors_n.asm +++ b/gmp/mpn/x86/k6/aors_n.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/aorsmul_1.asm b/gmp/mpn/x86/k6/aorsmul_1.asm index eaa92ebb24..c3795e3abb 100644 --- a/gmp/mpn/x86/k6/aorsmul_1.asm +++ b/gmp/mpn/x86/k6/aorsmul_1.asm @@ -1,52 +1,42 @@ dnl AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. -dnl Copyright 1999-2003, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, +dnl Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P5 -C P6 model 0-8,10-12 5.94 -C P6 model 9 (Banias) 5.51 -C P6 model 13 (Dothan) 5.57 +C cycles/limb +C P5: +C P6 model 0-8,10-12) 5.94 +C P6 model 9 (Banias) +C P6 model 13 (Dothan) 5.57 C P4 model 0 (Willamette) C P4 model 1 (?) C P4 model 2 (Northwood) C P4 model 3 (Prescott) C P4 model 4 (Nocona) -C AMD K6 7.65-8.5 (data dependent) -C AMD K7 -C AMD K8 +C K6: 7.65-8.5 (data dependent) +C K7: +C K8: -dnl K6: large multipliers small multipliers +dnl K6: large multpliers small multpliers dnl UNROLL_COUNT cycles/limb cycles/limb dnl 4 9.5 7.78 dnl 8 9.0 7.78 @@ -257,7 +247,7 @@ C registers at the point of doing the mul for the initial two carry limbs. C C The add/adc for the initial carry in %esi is necessary only for the C mpn_addmul/submul_1c entry points. Duplicating the startup code to -C eliminate this for the plain mpn_add/submul_1 doesn't seem like a good +C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good C idea. dnl overlapping with parameters already fetched diff --git a/gmp/mpn/x86/k6/cross.pl b/gmp/mpn/x86/k6/cross.pl index fc921a56b7..cf476d603b 100755 --- a/gmp/mpn/x86/k6/cross.pl +++ b/gmp/mpn/x86/k6/cross.pl @@ -2,31 +2,20 @@ # Copyright 2000, 2001 Free Software Foundation, Inc. # -# This file is part of the GNU MP Library. +# This file is part of the GNU MP Library. # -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of either: +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3 of the License, or (at +# your option) any later version. # -# * the GNU Lesser General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your -# option) any later version. +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. # -# or -# -# * the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any -# later version. -# -# or both in parallel, as here. -# -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received copies of the GNU General Public License and the -# GNU Lesser General Public License along with the GNU MP Library. If not, -# see https://www.gnu.org/licenses/. +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. # Usage: cross.pl [filename.o]... diff --git a/gmp/mpn/x86/k6/divrem_1.asm b/gmp/mpn/x86/k6/divrem_1.asm index b4cea4fa2a..1c86d9bd6c 100644 --- a/gmp/mpn/x86/k6/divrem_1.asm +++ b/gmp/mpn/x86/k6/divrem_1.asm @@ -1,32 +1,22 @@ dnl AMD K6 mpn_divrem_1 -- mpn by limb division. -dnl Copyright 1999-2003, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation, +dnl Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/gcd_1.asm b/gmp/mpn/x86/k6/gcd_1.asm index 0c233ff362..58aff08221 100644 --- a/gmp/mpn/x86/k6/gcd_1.asm +++ b/gmp/mpn/x86/k6/gcd_1.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_gcd_1 -- mpn by 1 gcd. -dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/gmp-mparam.h b/gmp/mpn/x86/k6/gmp-mparam.h index f03f1b2d91..c04446a573 100644 --- a/gmp/mpn/x86/k6/gmp-mparam.h +++ b/gmp/mpn/x86/k6/gmp-mparam.h @@ -1,166 +1,68 @@ /* AMD K6 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 2000-2004, 2009, 2010 Free Software Foundation, -Inc. +Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2009 +Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -or +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 -or both in parallel, as here. -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +/* 450MHz K6-2 */ -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +/* Generated by tuneup.c, 2009-01-05, gcc 3.4 */ -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 +#define MUL_KARATSUBA_THRESHOLD 19 +#define MUL_TOOM3_THRESHOLD 73 +#define MUL_TOOM44_THRESHOLD 104 +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_KARATSUBA_THRESHOLD 32 +#define SQR_TOOM3_THRESHOLD 105 +#define SQR_TOOM4_THRESHOLD 143 -/* 450MHz K6-2 */ +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 64 +#define MULLOW_MUL_N_THRESHOLD 232 + +#define DIV_SB_PREINV_THRESHOLD 4 +#define DIV_DC_THRESHOLD 67 +#define POWM_THRESHOLD 110 + +#define MATRIX22_STRASSEN_THRESHOLD 21 +#define HGCD_THRESHOLD 195 +#define GCD_DC_THRESHOLD 602 +#define GCDEXT_DC_THRESHOLD 662 +#define JACOBI_BASE_METHOD 2 + +#define USE_PREINV_DIVREM_1 0 +#define USE_PREINV_MOD_1 1 /* native */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ + +#define GET_STR_DC_THRESHOLD 31 +#define GET_STR_PRECOMPUTE_THRESHOLD 52 +#define SET_STR_DC_THRESHOLD 1127 +#define SET_STR_PRECOMPUTE_THRESHOLD 1795 + +#define MUL_FFT_TABLE { 336, 672, 1152, 3584, 10240, 24576, 163840, 393216, 0 } +#define MUL_FFT_MODF_THRESHOLD 352 +#define MUL_FFT_THRESHOLD 7168 -#define MOD_1_NORM_THRESHOLD 12 -#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 41 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 32 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 3 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 128 -#define USE_PREINV_DIVREM_1 0 -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */ - -#define MUL_TOOM22_THRESHOLD 20 -#define MUL_TOOM33_THRESHOLD 69 -#define MUL_TOOM44_THRESHOLD 106 -#define MUL_TOOM6H_THRESHOLD 157 -#define MUL_TOOM8H_THRESHOLD 199 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 64 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 32 -#define SQR_TOOM3_THRESHOLD 97 -#define SQR_TOOM4_THRESHOLD 143 -#define SQR_TOOM6_THRESHOLD 222 -#define SQR_TOOM8_THRESHOLD 272 - -#define MULMOD_BNM1_THRESHOLD 13 -#define SQRMOD_BNM1_THRESHOLD 17 - -#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 476, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ - { 11, 5}, { 23, 6}, { 17, 7}, { 9, 6}, \ - { 19, 7}, { 11, 6}, { 23, 7}, { 13, 6}, \ - { 27, 7}, { 15, 6}, { 31, 7}, { 17, 6}, \ - { 35, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 167,10}, { 95, 9}, { 191,10}, \ - { 111,11}, { 63,10}, { 127, 9}, { 255,10}, \ - { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \ - { 191, 9}, { 383,12}, { 63,11}, { 127,10}, \ - { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \ - { 287,11}, { 159,10}, { 351,11}, { 191,10}, \ - { 415, 9}, { 831,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 543,11}, { 287,10}, { 575,11}, \ - { 351,10}, { 703,12}, { 191,11}, { 415,10}, \ - { 831,13}, { 127,12}, { 255,11}, { 543,10}, \ - { 1087,11}, { 575,12}, { 319,11}, { 703,12}, \ - { 383,11}, { 831,12}, { 447,11}, { 895,13}, \ - { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ - { 1151,12}, { 703,13}, { 383,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1215,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 106 -#define MUL_FFT_THRESHOLD 7424 - -#define SQR_FFT_MODF_THRESHOLD 432 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 432, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ - { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \ - { 24, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 21, 8}, { 11, 7}, { 29, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \ - { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ - { 39, 9}, { 23, 7}, { 93, 8}, { 47, 7}, \ - { 95, 8}, { 51,10}, { 15, 9}, { 31, 8}, \ - { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \ - { 95, 9}, { 55,10}, { 31, 9}, { 71, 8}, \ - { 143, 9}, { 79,10}, { 47, 9}, { 95,11}, \ - { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ - { 167,10}, { 95, 9}, { 191,11}, { 63,10}, \ - { 127, 9}, { 255,10}, { 143, 9}, { 287, 8}, \ - { 575,10}, { 159, 9}, { 319,11}, { 95,10}, \ - { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \ - { 159,10}, { 319, 9}, { 639,10}, { 351, 9}, \ - { 703,11}, { 191,10}, { 415,11}, { 223,12}, \ - { 127,11}, { 255,10}, { 543,11}, { 287,10}, \ - { 607,11}, { 319,10}, { 639,11}, { 351,10}, \ - { 703,12}, { 191,11}, { 415,10}, { 831,13}, \ - { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \ - { 607,12}, { 319,11}, { 703,12}, { 383,11}, \ - { 831,12}, { 447,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,12}, { 703,13}, \ - { 383,12}, { 895,14}, { 255,13}, { 511,12}, \ - { 1215,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 112 -#define SQR_FFT_THRESHOLD 7040 - -#define MULLO_BASECASE_THRESHOLD 3 -#define MULLO_DC_THRESHOLD 60 -#define MULLO_MUL_N_THRESHOLD 13463 - -#define DC_DIV_QR_THRESHOLD 78 -#define DC_DIVAPPR_Q_THRESHOLD 252 -#define DC_BDIV_QR_THRESHOLD 84 -#define DC_BDIV_Q_THRESHOLD 171 - -#define INV_MULMOD_BNM1_THRESHOLD 55 -#define INV_NEWTON_THRESHOLD 234 -#define INV_APPR_THRESHOLD 236 - -#define BINV_NEWTON_THRESHOLD 268 -#define REDC_1_TO_REDC_N_THRESHOLD 67 - -#define MU_DIV_QR_THRESHOLD 1308 -#define MU_DIVAPPR_Q_THRESHOLD 1142 -#define MUPI_DIV_QR_THRESHOLD 134 -#define MU_BDIV_QR_THRESHOLD 1164 -#define MU_BDIV_Q_THRESHOLD 1164 - -#define MATRIX22_STRASSEN_THRESHOLD 15 -#define HGCD_THRESHOLD 182 -#define GCD_DC_THRESHOLD 591 -#define GCDEXT_DC_THRESHOLD 472 -#define JACOBI_BASE_METHOD 2 - -#define GET_STR_DC_THRESHOLD 24 -#define GET_STR_PRECOMPUTE_THRESHOLD 40 -#define SET_STR_DC_THRESHOLD 834 -#define SET_STR_PRECOMPUTE_THRESHOLD 2042 +#define SQR_FFT_TABLE { 272, 672, 1408, 4608, 10240, 24576, 163840, 393216, 0 } +#define SQR_FFT_MODF_THRESHOLD 336 +#define SQR_FFT_THRESHOLD 3840 diff --git a/gmp/mpn/x86/k6/k62mmx/copyd.asm b/gmp/mpn/x86/k6/k62mmx/copyd.asm index f80a5a1cdb..227ed78783 100644 --- a/gmp/mpn/x86/k6/k62mmx/copyd.asm +++ b/gmp/mpn/x86/k6/k62mmx/copyd.asm @@ -1,32 +1,21 @@ dnl AMD K6-2 mpn_copyd -- copy limb vector, decrementing. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/k62mmx/lshift.asm b/gmp/mpn/x86/k6/k62mmx/lshift.asm index c86575feed..e48e73e19a 100644 --- a/gmp/mpn/x86/k6/k62mmx/lshift.asm +++ b/gmp/mpn/x86/k6/k62mmx/lshift.asm @@ -1,32 +1,21 @@ dnl AMD K6-2 mpn_lshift -- mpn left shift. dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/k62mmx/rshift.asm b/gmp/mpn/x86/k6/k62mmx/rshift.asm index f604a7bd52..b3114d0e6e 100644 --- a/gmp/mpn/x86/k6/k62mmx/rshift.asm +++ b/gmp/mpn/x86/k6/k62mmx/rshift.asm @@ -1,32 +1,21 @@ dnl AMD K6-2 mpn_rshift -- mpn right shift. dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/mmx/com.asm b/gmp/mpn/x86/k6/mmx/com_n.asm index b747454627..42e6ab392a 100644 --- a/gmp/mpn/x86/k6/mmx/com.asm +++ b/gmp/mpn/x86/k6/mmx/com_n.asm @@ -1,32 +1,21 @@ -dnl AMD K6-2 mpn_com -- mpn bitwise one's complement. +dnl AMD K6-2 mpn_com_n -- mpn bitwise one's complement. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -39,7 +28,7 @@ C K6-2 1.0 1.18 1.18 1.18 cycles/limb C K6 1.5 1.85 1.75 1.85 -C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); +C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); C C Take the bitwise ones-complement of src,size and write it to dst,size. @@ -49,7 +38,7 @@ defframe(PARAM_DST, 4) TEXT ALIGN(16) -PROLOGUE(mpn_com) +PROLOGUE(mpn_com_n) deflit(`FRAME',0) movl PARAM_SIZE, %ecx diff --git a/gmp/mpn/x86/k6/mmx/dive_1.asm b/gmp/mpn/x86/k6/mmx/dive_1.asm index b644dca8cd..9cc90d88a5 100644 --- a/gmp/mpn/x86/k6/mmx/dive_1.asm +++ b/gmp/mpn/x86/k6/mmx/dive_1.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_divexact_1 -- mpn by limb exact division. -dnl Copyright 2000-2002, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -128,7 +117,7 @@ Zdisp( movzbl, 0,(%eax,%ebp), %eax) subl %ebp, %eax C inv = 2*inv - inv*inv*d subl $1, %edx C shift amount, and clear carry - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB pushl %eax FRAME_pushl() imull PARAM_DIVISOR, %eax cmpl $1, %eax diff --git a/gmp/mpn/x86/k6/mmx/logops_n.asm b/gmp/mpn/x86/k6/mmx/logops_n.asm index e17930bb2d..a6272131a2 100644 --- a/gmp/mpn/x86/k6/mmx/logops_n.asm +++ b/gmp/mpn/x86/k6/mmx/logops_n.asm @@ -1,33 +1,22 @@ dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n, dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/mmx/lshift.asm b/gmp/mpn/x86/k6/mmx/lshift.asm index 45be582633..1492025171 100644 --- a/gmp/mpn/x86/k6/mmx/lshift.asm +++ b/gmp/mpn/x86/k6/mmx/lshift.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_lshift -- mpn left shift. dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/mmx/popham.asm b/gmp/mpn/x86/k6/mmx/popham.asm index 2b19d0b5ee..a0a651d39c 100644 --- a/gmp/mpn/x86/k6/mmx/popham.asm +++ b/gmp/mpn/x86/k6/mmx/popham.asm @@ -1,33 +1,22 @@ dnl AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and dnl hamming distance. -dnl Copyright 2000-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/mmx/rshift.asm b/gmp/mpn/x86/k6/mmx/rshift.asm index cd0382f322..80cd6fb05a 100644 --- a/gmp/mpn/x86/k6/mmx/rshift.asm +++ b/gmp/mpn/x86/k6/mmx/rshift.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_rshift -- mpn right shift. dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/mod_34lsub1.asm b/gmp/mpn/x86/k6/mod_34lsub1.asm index 7e30503e54..a5b7ee1064 100644 --- a/gmp/mpn/x86/k6/mod_34lsub1.asm +++ b/gmp/mpn/x86/k6/mod_34lsub1.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1. -dnl Copyright 2000-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/mode1o.asm b/gmp/mpn/x86/k6/mode1o.asm index a13f647b81..f299877911 100644 --- a/gmp/mpn/x86/k6/mode1o.asm +++ b/gmp/mpn/x86/k6/mode1o.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_modexact_1_odd -- exact division style remainder. -dnl Copyright 2000-2003, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2003, 2007 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -114,7 +103,7 @@ Zdisp( movzbl, 0,(%ecx,%edi), %edi) C inv 8 bits subl %ecx, %edi C inv = 2*inv - inv*inv*d - ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB pushl %eax movl %esi, %eax imull %edi, %eax diff --git a/gmp/mpn/x86/k6/mul_1.asm b/gmp/mpn/x86/k6/mul_1.asm index 3ef7ec24fe..e1c468fe34 100644 --- a/gmp/mpn/x86/k6/mul_1.asm +++ b/gmp/mpn/x86/k6/mul_1.asm @@ -1,49 +1,38 @@ dnl AMD K6 mpn_mul_1 -- mpn by limb multiply. dnl Copyright 1999, 2000, 2002, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P5 -C P6 model 0-8,10-12 5.5 +C cycles/limb +C P5: +C P6 model 0-8,10-12) 5.5 C P6 model 9 (Banias) -C P6 model 13 (Dothan) 4.87 +C P6 model 13 (Dothan) 4.87 C P4 model 0 (Willamette) C P4 model 1 (?) C P4 model 2 (Northwood) C P4 model 3 (Prescott) C P4 model 4 (Nocona) -C AMD K6 6.25 -C AMD K7 -C AMD K8 +C K6: 6.25 +C K7: +C K8: C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/gmp/mpn/x86/k6/mul_basecase.asm b/gmp/mpn/x86/k6/mul_basecase.asm index 7030001c3f..dcd4d70082 100644 --- a/gmp/mpn/x86/k6/mul_basecase.asm +++ b/gmp/mpn/x86/k6/mul_basecase.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_mul_basecase -- multiply two mpn numbers. -dnl Copyright 1999-2003 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/pre_mod_1.asm b/gmp/mpn/x86/k6/pre_mod_1.asm index 34db20d386..3231539bfd 100644 --- a/gmp/mpn/x86/k6/pre_mod_1.asm +++ b/gmp/mpn/x86/k6/pre_mod_1.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_preinv_mod_1 -- mpn by 1 remainder, with pre-inverted divisor. dnl Copyright 2000, 2002, 2003 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k6/sqr_basecase.asm b/gmp/mpn/x86/k6/sqr_basecase.asm index b7ecb5cc8a..3392d38812 100644 --- a/gmp/mpn/x86/k6/sqr_basecase.asm +++ b/gmp/mpn/x86/k6/sqr_basecase.asm @@ -1,32 +1,21 @@ dnl AMD K6 mpn_sqr_basecase -- square an mpn number. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -36,35 +25,35 @@ C product (measured on the speed difference between 17 and 33 limbs, C which is roughly the Karatsuba recursing range). -dnl SQR_TOOM2_THRESHOLD_MAX is the maximum SQR_TOOM2_THRESHOLD this +dnl SQR_KARATSUBA_THRESHOLD_MAX is the maximum SQR_KARATSUBA_THRESHOLD this dnl code supports. This value is used only by the tune program to know dnl what it can go up to. (An attempt to compile with a bigger value will dnl trigger some m4_assert()s in the code, making the build fail.) dnl dnl The value is determined by requiring the displacements in the unrolled dnl addmul to fit in single bytes. This means a maximum UNROLL_COUNT of -dnl 63, giving a maximum SQR_TOOM2_THRESHOLD of 66. +dnl 63, giving a maximum SQR_KARATSUBA_THRESHOLD of 66. -deflit(SQR_TOOM2_THRESHOLD_MAX, 66) +deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66) dnl Allow a value from the tune program to override config.m4. -ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE', -`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)') +ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE', +`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)') dnl UNROLL_COUNT is the number of code chunks in the unrolled addmul. The -dnl number required is determined by SQR_TOOM2_THRESHOLD, since -dnl mpn_sqr_basecase only needs to handle sizes < SQR_TOOM2_THRESHOLD. +dnl number required is determined by SQR_KARATSUBA_THRESHOLD, since +dnl mpn_sqr_basecase only needs to handle sizes < SQR_KARATSUBA_THRESHOLD. dnl dnl The first addmul is the biggest, and this takes the second least dnl significant limb and multiplies it by the third least significant and -dnl up. Hence for a maximum operand size of SQR_TOOM2_THRESHOLD-1 -dnl limbs, UNROLL_COUNT needs to be SQR_TOOM2_THRESHOLD-3. +dnl up. Hence for a maximum operand size of SQR_KARATSUBA_THRESHOLD-1 +dnl limbs, UNROLL_COUNT needs to be SQR_KARATSUBA_THRESHOLD-3. -m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD') -deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3)) +m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD') +deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3)) C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); diff --git a/gmp/mpn/x86/k7/README b/gmp/mpn/x86/k7/README index 5711b612c5..e2c5e0c18d 100644 --- a/gmp/mpn/x86/k7/README +++ b/gmp/mpn/x86/k7/README @@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. diff --git a/gmp/mpn/x86/k7/addlsh1_n.asm b/gmp/mpn/x86/k7/addlsh1_n.asm deleted file mode 100644 index a957b6f78e..0000000000 --- a/gmp/mpn/x86/k7/addlsh1_n.asm +++ /dev/null @@ -1,196 +0,0 @@ -dnl AMD K7 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1) - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C This is an attempt at an addlsh1_n for x86-32, not relying on sse2 insns. -C The innerloop is 2*3-way unrolled, which is best we can do with the available -C registers. It seems tricky to use the same structure for rsblsh1_n, since we -C cannot feed carry between operations there. - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) 5.4 (worse than add_n + lshift) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 6 -C AMD K6 ? -C AMD K7 2.5 -C AMD K8 - -C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32 -C processors. It uses 2*3-way unrolling, for good reasons. Unfortunately, -C that means we need an initial magic multiply. -C -C It is not clear how to do sublsh1_n or rsblsh1_n using the same pattern. We -C cannot do rsblsh1_n since we feed carry from the shift blocks to the -C add/subtract blocks, which is right for addition but reversed for -C subtraction. We could perhaps do sublsh1_n, with some extra move insns, -C without losing any time, since we're not issue limited but carry recurrency -C latency. -C -C Breaking carry recurrency might be a good idea. We would then need separate -C registers for the shift carry and add/subtract carry, which in turn would -C force is to 2*2-way unrolling. - -defframe(PARAM_SIZE, 16) -defframe(PARAM_DBLD, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(VAR_COUNT,`PARAM_DST') -define(VAR_TMP,`PARAM_DBLD') - -ASM_START() - TEXT - ALIGN(8) -PROLOGUE(mpn_addlsh1_n) -deflit(`FRAME',0) - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebp') - - mov $0x2aaaaaab, %eax - - push %ebx FRAME_pushl() - mov PARAM_SIZE, %ebx C size - - push rp FRAME_pushl() - mov PARAM_DST, rp - - mul %ebx - - push up FRAME_pushl() - mov PARAM_SRC, up - - not %edx C count = -(size\8)-1 - mov %edx, VAR_COUNT - - push vp FRAME_pushl() - mov PARAM_DBLD, vp - - lea 3(%edx,%edx,2), %ecx C count*3+3 = -(size\6)*3 - xor %edx, %edx - lea (%ebx,%ecx,2), %ebx C size + (count*3+3)*2 = size % 6 - or %ebx, %ebx - jz L(exact) - -L(oop): -ifdef(`CPU_P6',` - shr %edx ') C restore 2nd saved carry bit - mov (vp), %eax - adc %eax, %eax - rcr %edx C restore 1st saved carry bit - lea 4(vp), vp - adc (up), %eax - lea 4(up), up - adc %edx, %edx C save a carry bit in edx -ifdef(`CPU_P6',` - adc %edx, %edx ') C save another carry bit in edx - dec %ebx - mov %eax, (rp) - lea 4(rp), rp - jnz L(oop) - mov vp, VAR_TMP -L(exact): - incl VAR_COUNT - jz L(end) - - ALIGN(16) -L(top): -ifdef(`CPU_P6',` - shr %edx ') C restore 2nd saved carry bit - mov (vp), %eax - adc %eax, %eax - mov 4(vp), %ebx - adc %ebx, %ebx - mov 8(vp), %ecx - adc %ecx, %ecx - - rcr %edx C restore 1st saved carry bit - - adc (up), %eax - mov %eax, (rp) - adc 4(up), %ebx - mov %ebx, 4(rp) - adc 8(up), %ecx - mov %ecx, 8(rp) - - mov 12(vp), %eax - adc %eax, %eax - mov 16(vp), %ebx - adc %ebx, %ebx - mov 20(vp), %ecx - adc %ecx, %ecx - - lea 24(vp), vp - adc %edx, %edx C save a carry bit in edx - - adc 12(up), %eax - mov %eax, 12(rp) - adc 16(up), %ebx - mov %ebx, 16(rp) - adc 20(up), %ecx - - lea 24(up), up - -ifdef(`CPU_P6',` - adc %edx, %edx ') C save another carry bit in edx - mov %ecx, 20(rp) - incl VAR_COUNT - lea 24(rp), rp - jne L(top) - -L(end): - pop vp FRAME_popl() - pop up FRAME_popl() - -ifdef(`CPU_P6',` - xor %eax, %eax - shr $1, %edx - adc %edx, %eax -',` - adc $0, %edx - mov %edx, %eax -') - pop rp FRAME_popl() - pop %ebx FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/k7/aors_n.asm b/gmp/mpn/x86/k7/aors_n.asm index 1a08072029..d84de3ee98 100644 --- a/gmp/mpn/x86/k7/aors_n.asm +++ b/gmp/mpn/x86/k7/aors_n.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract. -dnl Copyright 1999-2003 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k7/aorsmul_1.asm b/gmp/mpn/x86/k7/aorsmul_1.asm index eec8df6de2..b247c29131 100644 --- a/gmp/mpn/x86/k7/aorsmul_1.asm +++ b/gmp/mpn/x86/k7/aorsmul_1.asm @@ -1,49 +1,39 @@ dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. -dnl Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation, +dnl Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) 6.5 +C cycles/limb +C P5: +C P6 model 0-8,10-12) +C P6 model 9 (Banias) C P6 model 13 (Dothan) C P4 model 0 (Willamette) C P4 model 1 (?) C P4 model 2 (Northwood) C P4 model 3 (Prescott) C P4 model 4 (Nocona) -C AMD K6 -C AMD K7 3.75 -C AMD K8 +C K6: +C K7: 3.75 +C K8: C TODO C * Improve feed-in and wind-down code. We beat the old code for all n != 1, diff --git a/gmp/mpn/x86/k7/bdiv_q_1.asm b/gmp/mpn/x86/k7/bdiv_q_1.asm deleted file mode 100644 index df3477f539..0000000000 --- a/gmp/mpn/x86/k7/bdiv_q_1.asm +++ /dev/null @@ -1,244 +0,0 @@ -dnl AMD K7 mpn_bdiv_q_1 -- mpn by limb exact division. - -dnl Rearranged from mpn/x86/k7/dive_1.asm by Marco Bodrato. - -dnl Copyright 2001, 2002, 2004, 2007, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C cycles/limb -C Athlon: 11.0 -C Hammer: 9.0 - - -C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t divisor); -C -C The dependent chain is mul+imul+sub for 11 cycles and that speed is -C achieved with no special effort. The load and shrld latencies are hidden -C by out of order execution. -C -C It's a touch faster on size==1 to use the mul-by-inverse than divl. - -defframe(PARAM_SHIFT, 24) -defframe(PARAM_INVERSE,20) -defframe(PARAM_DIVISOR,16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -defframe(SAVE_EBX, -4) -defframe(SAVE_ESI, -8) -defframe(SAVE_EDI, -12) -defframe(SAVE_EBP, -16) -defframe(VAR_INVERSE, -20) -defframe(VAR_DST_END, -24) - -deflit(STACK_SPACE, 24) - - TEXT - -C mp_limb_t -C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor, -C mp_limb_t inverse, int shift) - ALIGN(16) -PROLOGUE(mpn_pi1_bdiv_q_1) -deflit(`FRAME',0) - - subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE) - movl PARAM_SHIFT, %ecx C shift count - - movl %ebp, SAVE_EBP - movl PARAM_SIZE, %ebp - - movl %esi, SAVE_ESI - movl PARAM_SRC, %esi - - movl %edi, SAVE_EDI - movl PARAM_DST, %edi - - movl %ebx, SAVE_EBX - - leal (%esi,%ebp,4), %esi C src end - leal (%edi,%ebp,4), %edi C dst end - negl %ebp C -size - - movl PARAM_INVERSE, %eax C inv - -L(common): - movl %eax, VAR_INVERSE - movl (%esi,%ebp,4), %eax C src[0] - - incl %ebp - jz L(one) - - movl (%esi,%ebp,4), %edx C src[1] - - shrdl( %cl, %edx, %eax) - - movl %edi, VAR_DST_END - xorl %ebx, %ebx - jmp L(entry) - - ALIGN(8) -L(top): - C eax q - C ebx carry bit, 0 or 1 - C ecx shift - C edx - C esi src end - C edi dst end - C ebp counter, limbs, negative - - mull PARAM_DIVISOR C carry limb in edx - - movl -4(%esi,%ebp,4), %eax - movl (%esi,%ebp,4), %edi - - shrdl( %cl, %edi, %eax) - - subl %ebx, %eax C apply carry bit - setc %bl - movl VAR_DST_END, %edi - - subl %edx, %eax C apply carry limb - adcl $0, %ebx - -L(entry): - imull VAR_INVERSE, %eax - - movl %eax, -4(%edi,%ebp,4) - incl %ebp - jnz L(top) - - - mull PARAM_DIVISOR C carry limb in edx - - movl -4(%esi), %eax C src high limb - shrl %cl, %eax - movl SAVE_ESI, %esi - - subl %ebx, %eax C apply carry bit - movl SAVE_EBX, %ebx - movl SAVE_EBP, %ebp - - subl %edx, %eax C apply carry limb - - imull VAR_INVERSE, %eax - - movl %eax, -4(%edi) - movl SAVE_EDI, %edi - addl $STACK_SPACE, %esp - - ret - -L(one): - shrl %cl, %eax - movl SAVE_ESI, %esi - movl SAVE_EBX, %ebx - - imull VAR_INVERSE, %eax - - movl SAVE_EBP, %ebp - - movl %eax, -4(%edi) - movl SAVE_EDI, %edi - addl $STACK_SPACE, %esp - - ret -EPILOGUE() - -C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t divisor); -C - - ALIGN(16) -PROLOGUE(mpn_bdiv_q_1) -deflit(`FRAME',0) - - movl PARAM_DIVISOR, %eax - subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE) - movl $-1, %ecx C shift count - - movl %ebp, SAVE_EBP - movl PARAM_SIZE, %ebp - - movl %esi, SAVE_ESI - movl %edi, SAVE_EDI - - C If there's usually only one or two trailing zero bits then this - C should be faster than bsfl. -L(strip_twos): - incl %ecx - shrl %eax - jnc L(strip_twos) - - movl %ebx, SAVE_EBX - leal 1(%eax,%eax), %ebx C d without twos - andl $127, %eax C d/2, 7 bits - -ifdef(`PIC',` - LEA( binvert_limb_table, %edx) - movzbl (%eax,%edx), %eax C inv 8 bits -',` - movzbl binvert_limb_table(%eax), %eax C inv 8 bits -') - - leal (%eax,%eax), %edx C 2*inv - movl %ebx, PARAM_DIVISOR C d without twos - - imull %eax, %eax C inv*inv - - movl PARAM_SRC, %esi - movl PARAM_DST, %edi - - imull %ebx, %eax C inv*inv*d - - subl %eax, %edx C inv = 2*inv - inv*inv*d - leal (%edx,%edx), %eax C 2*inv - - imull %edx, %edx C inv*inv - - leal (%esi,%ebp,4), %esi C src end - leal (%edi,%ebp,4), %edi C dst end - negl %ebp C -size - - imull %ebx, %edx C inv*inv*d - - subl %edx, %eax C inv = 2*inv - inv*inv*d - - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS - pushl %eax FRAME_pushl() - imull PARAM_DIVISOR, %eax - cmpl $1, %eax - popl %eax FRAME_popl()') - - jmp L(common) -EPILOGUE() diff --git a/gmp/mpn/x86/k7/dive_1.asm b/gmp/mpn/x86/k7/dive_1.asm index 8eb4f45ac0..c994e0fb06 100644 --- a/gmp/mpn/x86/k7/dive_1.asm +++ b/gmp/mpn/x86/k7/dive_1.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_divexact_1 -- mpn by limb exact division. dnl Copyright 2001, 2002, 2004, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -116,7 +105,7 @@ ifdef(`PIC',` subl %edx, %eax C inv = 2*inv - inv*inv*d - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB pushl %eax FRAME_pushl() imull PARAM_DIVISOR, %eax cmpl $1, %eax diff --git a/gmp/mpn/x86/k7/gcd_1.asm b/gmp/mpn/x86/k7/gcd_1.asm index c7d12c83c0..f912f43730 100644 --- a/gmp/mpn/x86/k7/gcd_1.asm +++ b/gmp/mpn/x86/k7/gcd_1.asm @@ -1,186 +1,369 @@ -dnl x86 mpn_gcd_1 optimised for AMD K7. +dnl AMD K7 mpn_gcd_1 -- mpn by 1 gcd. -dnl Contributed to the GNU project by by Kevin Ryde. Rehacked by Torbjorn -dnl Granlund. - -dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/bit (approx) -C AMD K7 5.31 -C AMD K8,K9 5.33 -C AMD K10 5.30 -C AMD bd1 ? -C AMD bobcat 7.02 -C Intel P4-2 10.1 -C Intel P4-3/4 10.0 -C Intel P6/13 5.88 -C Intel core2 6.26 -C Intel NHM 6.83 -C Intel SBR 8.50 -C Intel atom 8.90 -C VIA nano ? -C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1 - -C TODO -C * Tune overhead, this takes 2-3 cycles more than old code when v0 is tiny. -C * Stream things better through registers, avoiding some copying. - -C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0. +C K7: 6.75 cycles/bit (approx) 1x1 gcd +C 11.0 cycles/limb Nx1 reduction (modexact_1_odd) + + +dnl Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y, +dnl where x is the larger of the two. See tune/README for more. +dnl +dnl divl at 40 cycles compared to the gcd at about 7 cycles/bitpair +dnl suggests 40/7*2=11.4 but 7 seems to be about right. + +deflit(DIV_THRESHOLD, 7) + +C table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0. +C +C This is mixed in with the code, but as per the k7 optimization manual it's +C a full cache line and suitably aligned so it won't get swapped between +C code and data. Having it in TEXT rather than RODATA saves needing a GOT +C entry when PIC. +C +C Actually, there doesn't seem to be a measurable difference between this in +C it's own cache line or plonked in the middle of the code. Presumably +C since TEXT is read-only there's no worries about coherency. + +deflit(MASK, 63) deflit(MAXSHIFT, 6) -deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1)) -DEF_OBJECT(ctz_table,64) + TEXT + ALIGN(64) +L(table): .byte MAXSHIFT forloop(i,1,MASK, ` .byte m4_count_trailing_zeros(i) ') -END_OBJECT(ctz_table) -C Threshold of when to call bmod when U is one limb. Should be about -C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit). -define(`DIV_THRES_LOG2', 7) +C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t limb); +C + +defframe(PARAM_LIMB, 12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) -define(`up', `%edi') -define(`n', `%esi') -define(`v0', `%edx') +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) +defframe(CALL_DIVISOR,-20) +defframe(CALL_SIZE, -24) +defframe(CALL_SRC, -28) +deflit(STACK_SPACE, 28) -ASM_START() TEXT ALIGN(16) + PROLOGUE(mpn_gcd_1) - push %edi - push %esi +deflit(`FRAME',0) + + ASSERT(ne, `cmpl $0, PARAM_LIMB') C y!=0 + ASSERT(ae, `cmpl $1, PARAM_SIZE') C size>=1 + + movl PARAM_SRC, %eax + movl PARAM_LIMB, %edx + subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE) - mov 12(%esp), up - mov 16(%esp), n - mov 20(%esp), v0 + movl %esi, SAVE_ESI + movl %ebx, SAVE_EBX - mov (up), %eax C U low limb - or v0, %eax C x | y - mov $-1, %ecx + movl (%eax), %esi C src low limb + +ifdef(`PIC',` + movl %edi, SAVE_EDI + call L(movl_eip_to_edi) +L(here): + addl $L(table)-L(here), %edi +') + + movl %esi, %ebx + orl %edx, %esi C x|y + movl $-1, %ecx L(twos): - inc %ecx - shr %eax - jnc L(twos) + incl %ecx + shrl %esi + jnc L(twos) C 3/4 chance of x or y odd already - shr %cl, v0 - mov %ecx, %eax C common twos + shrl %cl, %ebx + shrl %cl, %edx + movl %ecx, %esi C common twos -L(divide_strip_y): - shr v0 - jnc L(divide_strip_y) - adc v0, v0 - - push %eax - push v0 - - cmp $1, n - jnz L(reduce_nby1) - -C Both U and V are single limbs, reduce with bmod if u0 >> v0. - mov (up), %ecx - mov %ecx, %eax - shr $DIV_THRES_LOG2, %ecx - cmp %ecx, v0 - ja L(reduced) - - mov v0, %esi - xor %edx, %edx - div %esi - mov %edx, %eax - jmp L(reduced) - -L(reduce_nby1): -ifdef(`PIC_WITH_EBX',` - push %ebx - call L(movl_eip_to_ebx) - add $_GLOBAL_OFFSET_TABLE_, %ebx + movl PARAM_SIZE, %ecx + cmpl $1, %ecx + ja L(divide) + + + C eax + C ebx x + C ecx + C edx y + C esi common twos + C edi [PIC] L(table) + C ebp + + movl %edx, %eax + cmpl %ebx, %edx + + cmovb( %ebx, %eax) C swap to make x bigger than y + cmovb( %edx, %ebx) + + +L(strip_y): + C eax x + C ebx y + C ecx + C edx + C esi common twos + C edi [PIC] L(table) + C ebp + + ASSERT(nz,`orl %ebx,%ebx') + shrl %ebx + jnc L(strip_y) + rcll %ebx + + + C eax x + C ebx y (odd) + C ecx + C edx + C esi common twos + C edi [PIC] L(table) + C ebp + + movl %eax, %ecx + movl %ebx, %edx + shrl $DIV_THRESHOLD, %eax + + cmpl %eax, %ebx + movl %ecx, %eax + ja L(strip_x_entry) C do x%y if x much bigger than y + + + xorl %edx, %edx + + divl %ebx + + orl %edx, %edx + movl %edx, %eax C remainder -> x + movl %ebx, %edx C y + + jz L(done_ebx) + jmp L(strip_x) + + + C Offset 0x9D here for non-PIC. About 0.4 cycles/bit is saved by + C ensuring the end of the jnz at the end of this loop doesn't cross + C into the next cache line at 0xC0. + C + C PIC on the other hand is offset 0xAC here and extends to 0xC9, so + C it crosses but doesn't suffer any measurable slowdown. + +L(top): + C eax x + C ebx y-x + C ecx x-y + C edx y + C esi twos, for use at end + C edi [PIC] L(table) + + cmovc( %ebx, %ecx) C if x-y gave carry, use x and y-x + cmovc( %eax, %edx) + +L(strip_x): + movl %ecx, %eax +L(strip_x_entry): + andl $MASK, %ecx + + ASSERT(nz, `orl %eax, %eax') + +ifdef(`PIC',` + movb (%ecx,%edi), %cl +',` + movb L(table) (%ecx), %cl ') - push v0 C param 3 - push n C param 2 - push up C param 1 - cmp $BMOD_1_TO_MOD_1_THRESHOLD, n - jl L(bmod) - CALL( mpn_mod_1) - jmp L(called) -L(bmod): - CALL( mpn_modexact_1_odd) - -L(called): - add $12, %esp C deallocate params -ifdef(`PIC_WITH_EBX',` - pop %ebx + + shrl %cl, %eax + cmpb $MAXSHIFT, %cl + + movl %eax, %ecx + movl %edx, %ebx + je L(strip_x) + + ASSERT(nz, `testl $1, %eax') C both odd + ASSERT(nz, `testl $1, %edx') + + subl %eax, %ebx + subl %edx, %ecx + jnz L(top) + + +L(done): + movl %esi, %ecx + movl SAVE_ESI, %esi +ifdef(`PIC',` + movl SAVE_EDI, %edi ') -L(reduced): - pop %edx - - LEA( ctz_table, %esi) - test %eax, %eax - mov %eax, %ecx - jnz L(mid) - jmp L(end) - - ALIGN(16) C K8 BC P4 NHM SBR -L(top): cmovc( %ecx, %eax) C if x-y < 0 0 - cmovc( %edi, %edx) C use x,y-x 0 -L(mid): and $MASK, %ecx C 0 - movzbl (%esi,%ecx), %ecx C 1 - jz L(shift_alot) C 1 - shr %cl, %eax C 3 - mov %eax, %edi C 4 - mov %edx, %ecx C 3 - sub %eax, %ecx C 4 - sub %edx, %eax C 4 - jnz L(top) C 5 - -L(end): pop %ecx - mov %edx, %eax - shl %cl, %eax - pop %esi - pop %edi - ret -L(shift_alot): - shr $MAXSHIFT, %eax - mov %eax, %ecx - jmp L(mid) + shll %cl, %eax + movl SAVE_EBX, %ebx + addl $FRAME, %esp -ifdef(`PIC_WITH_EBX',` -L(movl_eip_to_ebx): - mov (%esp), %ebx ret + + + +C ----------------------------------------------------------------------------- +C two or more limbs + +dnl MODEXACT_THRESHOLD is the size at which it's better to call +dnl mpn_modexact_1_odd than do an inline loop. + +deflit(MODEXACT_THRESHOLD, ifdef(`PIC',6,5)) + +L(divide): + C eax src + C ebx + C ecx size + C edx y + C esi common twos + C edi [PIC] L(table) + C ebp + +L(divide_strip_y): + ASSERT(nz,`orl %edx,%edx') + shrl %edx + jnc L(divide_strip_y) + leal 1(%edx,%edx), %ebx C y now odd + + movl %ebp, SAVE_EBP + movl %eax, %ebp + movl -4(%eax,%ecx,4), %eax C src high limb + + cmp $MODEXACT_THRESHOLD, %ecx + jae L(modexact) + + cmpl %ebx, %eax C high cmp divisor + movl $0, %edx + + cmovc( %eax, %edx) C skip a div if high<divisor + sbbl $0, %ecx + + +L(divide_top): + C eax scratch (quotient) + C ebx y + C ecx counter (size to 1, inclusive) + C edx carry (remainder) + C esi common twos + C edi [PIC] L(table) + C ebp src + + movl -4(%ebp,%ecx,4), %eax + + divl %ebx + + decl %ecx + jnz L(divide_top) + + + C eax + C ebx y (odd) + C ecx + C edx x + C esi common twos + C edi [PIC] L(table) + C ebp + + orl %edx, %edx + movl SAVE_EBP, %ebp + movl %edx, %eax + + movl %edx, %ecx + movl %ebx, %edx + jnz L(strip_x_entry) + + +L(done_ebx): + movl %ebx, %eax + jmp L(done) + + + +L(modexact): + C eax + C ebx y + C ecx size + C edx + C esi common twos + C edi [PIC] L(table) + C ebp src + +ifdef(`PIC',` + movl %ebp, CALL_SRC + movl %ebx, %ebp C y + movl %edi, %ebx C L(table) + + addl $_GLOBAL_OFFSET_TABLE_+[.-L(table)], %ebx + movl %ebp, CALL_DIVISOR + movl %ecx, CALL_SIZE + + call GSYM_PREFIX`'mpn_modexact_1_odd@PLT +',` +dnl non-PIC + movl %ebx, CALL_DIVISOR + movl %ebp, CALL_SRC + movl %ecx, CALL_SIZE + + call GSYM_PREFIX`'mpn_modexact_1_odd ') + + C eax x + C ebx [non-PIC] y + C ecx + C edx + C esi common twos + C edi [PIC] L(table) + C ebp [PIC] y + + orl %eax, %eax + movl ifdef(`PIC',`%ebp',`%ebx'), %edx + movl SAVE_EBP, %ebp + + movl %eax, %ecx + jnz L(strip_x_entry) + + movl %edx, %eax + jmp L(done) + + +ifdef(`PIC', ` +L(movl_eip_to_edi): + movl (%esp), %edi + ret_internal +') + EPILOGUE() diff --git a/gmp/mpn/x86/k7/gmp-mparam.h b/gmp/mpn/x86/k7/gmp-mparam.h index 9977a113e2..ced0c020f7 100644 --- a/gmp/mpn/x86/k7/gmp-mparam.h +++ b/gmp/mpn/x86/k7/gmp-mparam.h @@ -1,241 +1,73 @@ /* AMD K7 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software -Foundation, Inc. +Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2008 Free +Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -or +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 -or both in parallel, as here. -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 2083 MHz K7 Barton */ -/* FFT tuning limit = 25000000 */ -/* Generated by tuneup.c, 2014-03-13, gcc 4.2 */ - -#define MOD_1_NORM_THRESHOLD 0 /* always */ -#define MOD_1_UNNORM_THRESHOLD 3 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 3 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 24 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 3 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 24 - -#define MUL_TOOM22_THRESHOLD 28 -#define MUL_TOOM33_THRESHOLD 85 -#define MUL_TOOM44_THRESHOLD 147 -#define MUL_TOOM6H_THRESHOLD 216 -#define MUL_TOOM8H_THRESHOLD 309 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 85 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 99 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 98 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 124 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 50 -#define SQR_TOOM3_THRESHOLD 81 -#define SQR_TOOM4_THRESHOLD 216 -#define SQR_TOOM6_THRESHOLD 306 -#define SQR_TOOM8_THRESHOLD 446 - -#define MULMID_TOOM42_THRESHOLD 56 - -#define MULMOD_BNM1_THRESHOLD 17 -#define SQRMOD_BNM1_THRESHOLD 17 - -#define MUL_FFT_MODF_THRESHOLD 904 /* k = 6 */ -#define MUL_FFT_TABLE3 \ - { { 904, 6}, { 21, 7}, { 11, 6}, { 25, 7}, \ - { 13, 6}, { 27, 7}, { 15, 6}, { 31, 7}, \ - { 17, 6}, { 35, 7}, { 19, 6}, { 39, 7}, \ - { 23, 6}, { 47, 7}, { 27, 8}, { 15, 7}, \ - { 31, 6}, { 63, 7}, { 35, 8}, { 19, 7}, \ - { 39, 8}, { 23, 7}, { 47, 8}, { 31, 7}, \ - { 63, 8}, { 39, 7}, { 79, 9}, { 23, 8}, \ - { 47, 7}, { 95, 8}, { 51, 9}, { 31, 8}, \ - { 71, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \ - { 95, 9}, { 55,10}, { 31, 9}, { 63, 8}, \ - { 127, 9}, { 71, 8}, { 143, 9}, { 79, 8}, \ - { 159,10}, { 47, 9}, { 95, 8}, { 191, 9}, \ - { 103,11}, { 31,10}, { 63, 9}, { 127, 8}, \ - { 255, 9}, { 143,10}, { 79, 9}, { 167,10}, \ - { 95, 9}, { 199,10}, { 111,11}, { 63,10}, \ - { 127, 9}, { 255,10}, { 143, 9}, { 287,10}, \ - { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ - { 383,10}, { 207,12}, { 63,11}, { 127,10}, \ - { 255, 9}, { 511,10}, { 271, 8}, { 1087,10}, \ - { 287,11}, { 159,10}, { 319, 9}, { 639,11}, \ - { 191,10}, { 383, 9}, { 767, 8}, { 1535, 9}, \ - { 799, 8}, { 1599,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 511, 9}, { 1023,10}, { 543, 9}, \ - { 1087,11}, { 287,10}, { 575, 9}, { 1151,10}, \ - { 607, 9}, { 1215, 8}, { 2431,11}, { 319,10}, \ - { 639, 9}, { 1279,10}, { 671, 9}, { 1343,12}, \ - { 191,11}, { 383,10}, { 767, 9}, { 1535,10}, \ - { 799, 9}, { 1599,10}, { 831, 9}, { 1663,10}, \ - { 863,13}, { 127,12}, { 255,11}, { 511,10}, \ - { 1023,11}, { 543,10}, { 1087,11}, { 575,10}, \ - { 1151,11}, { 607,10}, { 1215, 9}, { 2431,12}, \ - { 319,11}, { 639,10}, { 1407,11}, { 735,10}, \ - { 1471, 9}, { 2943,12}, { 383,11}, { 767,10}, \ - { 1535,11}, { 799,10}, { 1599,11}, { 831,10}, \ - { 1663,11}, { 895,10}, { 1791,11}, { 959,10}, \ - { 1919,13}, { 255,12}, { 511,11}, { 1023,10}, \ - { 2047,11}, { 1087,12}, { 575,11}, { 1151,10}, \ - { 2303,11}, { 1215,10}, { 2431,12}, { 639,11}, \ - { 1279,10}, { 2559,11}, { 1407,10}, { 2815,11}, \ - { 1471,10}, { 2943,13}, { 383,12}, { 767,11}, \ - { 1599,12}, { 831,11}, { 1663,12}, { 895,11}, \ - { 1791,10}, { 3583,12}, { 959,11}, { 1919,10}, \ - { 3839,14}, { 255,13}, { 511,12}, { 1023,11}, \ - { 2047,12}, { 1087,11}, { 2175,12}, { 1151,11}, \ - { 2303,12}, { 1215,11}, { 2431,13}, { 639,12}, \ - { 1407,11}, { 2815,12}, { 1471,11}, { 2943,13}, \ - { 767,12}, { 1663,11}, { 3327,13}, { 895,12}, \ - { 1791,11}, { 3583,12}, { 1919,11}, { 3839,12}, \ - { 1983,11}, { 3967,14}, { 511,13}, { 1023,12}, \ - { 2239,13}, { 1151,12}, { 2495,13}, { 1279,12}, \ - { 2559,13}, { 1407,12}, { 2943,11}, { 5887,14}, \ - { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \ - { 3327,13}, { 1791,12}, { 3583,13}, { 1919,12}, \ - { 3967,15}, { 511,14}, { 1023,13}, { 2047,12}, \ - { 4095,13}, { 2175,12}, { 4351,13}, { 2431,12}, \ - { 4863,14}, { 1279,13}, { 2559,12}, { 5119,13}, \ - { 2943,12}, { 5887,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 228 -#define MUL_FFT_THRESHOLD 7808 - -#define SQR_FFT_MODF_THRESHOLD 888 /* k = 6 */ -#define SQR_FFT_TABLE3 \ - { { 888, 6}, { 21, 7}, { 11, 6}, { 25, 7}, \ - { 13, 6}, { 27, 7}, { 15, 6}, { 31, 7}, \ - { 17, 6}, { 35, 7}, { 19, 6}, { 39, 7}, \ - { 23, 6}, { 47, 7}, { 27, 8}, { 15, 7}, \ - { 31, 6}, { 63, 7}, { 35, 8}, { 19, 7}, \ - { 39, 8}, { 23, 7}, { 47, 8}, { 31, 7}, \ - { 63, 8}, { 39, 9}, { 23, 8}, { 47, 7}, \ - { 95, 8}, { 51, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \ - { 79,10}, { 47, 9}, { 95, 8}, { 191,11}, \ - { 31,10}, { 63, 9}, { 127, 8}, { 255, 9}, \ - { 143,10}, { 79, 9}, { 167,10}, { 95, 9}, \ - { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \ - { 255, 8}, { 511,10}, { 143, 9}, { 287, 8}, \ - { 575,10}, { 159,11}, { 95,10}, { 191, 9}, \ - { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543, 8}, { 1087,10}, \ - { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \ - { 639, 8}, { 1279, 9}, { 671,11}, { 191,10}, \ - { 383, 9}, { 799, 8}, { 1599, 9}, { 831,11}, \ - { 223,12}, { 127,11}, { 255,10}, { 543, 9}, \ - { 1087,11}, { 287,10}, { 575, 9}, { 1215, 8}, \ - { 2431,11}, { 319,10}, { 639, 9}, { 1279,10}, \ - { 671, 9}, { 1407,12}, { 191,10}, { 799, 9}, \ - { 1599,10}, { 831, 9}, { 1663,10}, { 863, 9}, \ - { 1727,11}, { 447,13}, { 127,12}, { 255,11}, \ - { 511,10}, { 1023,11}, { 543,10}, { 1087, 9}, \ - { 2175,10}, { 1119,11}, { 575,10}, { 1151,11}, \ - { 607,10}, { 1215, 9}, { 2431,12}, { 319,11}, \ - { 639,10}, { 1279,11}, { 671,10}, { 1343, 9}, \ - { 2687,11}, { 703,10}, { 1407,11}, { 735,10}, \ - { 1471, 9}, { 2943,10}, { 1503,12}, { 383,11}, \ - { 767,10}, { 1535,11}, { 799,10}, { 1599,11}, \ - { 863,10}, { 1727,12}, { 447,11}, { 895,10}, \ - { 1791,11}, { 959,10}, { 1919,13}, { 255,12}, \ - { 511,11}, { 1023,10}, { 2047,11}, { 1087,10}, \ - { 2175,11}, { 1119,12}, { 575,11}, { 1151,10}, \ - { 2303,11}, { 1215,10}, { 2431,12}, { 639,11}, \ - { 1407,10}, { 2815,11}, { 1471,10}, { 2943,12}, \ - { 767,11}, { 1599,12}, { 831,11}, { 1663,10}, \ - { 3327,12}, { 895,11}, { 1791,10}, { 3583,12}, \ - { 959,11}, { 1919,10}, { 3839,11}, { 1983,14}, \ - { 255,13}, { 511,12}, { 1023,11}, { 2047,12}, \ - { 1087,11}, { 2175,12}, { 1151,11}, { 2303,12}, \ - { 1215,11}, { 2431,13}, { 639,12}, { 1407,11}, \ - { 2815,12}, { 1471,11}, { 2943,13}, { 767,12}, \ - { 1663,11}, { 3327,12}, { 1727,13}, { 895,12}, \ - { 1791,11}, { 3583,12}, { 1919,11}, { 3839,12}, \ - { 1983,11}, { 3967,14}, { 511,13}, { 1023,12}, \ - { 2175,13}, { 1151,12}, { 2495,13}, { 1279,12}, \ - { 2559,13}, { 1407,12}, { 2943,11}, { 5887,14}, \ - { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \ - { 3327,13}, { 1791,12}, { 3583,13}, { 1919,12}, \ - { 3967,15}, { 511,14}, { 1023,13}, { 2047,12}, \ - { 4095,13}, { 2175,12}, { 4351,13}, { 2431,14}, \ - { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \ - { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 229 -#define SQR_FFT_THRESHOLD 7552 - -#define MULLO_BASECASE_THRESHOLD 8 -#define MULLO_DC_THRESHOLD 36 -#define MULLO_MUL_N_THRESHOLD 13463 - -#define DC_DIV_QR_THRESHOLD 45 -#define DC_DIVAPPR_Q_THRESHOLD 208 -#define DC_BDIV_QR_THRESHOLD 43 -#define DC_BDIV_Q_THRESHOLD 140 - -#define INV_MULMOD_BNM1_THRESHOLD 62 -#define INV_NEWTON_THRESHOLD 204 -#define INV_APPR_THRESHOLD 204 - -#define BINV_NEWTON_THRESHOLD 230 -#define REDC_1_TO_REDC_N_THRESHOLD 59 - -#define MU_DIV_QR_THRESHOLD 1752 -#define MU_DIVAPPR_Q_THRESHOLD 1528 -#define MUPI_DIV_QR_THRESHOLD 82 -#define MU_BDIV_QR_THRESHOLD 1360 -#define MU_BDIV_Q_THRESHOLD 1470 - -#define POWM_SEC_TABLE 1,16,102,336,1221 - -#define MATRIX22_STRASSEN_THRESHOLD 16 -#define HGCD_THRESHOLD 120 -#define HGCD_APPR_THRESHOLD 143 -#define HGCD_REDUCE_THRESHOLD 4818 -#define GCD_DC_THRESHOLD 474 -#define GCDEXT_DC_THRESHOLD 345 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 15 -#define GET_STR_PRECOMPUTE_THRESHOLD 33 -#define SET_STR_DC_THRESHOLD 298 -#define SET_STR_PRECOMPUTE_THRESHOLD 1187 - -#define FAC_DSC_THRESHOLD 602 -#define FAC_ODD_THRESHOLD 29 +/* 2083 MHz Athlon */ + +/* Generated by tuneup.c, 2008-12-23, gcc 3.4 */ + +#define MUL_KARATSUBA_THRESHOLD 28 +#define MUL_TOOM3_THRESHOLD 89 +#define MUL_TOOM44_THRESHOLD 130 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_KARATSUBA_THRESHOLD 52 +#define SQR_TOOM3_THRESHOLD 89 +#define SQR_TOOM4_THRESHOLD 196 + +#define MULLOW_BASECASE_THRESHOLD 10 +#define MULLOW_DC_THRESHOLD 96 +#define MULLOW_MUL_N_THRESHOLD 234 + +#define DIV_SB_PREINV_THRESHOLD 0 /* always */ +#define DIV_DC_THRESHOLD 86 +#define POWM_THRESHOLD 134 +#define MATRIX22_STRASSEN_THRESHOLD 18 +#define HGCD_THRESHOLD 163 +#define GCD_DC_THRESHOLD 665 +#define GCDEXT_DC_THRESHOLD 605 +#define JACOBI_BASE_METHOD 1 + +#define USE_PREINV_DIVREM_1 1 /* native */ +#define USE_PREINV_MOD_1 1 /* native */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ + +#define GET_STR_DC_THRESHOLD 19 +#define GET_STR_PRECOMPUTE_THRESHOLD 35 +#define SET_STR_DC_THRESHOLD 826 +#define SET_STR_PRECOMPUTE_THRESHOLD 1691 + +#define MUL_FFT_TABLE { 432, 864, 1664, 4608, 10240, 40960, 163840, 655360, 0 } +#define MUL_FFT_MODF_THRESHOLD 496 +#define MUL_FFT_THRESHOLD 4864 + +#define SQR_FFT_TABLE { 432, 864, 1664, 4608, 10240, 40960, 98304, 655360, 0 } +#define SQR_FFT_MODF_THRESHOLD 432 +#define SQR_FFT_THRESHOLD 3840 + +/* These tables need to be updated. */ + +#define MUL_FFT_TABLE2 {{1, 4}, {401, 5}, {801, 6}, {817, 5}, {865, 6}, {1025, 5}, {1057, 6}, {1601, 7}, {1633, 6}, {1729, 7}, {1921, 6}, {2113, 7}, {2177, 6}, {2241, 7}, {2433, 6}, {2497, 7}, {2945, 6}, {3009, 7}, {3457, 8}, {3521, 7}, {4481, 8}, {4865, 7}, {5249, 8}, {5889, 7}, {6017, 8}, {7553, 9}, {7681, 8}, {9985, 9}, {11777, 8}, {13057, 9}, {13825, 8}, {14081, 9}, {15873, 8}, {16641, 9}, {16897, 8}, {17153, 9}, {19969, 8}, {20225, 9}, {20737, 8}, {20993, 9}, {24065, 8}, {24577, 9}, {25089, 8}, {25345, 9}, {27393, 10}, {27649, 9}, {28161, 10}, {31745, 9}, {38913, 10}, {39425, 9}, {40449, 10}, {48129, 9}, {48641, 11}, {63489, 10}, {98305, 11}, {99329, 10}, {100353, 11}, {101377, 10}, {103425, 11}, {104449, 10}, {110593, 11}, {112641, 10}, {113665, 11}, {129025, 10}, {162817, 11}, {194561, 10}, {195585, 12}, {258049, 11}, {391169, 12}, {520193, 11}, {718849, 12}, {782337, 11}, {849921, 13}, {1040385, 12}, {2879489, 13}, {3137537, 12}, {3928065, 13}, {4186113, 12}, {4976641, 13}, {5234689, 12}, {6025217, 13}, {6283265, 12}, {MP_SIZE_T_MAX,0}} + +#define SQR_FFT_TABLE2 {{1, 4}, {401, 5}, {417, 4}, {433, 5}, {881, 6}, {961, 5}, {993, 6}, {1857, 7}, {1921, 6}, {2049, 7}, {2177, 6}, {2241, 7}, {2433, 6}, {2497, 7}, {3457, 8}, {3841, 7}, {4481, 8}, {4609, 7}, {4737, 8}, {4865, 7}, {5249, 8}, {5889, 7}, {6273, 8}, {7041, 9}, {7681, 8}, {9985, 9}, {10241, 8}, {10497, 9}, {11777, 8}, {13057, 9}, {15873, 8}, {16385, 9}, {16897, 8}, {17153, 9}, {19969, 8}, {20225, 9}, {20737, 8}, {20993, 9}, {24065, 8}, {24321, 9}, {24577, 10}, {24833, 9}, {25601, 10}, {27137, 9}, {27649, 10}, {31745, 9}, {38401, 10}, {38913, 9}, {40449, 10}, {48129, 9}, {48641, 11}, {63489, 10}, {99329, 11}, {101377, 10}, {103425, 11}, {104449, 10}, {107521, 11}, {110593, 10}, {113665, 11}, {129025, 10}, {154625, 11}, {155649, 10}, {162817, 11}, {194561, 12}, {258049, 11}, {391169, 12}, {520193, 11}, {718849, 12}, {727041, 11}, {729089, 12}, {782337, 11}, {849921, 13}, {1040385, 12}, {2879489, 13}, {3137537, 12}, {3928065, 13}, {4186113, 12}, {4714497, 13}, {5234689, 12}, {6025217, 13}, {6283265, 12}, {7073793, 13}, {7331841, 12}, {MP_SIZE_T_MAX,0}} diff --git a/gmp/mpn/x86/k7/invert_limb.asm b/gmp/mpn/x86/k7/invert_limb.asm deleted file mode 100644 index 6cce455a9d..0000000000 --- a/gmp/mpn/x86/k7/invert_limb.asm +++ /dev/null @@ -1,193 +0,0 @@ -dnl x86 mpn_invert_limb - -dnl Contributed to the GNU project by Niels Möller - -dnl Copyright 2009, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles (approx) div -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0 (Willamette) ? -C P4 model 1 (?) ? -C P4 model 2 (Northwood) ? -C P4 model 3 (Prescott) ? -C P4 model 4 (Nocona) ? -C AMD K6 ? -C AMD K7 41 53 -C AMD K8 ? - -C TODO -C * These c/l numbers are for a non-PIC build. Consider falling back to using -C the 'div' instruction for PIC builds. -C * Perhaps use this file--or at least the algorithm--for more machines than k7. - -C Register usage: -C Input D in %edi -C Current approximation is in %eax and/or %ecx -C %ebx and %edx are temporaries -C %esi and %ebp are unused - -defframe(PARAM_DIVISOR,4) - -ASM_START() - -C Make approx_tab global to work around Apple relocation bug. -ifdef(`DARWIN',` - deflit(`approx_tab', MPN(invert_limb_tab)) - GLOBL approx_tab') - - TEXT - ALIGN(16) -PROLOGUE(mpn_invert_limb) -deflit(`FRAME', 0) - mov PARAM_DIVISOR, %eax - C Avoid push/pop on k7. - sub $8, %esp FRAME_subl_esp(8) - mov %ebx, (%esp) - mov %edi, 4(%esp) - - mov %eax, %edi - shr $22, %eax -ifdef(`PIC',` - LEA( approx_tab, %ebx) - movzwl -1024(%ebx, %eax, 2), %eax -',` - movzwl -1024+approx_tab(%eax, %eax), %eax C %eax = v0 -') - - C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1 - mov %eax, %ecx - imul %eax, %eax - mov %edi, %ebx - shr $11, %ebx - inc %ebx - mul %ebx - mov %edi, %ebx C Prepare - shr %ebx - sbb %eax, %eax - sub %eax, %ebx C %ebx = d_31, %eax = mask - shl $4, %ecx - dec %ecx - sub %edx, %ecx C %ecx = v1 - - C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33) - imul %ecx, %ebx - and %ecx, %eax - shr %eax - sub %ebx, %eax - mul %ecx - mov %edi, %eax C Prepare for next mul - shl $15, %ecx - shr %edx - add %edx, %ecx C %ecx = v2 - - mul %ecx - add %edi, %eax - mov %ecx, %eax - adc %edi, %edx - sub %edx, %eax C %eax = v3 - - mov (%esp), %ebx - mov 4(%esp), %edi - add $8, %esp - - ret - -EPILOGUE() - -DEF_OBJECT(approx_tab,2) - .value 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27 - .value 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d - .value 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61 - .value 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894 - .value 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3 - .value 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520 - .value 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379 - .value 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de - .value 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e - .value 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8 - .value 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e - .value 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd - .value 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76 - .value 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918 - .value 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3 - .value 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676 - .value 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532 - .value 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5 - .value 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1 - .value 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193 - .value 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d - .value 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d - .value 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35 - .value 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22 - .value 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16 - .value 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10 - .value 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f - .value 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914 - .value 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f - .value 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e - .value 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643 - .value 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d - .value 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b - .value 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e - .value 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6 - .value 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1 - .value 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121 - .value 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056 - .value 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e - .value 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca - .value 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09 - .value 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d - .value 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93 - .value 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde - .value 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b - .value 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c - .value 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0 - .value 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927 - .value 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881 - .value 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de - .value 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e - .value 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1 - .value 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606 - .value 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e - .value 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8 - .value 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445 - .value 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5 - .value 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327 - .value 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b - .value 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211 - .value 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a - .value 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104 - .value 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081 - .value 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000 -END_OBJECT(approx_tab) diff --git a/gmp/mpn/x86/k7/mmx/com.asm b/gmp/mpn/x86/k7/mmx/com_n.asm index a258c224f1..068c01f076 100644 --- a/gmp/mpn/x86/k7/mmx/com.asm +++ b/gmp/mpn/x86/k7/mmx/com_n.asm @@ -1,32 +1,21 @@ -dnl AMD Athlon mpn_com -- mpn bitwise one's complement. +dnl AMD Athlon mpn_com_n -- mpn bitwise one's complement. dnl Copyright 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -34,7 +23,7 @@ include(`../config.m4') C K7: 1.0 cycles/limb -C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); +C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); C C The loop form below is necessary for the claimed speed. It needs to be C aligned to a 16 byte boundary and only 16 bytes long. Maybe that's so it @@ -62,7 +51,7 @@ defframe(PARAM_DST, 4) TEXT ALIGN(16) -PROLOGUE(mpn_com) +PROLOGUE(mpn_com_n) deflit(`FRAME',0) movl PARAM_DST, %edx diff --git a/gmp/mpn/x86/k7/mmx/copyd.asm b/gmp/mpn/x86/k7/mmx/copyd.asm index 59ece40920..4601fcd75a 100644 --- a/gmp/mpn/x86/k7/mmx/copyd.asm +++ b/gmp/mpn/x86/k7/mmx/copyd.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_copyd -- copy limb vector, decrementing. dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k7/mmx/copyi.asm b/gmp/mpn/x86/k7/mmx/copyi.asm index 9a28f927ec..a17d575ff4 100644 --- a/gmp/mpn/x86/k7/mmx/copyi.asm +++ b/gmp/mpn/x86/k7/mmx/copyi.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_copyi -- copy limb vector, incrementing. dnl Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k7/mmx/divrem_1.asm b/gmp/mpn/x86/k7/mmx/divrem_1.asm index cf343280bb..fa5824c7b9 100644 --- a/gmp/mpn/x86/k7/mmx/divrem_1.asm +++ b/gmp/mpn/x86/k7/mmx/divrem_1.asm @@ -1,33 +1,22 @@ dnl AMD K7 mpn_divrem_1, mpn_divrem_1c, mpn_preinv_divrem_1 -- mpn by limb dnl division. -dnl Copyright 1999-2002, 2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -456,7 +445,7 @@ C chain, and nothing better than 18 cycles has been found when using it. C The jump is taken only when q1 is 0xFFFFFFFF, and on random data this will C be an extremely rare event. C -C Branch mispredictions will hit random occurrences of q1==0xFFFFFFFF, but +C Branch mispredictions will hit random occurrances of q1==0xFFFFFFFF, but C if some special data is coming out with this always, the q1_ff special C case actually runs at 15 c/l. 0x2FFF...FFFD divided by 3 is a good way to C induce the q1_ff case, for speed measurements or testing. Note that @@ -735,12 +724,12 @@ C q1 is the high word of m*n2+b*n2 and the following shows q1<=b-2 always. C rnd() means rounding down to a multiple of d. C C m*n2 + b*n2 <= m*(d-1) + b*(d-1) -C = m*d + b*d - m - b -C = floor((b(b-d)-1)/d)*d + b*d - m - b -C = rnd(b(b-d)-1) + b*d - m - b -C = rnd(b(b-d)-1 + b*d) - m - b -C = rnd(b*b-1) - m - b -C <= (b-2)*b +C = m*d + b*d - m - b +C = floor((b(b-d)-1)/d)*d + b*d - m - b +C = rnd(b(b-d)-1) + b*d - m - b +C = rnd(b(b-d)-1 + b*d) - m - b +C = rnd(b*b-1) - m - b +C <= (b-2)*b C C Unchanged from the general case is that the final quotient limb q can be C either q1 or q1+1, and the q1+1 case occurs often. This can be seen from diff --git a/gmp/mpn/x86/k7/mmx/lshift.asm b/gmp/mpn/x86/k7/mmx/lshift.asm index b3383cf2c3..b3bff8ffd1 100644 --- a/gmp/mpn/x86/k7/mmx/lshift.asm +++ b/gmp/mpn/x86/k7/mmx/lshift.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_lshift -- mpn left shift. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k7/mmx/mod_1.asm b/gmp/mpn/x86/k7/mmx/mod_1.asm new file mode 100644 index 0000000000..2b42e55caf --- /dev/null +++ b/gmp/mpn/x86/k7/mmx/mod_1.asm @@ -0,0 +1,509 @@ +dnl AMD K7 mpn_mod_1 -- mpn by limb remainder. + +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + + +C K7: 17.0 cycles/limb. + + +C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t carry); +C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t inverse); +C +C The code here is the same as mpn_divrem_1, but with the quotient +C discarded. See mpn/x86/k7/mmx/divrem_1.c for some comments. + + +dnl MUL_THRESHOLD is the size at which the multiply by inverse method is +dnl used, rather than plain "divl"s. Minimum value 2. +dnl +dnl The inverse takes about 50 cycles to calculate, but after that the +dnl multiply is 17 c/l versus division at 41 c/l. +dnl +dnl Using mul or div is about the same speed at 3 limbs, so the threshold +dnl is set to 4 to get the smaller div code used at 3. + +deflit(MUL_THRESHOLD, 4) + + +defframe(PARAM_INVERSE,16) dnl mpn_preinv_mod_1 +defframe(PARAM_CARRY, 16) dnl mpn_mod_1c +defframe(PARAM_DIVISOR,12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) + +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) + +defframe(VAR_NORM, -20) +defframe(VAR_INVERSE, -24) +defframe(VAR_SRC_STOP,-28) + +deflit(STACK_SPACE, 28) + + TEXT + + ALIGN(32) +PROLOGUE(mpn_preinv_mod_1) +deflit(`FRAME',0) + movl PARAM_SRC, %ecx + movl PARAM_SIZE, %eax + subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE) + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %edi, SAVE_EDI + movl PARAM_INVERSE, %edx + + movl %esi, SAVE_ESI + movl -4(%ecx,%eax,4), %edi C src high limb + leal -16(%ecx,%eax,4), %ecx C &src[size-4] + + movl %ebx, SAVE_EBX + movl PARAM_INVERSE, %edx + + movl $0, VAR_NORM C l==0 + + movl %edi, %esi + subl %ebp, %edi C high-divisor + + cmovc( %esi, %edi) C restore if underflow + decl %eax + jz L(done_edi) C size==1, high-divisor only + + movl 8(%ecx), %esi C src second high limb + movl %edx, VAR_INVERSE + + movl $32, %ebx C 32-l + decl %eax + jz L(inverse_one_left) C size==2, one divide + + movd %ebx, %mm7 C 32-l + decl %eax + jz L(inverse_two_left) C size==3, two divides + + jmp L(inverse_top) C size>=4 + + +L(done_edi): + movl SAVE_ESI, %esi + movl SAVE_EBP, %ebp + movl %edi, %eax + + movl SAVE_EDI, %edi + addl $STACK_SPACE, %esp + + ret + +EPILOGUE() + + + ALIGN(32) +PROLOGUE(mpn_mod_1c) +deflit(`FRAME',0) + movl PARAM_CARRY, %edx + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + jmp L(start_1c) + +EPILOGUE() + + + ALIGN(32) +PROLOGUE(mpn_mod_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl $0, %edx C initial carry (if can't skip a div) + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + orl %ecx, %ecx + jz L(divide_done) + + movl -4(%esi,%ecx,4), %eax C src high limb + + cmpl %ebp, %eax C carry flag if high<divisor + + cmovc( %eax, %edx) C src high limb as initial carry + sbbl $0, %ecx C size-1 to skip one div + jz L(divide_done) + + + ALIGN(16) +L(start_1c): + C eax + C ebx + C ecx size + C edx carry + C esi src + C edi + C ebp divisor + + cmpl $MUL_THRESHOLD, %ecx + jae L(mul_by_inverse) + + + +C With a MUL_THRESHOLD of 4, this "loop" only ever does 1 to 3 iterations, +C but it's already fast and compact, and there's nothing to gain by +C expanding it out. +C +C Using PARAM_DIVISOR in the divl is a couple of cycles faster than %ebp. + + orl %ecx, %ecx + jz L(divide_done) + + +L(divide_top): + C eax scratch (quotient) + C ebx + C ecx counter, limbs, decrementing + C edx scratch (remainder) + C esi src + C edi + C ebp + + movl -4(%esi,%ecx,4), %eax + + divl PARAM_DIVISOR + + decl %ecx + jnz L(divide_top) + + +L(divide_done): + movl SAVE_ESI, %esi + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + movl %edx, %eax + + ret + + + +C ----------------------------------------------------------------------------- + +L(mul_by_inverse): + C eax + C ebx + C ecx size + C edx carry + C esi src + C edi + C ebp divisor + + bsrl %ebp, %eax C 31-l + + movl %ebx, SAVE_EBX + movl %ecx, %ebx C size + + movl %edi, SAVE_EDI + movl $31, %ecx + + movl %edx, %edi C carry + movl $-1, %edx + + C + + xorl %eax, %ecx C l + incl %eax C 32-l + + shll %cl, %ebp C d normalized + movl %ecx, VAR_NORM + + movd %eax, %mm7 C 32-l + + movl $-1, %eax + subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1 + + divl %ebp C floor (b*(b-d)-1) / d + + C + + movl %eax, VAR_INVERSE + leal -12(%esi,%ebx,4), %eax C &src[size-3] + + movl 8(%eax), %esi C src high limb + movl 4(%eax), %edx C src second highest limb + + shldl( %cl, %esi, %edi) C n2 = carry,high << l + + shldl( %cl, %edx, %esi) C n10 = high,second << l + + movl %eax, %ecx C &src[size-3] + + +ifelse(MUL_THRESHOLD,2,` + cmpl $2, %ebx + je L(inverse_two_left) +') + + +C The dependent chain here is the same as in mpn_divrem_1, but a few +C instructions are saved by not needing to store the quotient limbs. +C Unfortunately this doesn't get the code down to the theoretical 16 c/l. +C +C There's four dummy instructions in the loop, all of which are necessary +C for the claimed 17 c/l. It's a 1 to 3 cycle slowdown if any are removed, +C or changed from load to store or vice versa. They're not completely +C random, since they correspond to what mpn_divrem_1 has, but there's no +C obvious reason why they're necessary. Presumably they induce something +C good in the out of order execution, perhaps through some load/store +C ordering and/or decoding effects. +C +C The q1==0xFFFFFFFF case is handled here the same as in mpn_divrem_1. On +C on special data that comes out as q1==0xFFFFFFFF always, the loop runs at +C about 13.5 c/l. + + ALIGN(32) +L(inverse_top): + C eax scratch + C ebx scratch (nadj, q1) + C ecx src pointer, decrementing + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 scratch (src qword) + C mm7 rshift for normalization + + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + movl PARAM_SIZE, %ebx C dummy + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movq (%ecx), %mm0 C next src limb and the one below it + subl $4, %ecx + + movl %ecx, PARAM_SIZE C dummy + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2+1 + movl %ebp, %eax C d + + C + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + jz L(q1_ff) + nop C dummy + + mull %ebx C (q1+1)*d + + psrlq %mm7, %mm0 + leal (%ecx), %ecx C dummy + + C + + C + + subl %eax, %esi C low n - (q1+1)*d + movl PARAM_SRC, %eax + + C + + sbbl %edx, %edi C high n - (q1+1)*d, 0 or -1 + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + movd %mm0, %esi + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + cmpl %eax, %ecx + jae L(inverse_top) + + +L(inverse_loop_done): + + +C ----------------------------------------------------------------------------- + +L(inverse_two_left): + C eax scratch + C ebx scratch (nadj, q1) + C ecx &src[-1] + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 scratch (src dword) + C mm7 rshift + + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movd 4(%ecx), %mm0 C src low limb + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2+1 + movl %ebp, %eax C d + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + + mull %ebx C (q1+1)*d + + psllq $32, %mm0 + + psrlq %mm7, %mm0 + + C + + subl %eax, %esi + + C + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + movd %mm0, %esi + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + + +L(inverse_one_left): + C eax scratch + C ebx scratch (nadj, q1) + C ecx + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 src limb, shifted + C mm7 rshift + + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movl VAR_NORM, %ecx C for final denorm + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2+1 + movl %ebp, %eax C d + + C + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + + mull %ebx C (q1+1)*d + + movl SAVE_EBX, %ebx + + C + + C + + subl %eax, %esi + + movl %esi, %eax C remainder + movl SAVE_ESI, %esi + + sbbl %edx, %edi C n - (q1+1)*d + leal (%ebp,%eax), %edx + movl SAVE_EBP, %ebp + + cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1 + movl SAVE_EDI, %edi + + shrl %cl, %eax C denorm remainder + addl $STACK_SPACE, %esp + emms + + ret + + +C ----------------------------------------------------------------------------- +C +C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword +C of q*d is simply -d and the remainder n-q*d = n10+d + +L(q1_ff): + C eax (divisor) + C ebx (q1+1 == 0) + C ecx src pointer + C edx + C esi n10 + C edi (n2) + C ebp divisor + + movl PARAM_SRC, %edx + leal (%ebp,%esi), %edi C n-q*d remainder -> next n2 + psrlq %mm7, %mm0 + + movd %mm0, %esi C next n10 + + cmpl %edx, %ecx + jae L(inverse_top) + jmp L(inverse_loop_done) + +EPILOGUE() diff --git a/gmp/mpn/x86/k7/mmx/popham.asm b/gmp/mpn/x86/k7/mmx/popham.asm index 95965b74d4..5dc0a78c42 100644 --- a/gmp/mpn/x86/k7/mmx/popham.asm +++ b/gmp/mpn/x86/k7/mmx/popham.asm @@ -1,40 +1,29 @@ dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming dnl distance. -dnl Copyright 2000-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C popcount hamdist C P3 generic 6.5 7 -C P3 model 9 (Banias) 5.7 6.1 +C P3 model 9 (Banias) ? ? C P3 model 13 (Dothan) 5.75 6 C K7 5 6 diff --git a/gmp/mpn/x86/k7/mmx/rshift.asm b/gmp/mpn/x86/k7/mmx/rshift.asm index 345d23a25e..3566ce85d7 100644 --- a/gmp/mpn/x86/k7/mmx/rshift.asm +++ b/gmp/mpn/x86/k7/mmx/rshift.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_rshift -- mpn right shift. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k7/mod_1_1.asm b/gmp/mpn/x86/k7/mod_1_1.asm deleted file mode 100644 index 1bbe6f92d7..0000000000 --- a/gmp/mpn/x86/k7/mod_1_1.asm +++ /dev/null @@ -1,221 +0,0 @@ -dnl x86-32 mpn_mod_1_1p, requiring cmov. - -dnl Contributed to the GNU project by Niels Möller and Torbjorn Granlund. - -dnl Copyright 2010, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0 (Willamette) ? -C P4 model 1 (?) ? -C P4 model 2 (Northwood) ? -C P4 model 3 (Prescott) ? -C P4 model 4 (Nocona) ? -C AMD K6 ? -C AMD K7 7 -C AMD K8 ? - -define(`B2mb', `%ebx') -define(`r0', `%esi') -define(`r2', `%ebp') -define(`t0', `%edi') -define(`ap', `%ecx') C Also shift count - -C Stack frame -C pre 36(%esp) -C b 32(%esp) -C n 28(%esp) -C ap 24(%esp) -C return 20(%esp) -C %ebp 16(%esp) -C %edi 12(%esp) -C %esi 8(%esp) -C %ebx 4(%esp) -C B2mod (%esp) - -define(`B2modb', `(%esp)') -define(`n', `28(%esp)') -define(`b', `32(%esp)') -define(`pre', `36(%esp)') - -C mp_limb_t -C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t pre[4]) -C -C The pre array contains bi, cnt, B1modb, B2modb -C Note: This implementation needs B1modb only when cnt > 0 - -ASM_START() - TEXT - ALIGN(8) -PROLOGUE(mpn_mod_1_1p) - push %ebp - push %edi - push %esi - push %ebx - mov 32(%esp), %ebp C pre[] - - mov 12(%ebp), %eax C B2modb - push %eax C Put it on stack - - mov n, %edx - mov 24(%esp), ap - - lea (ap, %edx, 4), ap - mov -4(ap), %eax - cmp $3, %edx - jnc L(first) - mov -8(ap), r0 - jmp L(reduce_two) - -L(first): - C First iteration, no r2 - mull B2modb - mov -12(ap), r0 - add %eax, r0 - mov -8(ap), %eax - adc %edx, %eax - sbb r2, r2 - subl $3, n - lea -16(ap), ap - jz L(reduce_three) - - mov B2modb, B2mb - sub b, B2mb - lea (B2mb, r0), t0 - jmp L(mid) - - ALIGN(16) -L(top): C Loopmixed to 7 c/l on k7 - add %eax, r0 - lea (B2mb, r0), t0 - mov r2, %eax - adc %edx, %eax - sbb r2, r2 -L(mid): mull B2modb - and B2modb, r2 - add r0, r2 - decl n - mov (ap), r0 - cmovc( t0, r2) - lea -4(ap), ap - jnz L(top) - - add %eax, r0 - mov r2, %eax - adc %edx, %eax - sbb r2, r2 - -L(reduce_three): - C Eliminate r2 - and b, r2 - sub r2, %eax - -L(reduce_two): - mov pre, %ebp - movb 4(%ebp), %cl - test %cl, %cl - jz L(normalized) - - C Unnormalized, use B1modb to reduce to size < B b - mull 8(%ebp) - xor t0, t0 - add %eax, r0 - adc %edx, t0 - mov t0, %eax - - C Left-shift to normalize - shld %cl, r0, %eax C Always use shld? - - shl %cl, r0 - jmp L(udiv) - -L(normalized): - mov %eax, t0 - sub b, t0 - cmovnc( t0, %eax) - -L(udiv): - lea 1(%eax), t0 - mull (%ebp) - mov b, %ebx C Needed in register for lea - add r0, %eax - adc t0, %edx - imul %ebx, %edx - sub %edx, r0 - cmp r0, %eax - lea (%ebx, r0), %eax - cmovnc( r0, %eax) - cmp %ebx, %eax - jnc L(fix) -L(ok): shr %cl, %eax - - add $4, %esp - pop %ebx - pop %esi - pop %edi - pop %ebp - - ret -L(fix): sub %ebx, %eax - jmp L(ok) -EPILOGUE() - -PROLOGUE(mpn_mod_1_1p_cps) - push %ebp - mov 12(%esp), %ebp - push %esi - bsr %ebp, %ecx - push %ebx - xor $31, %ecx - mov 16(%esp), %esi - sal %cl, %ebp - mov %ebp, %edx - not %edx - mov $-1, %eax - div %ebp C On K7, invert_limb would be a few cycles faster. - mov %eax, (%esi) C store bi - mov %ecx, 4(%esi) C store cnt - neg %ebp - mov $1, %edx - shld %cl, %eax, %edx - imul %ebp, %edx - shr %cl, %edx - imul %ebp, %eax - mov %edx, 8(%esi) C store B1modb - mov %eax, 12(%esi) C store B2modb - pop %ebx - pop %esi - pop %ebp - ret -EPILOGUE() diff --git a/gmp/mpn/x86/k7/mod_1_4.asm b/gmp/mpn/x86/k7/mod_1_4.asm deleted file mode 100644 index bb7597edd2..0000000000 --- a/gmp/mpn/x86/k7/mod_1_4.asm +++ /dev/null @@ -1,260 +0,0 @@ -dnl x86-32 mpn_mod_1s_4p, requiring cmov. - -dnl Contributed to the GNU project by Torbjorn Granlund. - -dnl Copyright 2009, 2010 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) 6 -C P4 model 0 (Willamette) ? -C P4 model 1 (?) ? -C P4 model 2 (Northwood) 15.5 -C P4 model 3 (Prescott) ? -C P4 model 4 (Nocona) ? -C AMD K6 ? -C AMD K7 4.75 -C AMD K8 ? - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_mod_1s_4p) - push %ebp - push %edi - push %esi - push %ebx - sub $28, %esp - mov 60(%esp), %edi C cps[] - mov 8(%edi), %eax - mov 12(%edi), %edx - mov 16(%edi), %ecx - mov 20(%edi), %esi - mov 24(%edi), %edi - mov %eax, 4(%esp) - mov %edx, 8(%esp) - mov %ecx, 12(%esp) - mov %esi, 16(%esp) - mov %edi, 20(%esp) - mov 52(%esp), %eax C n - xor %edi, %edi - mov 48(%esp), %esi C up - lea -12(%esi,%eax,4), %esi - and $3, %eax - je L(b0) - cmp $2, %eax - jc L(b1) - je L(b2) - -L(b3): mov 4(%esi), %eax - mull 4(%esp) - mov (%esi), %ebp - add %eax, %ebp - adc %edx, %edi - mov 8(%esi), %eax - mull 8(%esp) - lea -12(%esi), %esi - jmp L(m0) - -L(b0): mov (%esi), %eax - mull 4(%esp) - mov -4(%esi), %ebp - add %eax, %ebp - adc %edx, %edi - mov 4(%esi), %eax - mull 8(%esp) - add %eax, %ebp - adc %edx, %edi - mov 8(%esi), %eax - mull 12(%esp) - lea -16(%esi), %esi - jmp L(m0) - -L(b1): mov 8(%esi), %ebp - lea -4(%esi), %esi - jmp L(m1) - -L(b2): mov 8(%esi), %edi - mov 4(%esi), %ebp - lea -8(%esi), %esi - jmp L(m1) - - ALIGN(16) -L(top): mov (%esi), %eax - mull 4(%esp) - mov -4(%esi), %ebx - xor %ecx, %ecx - add %eax, %ebx - adc %edx, %ecx - mov 4(%esi), %eax - mull 8(%esp) - add %eax, %ebx - adc %edx, %ecx - mov 8(%esi), %eax - mull 12(%esp) - add %eax, %ebx - adc %edx, %ecx - lea -16(%esi), %esi - mov 16(%esp), %eax - mul %ebp - add %eax, %ebx - adc %edx, %ecx - mov 20(%esp), %eax - mul %edi - mov %ebx, %ebp - mov %ecx, %edi -L(m0): add %eax, %ebp - adc %edx, %edi -L(m1): subl $4, 52(%esp) - ja L(top) - -L(end): mov 4(%esp), %eax - mul %edi - mov 60(%esp), %edi - add %eax, %ebp - adc $0, %edx - mov 4(%edi), %ecx - mov %edx, %esi - mov %ebp, %eax - sal %cl, %esi - mov %ecx, %ebx - neg %ecx - shr %cl, %eax - or %esi, %eax - lea 1(%eax), %esi - mull (%edi) - mov %ebx, %ecx - mov %eax, %ebx - mov %ebp, %eax - mov 56(%esp), %ebp - sal %cl, %eax - add %eax, %ebx - adc %esi, %edx - imul %ebp, %edx - sub %edx, %eax - lea (%eax,%ebp), %edx - cmp %eax, %ebx - cmovc( %edx, %eax) - mov %eax, %edx - sub %ebp, %eax - cmovc( %edx, %eax) - add $28, %esp - pop %ebx - pop %esi - pop %edi - pop %ebp - shr %cl, %eax - ret -EPILOGUE() - - ALIGN(16) -PROLOGUE(mpn_mod_1s_4p_cps) -C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm - push %ebp - push %edi - push %esi - push %ebx - mov 20(%esp), %ebp C FIXME: avoid bp for 0-idx - mov 24(%esp), %ebx - bsr %ebx, %ecx - xor $31, %ecx - sal %cl, %ebx C b << cnt - mov %ebx, %edx - not %edx - mov $-1, %eax - div %ebx - xor %edi, %edi - sub %ebx, %edi - mov $1, %esi - mov %eax, (%ebp) C store bi - mov %ecx, 4(%ebp) C store cnt - shld %cl, %eax, %esi - imul %edi, %esi - mov %eax, %edi - mul %esi - - add %esi, %edx - shr %cl, %esi - mov %esi, 8(%ebp) C store B1modb - - not %edx - imul %ebx, %edx - lea (%edx,%ebx), %esi - cmp %edx, %eax - cmovnc( %edx, %esi) - mov %edi, %eax - mul %esi - - add %esi, %edx - shr %cl, %esi - mov %esi, 12(%ebp) C store B2modb - - not %edx - imul %ebx, %edx - lea (%edx,%ebx), %esi - cmp %edx, %eax - cmovnc( %edx, %esi) - mov %edi, %eax - mul %esi - - add %esi, %edx - shr %cl, %esi - mov %esi, 16(%ebp) C store B3modb - - not %edx - imul %ebx, %edx - lea (%edx,%ebx), %esi - cmp %edx, %eax - cmovnc( %edx, %esi) - mov %edi, %eax - mul %esi - - add %esi, %edx - shr %cl, %esi - mov %esi, 20(%ebp) C store B4modb - - not %edx - imul %ebx, %edx - add %edx, %ebx - cmp %edx, %eax - cmovnc( %edx, %ebx) - - shr %cl, %ebx - mov %ebx, 24(%ebp) C store B5modb - - pop %ebx - pop %esi - pop %edi - pop %ebp - ret -EPILOGUE() diff --git a/gmp/mpn/x86/k7/mod_34lsub1.asm b/gmp/mpn/x86/k7/mod_34lsub1.asm index ee3ad04099..f00e84dc42 100644 --- a/gmp/mpn/x86/k7/mod_34lsub1.asm +++ b/gmp/mpn/x86/k7/mod_34lsub1.asm @@ -1,32 +1,22 @@ dnl AMD K7 mpn_mod_34lsub1 -- remainder modulo 2^24-1. -dnl Copyright 2000-2002, 2004, 2005, 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation, +dnl Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k7/mode1o.asm b/gmp/mpn/x86/k7/mode1o.asm index 6472ec5949..ef858049a6 100644 --- a/gmp/mpn/x86/k7/mode1o.asm +++ b/gmp/mpn/x86/k7/mode1o.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_modexact_1_odd -- exact division style remainder. -dnl Copyright 2000-2002, 2004, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2004, 2007 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -122,7 +111,7 @@ ifdef(`PIC',` subl %eax, %edi C inv = 2*inv - inv*inv*d - ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB movl %esi, %eax imull %edi, %eax cmpl $1, %eax') diff --git a/gmp/mpn/x86/k7/mul_1.asm b/gmp/mpn/x86/k7/mul_1.asm index 755cd2ed50..016262d594 100644 --- a/gmp/mpn/x86/k7/mul_1.asm +++ b/gmp/mpn/x86/k7/mul_1.asm @@ -1,38 +1,28 @@ dnl AMD K7 mpn_mul_1. -dnl Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation, +dnl Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P5 +C cycles/limb +C P5: C P6 model 0-8,10-12) C P6 model 9 (Banias) C P6 model 13 (Dothan) @@ -41,9 +31,9 @@ C P4 model 1 (?) C P4 model 2 (Northwood) C P4 model 3 (Prescott) C P4 model 4 (Nocona) -C AMD K6 -C AMD K7 3.25 -C AMD K8 +C K6: +C K7: 3.25 +C K8: C TODO C * Improve feed-in and wind-down code. We beat the old code for all n != 1, diff --git a/gmp/mpn/x86/k7/mul_basecase.asm b/gmp/mpn/x86/k7/mul_basecase.asm index 4dfb500885..7f4c0002f7 100644 --- a/gmp/mpn/x86/k7/mul_basecase.asm +++ b/gmp/mpn/x86/k7/mul_basecase.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_mul_basecase -- multiply two mpn numbers. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/k7/sqr_basecase.asm b/gmp/mpn/x86/k7/sqr_basecase.asm index 7b6a97e0df..408a13dc9b 100644 --- a/gmp/mpn/x86/k7/sqr_basecase.asm +++ b/gmp/mpn/x86/k7/sqr_basecase.asm @@ -1,32 +1,21 @@ dnl AMD K7 mpn_sqr_basecase -- square an mpn number. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -39,18 +28,18 @@ C roughly the Karatsuba recursing range). dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for dnl some comments. -deflit(SQR_TOOM2_THRESHOLD_MAX, 66) +deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66) -ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE', -`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)') +ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE', +`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)') -m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD') -deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3)) +m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD') +deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3)) C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); C -C With a SQR_TOOM2_THRESHOLD around 50 this code is about 1500 bytes, +C With a SQR_KARATSUBA_THRESHOLD around 50 this code is about 1500 bytes, C which is quite a bit, but is considered good value since squares big C enough to use most of the code will be spending quite a few cycles in it. diff --git a/gmp/mpn/x86/k7/sublsh1_n.asm b/gmp/mpn/x86/k7/sublsh1_n.asm deleted file mode 100644 index 523b01218d..0000000000 --- a/gmp/mpn/x86/k7/sublsh1_n.asm +++ /dev/null @@ -1,173 +0,0 @@ -dnl AMD K7 mpn_sublsh1_n_ip1 -- rp[] = rp[] - (up[] << 1) - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C This is an attempt at a sublsh1_n for x86-32, not relying on sse2 insns. The -C innerloop is 2*3-way unrolled, which is best we can do with the available -C registers. It seems tricky to use the same structure for rsblsh1_n, since we -C cannot feed carry between operations there. - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 6.75 -C AMD K6 -C AMD K7 -C AMD K8 - -C This is a basic sublsh1_n for k7, atom, and perhaps some other x86-32 -C processors. It uses 2*4-way unrolling, for good reasons. -C -C Breaking carry recurrency might be a good idea. We would then need separate -C registers for the shift carry and add/subtract carry, which in turn would -C force is to 2*2-way unrolling. - -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_EBP,`PARAM_DST') - -ASM_START() - TEXT - ALIGN(8) -PROLOGUE(mpn_sublsh1_n_ip1) -deflit(`FRAME',0) - -define(`rp', `%edi') -define(`up', `%esi') - - mov PARAM_SIZE, %eax C size - push up FRAME_pushl() - push rp FRAME_pushl() - xor %edx, %edx - mov PARAM_SRC, up - mov PARAM_DST, rp - mov %ebx, SAVE_EBX - mov %eax, %ebx - shr $3, %eax - - not %eax C count = -(size\8)-i - and $7, %ebx C size % 8 - jz L(exact) - -L(oop): -ifdef(`CPU_P6',` - shr %edx ') C restore 2nd saved carry bit - mov (up), %ecx - adc %ecx, %ecx - rcr %edx C restore 1st saved carry bit - lea 4(up), up - sbb %ecx, (rp) - lea 4(rp), rp - adc %edx, %edx C save a carry bit in edx -ifdef(`CPU_P6',` - adc %edx, %edx ') C save another carry bit in edx - dec %ebx - jnz L(oop) -L(exact): - inc %eax - jz L(end) - mov %eax, VAR_COUNT - mov %ebp, SAVE_EBP - - ALIGN(16) -L(top): -ifdef(`CPU_P6',` - shr %edx ') C restore 2nd saved carry bit - mov (up), %eax - adc %eax, %eax - mov 4(up), %ebx - adc %ebx, %ebx - mov 8(up), %ecx - adc %ecx, %ecx - mov 12(up), %ebp - adc %ebp, %ebp - - rcr %edx C restore 1st saved carry bit - - sbb %eax, (rp) - sbb %ebx, 4(rp) - sbb %ecx, 8(rp) - sbb %ebp, 12(rp) - - mov 16(up), %eax - adc %eax, %eax - mov 20(up), %ebx - adc %ebx, %ebx - mov 24(up), %ecx - adc %ecx, %ecx - mov 28(up), %ebp - adc %ebp, %ebp - - lea 32(up), up - adc %edx, %edx C save a carry bit in edx - - sbb %eax, 16(rp) - sbb %ebx, 20(rp) - sbb %ecx, 24(rp) - sbb %ebp, 28(rp) - -ifdef(`CPU_P6',` - adc %edx, %edx ') C save another carry bit in edx - incl VAR_COUNT - lea 32(rp), rp - jne L(top) - - mov SAVE_EBP, %ebp -L(end): - mov SAVE_EBX, %ebx - -ifdef(`CPU_P6',` - xor %eax, %eax - shr $1, %edx - adc %edx, %eax -',` - adc $0, %edx - mov %edx, %eax -') - pop rp FRAME_popl() - pop up FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/k8/gmp-mparam.h b/gmp/mpn/x86/k8/gmp-mparam.h deleted file mode 100644 index 8d95fef80b..0000000000 --- a/gmp/mpn/x86/k8/gmp-mparam.h +++ /dev/null @@ -1,198 +0,0 @@ -/* x86/k8 gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 2500 MHz K8 Brisbane */ -/* FFT tuning limit = 10000000 */ -/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 0 /* always */ -#define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 11 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 6 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 16 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 2 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 40 - -#define MUL_TOOM22_THRESHOLD 24 -#define MUL_TOOM33_THRESHOLD 81 -#define MUL_TOOM44_THRESHOLD 130 -#define MUL_TOOM6H_THRESHOLD 303 -#define MUL_TOOM8H_THRESHOLD 430 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 92 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 122 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 46 -#define SQR_TOOM3_THRESHOLD 78 -#define SQR_TOOM4_THRESHOLD 202 -#define SQR_TOOM6_THRESHOLD 286 -#define SQR_TOOM8_THRESHOLD 422 - -#define MULMID_TOOM42_THRESHOLD 56 - -#define MULMOD_BNM1_THRESHOLD 17 -#define SQRMOD_BNM1_THRESHOLD 18 - -#define MUL_FFT_MODF_THRESHOLD 848 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 848, 5}, { 27, 6}, { 25, 7}, { 13, 6}, \ - { 27, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \ - { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ - { 167,10}, { 95, 9}, { 199,10}, { 111,11}, \ - { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \ - { 287,10}, { 159,11}, { 95,10}, { 207,12}, \ - { 63,11}, { 127,10}, { 271, 9}, { 543,10}, \ - { 287,11}, { 159,10}, { 319, 9}, { 639,10}, \ - { 335, 9}, { 671,11}, { 191,10}, { 383, 9}, \ - { 799,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \ - { 319,10}, { 671, 9}, { 1343,12}, { 191,11}, \ - { 383,10}, { 799, 9}, { 1599,11}, { 415,10}, \ - { 863, 9}, { 1727,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1119,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \ - { 1471, 9}, { 2943,12}, { 383,11}, { 799,10}, \ - { 1599,11}, { 863,10}, { 1727,12}, { 447,11}, \ - { 991,13}, { 255,12}, { 511,11}, { 1023,10}, \ - { 2111,11}, { 1119,12}, { 575,11}, { 1215,10}, \ - { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \ - { 1471,10}, { 2943,13}, { 383,12}, { 767,11}, \ - { 1599,12}, { 831,11}, { 1727,12}, { 959,11}, \ - { 1919,14}, { 255,13}, { 511,12}, { 1023,11}, \ - { 2047,12}, { 1087,11}, { 2239,12}, { 1215,11}, \ - { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \ - { 767,12}, { 1727,13}, { 895,12}, { 1983,14}, \ - { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \ - { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 144 -#define MUL_FFT_THRESHOLD 7552 - -#define SQR_FFT_MODF_THRESHOLD 618 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 618, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \ - { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \ - { 167,10}, { 95, 9}, { 191,10}, { 111,11}, \ - { 63,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271, 9}, { 543, 8}, { 1087,10}, { 287,11}, \ - { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ - { 671, 8}, { 1343,10}, { 351,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ - { 415,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \ - { 1215,11}, { 319,10}, { 671, 9}, { 1343,11}, \ - { 351,12}, { 191,11}, { 383,10}, { 799, 9}, \ - { 1599,11}, { 415,10}, { 863, 9}, { 1727,13}, \ - { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \ - { 607,10}, { 1215,12}, { 319,11}, { 671,10}, \ - { 1343,11}, { 735,10}, { 1471,12}, { 383,11}, \ - { 799,10}, { 1599,11}, { 863,10}, { 1727,12}, \ - { 447,11}, { 959,10}, { 1919,11}, { 991,13}, \ - { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ - { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \ - { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \ - { 1599,12}, { 831,11}, { 1727,12}, { 959,11}, \ - { 1919,14}, { 255,13}, { 511,12}, { 1087,11}, \ - { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \ - { 1471,11}, { 2943,13}, { 767,12}, { 1727,11}, \ - { 3455,13}, { 895,12}, { 1983,14}, { 511,13}, \ - { 1023,12}, { 2239,13}, { 1151,12}, { 4096,13}, \ - { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 147 -#define SQR_FFT_THRESHOLD 5760 - -#define MULLO_BASECASE_THRESHOLD 8 -#define MULLO_DC_THRESHOLD 31 -#define MULLO_MUL_N_THRESHOLD 14281 - -#define DC_DIV_QR_THRESHOLD 91 -#define DC_DIVAPPR_Q_THRESHOLD 280 -#define DC_BDIV_QR_THRESHOLD 87 -#define DC_BDIV_Q_THRESHOLD 222 - -#define INV_MULMOD_BNM1_THRESHOLD 62 -#define INV_NEWTON_THRESHOLD 268 -#define INV_APPR_THRESHOLD 270 - -#define BINV_NEWTON_THRESHOLD 260 -#define REDC_1_TO_REDC_N_THRESHOLD 79 - -#define MU_DIV_QR_THRESHOLD 1718 -#define MU_DIVAPPR_Q_THRESHOLD 1528 -#define MUPI_DIV_QR_THRESHOLD 97 -#define MU_BDIV_QR_THRESHOLD 1470 -#define MU_BDIV_Q_THRESHOLD 1470 - -#define POWM_SEC_TABLE 1,22,114,416,1464 - -#define MATRIX22_STRASSEN_THRESHOLD 16 -#define HGCD_THRESHOLD 149 -#define HGCD_APPR_THRESHOLD 204 -#define HGCD_REDUCE_THRESHOLD 4455 -#define GCD_DC_THRESHOLD 599 -#define GCDEXT_DC_THRESHOLD 403 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 28 -#define SET_STR_DC_THRESHOLD 270 -#define SET_STR_PRECOMPUTE_THRESHOLD 1367 - -#define FAC_DSC_THRESHOLD 348 -#define FAC_ODD_THRESHOLD 24 diff --git a/gmp/mpn/x86/lshift.asm b/gmp/mpn/x86/lshift.asm index 6ee6153cc2..5598599f8b 100644 --- a/gmp/mpn/x86/lshift.asm +++ b/gmp/mpn/x86/lshift.asm @@ -1,43 +1,33 @@ dnl x86 mpn_lshift -- mpn left shift. -dnl Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software +dnl Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C P54 7.5 -C P55 7.0 -C P6 2.5 -C K6 4.5 -C K7 5.0 -C P4 14.5 +C P54: 7.5 +C P55: 7.0 +C P6: 2.5 +C K6: 4.5 +C K7: 5.0 +C P4: 14.5 C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/gmp/mpn/x86/mmx/sec_tabselect.asm b/gmp/mpn/x86/mmx/sec_tabselect.asm deleted file mode 100644 index aae158abf7..0000000000 --- a/gmp/mpn/x86/mmx/sec_tabselect.asm +++ /dev/null @@ -1,163 +0,0 @@ -dnl X86 MMX mpn_sec_tabselect. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2011-2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb cycles/limb -C ali,evn n unal,evn n -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) 1.33 1.87 -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) 2.1 2.63 -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) 1.7 2.57 -C Intel Atom 1.85 2.7 -C AMD K6 -C AMD K7 1.33 1.33 -C AMD K8 -C AMD K10 - -define(`rp', `%edi') -define(`tp', `%esi') -define(`n', `%edx') -define(`nents', `%ecx') -define(`which', `') - -define(`i', `%ebp') -define(`j', `%ebx') - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_sec_tabselect) - push %ebx - push %esi - push %edi - push %ebp - - mov 20(%esp), rp - mov 24(%esp), tp - mov 28(%esp), n - mov 32(%esp), nents - - movd 36(%esp), %mm6 - punpckldq %mm6, %mm6 C 2 copies of `which' - - mov $1, %ebx - movd %ebx, %mm7 - punpckldq %mm7, %mm7 C 2 copies of 1 - - mov n, j - add $-4, j - js L(outer_end) - -L(outer_top): - mov nents, i - mov tp, %eax - pxor %mm1, %mm1 - pxor %mm4, %mm4 - pxor %mm5, %mm5 - ALIGN(16) -L(top): movq %mm6, %mm0 - pcmpeqd %mm1, %mm0 - paddd %mm7, %mm1 - movq (tp), %mm2 - movq 8(tp), %mm3 - pand %mm0, %mm2 - pand %mm0, %mm3 - por %mm2, %mm4 - por %mm3, %mm5 - lea (tp,n,4), tp - add $-1, i - jne L(top) - - movq %mm4, (rp) - movq %mm5, 8(rp) - - lea 16(%eax), tp - lea 16(rp), rp - add $-4, j - jns L(outer_top) -L(outer_end): - - test $2, %dl - jz L(b0x) - -L(b1x): mov nents, i - mov tp, %eax - pxor %mm1, %mm1 - pxor %mm4, %mm4 - ALIGN(16) -L(tp2): movq %mm6, %mm0 - pcmpeqd %mm1, %mm0 - paddd %mm7, %mm1 - movq (tp), %mm2 - pand %mm0, %mm2 - por %mm2, %mm4 - lea (tp,n,4), tp - add $-1, i - jne L(tp2) - - movq %mm4, (rp) - - lea 8(%eax), tp - lea 8(rp), rp - -L(b0x): test $1, %dl - jz L(b00) - -L(b01): mov nents, i - pxor %mm1, %mm1 - pxor %mm4, %mm4 - ALIGN(16) -L(tp1): movq %mm6, %mm0 - pcmpeqd %mm1, %mm0 - paddd %mm7, %mm1 - movd (tp), %mm2 - pand %mm0, %mm2 - por %mm2, %mm4 - lea (tp,n,4), tp - add $-1, i - jne L(tp1) - - movd %mm4, (rp) - -L(b00): pop %ebp - pop %edi - pop %esi - pop %ebx - emms - ret -EPILOGUE() diff --git a/gmp/mpn/x86/mod_1.asm b/gmp/mpn/x86/mod_1.asm new file mode 100644 index 0000000000..0fa3ce0def --- /dev/null +++ b/gmp/mpn/x86/mod_1.asm @@ -0,0 +1,163 @@ +dnl x86 mpn_mod_1 -- mpn by limb remainder. + +dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation, +dnl Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + + +C cycles/limb +C 486 42 approx, maybe +C P5 44 +C P6 39 +C K6 20 +C K7 41 +C P4 58 + + +C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t carry); +C +C Essentially this code is the same as the division based part of +C mpn/generic/mod_1.c, but has the advantage that we get the desired divl +C instruction even when gcc is not being used (where longlong.h only has the +C rather slow generic C udiv_qrnnd(). +C +C A test is done to see if the high limb is less than the divisor, and if so +C one less div is done. A div is between 20 and 40 cycles on the various +C x86s, so assuming high<divisor about half the time, then this test saves +C half that amount. The branch misprediction penalty on each chip is less +C than half a div. +C +C +C Notes for K6: +C +C Back-to-back div instructions take 20 cycles, the same as the loop here, +C so it seems there's nothing to gain by rearranging. Pairing the mov and +C loop instructions was found to gain nothing. Normally we use a loop +C instruction rather than decl/jnz, but it gains nothing here. +C +C A multiply-by-inverse is used in mpn/x86/k6/pre_mod_1.asm, but it saves +C only 2 c/l so currently we haven't bothered with the same for mpn_mod_1. +C If an inverse takes about 40 cycles for normalized or perhaps 60 for +C unnormalized (due to bsfl being slow on k6) then the threshold would be at +C least 20 or 30 limbs. +C + +defframe(PARAM_CARRY, 16) +defframe(PARAM_DIVISOR,12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) + + TEXT + + ALIGN(16) +PROLOGUE(mpn_mod_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + pushl %ebx FRAME_pushl() + + movl PARAM_SRC, %ebx + pushl %esi FRAME_pushl() + + orl %ecx, %ecx + jz L(done_zero) + + movl PARAM_DIVISOR, %esi + movl -4(%ebx,%ecx,4), %eax C src high limb + + cmpl %esi, %eax + + sbbl %edx, %edx C -1 if high<divisor + + addl %edx, %ecx C skip one division if high<divisor + jz L(done_eax) + + andl %eax, %edx C carry if high<divisor + + +L(top): + C eax scratch (quotient) + C ebx src + C ecx counter + C edx carry (remainder) + C esi divisor + C edi + C ebp + + movl -4(%ebx,%ecx,4), %eax + + divl %esi + + decl %ecx + jnz L(top) + + + movl %edx, %eax +L(done_eax): + popl %esi + + popl %ebx + + ret + +EPILOGUE() + + + C This code located after mpn_mod_1, so the jump to L(top) here is + C back and hence will be predicted as taken. (size==0 is considered + C unlikely.) + + ALIGN(16) +PROLOGUE(mpn_mod_1c) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + pushl %ebx FRAME_pushl() + + movl PARAM_SRC, %ebx + pushl %esi FRAME_pushl() + + movl PARAM_DIVISOR, %esi + orl %ecx, %ecx + + movl PARAM_CARRY, %edx + jnz L(top) + + popl %esi + movl %edx, %eax + + popl %ebx + + ret + + + C This code is for mpn_mod_1, but is positioned here to save some + C space in the alignment padding. + C +L(done_zero): + popl %esi + xorl %eax, %eax + + popl %ebx + + ret + +EPILOGUE() diff --git a/gmp/mpn/x86/mod_34lsub1.asm b/gmp/mpn/x86/mod_34lsub1.asm index e09e702c6f..68b4a73dbc 100644 --- a/gmp/mpn/x86/mod_34lsub1.asm +++ b/gmp/mpn/x86/mod_34lsub1.asm @@ -1,42 +1,31 @@ dnl Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1. -dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C P5 3.0 -C P6 3.66 -C K6 3.0 -C K7 1.3 -C P4 9 +C P5: 3.0 +C P6: 3.66 +C K6: 3.0 +C K7: 1.3 +C P4: 9 C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) diff --git a/gmp/mpn/x86/mul_1.asm b/gmp/mpn/x86/mul_1.asm index 421de62225..1d715ece7e 100644 --- a/gmp/mpn/x86/mul_1.asm +++ b/gmp/mpn/x86/mul_1.asm @@ -1,50 +1,40 @@ dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector dnl with a limb and store the result in a second limb vector. -dnl Copyright 1992, 1994, 1997-2002, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free +dnl Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P5 12.5 -C P6 model 0-8,10-12 5.5 +C cycles/limb +C P5: 12.5 +C P6 model 0-8,10-12) 5.5 C P6 model 9 (Banias) -C P6 model 13 (Dothan) 5.25 -C P4 model 0 (Willamette) 19.0 -C P4 model 1 (?) 19.0 -C P4 model 2 (Northwood) 19.0 +C P6 model 13 (Dothan) 5.25 +C P4 model 0 (Willamette) 19.0 +C P4 model 1 (?) 19.0 +C P4 model 2 (Northwood) 19.0 C P4 model 3 (Prescott) C P4 model 4 (Nocona) -C AMD K6 10.5 -C AMD K7 4.5 -C AMD K8 +C K6: 10.5 +C K7: 4.5 +C K8: C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/gmp/mpn/x86/mul_basecase.asm b/gmp/mpn/x86/mul_basecase.asm index 8339732a80..7918ea07f3 100644 --- a/gmp/mpn/x86/mul_basecase.asm +++ b/gmp/mpn/x86/mul_basecase.asm @@ -1,43 +1,33 @@ dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result dnl in a third limb vector. -dnl Copyright 1996-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software +dnl Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/crossproduct -C P5 15 -C P6 7.5 -C K6 12.5 -C K7 5.5 -C P4 24 +C P5: 15 +C P6: 7.5 +C K6: 12.5 +C K7: 5.5 +C P4: 24 C void mpn_mul_basecase (mp_ptr wp, diff --git a/gmp/mpn/x86/nano/gmp-mparam.h b/gmp/mpn/x86/nano/gmp-mparam.h deleted file mode 100644 index cd8ac4e1d6..0000000000 --- a/gmp/mpn/x86/nano/gmp-mparam.h +++ /dev/null @@ -1,162 +0,0 @@ -/* x86/nano gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */ - -#define MOD_1_1P_METHOD 1 -#define MOD_1_NORM_THRESHOLD 3 -#define MOD_1_UNNORM_THRESHOLD 3 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 10 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 9 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */ -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 53 -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12 -#define USE_PREINV_DIVREM_1 1 -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 32 - -#define MUL_TOOM22_THRESHOLD 16 -#define MUL_TOOM33_THRESHOLD 132 -#define MUL_TOOM44_THRESHOLD 195 -#define MUL_TOOM6H_THRESHOLD 270 -#define MUL_TOOM8H_THRESHOLD 478 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 129 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 138 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 130 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 135 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 28 -#define SQR_TOOM3_THRESHOLD 194 -#define SQR_TOOM4_THRESHOLD 502 -#define SQR_TOOM6_THRESHOLD 746 -#define SQR_TOOM8_THRESHOLD 1005 - -#define MULMID_TOOM42_THRESHOLD 40 - -#define MULMOD_BNM1_THRESHOLD 14 -#define SQRMOD_BNM1_THRESHOLD 19 - -#define POWM_SEC_TABLE 4,23,258,828,2246 - -#define MUL_FFT_MODF_THRESHOLD 308 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 308, 5}, { 13, 6}, { 7, 5}, { 17, 6}, \ - { 9, 5}, { 19, 6}, { 11, 5}, { 23, 6}, \ - { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \ - { 19, 7}, { 11, 6}, { 24, 7}, { 15, 6}, \ - { 31, 7}, { 19, 8}, { 11, 7}, { 25, 8}, \ - { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \ - { 23, 7}, { 47, 9}, { 15, 8}, { 31, 7}, \ - { 63, 8}, { 39, 9}, { 23, 8}, { 47,10}, \ - { 15, 9}, { 31, 8}, { 63, 9}, { 47,10}, \ - { 31, 9}, { 71,10}, { 47, 9}, { 95,11}, \ - { 31,10}, { 63, 9}, { 127, 8}, { 255,10}, \ - { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \ - { 63,10}, { 127, 9}, { 255, 8}, { 543, 9}, \ - { 287, 8}, { 575, 7}, { 1215,10}, { 159,11}, \ - { 95,10}, { 191,12}, { 63,11}, { 127,10}, \ - { 255, 9}, { 543, 8}, { 1087,10}, { 287, 9}, \ - { 607, 8}, { 1215,11}, { 159,10}, { 319, 9}, \ - { 639,10}, { 351, 9}, { 703, 8}, { 1407, 9}, \ - { 735, 8}, { 1471,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 415, 9}, { 831,11}, { 223,10}, \ - { 447, 9}, { 895,10}, { 479, 9}, { 959, 8}, \ - { 1919,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ - { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 89 -#define MUL_FFT_THRESHOLD 1856 - -#define SQR_FFT_MODF_THRESHOLD 396 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 396, 5}, { 13, 6}, { 7, 5}, { 21, 6}, \ - { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \ - { 25, 7}, { 15, 6}, { 31, 7}, { 19, 6}, \ - { 39, 7}, { 21, 8}, { 11, 7}, { 23, 6}, \ - { 47, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \ - { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \ - { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \ - { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \ - { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 127,10}, \ - { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \ - { 127, 9}, { 255, 8}, { 543,10}, { 143, 9}, \ - { 287, 8}, { 607, 7}, { 1215, 6}, { 2431,10}, \ - { 159, 8}, { 639,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \ - { 1087,10}, { 287, 9}, { 607, 8}, { 1215,11}, \ - { 159,10}, { 319, 9}, { 671,10}, { 351, 9}, \ - { 703, 8}, { 1407, 9}, { 735, 8}, { 1471, 7}, \ - { 2943,11}, { 191,10}, { 383, 9}, { 799,10}, \ - { 415, 9}, { 895,10}, { 479,12}, { 4096,13}, \ - { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 87 -#define SQR_FFT_THRESHOLD 2368 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 51 -#define MULLO_MUL_N_THRESHOLD 3369 - -#define DC_DIV_QR_THRESHOLD 56 -#define DC_DIVAPPR_Q_THRESHOLD 183 -#define DC_BDIV_QR_THRESHOLD 55 -#define DC_BDIV_Q_THRESHOLD 118 - -#define INV_MULMOD_BNM1_THRESHOLD 30 -#define INV_NEWTON_THRESHOLD 266 -#define INV_APPR_THRESHOLD 218 - -#define BINV_NEWTON_THRESHOLD 268 -#define REDC_1_TO_REDC_N_THRESHOLD 56 - -#define MU_DIV_QR_THRESHOLD 1308 -#define MU_DIVAPPR_Q_THRESHOLD 1528 -#define MUPI_DIV_QR_THRESHOLD 124 -#define MU_BDIV_QR_THRESHOLD 855 -#define MU_BDIV_Q_THRESHOLD 1334 - -#define MATRIX22_STRASSEN_THRESHOLD 14 -#define HGCD_THRESHOLD 104 -#define HGCD_APPR_THRESHOLD 139 -#define HGCD_REDUCE_THRESHOLD 2121 -#define GCD_DC_THRESHOLD 456 -#define GCDEXT_DC_THRESHOLD 321 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 11 -#define GET_STR_PRECOMPUTE_THRESHOLD 25 -#define SET_STR_DC_THRESHOLD 542 -#define SET_STR_PRECOMPUTE_THRESHOLD 840 diff --git a/gmp/mpn/x86/p6/README b/gmp/mpn/x86/p6/README index f19d47b94f..1ded4e7177 100644 --- a/gmp/mpn/x86/p6/README +++ b/gmp/mpn/x86/p6/README @@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. @@ -63,7 +52,7 @@ Some of these might be able to be improved. mpn_mul_basecase 8.2 cycles/crossproduct (approx) mpn_sqr_basecase 4.0 cycles/crossproduct (approx) - or 7.75 cycles/triangleproduct (approx) + or 7.75 cycles/triangleproduct (approx) Pentium II and III have MMX and get the following improvements. diff --git a/gmp/mpn/x86/p6/aors_n.asm b/gmp/mpn/x86/p6/aors_n.asm index df51c2e6f7..f4652ec2cb 100644 --- a/gmp/mpn/x86/p6/aors_n.asm +++ b/gmp/mpn/x86/p6/aors_n.asm @@ -1,43 +1,32 @@ dnl Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract. dnl Copyright 2006 Free Software Foundation, Inc. - +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C TODO: -C * Avoid indexed addressing, it makes us stall on the two-ported register +C * Avoid indexed adressing, it makes us stall on the two-ported register C file. -C cycles/limb -C P6 model 0-8,10-12 3.17 -C P6 model 9 (Banias) 2.15 -C P6 model 13 (Dothan) 2.25 +C cycles/limb +C P6 model 0-8,10-12) 3.17 +C P6 model 9 (Banias) ? +C P6 model 13 (Dothan) 2.25 define(`rp', `%edi') diff --git a/gmp/mpn/x86/p6/aorsmul_1.asm b/gmp/mpn/x86/p6/aorsmul_1.asm index bc8c49c62e..746bf05f12 100644 --- a/gmp/mpn/x86/p6/aorsmul_1.asm +++ b/gmp/mpn/x86/p6/aorsmul_1.asm @@ -1,49 +1,38 @@ dnl Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. -dnl Copyright 1999-2002, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P5 -C P6 model 0-8,10-12 6.44 -C P6 model 9 (Banias) 6.15 -C P6 model 13 (Dothan) 6.11 +C cycles/limb +C P5: +C P6 model 0-8,10-12) 6.44 +C P6 model 9 (Banias) +C P6 model 13 (Dothan) 6.11 C P4 model 0 (Willamette) C P4 model 1 (?) C P4 model 2 (Northwood) C P4 model 3 (Prescott) C P4 model 4 (Nocona) -C AMD K6 -C AMD K7 -C AMD K8 +C K6: +C K7: +C K8: dnl P6 UNROLL_COUNT cycles/limb @@ -181,7 +170,7 @@ C registers when doing the mul for the initial two carry limbs. C C The add/adc for the initial carry in %ebx is necessary only for the C mpn_add/submul_1c entry points. Duplicating the startup code to -C eliminate this for the plain mpn_add/submul_1 doesn't seem like a good +C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good C idea. dnl overlapping with parameters already fetched diff --git a/gmp/mpn/x86/p6/bdiv_q_1.asm b/gmp/mpn/x86/p6/bdiv_q_1.asm deleted file mode 100644 index 2cc179c238..0000000000 --- a/gmp/mpn/x86/p6/bdiv_q_1.asm +++ /dev/null @@ -1,286 +0,0 @@ -dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder. - -dnl Rearranged from mpn/x86/p6/dive_1.asm by Marco Bodrato. - -dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C odd even divisor -C P6: 10.0 12.0 cycles/limb - -C MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1) - -C The odd case is basically the same as mpn_modexact_1_odd, just with an -C extra store, and it runs at the same 10 cycles which is the dependent -C chain. -C -C The shifts for the even case aren't on the dependent chain so in principle -C it could run the same too, but nothing running at 10 has been found. -C Perhaps there's too many uops (an extra 4 over the odd case). - -defframe(PARAM_SHIFT, 24) -defframe(PARAM_INVERSE,20) -defframe(PARAM_DIVISOR,16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -defframe(SAVE_EBX, -4) -defframe(SAVE_ESI, -8) -defframe(SAVE_EDI, -12) -defframe(SAVE_EBP, -16) -deflit(STACK_SPACE, 16) - -dnl re-use parameter space -define(VAR_INVERSE,`PARAM_SRC') - - TEXT - -C mp_limb_t -C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor, -C mp_limb_t inverse, int shift) - - ALIGN(16) -PROLOGUE(mpn_pi1_bdiv_q_1) -deflit(`FRAME',0) - - subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE) - - movl %esi, SAVE_ESI - movl PARAM_SRC, %esi - - movl %ebx, SAVE_EBX - movl PARAM_SIZE, %ebx - - movl %ebp, SAVE_EBP - movl PARAM_INVERSE, %ebp - - movl PARAM_SHIFT, %ecx C trailing twos - -L(common): - movl %edi, SAVE_EDI - movl PARAM_DST, %edi - - leal (%esi,%ebx,4), %esi C src end - - leal (%edi,%ebx,4), %edi C dst end - negl %ebx C -size - - movl (%esi,%ebx,4), %eax C src[0] - - orl %ecx, %ecx - jz L(odd_entry) - - movl %edi, PARAM_DST - movl %ebp, VAR_INVERSE - -L(even): - C eax src[0] - C ebx counter, limbs, negative - C ecx shift - C edx - C esi - C edi - C ebp - - xorl %ebp, %ebp C initial carry bit - xorl %edx, %edx C initial carry limb (for size==1) - - incl %ebx - jz L(even_one) - - movl (%esi,%ebx,4), %edi C src[1] - - shrdl( %cl, %edi, %eax) - - jmp L(even_entry) - - -L(even_top): - C eax scratch - C ebx counter, limbs, negative - C ecx shift - C edx scratch - C esi &src[size] - C edi &dst[size] and scratch - C ebp carry bit - - movl (%esi,%ebx,4), %edi - - mull PARAM_DIVISOR - - movl -4(%esi,%ebx,4), %eax - shrdl( %cl, %edi, %eax) - - subl %ebp, %eax - - sbbl %ebp, %ebp - subl %edx, %eax - - sbbl $0, %ebp - -L(even_entry): - imull VAR_INVERSE, %eax - - movl PARAM_DST, %edi - negl %ebp - - movl %eax, -4(%edi,%ebx,4) - incl %ebx - jnz L(even_top) - - mull PARAM_DIVISOR - - movl -4(%esi), %eax - -L(even_one): - shrl %cl, %eax - movl SAVE_ESI, %esi - - subl %ebp, %eax - movl SAVE_EBP, %ebp - - subl %edx, %eax - movl SAVE_EBX, %ebx - - imull VAR_INVERSE, %eax - - movl %eax, -4(%edi) - movl SAVE_EDI, %edi - addl $STACK_SPACE, %esp - - ret - -C The dependent chain here is -C -C subl %edx, %eax 1 -C imull %ebp, %eax 4 -C mull PARAM_DIVISOR 5 -C ---- -C total 10 -C -C and this is the measured speed. No special scheduling is necessary, out -C of order execution hides the load latency. - -L(odd_top): - C eax scratch (src limb) - C ebx counter, limbs, negative - C ecx carry bit - C edx carry limb, high of last product - C esi &src[size] - C edi &dst[size] - C ebp inverse - - mull PARAM_DIVISOR - - movl (%esi,%ebx,4), %eax - subl %ecx, %eax - - sbbl %ecx, %ecx - subl %edx, %eax - - sbbl $0, %ecx - -L(odd_entry): - imull %ebp, %eax - - movl %eax, (%edi,%ebx,4) - negl %ecx - - incl %ebx - jnz L(odd_top) - - - movl SAVE_ESI, %esi - - movl SAVE_EDI, %edi - - movl SAVE_EBP, %ebp - - movl SAVE_EBX, %ebx - addl $STACK_SPACE, %esp - - ret - -EPILOGUE() - -C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t divisor); -C - - ALIGN(16) -PROLOGUE(mpn_bdiv_q_1) -deflit(`FRAME',0) - - movl PARAM_DIVISOR, %eax - subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE) - - movl %esi, SAVE_ESI - movl PARAM_SRC, %esi - - movl %ebx, SAVE_EBX - movl PARAM_SIZE, %ebx - - bsfl %eax, %ecx C trailing twos - - movl %ebp, SAVE_EBP - - shrl %cl, %eax C d without twos - - movl %eax, %edx - shrl %eax C d/2 without twos - - movl %edx, PARAM_DIVISOR - andl $127, %eax - -ifdef(`PIC',` - LEA( binvert_limb_table, %ebp) - movzbl (%eax,%ebp), %ebp C inv 8 bits -',` - movzbl binvert_limb_table(%eax), %ebp C inv 8 bits -') - - leal (%ebp,%ebp), %eax C 2*inv - - imull %ebp, %ebp C inv*inv - imull %edx, %ebp C inv*inv*d - - subl %ebp, %eax C inv = 2*inv - inv*inv*d - leal (%eax,%eax), %ebp C 2*inv - - imull %eax, %eax C inv*inv - imull %edx, %eax C inv*inv*d - - subl %eax, %ebp C inv = 2*inv - inv*inv*d - - jmp L(common) - -EPILOGUE() diff --git a/gmp/mpn/x86/p6/copyd.asm b/gmp/mpn/x86/p6/copyd.asm index 1be7636835..2946f51e7a 100644 --- a/gmp/mpn/x86/p6/copyd.asm +++ b/gmp/mpn/x86/p6/copyd.asm @@ -1,32 +1,21 @@ dnl Intel P6 mpn_copyd -- copy limb vector backwards. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/dive_1.asm b/gmp/mpn/x86/p6/dive_1.asm index aa7ba880c9..e8efc28eac 100644 --- a/gmp/mpn/x86/p6/dive_1.asm +++ b/gmp/mpn/x86/p6/dive_1.asm @@ -1,32 +1,21 @@ dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder. dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -117,7 +106,7 @@ ifdef(`PIC',` subl %eax, %ebp C inv = 2*inv - inv*inv*d - ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB movl PARAM_DIVISOR, %eax imull %ebp, %eax cmpl $1, %eax') @@ -138,7 +127,7 @@ C subl %edx, %eax 1 C imull %ebp, %eax 4 C mull PARAM_DIVISOR 5 C ---- -C total 10 +C total 10 C C and this is the measured speed. No special scheduling is necessary, out C of order execution hides the load latency. diff --git a/gmp/mpn/x86/p6/gcd_1.asm b/gmp/mpn/x86/p6/gcd_1.asm deleted file mode 100644 index f6518f6e19..0000000000 --- a/gmp/mpn/x86/p6/gcd_1.asm +++ /dev/null @@ -1,156 +0,0 @@ -dnl x86 mpn_gcd_1 optimised for processors with fast BSF. - -dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked by Torbjorn Granlund. - -dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C cycles/bit (approx) -C AMD K7 7.80 -C AMD K8,K9 7.79 -C AMD K10 4.08 -C AMD bd1 ? -C AMD bobcat 7.82 -C Intel P4-2 14.9 -C Intel P4-3/4 14.0 -C Intel P6/13 5.09 -C Intel core2 4.22 -C Intel NHM 5.00 -C Intel SBR 5.00 -C Intel atom 17.1 -C VIA nano ? -C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1 - -C Threshold of when to call bmod when U is one limb. Should be about -C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit). -define(`BMOD_THRES_LOG2', 6) - - -define(`up', `%edi') -define(`n', `%esi') -define(`v0', `%edx') - - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_gcd_1) - push %edi - push %esi - - mov 12(%esp), up - mov 16(%esp), n - mov 20(%esp), v0 - - mov (up), %eax C U low limb - or v0, %eax - bsf %eax, %eax C min(ctz(u0),ctz(v0)) - - bsf v0, %ecx - shr %cl, v0 - - push %eax C preserve common twos over call - push v0 C preserve v0 argument over call - - cmp $1, n - jnz L(reduce_nby1) - -C Both U and V are single limbs, reduce with bmod if u0 >> v0. - mov (up), %ecx - mov %ecx, %eax - shr $BMOD_THRES_LOG2, %ecx - cmp %ecx, v0 - ja L(reduced) - jmp L(bmod) - -L(reduce_nby1): - cmp $BMOD_1_TO_MOD_1_THRESHOLD, n - jl L(bmod) -ifdef(`PIC_WITH_EBX',` - push %ebx - call L(movl_eip_to_ebx) - add $_GLOBAL_OFFSET_TABLE_, %ebx -') - push v0 C param 3 - push n C param 2 - push up C param 1 - CALL( mpn_mod_1) - jmp L(called) - -L(bmod): -ifdef(`PIC_WITH_EBX',`dnl - push %ebx - call L(movl_eip_to_ebx) - add $_GLOBAL_OFFSET_TABLE_, %ebx -') - push v0 C param 3 - push n C param 2 - push up C param 1 - CALL( mpn_modexact_1_odd) - -L(called): - add $12, %esp C deallocate params -ifdef(`PIC_WITH_EBX',`dnl - pop %ebx -') -L(reduced): - pop %edx - - bsf %eax, %ecx -C test %eax, %eax C FIXME: does this lower latency? - jnz L(mid) - jmp L(end) - - ALIGN(16) C K10 BD C2 NHM SBR -L(top): cmovc( %esi, %eax) C if x-y < 0 0,3 0,3 0,6 0,5 0,5 - cmovc( %edi, %edx) C use x,y-x 0,3 0,3 2,8 1,7 1,7 -L(mid): shr %cl, %eax C 1,7 1,6 2,8 2,8 2,8 - mov %edx, %esi C 1 1 4 3 3 - sub %eax, %esi C 2 2 5 4 4 - bsf %esi, %ecx C 3 3 6 5 5 - mov %eax, %edi C 2 2 3 3 4 - sub %edx, %eax C 2 2 4 3 4 - jnz L(top) C - -L(end): pop %ecx - mov %edx, %eax - shl %cl, %eax - - pop %esi - pop %edi - ret - -ifdef(`PIC_WITH_EBX',`dnl -L(movl_eip_to_ebx): - mov (%esp), %ebx - ret -') -EPILOGUE() diff --git a/gmp/mpn/x86/p6/gmp-mparam.h b/gmp/mpn/x86/p6/gmp-mparam.h index 96c96fd558..a85c500275 100644 --- a/gmp/mpn/x86/p6/gmp-mparam.h +++ b/gmp/mpn/x86/p6/gmp-mparam.h @@ -1,194 +1,70 @@ /* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2003, 2008-2010, 2012 Free Software +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -or +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. -or both in parallel, as here. +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - - -/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the - value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard limit in - mpn/x86/p6/sqr_basecase.asm. */ - - -/* 1867 MHz P6 model 13 */ - -#define MOD_1_NORM_THRESHOLD 4 -#define MOD_1_UNNORM_THRESHOLD 4 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 4 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 21 - -#define MUL_TOOM22_THRESHOLD 20 -#define MUL_TOOM33_THRESHOLD 74 -#define MUL_TOOM44_THRESHOLD 181 -#define MUL_TOOM6H_THRESHOLD 252 -#define MUL_TOOM8H_THRESHOLD 363 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 30 -#define SQR_TOOM3_THRESHOLD 101 -#define SQR_TOOM4_THRESHOLD 154 -#define SQR_TOOM6_THRESHOLD 222 -#define SQR_TOOM8_THRESHOLD 527 - -#define MULMID_TOOM42_THRESHOLD 58 - -#define MULMOD_BNM1_THRESHOLD 13 -#define SQRMOD_BNM1_THRESHOLD 17 - -#define POWM_SEC_TABLE 4,23,258,768,2388 - -#define MUL_FFT_MODF_THRESHOLD 565 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 565, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \ - { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 5}, \ - { 383, 4}, { 991, 5}, { 511, 6}, { 267, 7}, \ - { 157, 8}, { 91, 9}, { 47, 8}, { 111, 9}, \ - { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \ - { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \ - { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \ - { 159,10}, { 335, 9}, { 671,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ - { 415,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 671,12}, { 191,11}, { 383,10}, \ - { 799,11}, { 415,10}, { 831,13}, { 127,12}, \ - { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \ - { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \ - { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \ - { 1599,11}, { 863,12}, { 447,11}, { 959,13}, \ - { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ - { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \ - { 1471,13}, { 383,12}, { 831,11}, { 1727,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \ - { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ - { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \ - { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \ - { 1407,12}, { 2815,14}, { 767,13}, { 1663,12}, \ - { 3455,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 132 -#define MUL_FFT_THRESHOLD 6784 - -#define SQR_FFT_MODF_THRESHOLD 472 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 472, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ - { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 63, 4}, { 1023, 8}, { 67, 9}, \ - { 39, 5}, { 639, 4}, { 1471, 6}, { 383, 7}, \ - { 209, 8}, { 119, 9}, { 63, 7}, { 255, 8}, \ - { 139, 9}, { 71, 8}, { 143, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 135,10}, { 79, 9}, { 159, 8}, { 319, 9}, \ - { 167,10}, { 95,11}, { 63,10}, { 143, 9}, \ - { 287,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \ - { 1087,10}, { 287, 9}, { 575,11}, { 159,10}, \ - { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \ - { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \ - { 831,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \ - { 1215,11}, { 319,10}, { 671, 9}, { 1343,11}, \ - { 351,10}, { 703,12}, { 191,11}, { 383,10}, \ - { 799,11}, { 415,10}, { 831,13}, { 127,12}, \ - { 255,11}, { 543,10}, { 1087,11}, { 607,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,12}, \ - { 383,11}, { 799,10}, { 1599,11}, { 863,12}, \ - { 447,11}, { 959,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \ - { 1343,12}, { 703,11}, { 1471,13}, { 383,12}, \ - { 767,11}, { 1599,12}, { 831,11}, { 1727,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \ - { 639,12}, { 1471,13}, { 767,12}, { 1727,13}, \ - { 895,12}, { 1919,14}, { 511,13}, { 1023,12}, \ - { 2111,13}, { 1151,12}, { 2431,13}, { 1407,14}, \ - { 767,13}, { 1663,12}, { 3455,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 146 -#define SQR_FFT_THRESHOLD 5760 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 33 -#define MULLO_MUL_N_THRESHOLD 13463 - -#define DC_DIV_QR_THRESHOLD 20 -#define DC_DIVAPPR_Q_THRESHOLD 56 -#define DC_BDIV_QR_THRESHOLD 60 -#define DC_BDIV_Q_THRESHOLD 134 - -#define INV_MULMOD_BNM1_THRESHOLD 38 -#define INV_NEWTON_THRESHOLD 66 -#define INV_APPR_THRESHOLD 63 - -#define BINV_NEWTON_THRESHOLD 250 -#define REDC_1_TO_REDC_N_THRESHOLD 63 - -#define MU_DIV_QR_THRESHOLD 1164 -#define MU_DIVAPPR_Q_THRESHOLD 979 -#define MUPI_DIV_QR_THRESHOLD 38 -#define MU_BDIV_QR_THRESHOLD 1442 -#define MU_BDIV_Q_THRESHOLD 1470 - -#define MATRIX22_STRASSEN_THRESHOLD 17 -#define HGCD_THRESHOLD 64 -#define HGCD_APPR_THRESHOLD 105 -#define HGCD_REDUCE_THRESHOLD 3524 -#define GCD_DC_THRESHOLD 386 -#define GCDEXT_DC_THRESHOLD 309 -#define JACOBI_BASE_METHOD 1 - -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 26 -#define SET_STR_DC_THRESHOLD 587 -#define SET_STR_PRECOMPUTE_THRESHOLD 1104 + +/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be + smaller than the value in mpn/x86/p6/mmx/gmp-mparam.h. The former is + used as a hard limit in mpn/x86/p6/sqr_basecase.asm, and that file will + be run by the p6/mmx cpus (pentium2, pentium3). */ + + +/* 200MHz Pentium Pro */ + +/* Generated by tuneup.c, 2003-02-12, gcc 2.95 */ + +#define MUL_KARATSUBA_THRESHOLD 23 +#define MUL_TOOM3_THRESHOLD 140 + +#define SQR_BASECASE_THRESHOLD 0 /* always */ +#define SQR_KARATSUBA_THRESHOLD 52 +#define SQR_TOOM3_THRESHOLD 189 + +#define DIV_SB_PREINV_THRESHOLD 0 /* always */ +#define DIV_DC_THRESHOLD 116 +#define POWM_THRESHOLD 131 + +#define GCD_ACCEL_THRESHOLD 3 +#define JACOBI_BASE_METHOD 1 + +#define USE_PREINV_DIVREM_1 0 +#define USE_PREINV_MOD_1 1 /* native */ +#define DIVREM_2_THRESHOLD 0 /* always */ +#define DIVEXACT_1_THRESHOLD 0 /* always */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always */ + +#define GET_STR_DC_THRESHOLD 18 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_THRESHOLD 6093 + +#define MUL_FFT_TABLE { 464, 928, 1920, 3584, 10240, 40960, 0 } +#define MUL_FFT_MODF_THRESHOLD 360 +#define MUL_FFT_THRESHOLD 2816 + +#define SQR_FFT_TABLE { 528, 1184, 1920, 4608, 14336, 40960, 0 } +#define SQR_FFT_MODF_THRESHOLD 440 +#define SQR_FFT_THRESHOLD 2816 + +#define MUL_FFT_TABLE2 {{1,4}, {305,5}, {321,4}, {337,5}, {353,4}, {369,5}, {801,6}, {833,5}, {865,6}, {897,5}, {929,6}, {961,5}, {993,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3457,8}, {3841,7}, {3969,8}, {4097,7}, {4225,8}, {4353,7}, {4481,8}, {5889,7}, {6017,8}, {6401,7}, {6529,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {9985,9}, {10241,8}, {11009,9}, {11777,8}, {12289,9}, {13825,10}, {15361,9}, {15873,8}, {16129,9}, {19969,10}, {23553,9}, {24065,8}, {24321,9}, {26113,10}, {27649,11}, {28673,10}, {31745,9}, {34305,10}, {34817,9}, {35329,10}, {39937,9}, {40449,10}, {48129,11}, {55297,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {126977,11}, {129025,10}, {130049,9}, {130561,10}, {131073,11}, {133121,10}, {134145,11}, {137217,10}, {138241,11}, {161793,10}, {162817,11}, {194561,12}, {258049,11}, {260097,10}, {261121,9}, {261633,10}, {266241,11}, {268289,10}, {277505,11}, {292865,10}, {293889,9}, {294401,10}, {310273,9}, {310785,11}, {325633,10}, {326657,12}, {389121,13}, {516097,12}, {520193,11}, {522241,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {622593,12}, {651265,11}, {653313,10}, {654337,11}, {655361,10}, {657409,11}, {663553,10}, {664577,11}, {686081,10}, {687105,11}, {718849,10}, {719873,11}, {720897,10}, {722945,11}, {737281,10}, {740353,11}, {745473,10}, {749569,11}, {751617,10}, {752641,9}, {753153,11}, {753665,12}, {770049,11}, {774145,12}, {782337,11}, {786433,10}, {787457,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {854017,10}, {855041,11}, {862209,10}, {863233,11}, {866305,10}, {867329,11}, {876545,10}, {877569,11}, {882689,10}, {883713,9}, {884225,11}, {884737,13}, {1040385,12}, {1044481,11}, {1112065,10}, {1113089,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1515521,13}, {1523713,12}, {1527809,13}, {1540097,12}, {1544193,13}, {1548289,12}, {1568769,11}, {1636353,10}, {1637377,12}, {1699841,11}, {MP_SIZE_T_MAX,0}} + +#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {289,4}, {305,5}, {673,6}, {705,5}, {737,6}, {769,5}, {801,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3713,8}, {3841,7}, {4225,8}, {4865,7}, {4993,9}, {5121,8}, {6657,9}, {7681,8}, {8961,9}, {11777,8}, {12033,10}, {12289,8}, {12545,9}, {13825,10}, {14337,9}, {14849,10}, {15361,9}, {19969,10}, {23553,9}, {24577,11}, {30721,10}, {31745,9}, {32257,10}, {37889,9}, {38401,10}, {39937,9}, {40449,10}, {48129,11}, {63489,10}, {80897,11}, {96257,12}, {126977,11}, {129025,10}, {130049,11}, {194561,12}, {208897,11}, {210945,12}, {258049,11}, {260097,9}, {269313,10}, {277505,9}, {278017,11}, {278529,10}, {280577,11}, {282625,10}, {283649,11}, {284673,10}, {285697,11}, {286721,10}, {289793,11}, {290817,10}, {293889,9}, {294401,10}, {310273,9}, {310785,8}, {311041,10}, {311297,11}, {315393,10}, {321537,12}, {323585,11}, {325633,10}, {326657,12}, {331777,10}, {332801,12}, {389121,10}, {392193,9}, {392705,10}, {413697,9}, {414209,10}, {418817,9}, {419841,10}, {424961,9}, {425473,10}, {441345,9}, {441857,10}, {449537,9}, {450561,10}, {452609,9}, {453121,10}, {454657,9}, {455169,10}, {490497,12}, {491521,13}, {516097,12}, {520193,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {624641,12}, {626689,11}, {653313,10}, {654337,11}, {686081,10}, {687105,11}, {718849,10}, {720897,11}, {722945,10}, {724993,11}, {729089,10}, {734209,11}, {737281,10}, {744449,11}, {745473,10}, {747521,11}, {749569,10}, {752641,11}, {784385,10}, {785409,11}, {808961,10}, {809985,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {851969,10}, {852993,11}, {858113,10}, {859137,11}, {860161,10}, {861185,11}, {882689,10}, {883713,11}, {980993,13}, {1040385,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1568769,11}, {1636353,10}, {1637377,12}, {MP_SIZE_T_MAX,0}} diff --git a/gmp/mpn/x86/p6/lshsub_n.asm b/gmp/mpn/x86/p6/lshsub_n.asm index 7ada213644..a3086bdbc2 100644 --- a/gmp/mpn/x86/p6/lshsub_n.asm +++ b/gmp/mpn/x86/p6/lshsub_n.asm @@ -1,38 +1,27 @@ dnl Intel P6 mpn_lshsub_n -- mpn papillion support. dnl Copyright 2006 Free Software Foundation, Inc. - +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C P6/13: 3.35 cycles/limb (separate mpn_sub_n + mpn_lshift needs 4.12) -C (1) The loop is not scheduled in any way, and scheduling attempts have not +C (1) The loop is is not scheduled in any way, and scheduling attempts have not C improved speed on P6/13. Presumably, the K7 will want scheduling, if it C at all wants to use MMX. C (2) We could save a register by not alternatingly using eax and edx in the diff --git a/gmp/mpn/x86/p6/mmx/divrem_1.asm b/gmp/mpn/x86/p6/mmx/divrem_1.asm index 5300616c14..8891f3a843 100644 --- a/gmp/mpn/x86/p6/mmx/divrem_1.asm +++ b/gmp/mpn/x86/p6/mmx/divrem_1.asm @@ -1,32 +1,21 @@ dnl Intel Pentium-II mpn_divrem_1 -- mpn by limb division. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/mmx/gmp-mparam.h b/gmp/mpn/x86/p6/mmx/gmp-mparam.h index 35c3aadfc1..47602f562e 100644 --- a/gmp/mpn/x86/p6/mmx/gmp-mparam.h +++ b/gmp/mpn/x86/p6/mmx/gmp-mparam.h @@ -1,198 +1,79 @@ /* Intel P6/mmx gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2005, 2009, 2010 Free Software Foundation, -Inc. +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009 +Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -or +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. -or both in parallel, as here. +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - - -/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the - value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard limit in - mpn/x86/p6/sqr_basecase.asm. */ - - -/* 800 MHz P6 model 8 */ - -#define MOD_1_NORM_THRESHOLD 4 -#define MOD_1_UNNORM_THRESHOLD 4 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 9 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 7 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 10 -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 17 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 49 - -#define MUL_TOOM22_THRESHOLD 22 -#define MUL_TOOM33_THRESHOLD 73 -#define MUL_TOOM44_THRESHOLD 193 -#define MUL_TOOM6H_THRESHOLD 254 -#define MUL_TOOM8H_THRESHOLD 381 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 30 -#define SQR_TOOM3_THRESHOLD 81 -#define SQR_TOOM4_THRESHOLD 142 -#define SQR_TOOM6_THRESHOLD 258 -#define SQR_TOOM8_THRESHOLD 399 - -#define MULMOD_BNM1_THRESHOLD 15 -#define SQRMOD_BNM1_THRESHOLD 18 - -#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 476, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ - { 21, 7}, { 11, 6}, { 25, 7}, { 13, 6}, \ - { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \ - { 11, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \ - { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \ - { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ - { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \ - { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 167,10}, { 95, 9}, { 199,10}, \ - { 111,11}, { 63,10}, { 127, 9}, { 255, 8}, \ - { 511,10}, { 143, 9}, { 287, 8}, { 575,10}, \ - { 159,11}, { 95,10}, { 191, 9}, { 383,10}, \ - { 207,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543, 8}, { 1087,10}, \ - { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \ - { 639,10}, { 351, 9}, { 703,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 415, 9}, { 831,11}, \ - { 223,10}, { 447,12}, { 127,11}, { 255,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \ - { 1215,11}, { 319,10}, { 671,11}, { 351,10}, \ - { 703,12}, { 191,11}, { 383,10}, { 767,11}, \ - { 415,10}, { 831,11}, { 447,13}, { 127,12}, \ - { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \ - { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \ - { 703,10}, { 1407,11}, { 735,12}, { 383,11}, \ - { 831,12}, { 447,11}, { 959,10}, { 1919,13}, \ - { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ - { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \ - { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \ - { 1535,12}, { 831,11}, { 1727,12}, { 959,11}, \ - { 1919,14}, { 255,13}, { 511,12}, { 1215,11}, \ - { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \ - { 767,12}, { 1727,13}, { 895,12}, { 1919,11}, \ - { 3839,14}, { 511,13}, { 1023,12}, { 2111,13}, \ - { 1151,12}, { 2431,13}, { 1279,12}, { 2559,13}, \ - { 1407,12}, { 2943,14}, { 767,13}, { 1663,12}, \ - { 3327,13}, { 1919,12}, { 3839,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 160 -#define MUL_FFT_THRESHOLD 7040 - -#define SQR_FFT_MODF_THRESHOLD 376 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 376, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ - { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \ - { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \ - { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \ - { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \ - { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \ - { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \ - { 255, 9}, { 135,10}, { 79, 9}, { 167,10}, \ - { 95, 9}, { 191, 8}, { 383,10}, { 111,11}, \ - { 63,10}, { 127, 9}, { 255, 8}, { 511, 9}, \ - { 271,10}, { 143, 9}, { 287, 8}, { 575, 9}, \ - { 303, 8}, { 607,10}, { 159, 9}, { 319,11}, \ - { 95,10}, { 191, 9}, { 383,10}, { 207,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \ - { 271, 9}, { 543,10}, { 287, 9}, { 575,10}, \ - { 303,11}, { 159,10}, { 319, 9}, { 639,10}, \ - { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 415, 9}, { 831,11}, { 223,10}, \ - { 479,12}, { 127,11}, { 255,10}, { 543, 9}, \ - { 1087,11}, { 287,10}, { 607, 9}, { 1215,11}, \ - { 319,10}, { 671,11}, { 351,10}, { 703,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ - { 831,11}, { 479,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 703,10}, \ - { 1407,11}, { 735,12}, { 383,11}, { 831,12}, \ - { 447,11}, { 959,10}, { 1919,13}, { 255,12}, \ - { 511,11}, { 1087,12}, { 575,11}, { 1215,10}, \ - { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \ - { 1407,13}, { 383,12}, { 831,11}, { 1727,12}, \ - { 959,11}, { 1919,14}, { 255,13}, { 511,12}, \ - { 1215,11}, { 2431,13}, { 639,12}, { 1471,11}, \ - { 2943,13}, { 767,12}, { 1727,13}, { 895,12}, \ - { 1919,11}, { 3839,14}, { 511,13}, { 1023,12}, \ - { 2111,13}, { 1151,12}, { 2431,13}, { 1407,12}, \ - { 2943,14}, { 767,13}, { 1535,12}, { 3071,13}, \ - { 1663,12}, { 3455,13}, { 1919,12}, { 3839,15}, \ - { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 161 -#define SQR_FFT_THRESHOLD 3712 - -#define MULLO_BASECASE_THRESHOLD 8 -#define MULLO_DC_THRESHOLD 60 -#define MULLO_MUL_N_THRESHOLD 13765 - -#define DC_DIV_QR_THRESHOLD 83 -#define DC_DIVAPPR_Q_THRESHOLD 246 -#define DC_BDIV_QR_THRESHOLD 76 -#define DC_BDIV_Q_THRESHOLD 175 - -#define INV_MULMOD_BNM1_THRESHOLD 42 -#define INV_NEWTON_THRESHOLD 268 -#define INV_APPR_THRESHOLD 250 - -#define BINV_NEWTON_THRESHOLD 276 -#define REDC_1_TO_REDC_N_THRESHOLD 74 - -#define MU_DIV_QR_THRESHOLD 1442 -#define MU_DIVAPPR_Q_THRESHOLD 1442 -#define MUPI_DIV_QR_THRESHOLD 132 -#define MU_BDIV_QR_THRESHOLD 1142 -#define MU_BDIV_Q_THRESHOLD 1334 - -#define MATRIX22_STRASSEN_THRESHOLD 18 -#define HGCD_THRESHOLD 121 -#define GCD_DC_THRESHOLD 478 -#define GCDEXT_DC_THRESHOLD 361 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 26 -#define SET_STR_DC_THRESHOLD 272 -#define SET_STR_PRECOMPUTE_THRESHOLD 1074 + +/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be more + than the value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard + limit in mpn/x86/p6/sqr_basecase.asm. */ + + +/* 1867 MHz P6 model 13 */ + +/* Generated by tuneup.c, 2009-03-02, gcc 4.3 */ + +#define MUL_KARATSUBA_THRESHOLD 20 +#define MUL_TOOM3_THRESHOLD 74 +#define MUL_TOOM44_THRESHOLD 166 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_KARATSUBA_THRESHOLD 30 +#define SQR_TOOM3_THRESHOLD 101 +#define SQR_TOOM4_THRESHOLD 154 + +#define MULLOW_BASECASE_THRESHOLD 7 +#define MULLOW_DC_THRESHOLD 39 +#define MULLOW_MUL_N_THRESHOLD 230 + +#define DIV_SB_PREINV_THRESHOLD 0 /* always */ +#define DIV_DC_THRESHOLD 21 +#define POWM_THRESHOLD 154 + +#define MATRIX22_STRASSEN_THRESHOLD 23 +#define HGCD_THRESHOLD 72 +#define GCD_DC_THRESHOLD 321 +#define GCDEXT_DC_THRESHOLD 416 +#define JACOBI_BASE_METHOD 1 + +#define USE_PREINV_DIVREM_1 1 /* native */ +#define USE_PREINV_MOD_1 1 /* native */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ + +#define GET_STR_DC_THRESHOLD 15 +#define GET_STR_PRECOMPUTE_THRESHOLD 24 +#define SET_STR_DC_THRESHOLD 587 +#define SET_STR_PRECOMPUTE_THRESHOLD 1083 + +#define MUL_FFT_TABLE { 400, 928, 1664, 4608, 10240, 57344, 163840, 393216, 0 } +#define MUL_FFT_MODF_THRESHOLD 496 +#define MUL_FFT_THRESHOLD 7168 + +#define SQR_FFT_TABLE { 432, 928, 1664, 3584, 10240, 40960, 98304, 393216, 0 } +#define SQR_FFT_MODF_THRESHOLD 448 +#define SQR_FFT_THRESHOLD 3840 + +/* These tables need updating */ +#define MUL_FFT_TABLE2 {{1,4}, {305,5}, {321,4}, {337,5}, {353,4}, {369,5}, {801,6}, {833,5}, {865,6}, {897,5}, {929,6}, {961,5}, {993,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3457,8}, {3841,7}, {3969,8}, {4097,7}, {4225,8}, {4353,7}, {4481,8}, {5889,7}, {6017,8}, {6401,7}, {6529,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {9985,9}, {10241,8}, {11009,9}, {11777,8}, {12289,9}, {13825,10}, {15361,9}, {15873,8}, {16129,9}, {19969,10}, {23553,9}, {24065,8}, {24321,9}, {26113,10}, {27649,11}, {28673,10}, {31745,9}, {34305,10}, {34817,9}, {35329,10}, {39937,9}, {40449,10}, {48129,11}, {55297,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {126977,11}, {129025,10}, {130049,9}, {130561,10}, {131073,11}, {133121,10}, {134145,11}, {137217,10}, {138241,11}, {161793,10}, {162817,11}, {194561,12}, {258049,11}, {260097,10}, {261121,9}, {261633,10}, {266241,11}, {268289,10}, {277505,11}, {292865,10}, {293889,9}, {294401,10}, {310273,9}, {310785,11}, {325633,10}, {326657,12}, {389121,13}, {516097,12}, {520193,11}, {522241,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {622593,12}, {651265,11}, {653313,10}, {654337,11}, {655361,10}, {657409,11}, {663553,10}, {664577,11}, {686081,10}, {687105,11}, {718849,10}, {719873,11}, {720897,10}, {722945,11}, {737281,10}, {740353,11}, {745473,10}, {749569,11}, {751617,10}, {752641,9}, {753153,11}, {753665,12}, {770049,11}, {774145,12}, {782337,11}, {786433,10}, {787457,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {854017,10}, {855041,11}, {862209,10}, {863233,11}, {866305,10}, {867329,11}, {876545,10}, {877569,11}, {882689,10}, {883713,9}, {884225,11}, {884737,13}, {1040385,12}, {1044481,11}, {1112065,10}, {1113089,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1515521,13}, {1523713,12}, {1527809,13}, {1540097,12}, {1544193,13}, {1548289,12}, {1568769,11}, {1636353,10}, {1637377,12}, {1699841,11}, {MP_SIZE_T_MAX,0}} + +#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {289,4}, {305,5}, {673,6}, {705,5}, {737,6}, {769,5}, {801,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3713,8}, {3841,7}, {4225,8}, {4865,7}, {4993,9}, {5121,8}, {6657,9}, {7681,8}, {8961,9}, {11777,8}, {12033,10}, {12289,8}, {12545,9}, {13825,10}, {14337,9}, {14849,10}, {15361,9}, {19969,10}, {23553,9}, {24577,11}, {30721,10}, {31745,9}, {32257,10}, {37889,9}, {38401,10}, {39937,9}, {40449,10}, {48129,11}, {63489,10}, {80897,11}, {96257,12}, {126977,11}, {129025,10}, {130049,11}, {194561,12}, {208897,11}, {210945,12}, {258049,11}, {260097,9}, {269313,10}, {277505,9}, {278017,11}, {278529,10}, {280577,11}, {282625,10}, {283649,11}, {284673,10}, {285697,11}, {286721,10}, {289793,11}, {290817,10}, {293889,9}, {294401,10}, {310273,9}, {310785,8}, {311041,10}, {311297,11}, {315393,10}, {321537,12}, {323585,11}, {325633,10}, {326657,12}, {331777,10}, {332801,12}, {389121,10}, {392193,9}, {392705,10}, {413697,9}, {414209,10}, {418817,9}, {419841,10}, {424961,9}, {425473,10}, {441345,9}, {441857,10}, {449537,9}, {450561,10}, {452609,9}, {453121,10}, {454657,9}, {455169,10}, {490497,12}, {491521,13}, {516097,12}, {520193,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {624641,12}, {626689,11}, {653313,10}, {654337,11}, {686081,10}, {687105,11}, {718849,10}, {720897,11}, {722945,10}, {724993,11}, {729089,10}, {734209,11}, {737281,10}, {744449,11}, {745473,10}, {747521,11}, {749569,10}, {752641,11}, {784385,10}, {785409,11}, {808961,10}, {809985,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {851969,10}, {852993,11}, {858113,10}, {859137,11}, {860161,10}, {861185,11}, {882689,10}, {883713,11}, {980993,13}, {1040385,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1568769,11}, {1636353,10}, {1637377,12}, {MP_SIZE_T_MAX,0}} diff --git a/gmp/mpn/x86/p6/mmx/lshift.asm b/gmp/mpn/x86/p6/mmx/lshift.asm index febd1c0e6c..e325b67d64 100644 --- a/gmp/mpn/x86/p6/mmx/lshift.asm +++ b/gmp/mpn/x86/p6/mmx/lshift.asm @@ -1,32 +1,21 @@ dnl Intel Pentium-II mpn_lshift -- mpn left shift. dnl Copyright 2001 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl The P55 code runs well on P-II/III, but could stand some minor tweaks diff --git a/gmp/mpn/x86/p6/mmx/popham.asm b/gmp/mpn/x86/p6/mmx/popham.asm index fd340e4b45..421daa5308 100644 --- a/gmp/mpn/x86/p6/mmx/popham.asm +++ b/gmp/mpn/x86/p6/mmx/popham.asm @@ -2,32 +2,21 @@ dnl Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and dnl hamming distance. dnl Copyright 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/mmx/rshift.asm b/gmp/mpn/x86/p6/mmx/rshift.asm index 77aa1909fa..b1543cdf52 100644 --- a/gmp/mpn/x86/p6/mmx/rshift.asm +++ b/gmp/mpn/x86/p6/mmx/rshift.asm @@ -1,32 +1,21 @@ dnl Intel Pentium-II mpn_rshift -- mpn left shift. dnl Copyright 2001 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl The P55 code runs well on P-II/III, but could stand some minor tweaks diff --git a/gmp/mpn/x86/p6/mod_1.asm b/gmp/mpn/x86/p6/mod_1.asm new file mode 100644 index 0000000000..b6eacf7e82 --- /dev/null +++ b/gmp/mpn/x86/p6/mod_1.asm @@ -0,0 +1,472 @@ +dnl Intel P6 mpn_mod_1 -- mpn by limb remainder. + +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + + +C P6: 21.5 cycles/limb + + +C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t carry); +C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t inverse); +C +C The code here is in two parts, a simple divl loop and a mul-by-inverse. +C The divl is used by mod_1 and mod_1c for small sizes, until the savings in +C the mul-by-inverse can overcome the time to calculate an inverse. +C preinv_mod_1 goes straight to the mul-by-inverse. +C +C The mul-by-inverse normalizes the divisor (or for preinv_mod_1 it's +C already normalized). The calculation done is r=a%(d*2^n) followed by a +C final (r*2^n)%(d*2^n), where a is the dividend, d the divisor, and n is +C the number of leading zero bits on d. This means there's no bit shifts in +C the main loop, at the cost of an extra divide step at the end. +C +C The simple divl for mod_1 is able to skip one divide step if high<divisor. +C For mod_1c the carry parameter is the high of the first divide step, and +C no attempt is make to skip that step since carry==0 will be very rare. +C +C The mul-by-inverse always skips one divide step, but then needs an extra +C step at the end, unless the divisor was already normalized (n==0). This +C leads to different mul-by-inverse thresholds for normalized and +C unnormalized divisors, in mod_1 and mod_1c. +C +C Alternatives: +C +C If n is small then the extra divide step could be done by a few shift and +C trial subtract steps instead of a full divide. That would probably be 3 +C or 4 cycles/bit, so say up to n=8 might benefit from that over a 21 cycle +C divide. However it's considered that small divisors, meaning biggish n, +C are more likely than small n, and that it's not worth the branch +C mispredicts of a loop. +C +C Past: +C +C There used to be some MMX based code for P-II and P-III, roughly following +C the K7 form, but it was slower (about 24.0 c/l) than the code here. That +C code did have an advantage that mod_1 was able to do one less divide step +C when high<divisor and the divisor unnormalized, but the speed advantage of +C the current code soon overcomes that. +C +C Future: +C +C It's not clear whether what's here is optimal. A rough count of micro-ops +C on the dependent chain would suggest a couple of cycles could be shaved, +C perhaps. + + +dnl The following thresholds are the sizes where the multiply by inverse +dnl method is used instead of plain divl's. Minimum value 2 each. +dnl +dnl MUL_NORM_THRESHOLD is for normalized divisors (high bit set), +dnl MUL_UNNORM_THRESHOLD for unnormalized divisors. +dnl +dnl With the divl loop at 39 c/l, and the inverse loop at 21.5 c/l but +dnl setups for the inverse of about 50, the threshold should be around +dnl 50/(39-21.5)==2.85. An unnormalized divisor gets an extra divide step +dnl at the end, so if that's about 25 cycles then that threshold might be +dnl around (50+25)/(39-21.5) == 4.3. + +deflit(MUL_NORM_THRESHOLD, 4) +deflit(MUL_UNNORM_THRESHOLD, 5) + +deflit(MUL_NORM_DELTA, eval(MUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD)) + + +defframe(PARAM_INVERSE, 16) dnl mpn_preinv_mod_1 +defframe(PARAM_CARRY, 16) dnl mpn_mod_1c +defframe(PARAM_DIVISOR, 12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) + +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) + +defframe(VAR_NORM, -20) +defframe(VAR_INVERSE, -24) + +deflit(STACK_SPACE, 24) + + TEXT + + ALIGN(16) +PROLOGUE(mpn_preinv_mod_1) +deflit(`FRAME',0) + + movl PARAM_SRC, %edx + subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE) + + movl %ebx, SAVE_EBX + movl PARAM_SIZE, %ebx + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %esi, SAVE_ESI + movl PARAM_INVERSE, %eax + + movl %edi, SAVE_EDI + movl -4(%edx,%ebx,4), %edi C src high limb + + movl $0, VAR_NORM + leal -8(%edx,%ebx,4), %ecx C &src[size-2] + + C + + movl %edi, %esi + subl %ebp, %edi C high-divisor + + cmovc( %esi, %edi) C restore if underflow + decl %ebx + jnz L(preinv_entry) + + jmp L(done_edi) + +EPILOGUE() + + + ALIGN(16) +PROLOGUE(mpn_mod_1c) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE) + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %eax + + movl %esi, SAVE_ESI + movl PARAM_CARRY, %edx + + movl PARAM_SRC, %esi + orl %ecx, %ecx + jz L(done_edx) C result==carry if size==0 + + sarl $31, %eax + movl PARAM_DIVISOR, %ebp + + andl $MUL_NORM_DELTA, %eax + + addl $MUL_UNNORM_THRESHOLD, %eax + + cmpl %eax, %ecx + jb L(divide_top) + + + C The carry parameter pretends to be the src high limb. + + movl %ebx, SAVE_EBX + leal 1(%ecx), %ebx C size+1 + + movl %edx, %eax C carry + jmp L(mul_by_inverse_1c) + +EPILOGUE() + + + ALIGN(16) +PROLOGUE(mpn_mod_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE) + movl $0, %edx C initial carry (if can't skip a div) + + movl %esi, SAVE_ESI + movl PARAM_SRC, %eax + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl PARAM_DIVISOR, %esi + orl %ecx, %ecx + jz L(done_edx) + + movl -4(%eax,%ecx,4), %eax C src high limb + + sarl $31, %ebp + + andl $MUL_NORM_DELTA, %ebp + + addl $MUL_UNNORM_THRESHOLD, %ebp + cmpl %esi, %eax C carry flag if high<divisor + + cmovc( %eax, %edx) C src high limb as initial carry + movl PARAM_SRC, %esi + + sbbl $0, %ecx C size-1 to skip one div + jz L(done_eax) C done if had size==1 + + cmpl %ebp, %ecx + movl PARAM_DIVISOR, %ebp + jae L(mul_by_inverse) + + +L(divide_top): + C eax scratch (quotient) + C ebx + C ecx counter, limbs, decrementing + C edx scratch (remainder) + C esi src + C edi + C ebp divisor + + movl -4(%esi,%ecx,4), %eax + + divl %ebp + + decl %ecx + jnz L(divide_top) + + +L(done_edx): + movl %edx, %eax +L(done_eax): + movl SAVE_ESI, %esi + + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + ret + + +C ----------------------------------------------------------------------------- + +L(mul_by_inverse): + C eax src high limb + C ebx + C ecx + C edx + C esi src + C edi + C ebp divisor + + movl %ebx, SAVE_EBX + movl PARAM_SIZE, %ebx + +L(mul_by_inverse_1c): + bsrl %ebp, %ecx C 31-l + + movl %edi, SAVE_EDI + xorl $31, %ecx C l + + movl %ecx, VAR_NORM + shll %cl, %ebp C d normalized + + movl %eax, %edi C src high -> n2 + subl %ebp, %eax + + cmovnc( %eax, %edi) C n2-divisor if no underflow + + movl $-1, %eax + movl $-1, %edx + + subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1 + leal -8(%esi,%ebx,4), %ecx C &src[size-2] + + divl %ebp C floor (b*(b-d)-1) / d + +L(preinv_entry): + movl %eax, VAR_INVERSE + + + +C No special scheduling of loads is necessary in this loop, out of order +C execution hides the latencies already. +C +C The way q1+1 is generated in %ebx and d is moved to %eax for the multiply +C seems fastest. The obvious change to generate q1+1 in %eax and then just +C multiply by %ebp (as per mpn/x86/pentium/mod_1.asm in fact) runs 1 cycle +C slower, for no obvious reason. + + + ALIGN(16) +L(inverse_top): + C eax n10 (then scratch) + C ebx scratch (nadj, q1) + C ecx src pointer, decrementing + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + + movl (%ecx), %eax C next src limb + movl %eax, %esi + + sarl $31, %eax C -n1 + movl %ebp, %ebx + + andl %eax, %ebx C -n1 & d + negl %eax C n1 + + addl %edi, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + subl $4, %ecx + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2+1 + movl %ebp, %eax C d + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + jz L(q1_ff) + + mull %ebx C (q1+1)*d + + C + + subl %eax, %esi C low n - (q1+1)*d + + sbbl %edx, %edi C high n - (q1+1)*d, 0 or -1 + + andl %ebp, %edi C d if underflow + + addl %esi, %edi C remainder with addback if necessary + + cmpl PARAM_SRC, %ecx + jae L(inverse_top) + + +C ----------------------------------------------------------------------------- +L(inverse_loop_done): + + C %edi is the remainder modulo d*2^n and now must be reduced to + C 0<=r<d by calculating r*2^n mod d*2^n and then right shifting by + C n. If d was already normalized on entry so that n==0 then nothing + C is needed here. The chance of n==0 is low, but it's true of say + C PP from gmp-impl.h. + C + C eax + C ebx + C ecx + C edx + C esi + C edi remainder + C ebp divisor (normalized) + + movl VAR_NORM, %ecx + movl $0, %esi + + orl %ecx, %ecx + jz L(done_edi) + + + C Here use %edi=n10 and %esi=n2, opposite to the loop above. + C + C The q1=0xFFFFFFFF case is handled with an sbbl to adjust q1+1 + C back, rather than q1_ff special case code. This is simpler and + C costs only 2 uops. + + shldl( %cl, %edi, %esi) + + shll %cl, %edi + + movl %edi, %eax C n10 + movl %ebp, %ebx C d + + sarl $31, %eax C -n1 + + andl %eax, %ebx C -n1 & d + negl %eax C n1 + + addl %edi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + addl %esi, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%esi), %ebx C n2+1 + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + movl %ebp, %eax C d + + mull %ebx C (q1+1)*d + + movl SAVE_EBX, %ebx + + C + + subl %eax, %edi C low n - (q1+1)*d is remainder + + sbbl %edx, %esi C high n - (q1+1)*d, 0 or -1 + + andl %ebp, %esi + movl SAVE_EBP, %ebp + + leal (%esi,%edi), %eax C remainder + movl SAVE_ESI, %esi + + shrl %cl, %eax C denorm remainder + movl SAVE_EDI, %edi + addl $STACK_SPACE, %esp + + ret + + +L(done_edi): + movl SAVE_EBX, %ebx + movl %edi, %eax + + movl SAVE_ESI, %esi + + movl SAVE_EDI, %edi + + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + ret + + +C ----------------------------------------------------------------------------- +C +C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword +C of q*d is simply -d and the remainder n-q*d = n10+d. +C +C This is reached only very rarely. + +L(q1_ff): + C eax (divisor) + C ebx (q1+1 == 0) + C ecx src pointer + C edx + C esi n10 + C edi (n2) + C ebp divisor + + leal (%ebp,%esi), %edi C n-q*d remainder -> next n2 + + cmpl PARAM_SRC, %ecx + jae L(inverse_top) + + jmp L(inverse_loop_done) + + +EPILOGUE() diff --git a/gmp/mpn/x86/p6/mod_34lsub1.asm b/gmp/mpn/x86/p6/mod_34lsub1.asm index b88ab5d17c..5e854b7274 100644 --- a/gmp/mpn/x86/p6/mod_34lsub1.asm +++ b/gmp/mpn/x86/p6/mod_34lsub1.asm @@ -1,32 +1,21 @@ dnl Intel P6 mpn_mod_34lsub1 -- remainder modulo 2^24-1. -dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/mode1o.asm b/gmp/mpn/x86/p6/mode1o.asm index c62b676e5a..4aff48d7e6 100644 --- a/gmp/mpn/x86/p6/mode1o.asm +++ b/gmp/mpn/x86/p6/mode1o.asm @@ -1,32 +1,21 @@ dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder. -dnl Copyright 2000-2002, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -112,7 +101,7 @@ ifdef(`PIC',` subl %eax, %edi C inv = 2*inv - inv*inv*d - ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB movl PARAM_DIVISOR, %eax imull %edi, %eax cmpl $1, %eax') @@ -124,7 +113,7 @@ C subl %edx, %eax 1 C imull %edi, %eax 4 C mull PARAM_DIVISOR 5 C ---- -C total 10 +C total 10 C C and this is the measured speed. No special scheduling is necessary, out C of order execution hides the load latency. diff --git a/gmp/mpn/x86/p6/mul_basecase.asm b/gmp/mpn/x86/p6/mul_basecase.asm index d87bc12b60..fc1afbdf0e 100644 --- a/gmp/mpn/x86/p6/mul_basecase.asm +++ b/gmp/mpn/x86/p6/mul_basecase.asm @@ -1,32 +1,21 @@ dnl Intel P6 mpn_mul_basecase -- multiply two mpn numbers. -dnl Copyright 1999-2003 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/p3mmx/popham.asm b/gmp/mpn/x86/p6/p3mmx/popham.asm index db2f2601c9..2f58968a31 100644 --- a/gmp/mpn/x86/p6/p3mmx/popham.asm +++ b/gmp/mpn/x86/p6/p3mmx/popham.asm @@ -2,32 +2,21 @@ dnl Intel Pentium-III mpn_popcount, mpn_hamdist -- population count and dnl hamming distance. dnl Copyright 2000, 2002, 2004, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/sqr_basecase.asm b/gmp/mpn/x86/p6/sqr_basecase.asm index 8fc7fdf375..05a31f1a15 100644 --- a/gmp/mpn/x86/p6/sqr_basecase.asm +++ b/gmp/mpn/x86/p6/sqr_basecase.asm @@ -1,32 +1,21 @@ dnl Intel P6 mpn_sqr_basecase -- square an mpn number. dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -38,15 +27,15 @@ C which is the Karatsuba recursing range). dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for dnl a description. The only difference here is that UNROLL_COUNT can go up -dnl to 64 (not 63) making SQR_TOOM2_THRESHOLD_MAX 67. +dnl to 64 (not 63) making SQR_KARATSUBA_THRESHOLD_MAX 67. -deflit(SQR_TOOM2_THRESHOLD_MAX, 67) +deflit(SQR_KARATSUBA_THRESHOLD_MAX, 67) -ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE', -`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)') +ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE', +`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)') -m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD') -deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3)) +m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD') +deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3)) C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); diff --git a/gmp/mpn/x86/p6/sse2/addmul_1.asm b/gmp/mpn/x86/p6/sse2/addmul_1.asm index 144b627aa3..b601c54bcf 100644 --- a/gmp/mpn/x86/p6/sse2/addmul_1.asm +++ b/gmp/mpn/x86/p6/sse2/addmul_1.asm @@ -1,32 +1,21 @@ dnl Intel P6/SSE2 mpn_addmul_1. dnl Copyright 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/sse2/gmp-mparam.h b/gmp/mpn/x86/p6/sse2/gmp-mparam.h index 69226289a7..843227b99a 100644 --- a/gmp/mpn/x86/p6/sse2/gmp-mparam.h +++ b/gmp/mpn/x86/p6/sse2/gmp-mparam.h @@ -1,197 +1,74 @@ /* Intel P6/sse2 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2003, 2008-2010 Free Software Foundation, Inc. +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2008, 2009 +Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -or +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. -or both in parallel, as here. +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be more + than the value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard + limit in mpn/x86/p6/sqr_basecase.asm. */ -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 +/* 1867 MHz P6 model 13 */ +/* Generated by tuneupc, 2008-10-30, gcc 4.3 */ -/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the - value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard limit in - mpn/x86/p6/sqr_basecase.asm. */ +#define MUL_KARATSUBA_THRESHOLD 20 +#define MUL_TOOM3_THRESHOLD 77 +#define MUL_TOOM44_THRESHOLD 142 +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_KARATSUBA_THRESHOLD 30 +#define SQR_TOOM3_THRESHOLD 101 +#define SQR_TOOM4_THRESHOLD 154 -/* 1867 MHz P6 model 13 */ +#define MULLOW_BASECASE_THRESHOLD 4 +#define MULLOW_DC_THRESHOLD 38 +#define MULLOW_MUL_N_THRESHOLD 234 + +#define DIV_SB_PREINV_THRESHOLD 0 /* always */ +#define DIV_DC_THRESHOLD 24 +#define POWM_THRESHOLD 150 + +#define MATRIX22_STRASSEN_THRESHOLD 23 +#define HGCD_THRESHOLD 95 +#define GCD_DC_THRESHOLD 381 +#define GCDEXT_DC_THRESHOLD 419 +#define JACOBI_BASE_METHOD 1 + +#define USE_PREINV_DIVREM_1 1 /* native */ +#define USE_PREINV_MOD_1 1 /* native */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ + +#define GET_STR_DC_THRESHOLD 14 +#define GET_STR_PRECOMPUTE_THRESHOLD 24 +#define SET_STR_DC_THRESHOLD 276 +#define SET_STR_PRECOMPUTE_THRESHOLD 1078 + +#define MUL_FFT_TABLE { 400, 928, 1664, 3584, 10240, 40960, 98304, 393216, 1572864, 0 } +#define MUL_FFT_MODF_THRESHOLD 496 +#define MUL_FFT_THRESHOLD 7168 -#define MOD_1_NORM_THRESHOLD 4 -#define MOD_1_UNNORM_THRESHOLD 4 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 4 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 21 - -#define MUL_TOOM22_THRESHOLD 20 -#define MUL_TOOM33_THRESHOLD 77 -#define MUL_TOOM44_THRESHOLD 169 -#define MUL_TOOM6H_THRESHOLD 246 -#define MUL_TOOM8H_THRESHOLD 381 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 30 -#define SQR_TOOM3_THRESHOLD 101 -#define SQR_TOOM4_THRESHOLD 154 -#define SQR_TOOM6_THRESHOLD 222 -#define SQR_TOOM8_THRESHOLD 527 - -#define MULMID_TOOM42_THRESHOLD 58 - -#define MULMOD_BNM1_THRESHOLD 13 -#define SQRMOD_BNM1_THRESHOLD 17 - -#define MUL_FFT_MODF_THRESHOLD 690 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 565, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \ - { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 5}, \ - { 383, 4}, { 991, 5}, { 511, 6}, { 267, 7}, \ - { 157, 8}, { 91, 9}, { 47, 8}, { 111, 9}, \ - { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \ - { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \ - { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \ - { 159,10}, { 335, 9}, { 671,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \ - { 415,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 671,12}, { 191,11}, { 383,10}, \ - { 799,11}, { 415,10}, { 831,13}, { 127,12}, \ - { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \ - { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \ - { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \ - { 1599,11}, { 863,12}, { 447,11}, { 959,13}, \ - { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \ - { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \ - { 1471,13}, { 383,12}, { 831,11}, { 1727,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \ - { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ - { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \ - { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \ - { 1407,12}, { 2815,14}, { 767,13}, { 1663,12}, \ - { 3455,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 132 -#define MUL_FFT_THRESHOLD 7424 - -#define SQR_FFT_MODF_THRESHOLD 565 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 472, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \ - { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \ - { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 63, 4}, { 1023, 8}, { 67, 9}, \ - { 39, 5}, { 639, 4}, { 1471, 6}, { 383, 7}, \ - { 209, 8}, { 119, 9}, { 63, 7}, { 255, 8}, \ - { 139, 9}, { 71, 8}, { 143, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 135,10}, { 79, 9}, { 159, 8}, { 319, 9}, \ - { 167,10}, { 95,11}, { 63,10}, { 143, 9}, \ - { 287,10}, { 159,11}, { 95,10}, { 191,12}, \ - { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \ - { 1087,10}, { 287, 9}, { 575,11}, { 159,10}, \ - { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \ - { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \ - { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \ - { 831,11}, { 223,12}, { 127,11}, { 255,10}, \ - { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \ - { 1215,11}, { 319,10}, { 671, 9}, { 1343,11}, \ - { 351,10}, { 703,12}, { 191,11}, { 383,10}, \ - { 799,11}, { 415,10}, { 831,13}, { 127,12}, \ - { 255,11}, { 543,10}, { 1087,11}, { 607,12}, \ - { 319,11}, { 671,10}, { 1343,11}, { 735,12}, \ - { 383,11}, { 799,10}, { 1599,11}, { 863,12}, \ - { 447,11}, { 959,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \ - { 1343,12}, { 703,11}, { 1471,13}, { 383,12}, \ - { 767,11}, { 1599,12}, { 831,11}, { 1727,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \ - { 639,12}, { 1471,13}, { 767,12}, { 1727,13}, \ - { 895,12}, { 1919,14}, { 511,13}, { 1023,12}, \ - { 2111,13}, { 1151,12}, { 2431,13}, { 1407,14}, \ - { 767,13}, { 1663,12}, { 3455,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 146 -#define SQR_FFT_THRESHOLD 5760 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 31 -#define MULLO_MUL_N_THRESHOLD 13463 - -#define DC_DIV_QR_THRESHOLD 25 -#define DC_DIVAPPR_Q_THRESHOLD 55 -#define DC_BDIV_QR_THRESHOLD 60 -#define DC_BDIV_Q_THRESHOLD 132 - -#define INV_MULMOD_BNM1_THRESHOLD 38 -#define INV_NEWTON_THRESHOLD 65 -#define INV_APPR_THRESHOLD 65 - -#define BINV_NEWTON_THRESHOLD 252 -#define REDC_1_TO_REDC_N_THRESHOLD 62 - -#define MU_DIV_QR_THRESHOLD 1164 -#define MU_DIVAPPR_Q_THRESHOLD 748 -#define MUPI_DIV_QR_THRESHOLD 38 -#define MU_BDIV_QR_THRESHOLD 1360 -#define MU_BDIV_Q_THRESHOLD 1470 - -#define POWM_SEC_TABLE 2,23,258,879,2246 - -#define MATRIX22_STRASSEN_THRESHOLD 17 -#define HGCD_THRESHOLD 69 -#define HGCD_APPR_THRESHOLD 112 -#define HGCD_REDUCE_THRESHOLD 3389 -#define GCD_DC_THRESHOLD 386 -#define GCDEXT_DC_THRESHOLD 303 -#define JACOBI_BASE_METHOD 1 - -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 25 -#define SET_STR_DC_THRESHOLD 582 -#define SET_STR_PRECOMPUTE_THRESHOLD 1118 - -#define FAC_DSC_THRESHOLD 178 -#define FAC_ODD_THRESHOLD 34 +#define SQR_FFT_TABLE { 432, 928, 1664, 3584, 10240, 40960, 98304, 393216, 1572864, 0 } +#define SQR_FFT_MODF_THRESHOLD 448 +#define SQR_FFT_THRESHOLD 3840 diff --git a/gmp/mpn/x86/p6/sse2/mod_1_1.asm b/gmp/mpn/x86/p6/sse2/mod_1_1.asm deleted file mode 100644 index 8b7b7adaa5..0000000000 --- a/gmp/mpn/x86/p6/sse2/mod_1_1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel P6/SSE2 mpn_mod_1_1. - -dnl Copyright 2009, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_mod_1_1p) -include_mpn(`x86/pentium4/sse2/mod_1_1.asm') diff --git a/gmp/mpn/x86/p6/sse2/mod_1_4.asm b/gmp/mpn/x86/p6/sse2/mod_1_4.asm deleted file mode 100644 index 49c96c60b9..0000000000 --- a/gmp/mpn/x86/p6/sse2/mod_1_4.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel P6/SSE2 mpn_mod_1_4. - -dnl Copyright 2009, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_mod_1s_4p) -include_mpn(`x86/pentium4/sse2/mod_1_4.asm') diff --git a/gmp/mpn/x86/p6/sse2/mul_1.asm b/gmp/mpn/x86/p6/sse2/mul_1.asm index 50e5b6983a..fc3d4e6414 100644 --- a/gmp/mpn/x86/p6/sse2/mul_1.asm +++ b/gmp/mpn/x86/p6/sse2/mul_1.asm @@ -1,32 +1,21 @@ dnl Intel P6/SSE2 mpn_mul_1. dnl Copyright 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/sse2/mul_basecase.asm b/gmp/mpn/x86/p6/sse2/mul_basecase.asm index 4687625790..f52ece025f 100644 --- a/gmp/mpn/x86/p6/sse2/mul_basecase.asm +++ b/gmp/mpn/x86/p6/sse2/mul_basecase.asm @@ -1,32 +1,21 @@ dnl Intel P6/SSE2 mpn_mul_basecase. dnl Copyright 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/sse2/popcount.asm b/gmp/mpn/x86/p6/sse2/popcount.asm index 4c02b93be2..f818d6e230 100644 --- a/gmp/mpn/x86/p6/sse2/popcount.asm +++ b/gmp/mpn/x86/p6/sse2/popcount.asm @@ -1,32 +1,21 @@ dnl Intel P6/SSE2 mpn_popcount -- population count. dnl Copyright 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/sse2/sqr_basecase.asm b/gmp/mpn/x86/p6/sse2/sqr_basecase.asm index 76b574b6c7..8a7f24974d 100644 --- a/gmp/mpn/x86/p6/sse2/sqr_basecase.asm +++ b/gmp/mpn/x86/p6/sse2/sqr_basecase.asm @@ -1,32 +1,21 @@ dnl Intel P6/SSE2 mpn_sqr_basecase. dnl Copyright 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/p6/sse2/submul_1.asm b/gmp/mpn/x86/p6/sse2/submul_1.asm index 98a603ce93..ae97fd6346 100644 --- a/gmp/mpn/x86/p6/sse2/submul_1.asm +++ b/gmp/mpn/x86/p6/sse2/submul_1.asm @@ -1,32 +1,21 @@ dnl Intel P6/SSE2 mpn_submul_1. dnl Copyright 2008 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/README b/gmp/mpn/x86/pentium/README index 305936bbd9..6c4d872c47 100644 --- a/gmp/mpn/x86/pentium/README +++ b/gmp/mpn/x86/pentium/README @@ -1,30 +1,19 @@ -Copyright 1996, 1999-2001, 2003 Free Software Foundation, Inc. +Copyright 1996, 1999, 2000, 2001, 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. diff --git a/gmp/mpn/x86/pentium/aors_n.asm b/gmp/mpn/x86/pentium/aors_n.asm index 01ebfb96ae..30d0df79b0 100644 --- a/gmp/mpn/x86/pentium/aors_n.asm +++ b/gmp/mpn/x86/pentium/aors_n.asm @@ -1,32 +1,22 @@ dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction. -dnl Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software +dnl Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -88,13 +78,13 @@ deflit(`FRAME',16) pushl %edx FRAME_pushl() movl PARAM_CARRY,%eax - shrl %eax C shift bit 0 into carry + shrl $1,%eax C shift bit 0 into carry jmp L(oop) L(endgo): deflit(`FRAME',16) movl PARAM_CARRY,%eax - shrl %eax C shift bit 0 into carry + shrl $1,%eax C shift bit 0 into carry jmp L(end) EPILOGUE() diff --git a/gmp/mpn/x86/pentium/aorsmul_1.asm b/gmp/mpn/x86/pentium/aorsmul_1.asm index d83cc4513b..a50299b5cf 100644 --- a/gmp/mpn/x86/pentium/aorsmul_1.asm +++ b/gmp/mpn/x86/pentium/aorsmul_1.asm @@ -2,32 +2,21 @@ dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication. dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation, dnl Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/bdiv_q_1.asm b/gmp/mpn/x86/pentium/bdiv_q_1.asm deleted file mode 100644 index 9fee3cb87a..0000000000 --- a/gmp/mpn/x86/pentium/bdiv_q_1.asm +++ /dev/null @@ -1,260 +0,0 @@ -dnl Intel Pentium mpn_divexact_1 -- mpn by limb exact division. - -dnl Rearranged from mpn/x86/pentium/dive_1.asm by Marco Bodrato. - -dnl Copyright 2001, 2002, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C divisor -C odd even -C P54: 24.5 30.5 cycles/limb -C P55: 23.0 28.0 - -MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1) - -C The P55 speeds noted above, 23 cycles odd or 28 cycles even, are as -C expected. On P54 in the even case the shrdl pairing nonsense (see -C mpn/x86/pentium/README) costs 1 cycle, but it's not clear why there's a -C further 1.5 slowdown for both odd and even. - -defframe(PARAM_SHIFT, 24) -defframe(PARAM_INVERSE,20) -defframe(PARAM_DIVISOR,16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(VAR_INVERSE,`PARAM_DST') - - TEXT - - ALIGN(32) -C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t divisor); -C -PROLOGUE(mpn_bdiv_q_1) -deflit(`FRAME',0) - - movl $-1, %ecx - movl PARAM_DIVISOR, %eax - -L(strip_twos): - ASSERT(nz, `orl %eax, %eax') - shrl %eax - incl %ecx C shift count - - jnc L(strip_twos) - - leal 1(%eax,%eax), %edx C d - andl $127, %eax C d/2, 7 bits - - pushl %ebx FRAME_pushl() - pushl %ebp FRAME_pushl() - -ifdef(`PIC',` - call L(here) -L(here): - popl %ebp C eip - - addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp - C AGI - movl binvert_limb_table@GOT(%ebp), %ebp - C AGI - movzbl (%eax,%ebp), %eax -',` - -dnl non-PIC - movzbl binvert_limb_table(%eax), %eax C inv 8 bits -') - - movl %eax, %ebp C inv - addl %eax, %eax C 2*inv - - imull %ebp, %ebp C inv*inv - - imull %edx, %ebp C inv*inv*d - - subl %ebp, %eax C inv = 2*inv - inv*inv*d - movl PARAM_SIZE, %ebx - - movl %eax, %ebp - addl %eax, %eax C 2*inv - - imull %ebp, %ebp C inv*inv - - imull %edx, %ebp C inv*inv*d - - subl %ebp, %eax C inv = 2*inv - inv*inv*d - movl %edx, PARAM_DIVISOR C d without twos - - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS - pushl %eax FRAME_pushl() - imull PARAM_DIVISOR, %eax - cmpl $1, %eax - popl %eax FRAME_popl()') - - jmp L(common) -EPILOGUE() - -C mp_limb_t -C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor, -C mp_limb_t inverse, int shift) - ALIGN(32) -PROLOGUE(mpn_pi1_bdiv_q_1) -deflit(`FRAME',0) - - movl PARAM_SHIFT, %ecx - - pushl %ebx FRAME_pushl() - pushl %ebp FRAME_pushl() - - movl PARAM_SIZE, %ebx - movl PARAM_INVERSE, %eax - -L(common): - pushl %esi FRAME_pushl() - push %edi FRAME_pushl() - - movl PARAM_SRC, %esi - movl PARAM_DST, %edi - movl %eax, VAR_INVERSE - - leal (%esi,%ebx,4), %esi C src end - leal (%edi,%ebx,4), %edi C dst end - - negl %ebx C -size - - xorl %ebp, %ebp C initial carry bit - - orl %ecx, %ecx C shift - movl (%esi,%ebx,4), %eax C src low limb - jz L(odd_entry) - - xorl %edx, %edx C initial carry limb (for even, if one) - incl %ebx - jz L(one) - - movl (%esi,%ebx,4), %edx C src second limb (for even) - shrdl( %cl, %edx, %eax) - - jmp L(even_entry) - - - ALIGN(8) -L(odd_top): - C eax scratch - C ebx counter, limbs, negative - C ecx - C edx - C esi src end - C edi dst end - C ebp carry bit, 0 or -1 - - mull PARAM_DIVISOR - - movl (%esi,%ebx,4), %eax - subl %ebp, %edx - - subl %edx, %eax - - sbbl %ebp, %ebp - -L(odd_entry): - imull VAR_INVERSE, %eax - - movl %eax, (%edi,%ebx,4) - - incl %ebx - jnz L(odd_top) - - popl %edi - popl %esi - - popl %ebp - popl %ebx - - ret - -L(even_top): - C eax scratch - C ebx counter, limbs, negative - C ecx twos - C edx - C esi src end - C edi dst end - C ebp carry bit, 0 or -1 - - mull PARAM_DIVISOR - - subl %ebp, %edx C carry bit - movl -4(%esi,%ebx,4), %eax C src limb - - movl (%esi,%ebx,4), %ebp C and one above it - - shrdl( %cl, %ebp, %eax) - - subl %edx, %eax C carry limb - - sbbl %ebp, %ebp - -L(even_entry): - imull VAR_INVERSE, %eax - - movl %eax, -4(%edi,%ebx,4) - incl %ebx - - jnz L(even_top) - - mull PARAM_DIVISOR - - movl -4(%esi), %eax C src high limb - subl %ebp, %edx - -L(one): - shrl %cl, %eax - - subl %edx, %eax C no carry if division is exact - - imull VAR_INVERSE, %eax - - movl %eax, -4(%edi) C dst high limb - nop C protect against cache bank clash - - popl %edi - popl %esi - - popl %ebp - popl %ebx - - ret - -EPILOGUE() diff --git a/gmp/mpn/x86/pentium/com.asm b/gmp/mpn/x86/pentium/com_n.asm index b0805452a6..c6d2d72e5e 100644 --- a/gmp/mpn/x86/pentium/com.asm +++ b/gmp/mpn/x86/pentium/com_n.asm @@ -1,32 +1,21 @@ -dnl Intel Pentium mpn_com -- mpn ones complement. +dnl Intel Pentium mpn_com_n -- mpn ones complement. dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -37,7 +26,7 @@ C P5: 1.75 cycles/limb NAILS_SUPPORT(0-31) -C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); +C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); C C This code is similar to mpn_copyi, basically there's just some "xorl C $GMP_NUMB_MASK"s inserted. @@ -55,7 +44,7 @@ defframe(PARAM_DST, 4) TEXT ALIGN(8) -PROLOGUE(mpn_com) +PROLOGUE(mpn_com_n) deflit(`FRAME',0) movl PARAM_SRC, %eax diff --git a/gmp/mpn/x86/pentium/copyd.asm b/gmp/mpn/x86/pentium/copyd.asm index 72a543b2a3..2be8c765ac 100644 --- a/gmp/mpn/x86/pentium/copyd.asm +++ b/gmp/mpn/x86/pentium/copyd.asm @@ -1,32 +1,21 @@ dnl Intel Pentium mpn_copyd -- copy limb vector, decrementing. dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/copyi.asm b/gmp/mpn/x86/pentium/copyi.asm index d983d6b46e..9da08e2c06 100644 --- a/gmp/mpn/x86/pentium/copyi.asm +++ b/gmp/mpn/x86/pentium/copyi.asm @@ -1,32 +1,21 @@ dnl Intel Pentium mpn_copyi -- copy limb vector, incrementing. dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/dive_1.asm b/gmp/mpn/x86/pentium/dive_1.asm index f80632f479..79885244a5 100644 --- a/gmp/mpn/x86/pentium/dive_1.asm +++ b/gmp/mpn/x86/pentium/dive_1.asm @@ -1,32 +1,21 @@ dnl Intel Pentium mpn_divexact_1 -- mpn by limb exact division. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -157,7 +146,7 @@ dnl non-PIC negl %ebx C -size - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB pushl %eax FRAME_pushl() imull PARAM_DIVISOR, %eax cmpl $1, %eax diff --git a/gmp/mpn/x86/pentium/gmp-mparam.h b/gmp/mpn/x86/pentium/gmp-mparam.h index befa6e27a9..5c49c4e3cb 100644 --- a/gmp/mpn/x86/pentium/gmp-mparam.h +++ b/gmp/mpn/x86/pentium/gmp-mparam.h @@ -1,36 +1,26 @@ /* Intel P54 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2002, 2004 Free Software Foundation, Inc. +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004 Free Software +Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 /* For mpn/x86/pentium/mod_1.asm */ @@ -41,11 +31,11 @@ see https://www.gnu.org/licenses/. */ /* Generated by tuneup.c, 2004-02-10, gcc 2.95 */ -#define MUL_TOOM22_THRESHOLD 16 -#define MUL_TOOM33_THRESHOLD 90 +#define MUL_KARATSUBA_THRESHOLD 16 +#define MUL_TOOM3_THRESHOLD 90 #define SQR_BASECASE_THRESHOLD 0 /* always */ -#define SQR_TOOM2_THRESHOLD 22 +#define SQR_KARATSUBA_THRESHOLD 22 #define SQR_TOOM3_THRESHOLD 122 #define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */ diff --git a/gmp/mpn/x86/pentium/hamdist.asm b/gmp/mpn/x86/pentium/hamdist.asm index 2d7bc99b12..a129030f74 100644 --- a/gmp/mpn/x86/pentium/hamdist.asm +++ b/gmp/mpn/x86/pentium/hamdist.asm @@ -1,32 +1,21 @@ dnl Intel P5 mpn_hamdist -- mpn hamming distance. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/logops_n.asm b/gmp/mpn/x86/pentium/logops_n.asm index 18773172e9..0552e55809 100644 --- a/gmp/mpn/x86/pentium/logops_n.asm +++ b/gmp/mpn/x86/pentium/logops_n.asm @@ -1,32 +1,21 @@ dnl Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/lshift.asm b/gmp/mpn/x86/pentium/lshift.asm index 2a31f36c6e..ece51e06d3 100644 --- a/gmp/mpn/x86/pentium/lshift.asm +++ b/gmp/mpn/x86/pentium/lshift.asm @@ -1,32 +1,22 @@ dnl Intel Pentium mpn_lshift -- mpn left shift. -dnl Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software +dnl Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/mmx/gmp-mparam.h b/gmp/mpn/x86/pentium/mmx/gmp-mparam.h index 02a0def127..e443c8c300 100644 --- a/gmp/mpn/x86/pentium/mmx/gmp-mparam.h +++ b/gmp/mpn/x86/pentium/mmx/gmp-mparam.h @@ -1,37 +1,26 @@ /* Intel P55 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2002, 2004, 2009, 2010 Free Software +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 /* For mpn/x86/pentium/mod_1.asm */ @@ -40,124 +29,45 @@ see https://www.gnu.org/licenses/. */ /* 233MHz P55 */ -#define MOD_1_NORM_THRESHOLD 5 -#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */ -#define MOD_1U_TO_MOD_1_1_THRESHOLD 12 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 11 -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 63 -#define USE_PREINV_DIVREM_1 0 -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 51 - -#define MUL_TOOM22_THRESHOLD 16 -#define MUL_TOOM33_THRESHOLD 53 -#define MUL_TOOM44_THRESHOLD 128 -#define MUL_TOOM6H_THRESHOLD 189 -#define MUL_TOOM8H_THRESHOLD 260 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 90 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 88 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 20 -#define SQR_TOOM3_THRESHOLD 73 -#define SQR_TOOM4_THRESHOLD 178 -#define SQR_TOOM6_THRESHOLD 210 -#define SQR_TOOM8_THRESHOLD 375 - -#define MULMOD_BNM1_THRESHOLD 11 -#define SQRMOD_BNM1_THRESHOLD 12 - -#define MUL_FFT_MODF_THRESHOLD 364 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 364, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \ - { 9, 5}, { 19, 6}, { 17, 7}, { 9, 6}, \ - { 21, 7}, { 11, 6}, { 23, 7}, { 15, 6}, \ - { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ - { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \ - { 47,10}, { 15, 9}, { 31, 8}, { 67, 9}, \ - { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \ - { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \ - { 79, 9}, { 159, 8}, { 319, 9}, { 167,10}, \ - { 95, 9}, { 191, 8}, { 383,11}, { 63,10}, \ - { 127, 9}, { 255,10}, { 143, 9}, { 287,10}, \ - { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ - { 383,12}, { 63,11}, { 127,10}, { 271, 9}, \ - { 543,10}, { 287,11}, { 159,10}, { 351,11}, \ - { 191,10}, { 415,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 511,11}, { 287,10}, { 575,11}, \ - { 351,12}, { 191,11}, { 415,13}, { 127,12}, \ - { 255,11}, { 575,12}, { 319,11}, { 703,12}, \ - { 383,11}, { 831,12}, { 447,13}, { 8192,14}, \ - { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 90 -#define MUL_FFT_THRESHOLD 3520 - -#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 340, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \ - { 17, 7}, { 9, 6}, { 21, 7}, { 11, 6}, \ - { 23, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \ - { 11, 7}, { 29, 8}, { 15, 7}, { 33, 8}, \ - { 19, 7}, { 39, 8}, { 27, 7}, { 55, 9}, \ - { 15, 8}, { 31, 7}, { 65, 8}, { 43, 9}, \ - { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \ - { 67, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \ - { 95,10}, { 31, 9}, { 63, 8}, { 127, 9}, \ - { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \ - { 63, 9}, { 127, 8}, { 255, 9}, { 135,10}, \ - { 79, 9}, { 159, 8}, { 319,10}, { 95, 9}, \ - { 191,11}, { 63,10}, { 127, 9}, { 255, 8}, \ - { 511, 9}, { 271,10}, { 143, 9}, { 287, 8}, \ - { 575, 9}, { 303,10}, { 159, 9}, { 319,11}, \ - { 95,10}, { 191, 9}, { 383,10}, { 207,12}, \ - { 63,11}, { 127,10}, { 271, 9}, { 543,10}, \ - { 287, 9}, { 575,10}, { 303,11}, { 159,10}, \ - { 351,11}, { 191,10}, { 415,11}, { 223,10}, \ - { 447,12}, { 127,11}, { 255,10}, { 543,11}, \ - { 287,10}, { 607,11}, { 351,12}, { 191,11}, \ - { 479,13}, { 127,12}, { 255,11}, { 575,12}, \ - { 319,11}, { 703,12}, { 383,11}, { 767,12}, \ - { 447,13}, { 8192,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 96 -#define SQR_FFT_THRESHOLD 5504 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 48 -#define MULLO_MUL_N_THRESHOLD 6633 - -#define DC_DIV_QR_THRESHOLD 43 -#define DC_DIVAPPR_Q_THRESHOLD 170 -#define DC_BDIV_QR_THRESHOLD 43 -#define DC_BDIV_Q_THRESHOLD 110 - -#define INV_MULMOD_BNM1_THRESHOLD 30 -#define INV_NEWTON_THRESHOLD 177 -#define INV_APPR_THRESHOLD 171 - -#define BINV_NEWTON_THRESHOLD 194 -#define REDC_1_TO_REDC_N_THRESHOLD 50 - -#define MU_DIV_QR_THRESHOLD 1142 -#define MU_DIVAPPR_Q_THRESHOLD 1142 -#define MUPI_DIV_QR_THRESHOLD 90 -#define MU_BDIV_QR_THRESHOLD 942 -#define MU_BDIV_Q_THRESHOLD 1017 - -#define MATRIX22_STRASSEN_THRESHOLD 13 -#define HGCD_THRESHOLD 92 -#define GCD_DC_THRESHOLD 283 -#define GCDEXT_DC_THRESHOLD 221 -#define JACOBI_BASE_METHOD 2 - -#define GET_STR_DC_THRESHOLD 18 -#define GET_STR_PRECOMPUTE_THRESHOLD 31 -#define SET_STR_DC_THRESHOLD 490 -#define SET_STR_PRECOMPUTE_THRESHOLD 994 +/* Generated by tuneup.c, 2009-01-06, gcc 3.4 */ + +#define MUL_KARATSUBA_THRESHOLD 16 +#define MUL_TOOM3_THRESHOLD 89 +#define MUL_TOOM44_THRESHOLD 131 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_KARATSUBA_THRESHOLD 22 +#define SQR_TOOM3_THRESHOLD 77 +#define SQR_TOOM4_THRESHOLD 168 + +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 40 +#define MULLOW_MUL_N_THRESHOLD 266 + +#define DIV_SB_PREINV_THRESHOLD 4 +#define DIV_DC_THRESHOLD 43 +#define POWM_THRESHOLD 64 + +#define MATRIX22_STRASSEN_THRESHOLD 13 +#define HGCD_THRESHOLD 95 +#define GCD_DC_THRESHOLD 316 +#define GCDEXT_DC_THRESHOLD 316 +#define JACOBI_BASE_METHOD 2 + +#define USE_PREINV_DIVREM_1 0 +#define USE_PREINV_MOD_1 1 /* native */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ + +#define GET_STR_DC_THRESHOLD 17 +#define GET_STR_PRECOMPUTE_THRESHOLD 27 +#define SET_STR_DC_THRESHOLD 527 +#define SET_STR_PRECOMPUTE_THRESHOLD 1069 + +#define MUL_FFT_TABLE { 304, 672, 1152, 3584, 10240, 40960, 0 } +#define MUL_FFT_MODF_THRESHOLD 320 +#define MUL_FFT_THRESHOLD 3840 + +#define SQR_FFT_TABLE { 304, 672, 1152, 4608, 10240, 24576, 0 } +#define SQR_FFT_MODF_THRESHOLD 320 +#define SQR_FFT_THRESHOLD 3840 diff --git a/gmp/mpn/x86/pentium/mmx/hamdist.asm b/gmp/mpn/x86/pentium/mmx/hamdist.asm index 72e3196697..185eeaee22 100644 --- a/gmp/mpn/x86/pentium/mmx/hamdist.asm +++ b/gmp/mpn/x86/pentium/mmx/hamdist.asm @@ -1,32 +1,21 @@ dnl Intel P55 mpn_hamdist -- mpn hamming distance. dnl Copyright 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/mmx/lshift.asm b/gmp/mpn/x86/pentium/mmx/lshift.asm index 04b0ddcc8f..012d794952 100644 --- a/gmp/mpn/x86/pentium/mmx/lshift.asm +++ b/gmp/mpn/x86/pentium/mmx/lshift.asm @@ -1,32 +1,21 @@ dnl Intel P5 mpn_lshift -- mpn left shift. -dnl Copyright 2000-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/mmx/mul_1.asm b/gmp/mpn/x86/pentium/mmx/mul_1.asm index 4ced577b13..b9fe77ed07 100644 --- a/gmp/mpn/x86/pentium/mmx/mul_1.asm +++ b/gmp/mpn/x86/pentium/mmx/mul_1.asm @@ -1,32 +1,21 @@ dnl Intel Pentium MMX mpn_mul_1 -- mpn by limb multiplication. -dnl Copyright 2000-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/mmx/rshift.asm b/gmp/mpn/x86/pentium/mmx/rshift.asm index e3b274bb63..f50b8ab0e0 100644 --- a/gmp/mpn/x86/pentium/mmx/rshift.asm +++ b/gmp/mpn/x86/pentium/mmx/rshift.asm @@ -1,32 +1,21 @@ dnl Intel P5 mpn_rshift -- mpn right shift. dnl Copyright 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/mod_1.asm b/gmp/mpn/x86/pentium/mod_1.asm new file mode 100644 index 0000000000..408242e7a9 --- /dev/null +++ b/gmp/mpn/x86/pentium/mod_1.asm @@ -0,0 +1,454 @@ +dnl Intel P5 mpn_mod_1 -- mpn by limb remainder. + +dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + + +C P5: 28.0 cycles/limb + + +C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t carry); +C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t inverse); +C +C This code is not unlike mpn/x86/p6/mod_1.asm, it does the same sort of +C multiply by inverse without on-the-fly shifts. See that code for some +C general comments. +C +C Alternatives: +C +C P5 shldl is 4 cycles, so shifting on the fly would be at least 5 cycles +C slower, probably more depending what it did to register usage. Using MMX +C on P55 would be better, but still at least 4 or 5 instructions and so 2 or +C 3 cycles. + + +dnl These thresholds are the sizes where the multiply by inverse method is +dnl used, rather than plain "divl"s. Minimum value 2. +dnl +dnl MUL_NORM_THRESHOLD is for an already normalized divisor (high bit set), +dnl MUL_UNNORM_THRESHOLD for an unnormalized divisor. +dnl +dnl With the divl loop at 44 c/l and the inverse at 28 c/l with about 70 +dnl cycles to setup, the threshold should be about ceil(70/16)==5, which is +dnl what happens in practice. +dnl +dnl An unnormalized divisor gets an extra 40 cycles at the end for the +dnl final (r*2^n)%(d*2^n) and shift. This increases the threshold by about +dnl 40/16=3. +dnl +dnl PIC adds between 4 and 7 cycles (not sure why it varies), but this +dnl doesn't change the thresholds. +dnl +dnl The entry sequence code that chooses between MUL_NORM_THRESHOLD and +dnl MUL_UNNORM_THRESHOLD is a bit horrible, but it adds only 2 cycles +dnl (branch free) and ensures the choice between div or mul is optimal. + +deflit(MUL_NORM_THRESHOLD, ifdef(`PIC',5,5)) +deflit(MUL_UNNORM_THRESHOLD, ifdef(`PIC',8,8)) + +deflit(MUL_NORM_DELTA, eval(MUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD)) + + +defframe(PARAM_INVERSE, 16) dnl mpn_preinv_mod_1 +defframe(PARAM_CARRY, 16) dnl mpn_mod_1c +defframe(PARAM_DIVISOR, 12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) + +dnl re-using parameter space +define(VAR_NORM, `PARAM_DIVISOR') +define(VAR_INVERSE, `PARAM_SIZE') + + TEXT + + ALIGN(8) +PROLOGUE(mpn_preinv_mod_1) +deflit(`FRAME',0) + + pushl %ebp FRAME_pushl() + pushl %esi FRAME_pushl() + + movl PARAM_SRC, %esi + movl PARAM_SIZE, %edx + + pushl %edi FRAME_pushl() + pushl %ebx FRAME_pushl() + + movl PARAM_DIVISOR, %ebp + movl PARAM_INVERSE, %eax + + movl -4(%esi,%edx,4), %edi C src high limb + leal -8(%esi,%edx,4), %esi C &src[size-2] + + movl $0, VAR_NORM + decl %edx + + jnz L(start_preinv) + + subl %ebp, %edi C src-divisor + popl %ebx + + sbbl %ecx, %ecx C -1 if underflow + movl %edi, %eax C src-divisor + + andl %ebp, %ecx C d if underflow + popl %edi + + addl %ecx, %eax C remainder, with possible addback + popl %esi + + popl %ebp + + ret + +EPILOGUE() + + + ALIGN(8) +PROLOGUE(mpn_mod_1c) +deflit(`FRAME',0) + + movl PARAM_DIVISOR, %eax + movl PARAM_SIZE, %ecx + + sarl $31, %eax C d highbit + movl PARAM_CARRY, %edx + + orl %ecx, %ecx + jz L(done_edx) C result==carry if size==0 + + andl $MUL_NORM_DELTA, %eax + pushl %ebp FRAME_pushl() + + addl $MUL_UNNORM_THRESHOLD, %eax C norm or unnorm thresh + pushl %esi FRAME_pushl() + + movl PARAM_SRC, %esi + movl PARAM_DIVISOR, %ebp + + cmpl %eax, %ecx + jb L(divide_top) + + movl %edx, %eax C carry as pretend src high limb + leal 1(%ecx), %edx C size+1 + + cmpl $0x1000000, %ebp + jmp L(mul_by_inverse_1c) + +EPILOGUE() + + + ALIGN(8) +PROLOGUE(mpn_mod_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + pushl %ebp FRAME_pushl() + + orl %ecx, %ecx + jz L(done_zero) + + movl PARAM_SRC, %eax + movl PARAM_DIVISOR, %ebp + + sarl $31, %ebp C -1 if divisor normalized + movl -4(%eax,%ecx,4), %eax C src high limb + + movl PARAM_DIVISOR, %edx + pushl %esi FRAME_pushl() + + andl $MUL_NORM_DELTA, %ebp + cmpl %edx, %eax C carry flag if high<divisor + + sbbl %edx, %edx C -1 if high<divisor + addl $MUL_UNNORM_THRESHOLD, %ebp C norm or unnorm thresh + + addl %edx, %ecx C size-1 if high<divisor + jz L(done_eax) + + cmpl %ebp, %ecx + movl PARAM_DIVISOR, %ebp + + movl PARAM_SRC, %esi + jae L(mul_by_inverse) + + andl %eax, %edx C high as initial carry if high<divisor + + +L(divide_top): + C eax scratch (quotient) + C ebx + C ecx counter, limbs, decrementing + C edx scratch (remainder) + C esi src + C edi + C ebp divisor + + movl -4(%esi,%ecx,4), %eax + + divl %ebp + + decl %ecx + jnz L(divide_top) + + + popl %esi + popl %ebp + +L(done_edx): + movl %edx, %eax + + ret + + +L(done_zero): + xorl %eax, %eax + popl %ebp + + ret + + +C ----------------------------------------------------------------------------- +C +C The divisor is normalized using the same code as the pentium +C count_leading_zeros in longlong.h. Going through the GOT for PIC costs a +C couple of cycles, but is more or less unavoidable. + + + ALIGN(8) +L(mul_by_inverse): + C eax src high limb + C ebx + C ecx size or size-1 + C edx + C esi src + C edi + C ebp divisor + + movl PARAM_SIZE, %edx + cmpl $0x1000000, %ebp + +L(mul_by_inverse_1c): + sbbl %ecx, %ecx + cmpl $0x10000, %ebp + + sbbl $0, %ecx + cmpl $0x100, %ebp + + sbbl $0, %ecx + pushl %edi FRAME_pushl() + + pushl %ebx FRAME_pushl() + movl %ebp, %ebx C d + +ifdef(`PIC',` + call L(here) +L(here): + popl %edi + leal 25(,%ecx,8), %ecx C 0,-1,-2,-3 -> 25,17,9,1 + + shrl %cl, %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %edi + + C AGI + movl __clz_tab@GOT(%edi), %edi + addl $-34, %ecx + + C AGI + movb (%ebx,%edi), %bl + +',` + leal 25(,%ecx,8), %ecx C 0,-1,-2,-3 -> 25,17,9,1 + + shrl %cl, %ebx + addl $-34, %ecx + + C AGI + movb __clz_tab(%ebx), %bl +') + movl %eax, %edi C carry -> n1 + + addl %ebx, %ecx C -34 + c + __clz_tab[d>>c] = -clz-1 + leal -8(%esi,%edx,4), %esi C &src[size-2] + + xorl $-1, %ecx C clz + movl $-1, %edx + + ASSERT(e,`pushl %eax C clz calculation same as bsrl + bsrl %ebp, %eax + xorl $31, %eax + cmpl %eax, %ecx + popl %eax') + + shll %cl, %ebp C d normalized + movl %ecx, VAR_NORM + + subl %ebp, %edx C (b-d)-1, so edx:eax = b*(b-d)-1 + movl $-1, %eax + + divl %ebp C floor (b*(b-d)-1) / d + +L(start_preinv): + movl %eax, VAR_INVERSE + movl %ebp, %eax C d + + movl %ecx, %edx C fake high, will cancel + + +C For mpn_mod_1 and mpn_preinv_mod_1, the initial carry in %edi is the src +C high limb, and this may be greater than the divisor and may need one copy +C of the divisor subtracted (only one, because the divisor is normalized). +C This is accomplished by having the initial ecx:edi act as a fake previous +C n2:n10. The initial edx:eax is d, acting as a fake (q1+1)*d which is +C subtracted from ecx:edi, with the usual addback if it produces an +C underflow. + + +L(inverse_top): + C eax scratch (n10, n1, q1, etc) + C ebx scratch (nadj, src limit) + C ecx old n2 + C edx scratch + C esi src pointer, &src[size-2] to &src[0] + C edi old n10 + C ebp d + + subl %eax, %edi C low n - (q1+1)*d + movl (%esi), %eax C new n10 + + sbbl %edx, %ecx C high n - (q1+1)*d, 0 or -1 + movl %ebp, %ebx C d + + sarl $31, %eax C -n1 + andl %ebp, %ecx C d if underflow + + addl %edi, %ecx C remainder -> n2, and possible addback + ASSERT(b,`cmpl %ebp, %ecx') + andl %eax, %ebx C -n1 & d + + movl (%esi), %edi C n10 + andl $1, %eax C n1 + + addl %ecx, %eax C n2+n1 + addl %edi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + + mull VAR_INVERSE C m*(n2+n1) + + addl %eax, %ebx C low(m*(n2+n1) + nadj), giving carry flag + leal 1(%ecx), %eax C 1+n2 + + adcl %edx, %eax C 1 + high[n2<<32 + m*(n2+n1) + nadj] = q1+1 + movl PARAM_SRC, %ebx + + sbbl $0, %eax C use q1 if q1+1 overflows + subl $4, %esi C step src ptr + + mull %ebp C (q1+1)*d + + cmpl %ebx, %esi + jae L(inverse_top) + + + + C %edi (after subtract and addback) is the remainder modulo d*2^n + C and must be reduced to 0<=r<d by calculating r*2^n mod d*2^n and + C right shifting by n. + C + C If d was already normalized on entry so that n==0 then nothing is + C needed here. This is always the case for preinv_mod_1. For mod_1 + C or mod_1c the chance of n==0 is low, but about 40 cycles can be + C saved. + + subl %eax, %edi C low n - (q1+1)*d + movl %ecx, %ebx C n2 + + sbbl %edx, %ebx C high n - (q1+1)*d, 0 or -1 + xorl %esi, %esi C next n2 + + andl %ebp, %ebx C d if underflow + movl VAR_NORM, %ecx + + addl %ebx, %edi C remainder, with possible addback + orl %ecx, %ecx + + jz L(done_mul_edi) + + + C Here using %esi=n2 and %edi=n10, unlike the above + + shldl( %cl, %edi, %esi) C n2 + + shll %cl, %edi C n10 + + movl %edi, %eax C n10 + movl %edi, %ebx C n10 + + sarl $31, %ebx C -n1 + + shrl $31, %eax C n1 + andl %ebp, %ebx C -n1 & d + + addl %esi, %eax C n2+n1 + addl %edi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + + mull VAR_INVERSE C m*(n2+n1) + + addl %eax, %ebx C m*(n2+n1) + nadj, low giving carry flag + leal 1(%esi), %eax C 1+n2 + + adcl %edx, %eax C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %eax C use q1 if q1+1 overflows + + mull %ebp C (q1+1)*d + + subl %eax, %edi C low n - (q1+1)*d + popl %ebx + + sbbl %edx, %esi C high n - (q1+1)*d, 0 or -1 + movl %edi, %eax + + andl %ebp, %esi C d if underflow + popl %edi + + addl %esi, %eax C addback if underflow + popl %esi + + shrl %cl, %eax C denorm remainder + popl %ebp + + ret + + +L(done_mul_edi): + movl %edi, %eax + popl %ebx + + popl %edi +L(done_eax): + popl %esi + + popl %ebp + + ret + +EPILOGUE() diff --git a/gmp/mpn/x86/pentium/mod_34lsub1.asm b/gmp/mpn/x86/pentium/mod_34lsub1.asm index 2d88223b84..201081a437 100644 --- a/gmp/mpn/x86/pentium/mod_34lsub1.asm +++ b/gmp/mpn/x86/pentium/mod_34lsub1.asm @@ -1,32 +1,21 @@ dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1. -dnl Copyright 2000-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/mode1o.asm b/gmp/mpn/x86/pentium/mode1o.asm index eb2790e1a0..222f64e5cb 100644 --- a/gmp/mpn/x86/pentium/mode1o.asm +++ b/gmp/mpn/x86/pentium/mode1o.asm @@ -1,32 +1,21 @@ dnl Intel Pentium mpn_modexact_1_odd -- exact division style remainder. -dnl Copyright 2000-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -133,7 +122,7 @@ dnl non-PIC subl %eax, %ecx C inv = 2*inv - inv*inv*d pushl %esi FRAME_pushl() - ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB movl %ecx, %eax imull PARAM_DIVISOR, %eax cmpl $1, %eax') diff --git a/gmp/mpn/x86/pentium/mul_1.asm b/gmp/mpn/x86/pentium/mul_1.asm index a0858af2b4..c6b255c322 100644 --- a/gmp/mpn/x86/pentium/mul_1.asm +++ b/gmp/mpn/x86/pentium/mul_1.asm @@ -2,32 +2,21 @@ dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication. dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation, dnl Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/mul_2.asm b/gmp/mpn/x86/pentium/mul_2.asm index 4c7beb5df2..36a025c425 100644 --- a/gmp/mpn/x86/pentium/mul_2.asm +++ b/gmp/mpn/x86/pentium/mul_2.asm @@ -1,32 +1,21 @@ dnl Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/mul_basecase.asm b/gmp/mpn/x86/pentium/mul_basecase.asm index 50e15d3567..fd24fdf7fa 100644 --- a/gmp/mpn/x86/pentium/mul_basecase.asm +++ b/gmp/mpn/x86/pentium/mul_basecase.asm @@ -1,32 +1,21 @@ dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication. -dnl Copyright 1996, 1998-2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/popcount.asm b/gmp/mpn/x86/pentium/popcount.asm index b8d84ad2e2..df53bb8842 100644 --- a/gmp/mpn/x86/pentium/popcount.asm +++ b/gmp/mpn/x86/pentium/popcount.asm @@ -1,32 +1,21 @@ dnl Intel P5 mpn_popcount -- mpn bit population count. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/rshift.asm b/gmp/mpn/x86/pentium/rshift.asm index 2105c4c935..949b0d2e2f 100644 --- a/gmp/mpn/x86/pentium/rshift.asm +++ b/gmp/mpn/x86/pentium/rshift.asm @@ -1,32 +1,22 @@ dnl Intel Pentium mpn_rshift -- mpn right shift. -dnl Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software +dnl Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium/sqr_basecase.asm b/gmp/mpn/x86/pentium/sqr_basecase.asm index b11d767da2..e4fca7c546 100644 --- a/gmp/mpn/x86/pentium/sqr_basecase.asm +++ b/gmp/mpn/x86/pentium/sqr_basecase.asm @@ -1,32 +1,21 @@ dnl Intel P5 mpn_sqr_basecase -- square an mpn number. -dnl Copyright 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium4/README b/gmp/mpn/x86/pentium4/README index 90f752e5d5..8dc0479f04 100644 --- a/gmp/mpn/x86/pentium4/README +++ b/gmp/mpn/x86/pentium4/README @@ -3,28 +3,17 @@ Copyright 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. diff --git a/gmp/mpn/x86/pentium4/copyd.asm b/gmp/mpn/x86/pentium4/copyd.asm index 82af81c522..491ad60128 100644 --- a/gmp/mpn/x86/pentium4/copyd.asm +++ b/gmp/mpn/x86/pentium4/copyd.asm @@ -1,32 +1,22 @@ dnl Pentium-4 mpn_copyd -- copy limb vector, decrementing. - -dnl Copyright 1999-2001 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or + +dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl The std/rep/movsl/cld is very slow for small blocks on pentium4. Its diff --git a/gmp/mpn/x86/pentium4/copyi.asm b/gmp/mpn/x86/pentium4/copyi.asm index b6148879fa..bf812c822b 100644 --- a/gmp/mpn/x86/pentium4/copyi.asm +++ b/gmp/mpn/x86/pentium4/copyi.asm @@ -1,32 +1,22 @@ dnl Pentium-4 mpn_copyi -- copy limb vector, incrementing. - -dnl Copyright 1999-2001 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or + +dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc. dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl The rep/movsl is very slow for small blocks on pentium4. Its startup diff --git a/gmp/mpn/x86/pentium4/mmx/lshift.asm b/gmp/mpn/x86/pentium4/mmx/lshift.asm index b5eca66698..5d316d5da4 100644 --- a/gmp/mpn/x86/pentium4/mmx/lshift.asm +++ b/gmp/mpn/x86/pentium4/mmx/lshift.asm @@ -1,32 +1,21 @@ dnl Intel Pentium-4 mpn_lshift -- left shift. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium4/mmx/popham.asm b/gmp/mpn/x86/pentium4/mmx/popham.asm index 9563cb57e4..2e79816821 100644 --- a/gmp/mpn/x86/pentium4/mmx/popham.asm +++ b/gmp/mpn/x86/pentium4/mmx/popham.asm @@ -1,33 +1,22 @@ dnl Intel Pentium 4 mpn_popcount, mpn_hamdist -- population count and dnl hamming distance. -dnl Copyright 2000-2002, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium4/mmx/rshift.asm b/gmp/mpn/x86/pentium4/mmx/rshift.asm index 3ac0094a5a..a7dec54a3a 100644 --- a/gmp/mpn/x86/pentium4/mmx/rshift.asm +++ b/gmp/mpn/x86/pentium4/mmx/rshift.asm @@ -1,32 +1,21 @@ dnl Intel Pentium-4 mpn_rshift -- right shift. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium4/sse2/add_n.asm b/gmp/mpn/x86/pentium4/sse2/add_n.asm index 8e2380e493..04c0c68d0e 100644 --- a/gmp/mpn/x86/pentium4/sse2/add_n.asm +++ b/gmp/mpn/x86/pentium4/sse2/add_n.asm @@ -1,44 +1,36 @@ dnl Intel Pentium-4 mpn_add_n -- mpn addition. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C dst!=src1,2 dst==src1 dst==src2 -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 4 6 6 -C P4 model 3-4 (Prescott) 4.25 7.5 7.5 +C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 +C 6.0 cycles/limb if dst==src1 or dst==src2 +C P4 Prescott: >= 5 cycles/limb + +C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size, mp_limb_t carry); +C +C The 4 c/l achieved here isn't particularly good, but is better than 9 c/l +C for a basic adc loop. defframe(PARAM_CARRY,20) defframe(PARAM_SIZE, 16) @@ -54,25 +46,29 @@ define(SAVE_EBX,`PARAM_SRC1') PROLOGUE(mpn_add_nc) deflit(`FRAME',0) + movd PARAM_CARRY, %mm0 jmp L(start_nc) + EPILOGUE() ALIGN(8) PROLOGUE(mpn_add_n) deflit(`FRAME',0) + pxor %mm0, %mm0 + L(start_nc): - mov PARAM_SRC1, %eax - mov %ebx, SAVE_EBX - mov PARAM_SRC2, %ebx - mov PARAM_DST, %edx - mov PARAM_SIZE, %ecx + movl PARAM_SRC1, %eax + movl %ebx, SAVE_EBX + movl PARAM_SRC2, %ebx + movl PARAM_DST, %edx + movl PARAM_SIZE, %ecx - lea (%eax,%ecx,4), %eax C src1 end - lea (%ebx,%ecx,4), %ebx C src2 end - lea (%edx,%ecx,4), %edx C dst end - neg %ecx C -size + leal (%eax,%ecx,4), %eax C src1 end + leal (%ebx,%ecx,4), %ebx C src2 end + leal (%edx,%ecx,4), %edx C dst end + negl %ecx C -size L(top): C eax src1 end @@ -90,11 +86,12 @@ L(top): psrlq $32, %mm0 - add $1, %ecx + addl $1, %ecx jnz L(top) + movd %mm0, %eax - mov SAVE_EBX, %ebx + movl SAVE_EBX, %ebx emms ret diff --git a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm index 93b63b2018..46b0903c50 100644 --- a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm +++ b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm @@ -1,45 +1,33 @@ dnl Intel Pentium-4 mpn_addlsh1_n -- mpn x+2*y. -dnl Copyright 2001-2004, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or +dnl Copyright 2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc. dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C dst!=src1,2 dst==src1 dst==src2 -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 4.25 6 6 -C P4 model 3-4 (Prescott) 5 8.5 8.5 +C cycles/limb (approx) +C dst!=src1,2 dst==src1 dst==src2 +C P4 m2: 4.5 ?7.25 ?6.75 +C P4 m3: 5.3 ? ? +C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C C The slightly strange combination of indexing and pointer incrementing C that's used seems to work best. Not sure why, but %ecx,4 with src1 and/or C src2 is a slowdown. @@ -63,18 +51,18 @@ define(SAVE_EBX,`PARAM_SRC1') PROLOGUE(mpn_addlsh1_n) deflit(`FRAME',0) - mov PARAM_SRC1, %eax - mov %ebx, SAVE_EBX + movl PARAM_SRC1, %eax + movl %ebx, SAVE_EBX - mov PARAM_SRC2, %ebx + movl PARAM_SRC2, %ebx pxor %mm0, %mm0 C initial carry - mov PARAM_DST, %edx + movl PARAM_DST, %edx - mov PARAM_SIZE, %ecx + movl PARAM_SIZE, %ecx - lea (%edx,%ecx,4), %edx C dst end - neg %ecx C -size + leal (%edx,%ecx,4), %edx C dst end + negl %ecx C -size L(top): C eax src1 end @@ -83,24 +71,24 @@ L(top): C edx dst end C mm0 carry - movd (%ebx), %mm2 movd (%eax), %mm1 + movd (%ebx), %mm2 psrlq $32, %mm0 - lea 4(%eax), %eax - lea 4(%ebx), %ebx + leal 4(%eax), %eax + leal 4(%ebx), %ebx - psllq $1, %mm2 + paddq %mm2, %mm1 paddq %mm2, %mm1 paddq %mm1, %mm0 movd %mm0, (%edx,%ecx,4) - add $1, %ecx + addl $1, %ecx jnz L(top) psrlq $32, %mm0 - mov SAVE_EBX, %ebx + movl SAVE_EBX, %ebx movd %mm0, %eax emms ret diff --git a/gmp/mpn/x86/pentium4/sse2/addmul_1.asm b/gmp/mpn/x86/pentium4/sse2/addmul_1.asm index 78102072bf..3a8d0bb9bd 100644 --- a/gmp/mpn/x86/pentium4/sse2/addmul_1.asm +++ b/gmp/mpn/x86/pentium4/sse2/addmul_1.asm @@ -1,48 +1,37 @@ dnl mpn_addmul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F). -dnl Copyright 2005, 2007, 2011 Free Software Foundation, Inc. - +dnl Copyright 2005, 2007 Free Software Foundation, Inc. +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) 5.24 -C P6 model 13 (Dothan) 5.24 -C P4 model 0-1 (Willamette) 5 -C P4 model 2 (Northwood) 5 -C P4 model 3-4 (Prescott) 5 - C TODO: C * Tweak eax/edx offsets in loop as to save some lea's C * Perhaps software pipeline small-case code +C cycles/limb +C P6 model 0-8,10-12) - +C P6 model 9 (Banias) ? +C P6 model 13 (Dothan) 5.24 +C P4 model 0-1 (Willamette): 5 +C P4 model 2 (Northwood): 5 +C P4 model 3-4 (Prescott): 5 + C INPUT PARAMETERS C rp sp + 4 C up sp + 8 @@ -51,13 +40,22 @@ C v0 sp + 16 TEXT ALIGN(16) +PROLOGUE(mpn_addmul_1c) + mov 4(%esp), %edx + mov 8(%esp), %eax + mov 12(%esp), %ecx + movd 16(%esp), %mm7 + movd 20(%esp), %mm6 + jmp L(ent) +EPILOGUE() + ALIGN(16) PROLOGUE(mpn_addmul_1) - pxor %mm6, %mm6 -L(ent): mov 4(%esp), %edx + mov 4(%esp), %edx mov 8(%esp), %eax mov 12(%esp), %ecx movd 16(%esp), %mm7 - cmp $4, %ecx + pxor %mm6, %mm6 +L(ent): cmp $4, %ecx jnc L(big) L(lp0): movd (%eax), %mm0 @@ -183,7 +181,3 @@ L(end): pmuludq %mm7, %mm2 emms ret EPILOGUE() -PROLOGUE(mpn_addmul_1c) - movd 20(%esp), %mm6 - jmp L(ent) -EPILOGUE() diff --git a/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm b/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm deleted file mode 100644 index 354300e4de..0000000000 --- a/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm +++ /dev/null @@ -1,141 +0,0 @@ -dnl Intel Atom mpn_bdiv_dbm1. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C cycles/limb -C P5 - -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) 9.75 -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) 8.25 -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 8 -C AMD K6 - -C AMD K7 - -C AMD K8 -C AMD K10 - -C TODO: This code was optimised for atom-32, consider moving it back to atom -C dir(atom currently grabs this code), and write a 4-way version(7c/l). - -defframe(PARAM_CARRY,20) -defframe(PARAM_MUL, 16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_RP,`PARAM_MUL') -define(SAVE_UP,`PARAM_SIZE') - -define(`rp', `%edi') -define(`up', `%esi') -define(`n', `%ecx') -define(`reg', `%edx') -define(`cy', `%eax') C contains the return value - -ASM_START() - TEXT - ALIGN(16) -deflit(`FRAME',0) - -PROLOGUE(mpn_bdiv_dbm1c) - mov PARAM_SIZE, n C size - mov up, SAVE_UP - mov PARAM_SRC, up - movd PARAM_MUL, %mm7 - mov rp, SAVE_RP - mov PARAM_DST, rp - - movd (up), %mm0 - pmuludq %mm7, %mm0 - shr n - mov PARAM_CARRY, cy - jz L(eq1) - - movd 4(up), %mm1 - jc L(odd) - - lea 4(up), up - pmuludq %mm7, %mm1 - movd %mm0, reg - psrlq $32, %mm0 - sub reg, cy - movd %mm0, reg - movq %mm1, %mm0 - dec n - mov cy, (rp) - lea 4(rp), rp - jz L(end) - -C ALIGN(16) -L(top): movd 4(up), %mm1 - sbb reg, cy -L(odd): movd %mm0, reg - psrlq $32, %mm0 - pmuludq %mm7, %mm1 - sub reg, cy - lea 8(up), up - movd %mm0, reg - movd (up), %mm0 - mov cy, (rp) - sbb reg, cy - movd %mm1, reg - psrlq $32, %mm1 - sub reg, cy - movd %mm1, reg - pmuludq %mm7, %mm0 - dec n - mov cy, 4(rp) - lea 8(rp), rp - jnz L(top) - -L(end): sbb reg, cy - -L(eq1): movd %mm0, reg - psrlq $32, %mm0 - mov SAVE_UP, up - sub reg, cy - movd %mm0, reg - emms - mov cy, (rp) - sbb reg, cy - - mov SAVE_RP, rp - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm b/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm deleted file mode 100644 index f7f461d56f..0000000000 --- a/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm +++ /dev/null @@ -1,233 +0,0 @@ -dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division. - -dnl Rearranged from mpn/x86/pentium4/sse2/dive_1.asm by Marco Bodrato. - -dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C P4: 19.0 cycles/limb - -C Pairs of movd's are used to avoid unaligned loads. Despite the loads not -C being on the dependent chain and there being plenty of cycles available, -C using an unaligned movq on every second iteration measured about 23 c/l. -C - -defframe(PARAM_SHIFT, 24) -defframe(PARAM_INVERSE,20) -defframe(PARAM_DIVISOR,16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - - TEXT - -C mp_limb_t -C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor, -C mp_limb_t inverse, int shift) - ALIGN(32) -PROLOGUE(mpn_pi1_bdiv_q_1) -deflit(`FRAME',0) - - movl PARAM_SIZE, %edx - - movl PARAM_SRC, %eax - - movl PARAM_DIVISOR, %ecx - - movd %ecx, %mm6 - movl PARAM_SHIFT, %ecx - - movd %ecx, %mm7 C shift - - C - - movl PARAM_INVERSE, %ecx - movd %ecx, %mm5 C inv - - movl PARAM_DST, %ecx - pxor %mm1, %mm1 C initial carry limb - pxor %mm0, %mm0 C initial carry bit - - subl $1, %edx - jz L(done) - - pcmpeqd %mm4, %mm4 - psrlq $32, %mm4 C 0x00000000FFFFFFFF - -C The dependent chain here is as follows. -C -C latency -C psubq s = (src-cbit) - climb 2 -C pmuludq q = s*inverse 8 -C pmuludq prod = q*divisor 8 -C psrlq climb = high(prod) 2 -C -- -C 20 -C -C Yet the loop measures 19.0 c/l, so obviously there's something gained -C there over a straight reading of the chip documentation. - -L(top): - C eax src, incrementing - C ebx - C ecx dst, incrementing - C edx counter, size-1 iterations - C - C mm0 carry bit - C mm1 carry limb - C mm4 0x00000000FFFFFFFF - C mm5 inverse - C mm6 divisor - C mm7 shift - - movd (%eax), %mm2 - movd 4(%eax), %mm3 - addl $4, %eax - punpckldq %mm3, %mm2 - - psrlq %mm7, %mm2 - pand %mm4, %mm2 C src - psubq %mm0, %mm2 C src - cbit - - psubq %mm1, %mm2 C src - cbit - climb - movq %mm2, %mm0 - psrlq $63, %mm0 C new cbit - - pmuludq %mm5, %mm2 C s*inverse - movd %mm2, (%ecx) C q - addl $4, %ecx - - movq %mm6, %mm1 - pmuludq %mm2, %mm1 C q*divisor - psrlq $32, %mm1 C new climb - -L(entry): - subl $1, %edx - jnz L(top) - -L(done): - movd (%eax), %mm2 - psrlq %mm7, %mm2 C src - psubq %mm0, %mm2 C src - cbit - - psubq %mm1, %mm2 C src - cbit - climb - - pmuludq %mm5, %mm2 C s*inverse - movd %mm2, (%ecx) C q - - emms - ret - -EPILOGUE() - - ALIGN(16) -C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t divisor); -C -PROLOGUE(mpn_bdiv_q_1) -deflit(`FRAME',0) - - movl PARAM_SIZE, %edx - - movl PARAM_DIVISOR, %ecx - - C eax src - C ebx - C ecx divisor - C edx size-1 - - movl %ecx, %eax - bsfl %ecx, %ecx C trailing twos - - shrl %cl, %eax C d = divisor without twos - movd %eax, %mm6 - movd %ecx, %mm7 C shift - - shrl %eax C d/2 - - andl $127, %eax C d/2, 7 bits - -ifdef(`PIC',` - LEA( binvert_limb_table, %ecx) - movzbl (%eax,%ecx), %eax C inv 8 bits -',` - movzbl binvert_limb_table(%eax), %eax C inv 8 bits -') - - C - - movd %eax, %mm5 C inv - - movd %eax, %mm0 C inv - - pmuludq %mm5, %mm5 C inv*inv - - C - - pmuludq %mm6, %mm5 C inv*inv*d - paddd %mm0, %mm0 C 2*inv - - C - - psubd %mm5, %mm0 C inv = 2*inv - inv*inv*d - pxor %mm5, %mm5 - - paddd %mm0, %mm5 - pmuludq %mm0, %mm0 C inv*inv - - pcmpeqd %mm4, %mm4 - psrlq $32, %mm4 C 0x00000000FFFFFFFF - - C - - pmuludq %mm6, %mm0 C inv*inv*d - paddd %mm5, %mm5 C 2*inv - - movl PARAM_SRC, %eax - movl PARAM_DST, %ecx - pxor %mm1, %mm1 C initial carry limb - - C - - psubd %mm0, %mm5 C inv = 2*inv - inv*inv*d - - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS - pushl %eax FRAME_pushl() - movq %mm6, %mm0 - pmuludq %mm5, %mm0 - movd %mm0, %eax - cmpl $1, %eax - popl %eax FRAME_popl()') - - pxor %mm0, %mm0 C initial carry bit - jmp L(entry) - -EPILOGUE() diff --git a/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm b/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm deleted file mode 100644 index b3f3474e67..0000000000 --- a/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm +++ /dev/null @@ -1,95 +0,0 @@ -dnl Intel Pentium-4 mpn_cnd_add_n -- mpn addition. - -dnl Copyright 2001, 2002, 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C cycles/limb -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) 4.67 -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 5 -C P4 model 3-4 (Prescott) 5.25 - -defframe(PARAM_SIZE, 20) -defframe(PARAM_SRC2, 16) -defframe(PARAM_SRC1, 12) -defframe(PARAM_DST, 8) -defframe(PARAM_CND, 4) - -dnl re-use parameter space -define(SAVE_EBX,`PARAM_SRC1') - -define(`cnd', `%mm3') - - TEXT - ALIGN(8) - - ALIGN(8) -PROLOGUE(mpn_cnd_add_n) -deflit(`FRAME',0) - pxor %mm0, %mm0 - - mov PARAM_CND, %eax - neg %eax - sbb %eax, %eax - movd %eax, cnd - - mov PARAM_SRC1, %eax - mov %ebx, SAVE_EBX - mov PARAM_SRC2, %ebx - mov PARAM_DST, %edx - mov PARAM_SIZE, %ecx - - lea (%eax,%ecx,4), %eax C src1 end - lea (%ebx,%ecx,4), %ebx C src2 end - lea (%edx,%ecx,4), %edx C dst end - neg %ecx C -size - -L(top): movd (%ebx,%ecx,4), %mm2 - movd (%eax,%ecx,4), %mm1 - pand cnd, %mm2 - paddq %mm2, %mm1 - - paddq %mm1, %mm0 - movd %mm0, (%edx,%ecx,4) - - psrlq $32, %mm0 - - add $1, %ecx - jnz L(top) - - movd %mm0, %eax - mov SAVE_EBX, %ebx - emms - ret - -EPILOGUE() diff --git a/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm b/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm deleted file mode 100644 index 339a23e0b6..0000000000 --- a/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm +++ /dev/null @@ -1,114 +0,0 @@ -dnl Intel Pentium-4 mpn_cnd_sub_n -- mpn subtraction. - -dnl Copyright 2001, 2002, 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C cycles/limb -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) 4.67 -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 5 -C P4 model 3-4 (Prescott) 5.25 - -defframe(PARAM_SIZE, 20) -defframe(PARAM_SRC2, 16) -defframe(PARAM_SRC1, 12) -defframe(PARAM_DST, 8) -defframe(PARAM_CND, 4) - -dnl re-use parameter space -define(SAVE_EBX,`PARAM_SRC1') - -define(`cnd', `%mm3') - - TEXT - ALIGN(8) - - ALIGN(8) -PROLOGUE(mpn_cnd_sub_n) -deflit(`FRAME',0) - pxor %mm0, %mm0 - - mov PARAM_CND, %eax - neg %eax - sbb %eax, %eax - movd %eax, cnd - - mov PARAM_SRC1, %eax - mov %ebx, SAVE_EBX - mov PARAM_SRC2, %ebx - mov PARAM_DST, %edx - mov PARAM_SIZE, %ecx - - lea (%eax,%ecx,4), %eax C src1 end - lea (%ebx,%ecx,4), %ebx C src2 end - lea (%edx,%ecx,4), %edx C dst end - neg %ecx C -size - -L(top): movd (%ebx,%ecx,4), %mm2 - movd (%eax,%ecx,4), %mm1 - pand cnd, %mm2 - psubq %mm2, %mm1 - - psubq %mm0, %mm1 - movd %mm1, (%edx,%ecx,4) - - psrlq $63, %mm1 - - add $1, %ecx - jz L(done_mm1) - - movd (%ebx,%ecx,4), %mm2 - movd (%eax,%ecx,4), %mm0 - pand cnd, %mm2 - psubq %mm2, %mm0 - - psubq %mm1, %mm0 - movd %mm0, (%edx,%ecx,4) - - psrlq $63, %mm0 - - add $1, %ecx - jnz L(top) - - movd %mm0, %eax - mov SAVE_EBX, %ebx - emms - ret - -L(done_mm1): - movd %mm1, %eax - mov SAVE_EBX, %ebx - emms - ret - -EPILOGUE() diff --git a/gmp/mpn/x86/pentium4/sse2/dive_1.asm b/gmp/mpn/x86/pentium4/sse2/dive_1.asm index 238f0dd8a5..c50ef7d29e 100644 --- a/gmp/mpn/x86/pentium4/sse2/dive_1.asm +++ b/gmp/mpn/x86/pentium4/sse2/dive_1.asm @@ -1,32 +1,21 @@ dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division. dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -137,7 +126,7 @@ ifdef(`PIC',` psubd %mm0, %mm5 C inv = 2*inv - inv*inv*d - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB pushl %eax FRAME_pushl() movq %mm6, %mm0 pmuludq %mm5, %mm0 @@ -150,13 +139,13 @@ ifdef(`PIC',` C The dependent chain here is as follows. C -C latency -C psubq s = (src-cbit) - climb 2 -C pmuludq q = s*inverse 8 -C pmuludq prod = q*divisor 8 -C psrlq climb = high(prod) 2 -C -- -C 20 +C latency +C psubq s = (src-cbit) - climb 2 +C pmuludq q = s*inverse 8 +C pmuludq prod = q*divisor 8 +C psrlq climb = high(prod) 2 +C -- +C 20 C C Yet the loop measures 19.0 c/l, so obviously there's something gained C there over a straight reading of the chip documentation. diff --git a/gmp/mpn/x86/pentium4/sse2/divrem_1.asm b/gmp/mpn/x86/pentium4/sse2/divrem_1.asm index 0146fab117..7f973dbf98 100644 --- a/gmp/mpn/x86/pentium4/sse2/divrem_1.asm +++ b/gmp/mpn/x86/pentium4/sse2/divrem_1.asm @@ -1,32 +1,22 @@ dnl Intel Pentium-4 mpn_divrem_1 -- mpn by limb division. -dnl Copyright 1999-2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, +dnl Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h b/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h index a94ae868b3..5071aae092 100644 --- a/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h +++ b/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h @@ -1,206 +1,68 @@ /* Intel Pentium-4 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 2000-2005, 2007-2010, 2014 Free Software -Foundation, Inc. +Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, +2009 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -or +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 -or both in parallel, as here. -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 2600 MHz P4 Northwood */ -/* FFT tuning limit = 12500000 */ -/* Generated by tuneup.c, 2014-03-12, gcc 4.2 */ - -#define MOD_1_NORM_THRESHOLD 24 -#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 6 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 13 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 2 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 2 -#define DIV_QR_1_NORM_THRESHOLD 19 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 20 - -#define MUL_TOOM22_THRESHOLD 29 -#define MUL_TOOM33_THRESHOLD 113 -#define MUL_TOOM44_THRESHOLD 288 -#define MUL_TOOM6H_THRESHOLD 454 -#define MUL_TOOM8H_THRESHOLD 592 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 118 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 214 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 193 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 186 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 287 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 44 -#define SQR_TOOM3_THRESHOLD 173 -#define SQR_TOOM4_THRESHOLD 390 -#define SQR_TOOM6_THRESHOLD 0 /* always */ -#define SQR_TOOM8_THRESHOLD 915 - -#define MULMID_TOOM42_THRESHOLD 66 - -#define MULMOD_BNM1_THRESHOLD 19 -#define SQRMOD_BNM1_THRESHOLD 23 - -#define MUL_FFT_MODF_THRESHOLD 1147 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 1147, 5}, { 36, 6}, { 19, 5}, { 39, 6}, \ - { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 35, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \ - { 43, 7}, { 23, 6}, { 49, 7}, { 27, 6}, \ - { 55, 7}, { 31, 6}, { 63, 7}, { 35, 8}, \ - { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \ - { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \ - { 39, 7}, { 79, 8}, { 43, 9}, { 23, 8}, \ - { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \ - { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \ - { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \ - { 47, 9}, { 111,11}, { 31,10}, { 63, 9}, \ - { 143,10}, { 79, 9}, { 159,10}, { 111,11}, \ - { 63,10}, { 127, 9}, { 255,10}, { 159, 9}, \ - { 319,11}, { 95,10}, { 207,12}, { 63,11}, \ - { 127,10}, { 287,11}, { 159,10}, { 335,11}, \ - { 191,10}, { 383,11}, { 223,12}, { 127,11}, \ - { 255,10}, { 511,11}, { 319,10}, { 671,11}, \ - { 351,12}, { 191,11}, { 383,10}, { 799,13}, \ - { 127,12}, { 255,11}, { 511,10}, { 1055, 9}, \ - { 2111,10}, { 1119, 9}, { 2239,11}, { 607,12}, \ - { 319,11}, { 671,10}, { 1407,11}, { 735,10}, \ - { 1471, 9}, { 2943,12}, { 383,11}, { 799,10}, \ - { 1599,11}, { 863,10}, { 1727, 9}, { 3455,12}, \ - { 447,11}, { 895,13}, { 255,12}, { 511,11}, \ - { 1055,10}, { 2111,11}, { 1119,10}, { 2239, 9}, \ - { 4479,12}, { 575,11}, { 1247,10}, { 2495, 9}, \ - { 4991,12}, { 639,11}, { 1471,10}, { 2943,13}, \ - { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ - { 1727,10}, { 3455,12}, { 895,14}, { 255,13}, \ - { 511,12}, { 1023,11}, { 2047,12}, { 1087,11}, \ - { 2239,10}, { 4479,12}, { 1215,11}, { 2495,10}, \ - { 4991,13}, { 639,12}, { 1471,11}, { 2943,10}, \ - { 5887,11}, { 3007,13}, { 767,12}, { 1727,11}, \ - { 3455,13}, { 895,12}, { 1791,11}, { 3711,12}, \ - { 1983,11}, { 3967,10}, { 7935,14}, { 511,13}, \ - { 1023,12}, { 2239,11}, { 4479,13}, { 1151,12}, \ - { 2495,11}, { 4991,13}, { 1279,12}, { 2623,13}, \ - { 1407,12}, { 2943,11}, { 5887,12}, { 3007,14}, \ - { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 158 -#define MUL_FFT_THRESHOLD 7808 - -#define SQR_FFT_MODF_THRESHOLD 896 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 896, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \ - { 17, 5}, { 35, 6}, { 19, 5}, { 39, 6}, \ - { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \ - { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \ - { 47, 7}, { 27, 6}, { 55, 7}, { 31, 6}, \ - { 63, 7}, { 37, 8}, { 19, 7}, { 43, 8}, \ - { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \ - { 31, 7}, { 63, 8}, { 39, 7}, { 79, 8}, \ - { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \ - { 71, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \ - { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 127,10}, { 79, 9}, { 159,10}, { 95, 9}, \ - { 191,11}, { 63,10}, { 127, 9}, { 255,10}, \ - { 159,11}, { 95,10}, { 191,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ - { 543,11}, { 159,10}, { 319, 9}, { 639,11}, \ - { 191,10}, { 383, 9}, { 767,11}, { 223,12}, \ - { 127,11}, { 255,10}, { 511, 9}, { 1055,10}, \ - { 543,11}, { 287,10}, { 607,11}, { 319,12}, \ - { 191,11}, { 383,10}, { 767,13}, { 127,12}, \ - { 255,11}, { 511,10}, { 1055,11}, { 543,10}, \ - { 1119, 9}, { 2239,11}, { 607,12}, { 319,11}, \ - { 671,10}, { 1407,11}, { 735,10}, { 1471, 9}, \ - { 2943,12}, { 383,11}, { 799,10}, { 1599,11}, \ - { 863,10}, { 1727,12}, { 447,11}, { 991,13}, \ - { 255,12}, { 511,11}, { 1055,10}, { 2111,11}, \ - { 1119,10}, { 2239,12}, { 575,11}, { 1247,10}, \ - { 2495,12}, { 639,11}, { 1471,10}, { 2943,13}, \ - { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \ - { 1727,10}, { 3455,12}, { 959,14}, { 255,13}, \ - { 511,12}, { 1023,11}, { 2111,12}, { 1087,11}, \ - { 2239,10}, { 4479,12}, { 1215,11}, { 2495,13}, \ - { 639,12}, { 1471,11}, { 2943,10}, { 5887,13}, \ - { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \ - { 1791,11}, { 3711,12}, { 1983,11}, { 3967,10}, \ - { 7935,14}, { 511,13}, { 1023,12}, { 2239,11}, \ - { 4479,13}, { 1151,12}, { 2495,11}, { 4991,13}, \ - { 1279,12}, { 2623,13}, { 1407,12}, { 2943,11}, \ - { 5887,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 159 -#define SQR_FFT_THRESHOLD 7296 - -#define MULLO_BASECASE_THRESHOLD 12 -#define MULLO_DC_THRESHOLD 55 -#define MULLO_MUL_N_THRESHOLD 14709 - -#define DC_DIV_QR_THRESHOLD 38 -#define DC_DIVAPPR_Q_THRESHOLD 77 -#define DC_BDIV_QR_THRESHOLD 51 -#define DC_BDIV_Q_THRESHOLD 85 - -#define INV_MULMOD_BNM1_THRESHOLD 56 -#define INV_NEWTON_THRESHOLD 121 -#define INV_APPR_THRESHOLD 93 - -#define BINV_NEWTON_THRESHOLD 366 -#define REDC_1_TO_REDC_N_THRESHOLD 64 - -#define MU_DIV_QR_THRESHOLD 2350 -#define MU_DIVAPPR_Q_THRESHOLD 2172 -#define MUPI_DIV_QR_THRESHOLD 62 -#define MU_BDIV_QR_THRESHOLD 2172 -#define MU_BDIV_Q_THRESHOLD 2304 - -#define POWM_SEC_TABLE 1,19,102,615,2111 - -#define MATRIX22_STRASSEN_THRESHOLD 23 -#define HGCD_THRESHOLD 88 -#define HGCD_APPR_THRESHOLD 93 -#define HGCD_REDUCE_THRESHOLD 5010 -#define GCD_DC_THRESHOLD 379 -#define GCDEXT_DC_THRESHOLD 258 -#define JACOBI_BASE_METHOD 4 - -#define GET_STR_DC_THRESHOLD 12 -#define GET_STR_PRECOMPUTE_THRESHOLD 26 -#define SET_STR_DC_THRESHOLD 147 -#define SET_STR_PRECOMPUTE_THRESHOLD 894 - -#define FAC_DSC_THRESHOLD 906 -#define FAC_ODD_THRESHOLD 28 +/* 2600 MHz Pentium 4 model 2 */ + +/* Generated by tuneup.c, 2009-01-06, gcc 3.4 */ + +#define MUL_KARATSUBA_THRESHOLD 31 +#define MUL_TOOM3_THRESHOLD 119 +#define MUL_TOOM44_THRESHOLD 178 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_KARATSUBA_THRESHOLD 49 +#define SQR_TOOM3_THRESHOLD 165 +#define SQR_TOOM4_THRESHOLD 252 + +#define MULLOW_BASECASE_THRESHOLD 15 +#define MULLOW_DC_THRESHOLD 44 +#define MULLOW_MUL_N_THRESHOLD 363 + +#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_DC_THRESHOLD 33 +#define POWM_THRESHOLD 95 + +#define MATRIX22_STRASSEN_THRESHOLD 23 +#define HGCD_THRESHOLD 64 +#define GCD_DC_THRESHOLD 310 +#define GCDEXT_DC_THRESHOLD 310 +#define JACOBI_BASE_METHOD 1 + +#define USE_PREINV_DIVREM_1 1 /* native */ +#define USE_PREINV_MOD_1 1 /* native */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ + +#define GET_STR_DC_THRESHOLD 11 +#define GET_STR_PRECOMPUTE_THRESHOLD 26 +#define SET_STR_DC_THRESHOLD 118 +#define SET_STR_PRECOMPUTE_THRESHOLD 1078 + +#define MUL_FFT_TABLE { 560, 928, 1920, 5632, 14336, 40960, 0 } +#define MUL_FFT_MODF_THRESHOLD 720 +#define MUL_FFT_THRESHOLD 9216 + +#define SQR_FFT_TABLE { 592, 928, 1920, 4608, 14336, 40960, 0 } +#define SQR_FFT_MODF_THRESHOLD 608 +#define SQR_FFT_THRESHOLD 5888 diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1.asm b/gmp/mpn/x86/pentium4/sse2/mod_1.asm new file mode 100644 index 0000000000..0e95f13913 --- /dev/null +++ b/gmp/mpn/x86/pentium4/sse2/mod_1.asm @@ -0,0 +1,391 @@ +dnl Intel Pentium-4 mpn_mod_1 -- mpn by limb remainder. + +dnl Copyright 2001, 2002, 2003 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + + +dnl P4: 31 cycles/limb. + + +C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t carry); +C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t inverse); +C +C An idea was tried in the mul-by-inverse to process the last limb by a jump +C back to the top of the loop skipping the -4(%esi) fetch. But that seemed +C to produce slightly strange timings, like 9 and 10 limb operations about +C the same speed. The jump would be successively taken and not-taken, which +C in theory should predict ok, but perhaps isn't enjoyed by the chip. +C Duplicating the loop for the last limb seems to be a couple of cycles +C quicker too. +C +C Enhancements: +C +C The loop measures 31 cycles, but the dependent chain would suggest it +C could be done with 30. Not sure where to start looking for the extra +C cycle. + + +dnl MUL_THRESHOLD is the size at which the multiply by inverse method is +dnl used, rather than plain "divl"s. Minimum value 2. +dnl +dnl The inverse takes about 80-90 cycles to calculate, but after that the +dnl multiply is 31 c/l versus division at about 58 c/l. + +deflit(MUL_THRESHOLD, 5) + + +defframe(PARAM_INVERSE,16) dnl mpn_preinv_mod_1 +defframe(PARAM_CARRY, 16) dnl mpn_mod_1c +defframe(PARAM_DIVISOR,12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) + +dnl re-use parameter space +define(SAVE_ESI,`PARAM_SIZE') +define(SAVE_EBP,`PARAM_SRC') + + TEXT + + ALIGN(16) +PROLOGUE(mpn_preinv_mod_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl %esi, SAVE_ESI + movl $32, %eax + + movd %eax, %mm6 C l = 0, so 32-l = 32 + movl PARAM_SRC, %esi + movl %ebp, SAVE_EBP + + movd PARAM_DIVISOR, %mm5 + pxor %mm7, %mm7 C l = 0 + + movd -4(%esi,%ecx,4), %mm0 C src high limb + leal -8(%esi,%ecx,4), %esi C &src[size-2] + + movd PARAM_INVERSE, %mm4 + subl $2, %ecx C size-2 + + psubq %mm5, %mm0 C high-divisor + movq %mm0, %mm2 + + psrlq $32, %mm0 C -1 if underflow + + pand %mm5, %mm0 C divisor if underflow + + paddq %mm2, %mm0 C addback if underflow + jz L(inverse_last) C if size==2 + ja L(inverse_top) C if size>2 + + + C if size==1 + movl SAVE_ESI, %esi + movd %mm0, %eax + emms + ret + +EPILOGUE() + + + ALIGN(16) +PROLOGUE(mpn_mod_1c) +deflit(`FRAME',0) + movl PARAM_SIZE, %ecx + movl %esi, SAVE_ESI + + movl PARAM_SRC, %esi + movl %ebp, SAVE_EBP + + movl PARAM_CARRY, %edx + orl %ecx, %ecx + jz L(divide_done) C result==carry if size==0 + + movl PARAM_DIVISOR, %ebp + jmp L(start_1c) + +EPILOGUE() + + + ALIGN(16) +PROLOGUE(mpn_mod_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl %esi, SAVE_ESI + + movl PARAM_SRC, %esi + movl %ebp, SAVE_EBP + + movl PARAM_DIVISOR, %ebp + xorl %edx, %edx C result 0 if size==0 + + orl %ecx, %ecx + jz L(divide_done) + movl -4(%esi,%ecx,4), %eax C src high limb + + leal -1(%ecx), %edx + cmpl %ebp, %eax C c if high<divisor + + cmovc( %edx, %ecx) C size-1 if high<divisor + + movl $0, %edx C initial carry + cmovc( %eax, %edx) C src high limb if high<divisor + + orl %ecx, %ecx + jz L(divide_done) C if size==1 and skip div + + +L(start_1c): + C eax + C ebx + C ecx size + C edx carry + C esi src + C edi + C ebp divisor + + leal -4(%esi,%ecx,4), %esi C &src[size-1] + cmpl $MUL_THRESHOLD, %ecx + jae L(mul_by_inverse) + + +L(divide_top): + C eax + C ebx + C ecx counter, limbs, decrementing + C edx remainder + C esi src, decrementing + C edi + C ebp divisor + + movl (%esi), %eax + subl $4, %esi + + divl %ebp + + subl $1, %ecx + jnz L(divide_top) + + +L(divide_done): + movl SAVE_ESI, %esi + movl SAVE_EBP, %ebp + movl %edx, %eax + ret + + +C ----------------------------------------------------------------------------- + +L(mul_by_inverse): + C eax + C ebx + C ecx size + C edx carry + C esi src + C edi + C ebp divisor + + bsrl %ebp, %eax C 31-l + + movd %edx, %mm1 C carry + movl %ecx, %edx C size + movl $31, %ecx + + C + + xorl %eax, %ecx C l = leading zeros on d + addl $1, %eax C 32-l + + shll %cl, %ebp C normalize d + movd %ecx, %mm7 C l + leal -1(%edx), %ecx C size-1 + + movd %eax, %mm6 C 32-l + movl $-1, %edx + movl $-1, %eax + + C + + subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1 + + divl %ebp C floor (b*(b-d)-1 / d) + + movd %ebp, %mm5 C d + movd (%esi), %mm0 C src high limb + punpckldq %mm1, %mm0 + psrlq %mm6, %mm0 C n2 = high (carry:srchigh << l) + + C + + movd %eax, %mm4 C m + + +C The dependent chain here consists of +C +C 2 paddd n1+n2 +C 8 pmuludq m*(n1+n2) +C 2 paddq n2:nadj + m*(n1+n2) +C 2 psrlq q1 +C 8 pmuludq d*q1 +C 2 psubq (n-d)-q1*d +C 2 psrlq high mask +C 2 pand d masked +C 2 paddd n2+d addback +C -- +C 30 +C +C But it seems to run at 31 cycles, so presumably there's something else +C going on. + + + ALIGN(16) +L(inverse_top): + C eax + C ebx + C ecx counter, size-1 to 1 + C edx + C esi src, decrementing + C edi + C ebp + C + C mm0 n2 + C mm4 m + C mm5 d + C mm6 32-l + C mm7 l + + ASSERT(b,`C n2<d + movd %mm0, %eax + movd %mm5, %edx + cmpl %edx, %eax') + + movd -4(%esi), %mm1 C next src limbs + movd (%esi), %mm2 + leal -4(%esi), %esi + + punpckldq %mm2, %mm1 + psrlq %mm6, %mm1 C n10 + + movq %mm1, %mm2 C n10 + movq %mm1, %mm3 C n10 + psrad $31, %mm1 C -n1 + pand %mm5, %mm1 C -n1 & d + paddd %mm2, %mm1 C nadj = n10+(-n1&d), ignore overflow + + psrld $31, %mm2 C n1 + paddd %mm0, %mm2 C n2+n1 + punpckldq %mm0, %mm1 C n2:nadj + + pmuludq %mm4, %mm2 C m*(n2+n1) + + paddq %mm2, %mm1 C n2:nadj + m*(n2+n1) + + psrlq $32, %mm1 C q1 = high(n2:nadj + m*(n2+n1)) + + pmuludq %mm5, %mm1 C q1*d + punpckldq %mm0, %mm3 C n + psubq %mm5, %mm3 C n - d + pxor %mm0, %mm0 + + psubq %mm1, %mm3 C n - (q1+1)*d + + por %mm3, %mm0 C remainder -> n2 + psrlq $32, %mm3 C high n - (q1+1)*d, 0 or -1 + + ASSERT(be,`C 0 or -1 + movd %mm3, %eax + addl $1, %eax + cmpl $1, %eax') + + pand %mm5, %mm3 C mask & d + + paddd %mm3, %mm0 C addback if necessary + + subl $1, %ecx + jnz L(inverse_top) + + + C Least significant limb. + C Same code as the loop, but there's no -4(%esi) limb to fetch. + +L(inverse_last): + C eax + C ebx + C ecx + C edx + C esi &src[0] + C + C mm0 n2 + C mm4 m + C mm5 d + C mm6 32-l + C mm7 l + + movd (%esi), %mm1 C src[0] + psllq %mm7, %mm1 C n10 + + movq %mm1, %mm2 C n10 + movq %mm1, %mm3 C n10 + psrad $31, %mm1 C -n1 + pand %mm5, %mm1 C -n1 & d + paddd %mm2, %mm1 C nadj = n10+(-n1&d), ignore overflow + + psrld $31, %mm2 C n1 + paddd %mm0, %mm2 C n2+n1 + punpckldq %mm0, %mm1 C n2:nadj + + pmuludq %mm4, %mm2 C m*(n2+n1) + + paddq %mm2, %mm1 C n2:nadj + m*(n2+n1) + + psrlq $32, %mm1 C q1 = high(n2:nadj + m*(n2+n1)) + + pmuludq %mm5, %mm1 C q1*d + punpckldq %mm0, %mm3 C n + psubq %mm5, %mm3 C n - d + pxor %mm0, %mm0 + + psubq %mm1, %mm3 C n - (q1+1)*d + + por %mm3, %mm0 C remainder -> n2 + psrlq $32, %mm3 C high n - (q1+1)*d, 0 or -1 + + ASSERT(be,`C 0 or -1 + movd %mm3, %eax + addl $1, %eax + cmpl $1, %eax') + + movl SAVE_EBP, %ebp + pand %mm5, %mm3 C mask & d + + movl SAVE_ESI, %esi + paddd %mm3, %mm0 C addback if necessary + + psrld %mm7, %mm0 + + movd %mm0, %eax + + emms + ret + +EPILOGUE() diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm b/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm deleted file mode 100644 index ee88babeee..0000000000 --- a/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm +++ /dev/null @@ -1,166 +0,0 @@ -dnl x86-32 mpn_mod_1_1p for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F). - -dnl Contributed to the GNU project by Torbjorn Granlund. - -dnl Copyright 2009, 2010 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C TODO: -C * Optimize. The present code was written quite straightforwardly. -C * Optimize post-loop reduction code; it is from mod_1s_4p, thus overkill. -C * Write a cps function that uses sse2 insns. - -C cycles/limb -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 16 -C P4 model 3-4 (Prescott) 18 - -C INPUT PARAMETERS -C ap sp + 4 -C n sp + 8 -C b sp + 12 -C cps sp + 16 - -define(`B1modb', `%mm1') -define(`B2modb', `%mm2') -define(`ap', `%edx') -define(`n', `%eax') - - TEXT - ALIGN(16) -PROLOGUE(mpn_mod_1_1p) - push %ebx - mov 8(%esp), ap - mov 12(%esp), n - mov 20(%esp), %ecx - movd 8(%ecx), B1modb - movd 12(%ecx), B2modb - - lea -4(ap,n,4), ap - -C FIXME: See comment in generic/mod_1_1.c. - movd (ap), %mm7 - movd -4(ap), %mm4 - pmuludq B1modb, %mm7 - paddq %mm4, %mm7 - add $-2, n - jz L(end) - - ALIGN(8) -L(top): movq %mm7, %mm6 - psrlq $32, %mm7 C rh - movd -8(ap), %mm0 - add $-4, ap - pmuludq B2modb, %mm7 - pmuludq B1modb, %mm6 - add $-1, n - paddq %mm0, %mm7 - paddq %mm6, %mm7 - jnz L(top) - -L(end): pcmpeqd %mm4, %mm4 - psrlq $32, %mm4 C 0x00000000FFFFFFFF - pand %mm7, %mm4 C rl - psrlq $32, %mm7 C rh - pmuludq B1modb, %mm7 C rh,cl - paddq %mm4, %mm7 C rh,rl - movd 4(%ecx), %mm4 C cnt - psllq %mm4, %mm7 C rh,rl normalized - movq %mm7, %mm2 C rl in low half - psrlq $32, %mm7 C rh - movd (%ecx), %mm1 C bi - pmuludq %mm7, %mm1 C qh,ql - paddq %mm2, %mm1 C qh-1,ql - movd %mm1, %ecx C ql - psrlq $32, %mm1 C qh-1 - movd 16(%esp), %mm3 C b - pmuludq %mm1, %mm3 C (qh-1) * b - psubq %mm3, %mm2 C r in low half (could use psubd) - movd %mm2, %eax C r - mov 16(%esp), %ebx - sub %ebx, %eax C r - cmp %eax, %ecx - lea (%eax,%ebx), %edx - cmovc( %edx, %eax) - movd %mm4, %ecx C cnt - cmp %ebx, %eax - jae L(fix) - emms - pop %ebx - shr %cl, %eax - ret - -L(fix): sub %ebx, %eax - emms - pop %ebx - shr %cl, %eax - ret -EPILOGUE() - -PROLOGUE(mpn_mod_1_1p_cps) -C CAUTION: This is the same code as in k7/mod_1_1.asm - push %ebp - mov 12(%esp), %ebp - push %esi - bsr %ebp, %ecx - push %ebx - xor $31, %ecx - mov 16(%esp), %esi - sal %cl, %ebp - mov %ebp, %edx - not %edx - mov $-1, %eax - div %ebp - mov %eax, (%esi) C store bi - mov %ecx, 4(%esi) C store cnt - xor %ebx, %ebx - sub %ebp, %ebx - mov $1, %edx - shld %cl, %eax, %edx - imul %edx, %ebx - mul %ebx - add %ebx, %edx - not %edx - imul %ebp, %edx - add %edx, %ebp - cmp %edx, %eax - cmovc( %ebp, %edx) - shr %cl, %ebx - mov %ebx, 8(%esi) C store B1modb - shr %cl, %edx - mov %edx, 12(%esi) C store B2modb - pop %ebx - pop %esi - pop %ebp - ret -EPILOGUE() diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm b/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm deleted file mode 100644 index eb2edb6297..0000000000 --- a/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm +++ /dev/null @@ -1,269 +0,0 @@ -dnl x86-32 mpn_mod_1s_4p for Pentium 4 and P6 models with SSE2 (i.e. 9,D,E,F). - -dnl Contributed to the GNU project by Torbjorn Granlund. - -dnl Copyright 2009, 2010 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C TODO: -C * Optimize. The present code was written quite straightforwardly. -C * Optimize post-loop reduction code. -C * Write a cps function that uses sse2 insns. - -C cycles/limb -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) 3.4 -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 4 -C P4 model 3-4 (Prescott) 4.5 - -C INPUT PARAMETERS -C ap sp + 4 -C n sp + 8 -C b sp + 12 -C cps sp + 16 - -define(`B1modb', `%mm1') -define(`B2modb', `%mm2') -define(`B3modb', `%mm3') -define(`B4modb', `%mm4') -define(`B5modb', `%mm5') -define(`ap', `%edx') -define(`n', `%eax') - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_mod_1s_4p) - push %ebx - mov 8(%esp), ap - mov 12(%esp), n - mov 20(%esp), %ecx - - movd 8(%ecx), B1modb - movd 12(%ecx), B2modb - movd 16(%ecx), B3modb - movd 20(%ecx), B4modb - movd 24(%ecx), B5modb - - mov n, %ebx - lea -4(ap,n,4), ap - and $3, %ebx - je L(b0) - cmp $2, %ebx - jc L(b1) - je L(b2) - -L(b3): movd -4(ap), %mm7 - pmuludq B1modb, %mm7 - movd -8(ap), %mm6 - paddq %mm6, %mm7 - movd (ap), %mm6 - pmuludq B2modb, %mm6 - paddq %mm6, %mm7 - lea -24(ap), ap - add $-3, n - jz L(end) - jmp L(top) - -L(b0): movd -8(ap), %mm7 - pmuludq B1modb, %mm7 - movd -12(ap), %mm6 - paddq %mm6, %mm7 - movd -4(ap), %mm6 - pmuludq B2modb, %mm6 - paddq %mm6, %mm7 - movd (ap), %mm6 - pmuludq B3modb, %mm6 - paddq %mm6, %mm7 - lea -28(ap), ap - add $-4, n - jz L(end) - jmp L(top) - -L(b1): movd (ap), %mm7 - lea -16(ap), ap - dec n - jz L(x) - jmp L(top) - -L(b2): movd -4(ap), %mm7 C rl - punpckldq (ap), %mm7 C rh - lea -20(ap), ap - add $-2, n - jz L(end) - - ALIGN(8) -L(top): movd 4(ap), %mm0 - pmuludq B1modb, %mm0 - movd 0(ap), %mm6 - paddq %mm6, %mm0 - - movd 8(ap), %mm6 - pmuludq B2modb, %mm6 - paddq %mm6, %mm0 - - movd 12(ap), %mm6 - pmuludq B3modb, %mm6 - paddq %mm6, %mm0 - - movq %mm7, %mm6 - psrlq $32, %mm7 C rh - pmuludq B5modb, %mm7 - pmuludq B4modb, %mm6 - - paddq %mm0, %mm7 - paddq %mm6, %mm7 - - add $-16, ap - add $-4, n - jnz L(top) - -L(end): pcmpeqd %mm4, %mm4 - psrlq $32, %mm4 C 0x00000000FFFFFFFF - pand %mm7, %mm4 C rl - psrlq $32, %mm7 C rh - pmuludq B1modb, %mm7 C rh,cl - paddq %mm4, %mm7 C rh,rl -L(x): movd 4(%ecx), %mm4 C cnt - psllq %mm4, %mm7 C rh,rl normalized - movq %mm7, %mm2 C rl in low half - psrlq $32, %mm7 C rh - movd (%ecx), %mm1 C bi - pmuludq %mm7, %mm1 C qh,ql - paddq %mm2, %mm1 C qh-1,ql - movd %mm1, %ecx C ql - psrlq $32, %mm1 C qh-1 - movd 16(%esp), %mm3 C b - pmuludq %mm1, %mm3 C (qh-1) * b - psubq %mm3, %mm2 C r in low half (could use psubd) - movd %mm2, %eax C r - mov 16(%esp), %ebx - sub %ebx, %eax C r - cmp %eax, %ecx - lea (%eax,%ebx), %edx - cmovc( %edx, %eax) - movd %mm4, %ecx C cnt - cmp %ebx, %eax - jae L(fix) - emms - pop %ebx - shr %cl, %eax - ret - -L(fix): sub %ebx, %eax - emms - pop %ebx - shr %cl, %eax - ret -EPILOGUE() - - ALIGN(16) -PROLOGUE(mpn_mod_1s_4p_cps) -C CAUTION: This is the same code as in k7/mod_1_4.asm - push %ebp - push %edi - push %esi - push %ebx - mov 20(%esp), %ebp C FIXME: avoid bp for 0-idx - mov 24(%esp), %ebx - bsr %ebx, %ecx - xor $31, %ecx - sal %cl, %ebx C b << cnt - mov %ebx, %edx - not %edx - mov $-1, %eax - div %ebx - xor %edi, %edi - sub %ebx, %edi - mov $1, %esi - mov %eax, (%ebp) C store bi - mov %ecx, 4(%ebp) C store cnt - shld %cl, %eax, %esi - imul %edi, %esi - mov %eax, %edi - mul %esi - - add %esi, %edx - shr %cl, %esi - mov %esi, 8(%ebp) C store B1modb - - not %edx - imul %ebx, %edx - lea (%edx,%ebx), %esi - cmp %edx, %eax - cmovnc( %edx, %esi) - mov %edi, %eax - mul %esi - - add %esi, %edx - shr %cl, %esi - mov %esi, 12(%ebp) C store B2modb - - not %edx - imul %ebx, %edx - lea (%edx,%ebx), %esi - cmp %edx, %eax - cmovnc( %edx, %esi) - mov %edi, %eax - mul %esi - - add %esi, %edx - shr %cl, %esi - mov %esi, 16(%ebp) C store B3modb - - not %edx - imul %ebx, %edx - lea (%edx,%ebx), %esi - cmp %edx, %eax - cmovnc( %edx, %esi) - mov %edi, %eax - mul %esi - - add %esi, %edx - shr %cl, %esi - mov %esi, 20(%ebp) C store B4modb - - not %edx - imul %ebx, %edx - add %edx, %ebx - cmp %edx, %eax - cmovnc( %edx, %ebx) - - shr %cl, %ebx - mov %ebx, 24(%ebp) C store B5modb - - pop %ebx - pop %esi - pop %edi - pop %ebp - ret -EPILOGUE() diff --git a/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm index 31e25b79bc..1598b41785 100644 --- a/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm +++ b/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm @@ -1,32 +1,21 @@ dnl Intel Pentium 4 mpn_mod_34lsub1 -- remainder modulo 2^24-1. -dnl Copyright 2000-2003 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium4/sse2/mode1o.asm b/gmp/mpn/x86/pentium4/sse2/mode1o.asm index 778c478169..2f0b177a00 100644 --- a/gmp/mpn/x86/pentium4/sse2/mode1o.asm +++ b/gmp/mpn/x86/pentium4/sse2/mode1o.asm @@ -1,32 +1,21 @@ dnl Intel Pentium-4 mpn_modexact_1_odd -- mpn by limb exact remainder. dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -112,7 +101,7 @@ ifdef(`PIC',` psubd %mm0, %mm6 C inv = 2*inv - inv*inv*d - ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS + ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB pushl %eax FRAME_pushl() movd %mm6, %eax imul PARAM_DIVISOR, %eax @@ -124,13 +113,13 @@ ifdef(`PIC',` C The dependent chain here is as follows. C -C latency -C psubq s = (src-cbit) - climb 2 -C pmuludq q = s*inverse 8 -C pmuludq prod = q*divisor 8 -C psrlq climb = high(prod) 2 -C -- -C 20 +C latency +C psubq s = (src-cbit) - climb 2 +C pmuludq q = s*inverse 8 +C pmuludq prod = q*divisor 8 +C psrlq climb = high(prod) 2 +C -- +C 20 C C Yet the loop measures 19.0 c/l, so obviously there's something gained C there over a straight reading of the chip documentation. diff --git a/gmp/mpn/x86/pentium4/sse2/mul_1.asm b/gmp/mpn/x86/pentium4/sse2/mul_1.asm index 6347b8bf62..07be951921 100644 --- a/gmp/mpn/x86/pentium4/sse2/mul_1.asm +++ b/gmp/mpn/x86/pentium4/sse2/mul_1.asm @@ -1,48 +1,37 @@ dnl mpn_mul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F). -dnl Copyright 2005, 2007, 2011 Free Software Foundation, Inc. - +dnl Copyright 2005, 2007 Free Software Foundation, Inc. +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) 4.17 -C P6 model 13 (Dothan) 4.17 -C P4 model 0-1 (Willamette) 4 -C P4 model 2 (Northwood) 4 -C P4 model 3-4 (Prescott) 4.55 - C TODO: C * Tweak eax/edx offsets in loop as to save some lea's C * Perhaps software pipeline small-case code +C cycles/limb +C P6 model 0-8,10-12) - +C P6 model 9 (Banias) ? +C P6 model 13 (Dothan) 4.17 +C P4 model 0-1 (Willamette): 4 +C P4 model 2 (Northwood): 4 +C P4 model 3-4 (Prescott): 4.55 + C INPUT PARAMETERS C rp sp + 4 C up sp + 8 @@ -51,13 +40,22 @@ C v0 sp + 16 TEXT ALIGN(16) +PROLOGUE(mpn_mul_1c) + mov 4(%esp), %edx + mov 8(%esp), %eax + mov 12(%esp), %ecx + movd 16(%esp), %mm7 + movd 20(%esp), %mm6 + jmp L(ent) +EPILOGUE() + ALIGN(16) PROLOGUE(mpn_mul_1) - pxor %mm6, %mm6 -L(ent): mov 4(%esp), %edx + mov 4(%esp), %edx mov 8(%esp), %eax mov 12(%esp), %ecx movd 16(%esp), %mm7 - cmp $4, %ecx + pxor %mm6, %mm6 +L(ent): cmp $4, %ecx jnc L(big) L(lp0): movd (%eax), %mm0 @@ -158,7 +156,3 @@ L(end): pmuludq %mm7, %mm2 emms ret EPILOGUE() -PROLOGUE(mpn_mul_1c) - movd 20(%esp), %mm6 - jmp L(ent) -EPILOGUE() diff --git a/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm b/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm index 6e3775ae09..2628e5eb72 100644 --- a/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm +++ b/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm @@ -1,32 +1,21 @@ dnl mpn_mul_basecase for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F). dnl Copyright 2001, 2002, 2005, 2007 Free Software Foundation, Inc. - +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium4/sse2/popcount.asm b/gmp/mpn/x86/pentium4/sse2/popcount.asm index b8238b9b66..cb982ade46 100644 --- a/gmp/mpn/x86/pentium4/sse2/popcount.asm +++ b/gmp/mpn/x86/pentium4/sse2/popcount.asm @@ -1,66 +1,52 @@ dnl X86-32 and X86-64 mpn_popcount using SSE2. -dnl Copyright 2006, 2007, 2011 Free Software Foundation, Inc. - +dnl Copyright 2006, 2007 Free Software Foundation, Inc. +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C 32-bit popcount hamdist -C cycles/limb cycles/limb -C P5 - -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) 4 -C P4 model 0 (Willamette) ? -C P4 model 1 (?) ? -C P4 model 2 (Northwood) 3.9 -C P4 model 3 (Prescott) ? -C P4 model 4 (Nocona) ? -C AMD K6 - -C AMD K7 - -C AMD K8 ? - -C 64-bit popcount hamdist -C cycles/limb cycles/limb -C P4 model 4 (Nocona): 8 -C AMD K8,K9 7.5 -C AMD K10 3.5 -C Intel core2 3.68 -C Intel corei 3.15 -C Intel atom 10.8 -C VIA nano 6.5 +C 32-bit popcount hamdist +C cycles/limb cycles/limb +C P5: - +C P6 model 0-8,10-12) - +C P6 model 9 (Banias) ? +C P6 model 13 (Dothan) 4 +C P4 model 0 (Willamette) ? +C P4 model 1 (?) ? +C P4 model 2 (Northwood) 3.9 +C P4 model 3 (Prescott) ? +C P4 model 4 (Nocona) ? +C K6: - +C K7: - +C K8: ? + +C 64-bit popcount hamdist +C cycles/limb cycles/limb +C P4 model 4 (Nocona): 8 +C K8: 7.5 +C K10: 3.5 +C P6-15: 3.68 C TODO C * Make a mpn_hamdist based on this. Alignment could either be handled by C using movdqu for one operand and movdqa for the other, or by painfully -C shifting as we go. Unfortunately, there seem to be no usable shift +C shifting as we go. Unfortunately, there seem to be no useable shift C instruction, except for one that takes an immediate count. C * It would probably be possible to cut a few cycles/limb using software C pipelining. diff --git a/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm b/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm index f421d1323e..bbf43245cb 100644 --- a/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm +++ b/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm @@ -1,32 +1,21 @@ dnl Intel Pentium-4 mpn_rsh1add_n -- mpn (x+y)/2 -dnl Copyright 2001-2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm b/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm index 2dd57d25d9..fc56f164ed 100644 --- a/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm +++ b/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm @@ -1,32 +1,21 @@ dnl mpn_sqr_basecase for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F). dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc. - +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -41,7 +30,7 @@ C * Look into different loop alignment, we now expand the code about 50 bytes C with possibly needless alignment. C * Use OSP, should solve feed-in latency problems. C * Address relative slowness for un<=3 for Pentium M. The old code is there -C considerably faster. (1:20/14, 2:34:32, 3:66/57) +C consideraly faster. (1:20/14, 2:34:32, 3:66/57) C INPUT PARAMETERS C rp sp + 4 diff --git a/gmp/mpn/x86/pentium4/sse2/sub_n.asm b/gmp/mpn/x86/pentium4/sse2/sub_n.asm index 5ba1c018ec..02d5f01474 100644 --- a/gmp/mpn/x86/pentium4/sse2/sub_n.asm +++ b/gmp/mpn/x86/pentium4/sse2/sub_n.asm @@ -1,44 +1,37 @@ dnl Intel Pentium-4 mpn_sub_n -- mpn subtraction. dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C dst!=src1,2 dst==src1 dst==src2 -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 4 6 6 -C P4 model 3-4 (Prescott) 4.25 7.5 7.5 +C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 +C 6.0 cycles/limb if dst==src1 or dst==src2 +C P4 Prescott: >= 5 cycles/limb + + +C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size, mp_limb_t carry); +C +C The main loop code is 2x unrolled so that the carry bit can alternate +C between mm0 and mm1. defframe(PARAM_CARRY,20) defframe(PARAM_SIZE, 16) @@ -54,8 +47,10 @@ define(SAVE_EBX,`PARAM_SRC1') PROLOGUE(mpn_sub_nc) deflit(`FRAME',0) + movd PARAM_CARRY, %mm0 jmp L(start_nc) + EPILOGUE() ALIGN(8) @@ -63,16 +58,16 @@ PROLOGUE(mpn_sub_n) deflit(`FRAME',0) pxor %mm0, %mm0 L(start_nc): - mov PARAM_SRC1, %eax - mov %ebx, SAVE_EBX - mov PARAM_SRC2, %ebx - mov PARAM_DST, %edx - mov PARAM_SIZE, %ecx + movl PARAM_SRC1, %eax + movl %ebx, SAVE_EBX + movl PARAM_SRC2, %ebx + movl PARAM_DST, %edx + movl PARAM_SIZE, %ecx - lea (%eax,%ecx,4), %eax C src1 end - lea (%ebx,%ecx,4), %ebx C src2 end - lea (%edx,%ecx,4), %edx C dst end - neg %ecx C -size + leal (%eax,%ecx,4), %eax C src1 end + leal (%ebx,%ecx,4), %ebx C src2 end + leal (%edx,%ecx,4), %edx C dst end + negl %ecx C -size L(top): C eax src1 end @@ -90,7 +85,7 @@ L(top): psrlq $63, %mm1 - add $1, %ecx + addl $1, %ecx jz L(done_mm1) movd (%eax,%ecx,4), %mm0 @@ -102,17 +97,18 @@ L(top): psrlq $63, %mm0 - add $1, %ecx + addl $1, %ecx jnz L(top) + movd %mm0, %eax - mov SAVE_EBX, %ebx + movl SAVE_EBX, %ebx emms ret L(done_mm1): movd %mm1, %eax - mov SAVE_EBX, %ebx + movl SAVE_EBX, %ebx emms ret diff --git a/gmp/mpn/x86/pentium4/sse2/submul_1.asm b/gmp/mpn/x86/pentium4/sse2/submul_1.asm index 020675bd7b..ceb41f2ac0 100644 --- a/gmp/mpn/x86/pentium4/sse2/submul_1.asm +++ b/gmp/mpn/x86/pentium4/sse2/submul_1.asm @@ -1,71 +1,60 @@ dnl Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and dnl subtract the result from a second limb vector. -dnl Copyright 2001, 2002, 2008, 2010 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) 6.8 -C P6 model 13 (Dothan) 6.9 -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 5.87 -C P4 model 3-4 (Prescott) 6.5 +C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon +C (stepping 10). -C This code represents a step forwards compared to the code available before -C GMP 5.1, but it is not carefully tuned for either P6 or P4. In fact, it is -C not good for P6. For P4 it saved a bit over 1 c/l for both Northwood and -C Prescott compared to the old code. + +C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult); +C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult, mp_limb_t carry); +C +C This code is not particularly good at 7 c/l. The dependent chain is only +C 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that +C speed isn't achieved. C C The arrangements made here to get a two instruction dependent chain are -C slightly subtle. In the loop the carry (or borrow rather) is a negative so -C that a paddq can be used to give a low limb ready to store, and a high limb -C ready to become the new carry after a psrlq. +C slightly subtle. In the loop the carry (or borrow rather) is a negative +C so that a paddq can be used to give a low limb ready to store, and a high +C limb ready to become the new carry after a psrlq. C -C If the carry was a simple twos complement negative then the psrlq shift would -C need to bring in 0 bits or 1 bits according to whether the high was zero or -C non-zero, since a non-zero value would represent a negative needing sign -C extension. That wouldn't be particularly easy to arrange and certainly would -C add an instruction to the dependent chain, so instead an offset is applied so -C that the high limb will be 0xFFFFFFFF+c. With c in the range -0xFFFFFFFF to -C 0, the value 0xFFFFFFFF+c is in the range 0 to 0xFFFFFFFF and is therefore -C always positive and can always have 0 bits shifted in, which is what psrlq -C does. +C If the carry was a simple twos complement negative then the psrlq shift +C would need to bring in 0 bits or 1 bits according to whether the high was +C zero or non-zero, since a non-zero value would represent a negative +C needing sign extension. That wouldn't be particularly easy to arrange and +C certainly would add an instruction to the dependent chain, so instead an +C offset is applied so that the high limb will be 0xFFFFFFFF+c. With c in +C the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to +C 0xFFFFFFFF and is therefore always positive and can always have 0 bits +C shifted in, which is what psrlq does. C C The extra 0xFFFFFFFF must be subtracted before c is used, but that can be C done off the dependent chain. The total adjustment then is to add -C 0xFFFFFFFF00000000 to offset the new carry, and subtract 0x00000000FFFFFFFF -C to remove the offset from the current carry, for a net add of -C 0xFFFFFFFE00000001. In the code this is applied to the destination limb when -C fetched. +C 0xFFFFFFFF00000000 to offset the new carry, and subtract +C 0x00000000FFFFFFFF to remove the offset from the current carry, for a net +C add of 0xFFFFFFFE00000001. In the code this is applied to the destination +C limb when fetched. C C It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement C negative, which is how it's undone for the return value, but that doesn't @@ -91,16 +80,16 @@ deflit(`FRAME',0) pxor %mm1, %mm1 C initial borrow L(start_1c): - mov PARAM_SRC, %eax + movl PARAM_SRC, %eax pcmpeqd %mm0, %mm0 movd PARAM_MULTIPLIER, %mm7 pcmpeqd %mm6, %mm6 - mov PARAM_DST, %edx + movl PARAM_DST, %edx psrlq $32, %mm0 C 0x00000000FFFFFFFF - mov PARAM_SIZE, %ecx + movl PARAM_SIZE, %ecx psllq $32, %mm6 C 0xFFFFFFFF00000000 psubq %mm0, %mm6 C 0xFFFFFFFE00000001 @@ -108,75 +97,32 @@ L(start_1c): psubq %mm1, %mm0 C 0xFFFFFFFF - borrow - movd (%eax), %mm3 C up - movd (%edx), %mm4 C rp - - add $-1, %ecx - paddq %mm6, %mm4 C add 0xFFFFFFFE00000001 - pmuludq %mm7, %mm3 - jnz L(gt1) - psubq %mm3, %mm4 C prod - paddq %mm4, %mm0 C borrow - movd %mm0, (%edx) C result - jmp L(rt) - -L(gt1): movd 4(%eax), %mm1 C up - movd 4(%edx), %mm2 C rp - - add $-1, %ecx - jz L(eev) - - ALIGN(16) -L(top): paddq %mm6, %mm2 C add 0xFFFFFFFE00000001 + C eax src, incrementing + C ebx + C ecx loop counter, decrementing + C edx dst, incrementing + C + C mm0 0xFFFFFFFF - borrow + C mm6 0xFFFFFFFE00000001 + C mm7 multiplier + +L(loop): + movd (%eax), %mm1 C src + leal 4(%eax), %eax + movd (%edx), %mm2 C dst + paddq %mm6, %mm2 C add 0xFFFFFFFE00000001 pmuludq %mm7, %mm1 - psubq %mm3, %mm4 C prod - movd 8(%eax), %mm3 C up - paddq %mm4, %mm0 C borrow - movd 8(%edx), %mm4 C rp - movd %mm0, (%edx) C result - psrlq $32, %mm0 - - add $-1, %ecx - jz L(eod) - - paddq %mm6, %mm4 C add 0xFFFFFFFE00000001 - pmuludq %mm7, %mm3 psubq %mm1, %mm2 C prod - movd 12(%eax), %mm1 C up paddq %mm2, %mm0 C borrow - movd 12(%edx), %mm2 C rp - movd %mm0, 4(%edx) C result - psrlq $32, %mm0 - - lea 8(%eax), %eax - lea 8(%edx), %edx - add $-1, %ecx - jnz L(top) - - -L(eev): paddq %mm6, %mm2 C add 0xFFFFFFFE00000001 - pmuludq %mm7, %mm1 - psubq %mm3, %mm4 C prod - paddq %mm4, %mm0 C borrow + subl $1, %ecx movd %mm0, (%edx) C result psrlq $32, %mm0 - psubq %mm1, %mm2 C prod - paddq %mm2, %mm0 C borrow - movd %mm0, 4(%edx) C result -L(rt): psrlq $32, %mm0 + leal 4(%edx), %edx + jnz L(loop) + movd %mm0, %eax - not %eax + notl %eax emms ret -L(eod): paddq %mm6, %mm4 C add 0xFFFFFFFE00000001 - pmuludq %mm7, %mm3 - psubq %mm1, %mm2 C prod - paddq %mm2, %mm0 C borrow - movd %mm0, 4(%edx) C result - psrlq $32, %mm0 - psubq %mm3, %mm4 C prod - paddq %mm4, %mm0 C borrow - movd %mm0, 8(%edx) C result - jmp L(rt) EPILOGUE() diff --git a/gmp/mpn/x86/rshift.asm b/gmp/mpn/x86/rshift.asm index a60dcaa4b2..8e33eabd61 100644 --- a/gmp/mpn/x86/rshift.asm +++ b/gmp/mpn/x86/rshift.asm @@ -1,43 +1,33 @@ dnl x86 mpn_rshift -- mpn right shift. -dnl Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software +dnl Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C P54 7.5 -C P55 7.0 -C P6 2.5 -C K6 4.5 -C K7 5.0 -C P4 16.5 +C P54: 7.5 +C P55: 7.0 +C P6: 2.5 +C K6: 4.5 +C K7: 5.0 +C P4: 16.5 C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, diff --git a/gmp/mpn/x86/sec_tabselect.asm b/gmp/mpn/x86/sec_tabselect.asm deleted file mode 100644 index c7c2e059f1..0000000000 --- a/gmp/mpn/x86/sec_tabselect.asm +++ /dev/null @@ -1,115 +0,0 @@ -dnl x86 mpn_sec_tabselect. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C cycles/limb -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0 (Willamette) ? -C P4 model 1 (?) ? -C P4 model 2 (Northwood) 4.5 -C P4 model 3 (Prescott) ? -C P4 model 4 (Nocona) ? -C Intel Atom ? -C AMD K6 ? -C AMD K7 3.4 -C AMD K8 ? -C AMD K10 ? - -C NOTES -C * This has not been tuned for any specific processor. Its speed should not -C be too bad, though. -C * Using SSE2 could result in many-fold speedup. - -C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which) -define(`rp', `%edi') -define(`tp', `%esi') -define(`n', `%ebx') -define(`nents', `%ecx') -define(`which', `36(%esp)') - -define(`i', `%ebp') -define(`maskp', `20(%esp)') -define(`maskn', `32(%esp)') - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_sec_tabselect) - push %edi - push %esi - push %ebx - push %ebp - mov 20(%esp), rp - mov 24(%esp), tp - mov 28(%esp), n - mov 32(%esp), nents - - lea (rp,n,4), rp - lea (tp,n,4), tp - sub nents, which -L(outer): - mov which, %eax - add nents, %eax - neg %eax C set CF iff 'which' != k - sbb %eax, %eax - mov %eax, maskn - not %eax - mov %eax, maskp - - mov n, i - neg i - - ALIGN(16) -L(top): mov (tp,i,4), %eax - and maskp, %eax - mov (rp,i,4), %edx - and maskn, %edx - or %edx, %eax - mov %eax, (rp,i,4) - inc i - js L(top) - -L(end): mov n, %eax - lea (tp,%eax,4), tp - dec nents - jne L(outer) - -L(outer_end): - pop %ebp - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() diff --git a/gmp/mpn/x86/sqr_basecase.asm b/gmp/mpn/x86/sqr_basecase.asm index 39f8a89805..9a7e13327b 100644 --- a/gmp/mpn/x86/sqr_basecase.asm +++ b/gmp/mpn/x86/sqr_basecase.asm @@ -1,43 +1,32 @@ dnl x86 generic mpn_sqr_basecase -- square an mpn number. dnl Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc. - +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/crossproduct cycles/triangleproduct -C P5 -C P6 -C K6 -C K7 -C P4 +C P5: +C P6: +C K6: +C K7: +C P4: C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); diff --git a/gmp/mpn/x86/t-zdisp.sh b/gmp/mpn/x86/t-zdisp.sh index 61efdd6c4f..6c55067b6c 100755 --- a/gmp/mpn/x86/t-zdisp.sh +++ b/gmp/mpn/x86/t-zdisp.sh @@ -2,31 +2,20 @@ # # Copyright 2000 Free Software Foundation, Inc. # -# This file is part of the GNU MP Library. +# This file is part of the GNU MP Library. # -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of either: +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3 of the License, or (at +# your option) any later version. # -# * the GNU Lesser General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your -# option) any later version. +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. # -# or -# -# * the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any -# later version. -# -# or both in parallel, as here. -# -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received copies of the GNU General Public License and the -# GNU Lesser General Public License along with the GNU MP Library. If not, -# see https://www.gnu.org/licenses/. +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. # Usage: cd $(builddir)/mpn diff --git a/gmp/mpn/x86/t-zdisp2.pl b/gmp/mpn/x86/t-zdisp2.pl index b441b6579a..d5e2d93dc0 100755 --- a/gmp/mpn/x86/t-zdisp2.pl +++ b/gmp/mpn/x86/t-zdisp2.pl @@ -2,31 +2,20 @@ # # Copyright 2001, 2002 Free Software Foundation, Inc. # -# This file is part of the GNU MP Library. +# This file is part of the GNU MP Library. # -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of either: +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3 of the License, or (at +# your option) any later version. # -# * the GNU Lesser General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your -# option) any later version. +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. # -# or -# -# * the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any -# later version. -# -# or both in parallel, as here. -# -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received copies of the GNU General Public License and the -# GNU Lesser General Public License along with the GNU MP Library. If not, -# see https://www.gnu.org/licenses/. +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. # Usage: cd $(builddir)/mpn @@ -82,7 +71,7 @@ sub process { } } -# Ensure we're using the right SQR_TOOM2_THRESHOLD for the part of the +# Ensure we're using the right SQR_KARATSUBA_THRESHOLD for the part of the # tree being processed. sub process_mparam { my $file = "$File::Find::dir/gmp-mparam.h"; @@ -90,10 +79,10 @@ sub process_mparam { print "$file\n" if $opt{'t'}; open MPARAM, "<$file" or die; while (<MPARAM>) { - if (/^#define SQR_TOOM2_THRESHOLD[ \t]*([0-9][0-9]*)/) { + if (/^#define SQR_KARATSUBA_THRESHOLD[ \t]*([0-9][0-9]*)/) { open KARA, ">$tempfile" or die; - print KARA "define(\`SQR_TOOM2_THRESHOLD',$1)\n\n"; - print "define(\`SQR_TOOM2_THRESHOLD',$1)\n" if $opt{'t'}; + print KARA "define(\`SQR_KARATSUBA_THRESHOLD',$1)\n\n"; + print "define(\`SQR_KARATSUBA_THRESHOLD',$1)\n" if $opt{'t'}; close KARA or die; last; } diff --git a/gmp/mpn/x86/udiv.asm b/gmp/mpn/x86/udiv.asm index a3ee08860f..5c7d3f3533 100644 --- a/gmp/mpn/x86/udiv.asm +++ b/gmp/mpn/x86/udiv.asm @@ -1,32 +1,21 @@ dnl x86 mpn_udiv_qrnnd -- 2 by 1 limb division dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/umul.asm b/gmp/mpn/x86/umul.asm index 34fe434400..d0116de6d9 100644 --- a/gmp/mpn/x86/umul.asm +++ b/gmp/mpn/x86/umul.asm @@ -1,32 +1,21 @@ dnl mpn_umul_ppmm -- 1x1->2 limb multiplication dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/x86/x86-defs.m4 b/gmp/mpn/x86/x86-defs.m4 index 1538b6820c..5b4a8e1fad 100644 --- a/gmp/mpn/x86/x86-defs.m4 +++ b/gmp/mpn/x86/x86-defs.m4 @@ -4,33 +4,23 @@ divert(-1) dnl m4 macros for x86 assembler. -dnl Copyright 1999-2003, 2007, 2010, 2012 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation, +dnl Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl Notes: @@ -51,7 +41,7 @@ dnl This is only a problem in macro definitions, not in ordinary text, dnl and not in macro parameters like text passed to forloop() or ifdef(). -deflit(GMP_LIMB_BYTES, 4) +deflit(BYTES_PER_MP_LIMB, 4) dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We @@ -68,41 +58,24 @@ dnl order they appear in that structure. define(CPUVEC_FUNCS_LIST, ``add_n', -`addlsh1_n', -`addlsh2_n', `addmul_1', -`addmul_2', -`bdiv_dbm1c', -`cnd_add_n', -`cnd_sub_n', -`com', `copyd', `copyi', `divexact_1', +`divexact_by3c', `divrem_1', `gcd_1', `lshift', -`lshiftc', `mod_1', -`mod_1_1p', -`mod_1_1p_cps', -`mod_1s_2p', -`mod_1s_2p_cps', -`mod_1s_4p', -`mod_1s_4p_cps', `mod_34lsub1', `modexact_1c_odd', `mul_1', `mul_basecase', -`mullo_basecase', `preinv_divrem_1', `preinv_mod_1', -`redc_1', -`redc_2', `rshift', `sqr_basecase', `sub_n', -`sublsh1_n', `submul_1'') @@ -922,7 +895,7 @@ dnl movl_code_address(L(foo),%eax) dnl dnl This macro is only meant for use in ASSERT()s or when testing, since dnl the PIC sequence it generates will want to be done with a ret balancing -dnl the call on CPUs with return address branch prediction. +dnl the call on CPUs with return address branch predition. dnl dnl The addl generated here has a backward reference to the label, and so dnl won't suffer from the two forwards references bug in old gas (described @@ -955,9 +928,7 @@ m4_assert_numargs(1) dnl Usage LEA(symbol,reg) -define(`LEA', -m4_assert_numargs(2) -`ifdef(`PIC',` +define(`LEA',` define(`EPILOGUE_cpu', ` L(movl_eip_`'substr($2,1)): @@ -965,12 +936,11 @@ L(movl_eip_`'substr($2,1)): ret_internal SIZE($'`1, .-$'`1)') - call L(movl_eip_`'substr($2,1)) - addl $_GLOBAL_OFFSET_TABLE_, $2 - movl $1@GOT($2), $2 -',` - movl `$'$1, $2 -')') + call L(movl_eip_`'substr($2,1)) + addl $_GLOBAL_OFFSET_TABLE_, $2 + movl $1@GOT($2), $2 +') + define(`DEF_OBJECT', m4_assert_numargs_range(1,2) @@ -983,17 +953,4 @@ define(`END_OBJECT', m4_assert_numargs(1) ` SIZE(`$1',.-`$1')') -dnl Usage: CALL(funcname) -dnl - -define(`CALL', -m4_assert_numargs(1) -`ifdef(`PIC', - `call GSYM_PREFIX`'$1@PLT', - `call GSYM_PREFIX`'$1')') - -ifdef(`PIC', -`define(`PIC_WITH_EBX')', -`undefine(`PIC_WITH_EBX')') - divert`'dnl |