diff options
Diffstat (limited to 'gmp/mpn/x86/atom')
30 files changed, 0 insertions, 3720 deletions
diff --git a/gmp/mpn/x86/atom/aorrlsh1_n.asm b/gmp/mpn/x86/atom/aorrlsh1_n.asm deleted file mode 100644 index cd1a650022..0000000000 --- a/gmp/mpn/x86/atom/aorrlsh1_n.asm +++ /dev/null @@ -1,53 +0,0 @@ -dnl Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[] - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -define(LSH, 1) -define(RSH, 31) - -ifdef(`OPERATION_addlsh1_n', ` - define(M4_inst, adc) - define(M4_opp, sub) - define(M4_function, mpn_addlsh1_n) - define(M4_function_c, mpn_addlsh1_nc) -',`ifdef(`OPERATION_rsblsh1_n', ` - define(M4_inst, sbb) - define(M4_opp, add) - define(M4_function, mpn_rsblsh1_n) - define(M4_function_c, mpn_rsblsh1_nc) -',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n -')')') - -MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc) - -include_mpn(`x86/atom/aorrlshC_n.asm') diff --git a/gmp/mpn/x86/atom/aorrlsh2_n.asm b/gmp/mpn/x86/atom/aorrlsh2_n.asm deleted file mode 100644 index 10f4419de9..0000000000 --- a/gmp/mpn/x86/atom/aorrlsh2_n.asm +++ /dev/null @@ -1,53 +0,0 @@ -dnl Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[] - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -define(LSH, 2) -define(RSH, 30) - -ifdef(`OPERATION_addlsh2_n', ` - define(M4_inst, adcl) - define(M4_opp, subl) - define(M4_function, mpn_addlsh2_n) - define(M4_function_c, mpn_addlsh2_nc) -',`ifdef(`OPERATION_rsblsh2_n', ` - define(M4_inst, sbbl) - define(M4_opp, addl) - define(M4_function, mpn_rsblsh2_n) - define(M4_function_c, mpn_rsblsh2_nc) -',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n -')')') - -MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc) - -include_mpn(`x86/atom/aorrlshC_n.asm') diff --git a/gmp/mpn/x86/atom/aorrlshC_n.asm b/gmp/mpn/x86/atom/aorrlshC_n.asm deleted file mode 100644 index 71cfe490d6..0000000000 --- a/gmp/mpn/x86/atom/aorrlshC_n.asm +++ /dev/null @@ -1,156 +0,0 @@ -dnl Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[] - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size); -C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_limb_t carry); -C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size); -C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_signed_limb_t carry); - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 6 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(PARAM_CORB, 20) -defframe(PARAM_SIZE, 16) -defframe(PARAM_DBLD, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBP,`PARAM_DBLD') -define(SAVE_VP,`PARAM_SRC') -define(SAVE_UP,`PARAM_DST') - -define(M, eval(m4_lshift(1,LSH))) -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebx') - -ASM_START() - TEXT - ALIGN(8) - -PROLOGUE(M4_function_c) -deflit(`FRAME',0) - movl PARAM_CORB, %eax - movl %eax, %edx - shr $LSH, %edx - andl $1, %edx - M4_opp %edx, %eax - jmp L(start_nc) -EPILOGUE() - -PROLOGUE(M4_function) -deflit(`FRAME',0) - - xor %eax, %eax - xor %edx, %edx -L(start_nc): - push rp FRAME_pushl() - - mov PARAM_SIZE, %ecx C size - mov PARAM_DST, rp - mov up, SAVE_UP - incl %ecx C size + 1 - mov PARAM_SRC, up - mov vp, SAVE_VP - shr %ecx C (size+1)\2 - mov PARAM_DBLD, vp - mov %ebp, SAVE_EBP - mov %ecx, VAR_COUNT - jnc L(entry) C size odd - - shr %edx C size even - mov (vp), %ecx - lea 4(vp), vp - lea (%eax,%ecx,M), %edx - mov %ecx, %eax - lea -4(up), up - lea -4(rp), rp - jmp L(enteven) - - ALIGN(16) -L(oop): - lea (%eax,%ecx,M), %ebp - shr $RSH, %ecx - mov 4(vp), %eax - shr %edx - lea 8(vp), vp - M4_inst (up), %ebp - lea (%ecx,%eax,M), %edx - mov %ebp, (rp) -L(enteven): - M4_inst 4(up), %edx - lea 8(up), up - mov %edx, 4(rp) - adc %edx, %edx - shr $RSH, %eax - lea 8(rp), rp -L(entry): - mov (vp), %ecx - decl VAR_COUNT - jnz L(oop) - - lea (%eax,%ecx,M), %ebp - shr $RSH, %ecx - shr %edx - mov SAVE_VP, vp - M4_inst (up), %ebp - mov %ecx, %eax - mov SAVE_UP, up - M4_inst $0, %eax - mov %ebp, (rp) - mov SAVE_EBP, %ebp - pop rp FRAME_popl() - ret -EPILOGUE() - -ASM_END() diff --git a/gmp/mpn/x86/atom/aors_n.asm b/gmp/mpn/x86/atom/aors_n.asm deleted file mode 100644 index 45ec287c3a..0000000000 --- a/gmp/mpn/x86/atom/aors_n.asm +++ /dev/null @@ -1,159 +0,0 @@ -dnl Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[]. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 3 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -ifdef(`OPERATION_add_n', ` - define(M4_inst, adcl) - define(M4_function_n, mpn_add_n) - define(M4_function_nc, mpn_add_nc) - define(M4_description, add) -',`ifdef(`OPERATION_sub_n', ` - define(M4_inst, sbbl) - define(M4_function_n, mpn_sub_n) - define(M4_function_nc, mpn_sub_nc) - define(M4_description, subtract) -',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n -')')') - -MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) - -C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size); -C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_limb_t carry); -C -C Calculate src1,size M4_description src2,size, and store the result in -C dst,size. The return value is the carry bit from the top of the result (1 -C or 0). -C -C The _nc version accepts 1 or 0 for an initial carry into the low limb of -C the calculation. Note values other than 1 or 0 here will lead to garbage -C results. - -defframe(PARAM_CARRY,20) -defframe(PARAM_SIZE, 16) -defframe(PARAM_SRC2, 12) -defframe(PARAM_SRC1, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_RP,`PARAM_SIZE') -define(SAVE_VP,`PARAM_SRC1') -define(SAVE_UP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebx') -define(`cy', `%ecx') -define(`r1', `%ecx') -define(`r2', `%edx') - -ASM_START() - TEXT - ALIGN(16) -deflit(`FRAME',0) - -PROLOGUE(M4_function_n) - xor cy, cy C carry -L(start): - mov PARAM_SIZE, %eax C size - mov rp, SAVE_RP - mov PARAM_DST, rp - mov up, SAVE_UP - mov PARAM_SRC1, up - shr %eax C size >> 1 - mov vp, SAVE_VP - mov PARAM_SRC2, vp - jz L(one) C size == 1 - jc L(three) C size % 2 == 1 - - shr cy - mov (up), r2 - lea 4(up), up - lea 4(vp), vp - lea -4(rp), rp - jmp L(entry) -L(one): - shr cy - mov (up), r1 - jmp L(end) -L(three): - shr cy - mov (up), r1 - - ALIGN(16) -L(oop): - M4_inst (vp), r1 - lea 8(up), up - mov -4(up), r2 - lea 8(vp), vp - mov r1, (rp) -L(entry): - M4_inst -4(vp), r2 - lea 8(rp), rp - dec %eax - mov (up), r1 - mov r2, -4(rp) - jnz L(oop) - -L(end): C %eax is zero here - mov SAVE_UP, up - M4_inst (vp), r1 - mov SAVE_VP, vp - mov r1, (rp) - adc %eax, %eax - mov SAVE_RP, rp - ret -EPILOGUE() - -PROLOGUE(M4_function_nc) - mov PARAM_CARRY, cy C carry - jmp L(start) -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/aorslshC_n.asm b/gmp/mpn/x86/atom/aorslshC_n.asm deleted file mode 100644 index 75ace65e51..0000000000 --- a/gmp/mpn/x86/atom/aorslshC_n.asm +++ /dev/null @@ -1,247 +0,0 @@ -dnl Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C) - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size); -C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t carry); -C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,); -C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_signed_limb_t borrow); - -defframe(PARAM_CORB, 16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size,); -C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_limb_t carry); -C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size,); -C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, -C mp_size_t size, mp_limb_t borrow); - -C if src1 == dst, _ip1 is used - -C cycles/limb -C dst!=src1,src2 dst==src1 -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 7 6 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(GPARAM_CORB, 20) -defframe(GPARAM_SIZE, 16) -defframe(GPARAM_SRC2, 12) - -dnl re-use parameter space -define(SAVE_EBP,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_UP,`PARAM_DST') - -define(M, eval(m4_lshift(1,LSH))) -define(`rp', `%edi') -define(`up', `%esi') - -ASM_START() - TEXT - ALIGN(8) - -PROLOGUE(M4_ip_function_c) -deflit(`FRAME',0) - movl PARAM_CORB, %ecx - movl %ecx, %edx - shr $LSH, %edx - andl $1, %edx - M4_opp %edx, %ecx - jmp L(start_nc) -EPILOGUE() - -PROLOGUE(M4_ip_function) -deflit(`FRAME',0) - - xor %ecx, %ecx - xor %edx, %edx -L(start_nc): - push rp FRAME_pushl() - mov PARAM_DST, rp - mov up, SAVE_UP - mov PARAM_SRC, up - mov %ebx, SAVE_EBX - mov PARAM_SIZE, %ebx C size -L(inplace): - incl %ebx C size + 1 - shr %ebx C (size+1)\2 - mov %ebp, SAVE_EBP - jnc L(entry) C size odd - - add %edx, %edx C size even - mov %ecx, %ebp - mov (up), %ecx - lea -4(rp), rp - lea (%ebp,%ecx,M), %eax - lea 4(up), up - jmp L(enteven) - - ALIGN(16) -L(oop): - lea (%ecx,%eax,M), %ebp - shr $RSH, %eax - mov 4(up), %ecx - add %edx, %edx - lea 8(up), up - M4_inst %ebp, (rp) - lea (%eax,%ecx,M), %eax - -L(enteven): - M4_inst %eax, 4(rp) - lea 8(rp), rp - - sbb %edx, %edx - shr $RSH, %ecx - -L(entry): - mov (up), %eax - decl %ebx - jnz L(oop) - - lea (%ecx,%eax,M), %ebp - shr $RSH, %eax - shr %edx - M4_inst %ebp, (rp) - mov SAVE_UP, up - adc $0, %eax - mov SAVE_EBP, %ebp - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret -EPILOGUE() - -PROLOGUE(M4_function_c) -deflit(`FRAME',0) - movl GPARAM_CORB, %ecx - movl %ecx, %edx - shr $LSH, %edx - andl $1, %edx - M4_opp %edx, %ecx - jmp L(generic_nc) -EPILOGUE() - -PROLOGUE(M4_function) -deflit(`FRAME',0) - - xor %ecx, %ecx - xor %edx, %edx -L(generic_nc): - push rp FRAME_pushl() - mov PARAM_DST, rp - mov up, SAVE_UP - mov PARAM_SRC, up - cmp rp, up - mov %ebx, SAVE_EBX - jne L(general) - mov GPARAM_SIZE, %ebx C size - mov GPARAM_SRC2, up - jmp L(inplace) - -L(general): - mov GPARAM_SIZE, %eax C size - mov %ebx, SAVE_EBX - incl %eax C size + 1 - mov up, %ebx C vp - mov GPARAM_SRC2, up C up - shr %eax C (size+1)\2 - mov %ebp, SAVE_EBP - mov %eax, GPARAM_SIZE - jnc L(entry2) C size odd - - add %edx, %edx C size even - mov %ecx, %ebp - mov (up), %ecx - lea -4(rp), rp - lea -4(%ebx), %ebx - lea (%ebp,%ecx,M), %eax - lea 4(up), up - jmp L(enteven2) - - ALIGN(16) -L(oop2): - lea (%ecx,%eax,M), %ebp - shr $RSH, %eax - mov 4(up), %ecx - add %edx, %edx - lea 8(up), up - mov (%ebx), %edx - M4_inst %ebp, %edx - lea (%eax,%ecx,M), %eax - mov %edx, (rp) -L(enteven2): - mov 4(%ebx), %edx - lea 8(%ebx), %ebx - M4_inst %eax, %edx - mov %edx, 4(rp) - sbb %edx, %edx - shr $RSH, %ecx - lea 8(rp), rp -L(entry2): - mov (up), %eax - decl GPARAM_SIZE - jnz L(oop2) - - lea (%ecx,%eax,M), %ebp - shr $RSH, %eax - shr %edx - mov (%ebx), %edx - M4_inst %ebp, %edx - mov %edx, (rp) - mov SAVE_UP, up - adc $0, %eax - mov SAVE_EBP, %ebp - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret -EPILOGUE() - -ASM_END() diff --git a/gmp/mpn/x86/atom/bdiv_q_1.asm b/gmp/mpn/x86/atom/bdiv_q_1.asm deleted file mode 100644 index 31e908ec44..0000000000 --- a/gmp/mpn/x86/atom/bdiv_q_1.asm +++ /dev/null @@ -1,35 +0,0 @@ -dnl Intel Atom mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel -dnl division by 1-limb divisor, returning quotient only. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1) -include_mpn(`x86/pentium/bdiv_q_1.asm') diff --git a/gmp/mpn/x86/atom/cnd_add_n.asm b/gmp/mpn/x86/atom/cnd_add_n.asm deleted file mode 100644 index 50bf2ad64b..0000000000 --- a/gmp/mpn/x86/atom/cnd_add_n.asm +++ /dev/null @@ -1,113 +0,0 @@ -dnl X86 mpn_cnd_add_n optimised for Intel Atom. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) ? -C P4 model 3-4 (Prescott) ? -C Intel atom 4.67 -C AMD K6 ? -C AMD K7 ? -C AMD K8 ? - - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebp') -define(`n', `%ecx') -define(`cnd', `20(%esp)') - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_cnd_add_n) - push %edi - push %esi - push %ebx - push %ebp - - mov cnd, %eax C make cnd into a mask (1) - mov 24(%esp), rp - neg %eax C make cnd into a mask (1) - mov 28(%esp), up - sbb %eax, %eax C make cnd into a mask (1) - mov 32(%esp), vp - mov %eax, cnd C make cnd into a mask (1) - mov 36(%esp), n - - xor %edx, %edx - - shr $1, n - jnc L(top) - - mov 0(vp), %eax - and cnd, %eax - lea 4(vp), vp - add 0(up), %eax - lea 4(rp), rp - lea 4(up), up - sbb %edx, %edx - mov %eax, -4(rp) - inc n - dec n - je L(end) - -L(top): sbb %edx, %edx - mov 0(vp), %eax - and cnd, %eax - lea 8(vp), vp - lea 8(rp), rp - mov -4(vp), %ebx - and cnd, %ebx - add %edx, %edx - adc 0(up), %eax - lea 8(up), up - mov %eax, -8(rp) - adc -4(up), %ebx - dec n - mov %ebx, -4(rp) - jne L(top) - -L(end): mov $0, %eax - adc %eax, %eax - - pop %ebp - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/cnd_sub_n.asm b/gmp/mpn/x86/atom/cnd_sub_n.asm deleted file mode 100644 index 221bedca37..0000000000 --- a/gmp/mpn/x86/atom/cnd_sub_n.asm +++ /dev/null @@ -1,124 +0,0 @@ -dnl X86 mpn_cnd_sub_n optimised for Intel Atom. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C P5 ? -C P6 model 0-8,10-12 ? -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) ? -C P4 model 3-4 (Prescott) ? -C Intel atom 5.67 -C AMD K6 ? -C AMD K7 ? -C AMD K8 ? - - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebp') -define(`n', `%ecx') -define(`cnd', `20(%esp)') - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_cnd_sub_n) - push %edi - push %esi - push %ebx - push %ebp - - mov cnd, %eax C make cnd into a mask (1) - mov 24(%esp), rp - neg %eax C make cnd into a mask (1) - mov 28(%esp), up - sbb %eax, %eax C make cnd into a mask (1) - mov 32(%esp), vp - mov %eax, cnd C make cnd into a mask (1) - mov 36(%esp), n - - xor %edx, %edx - - inc n - shr n - jnc L(ent) - - mov 0(vp), %eax - and cnd, %eax - lea 4(vp), vp - mov 0(up), %edx - sub %eax, %edx - lea 4(rp), rp - lea 4(up), up - mov %edx, -4(rp) - sbb %edx, %edx C save cy - -L(ent): mov 0(vp), %ebx - and cnd, %ebx - add %edx, %edx C restore cy - mov 0(up), %edx - dec n - je L(end) - -L(top): sbb %ebx, %edx - mov 4(vp), %eax - mov %edx, 0(rp) - sbb %edx, %edx C save cy - mov 8(vp), %ebx - lea 8(up), up - and cnd, %ebx - and cnd, %eax - add %edx, %edx C restore cy - mov -4(up), %edx - lea 8(rp), rp - sbb %eax, %edx - mov %edx, -4(rp) - dec n - mov 0(up), %edx - lea 8(vp), vp - jne L(top) - -L(end): sbb %ebx, %edx - mov %edx, 0(rp) - - mov $0, %eax - adc %eax, %eax - - pop %ebp - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/dive_1.asm b/gmp/mpn/x86/atom/dive_1.asm deleted file mode 100644 index 71036a15a4..0000000000 --- a/gmp/mpn/x86/atom/dive_1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_divexact_1 -- mpn by limb exact division. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_divexact_1) -include_mpn(`x86/pentium/dive_1.asm') diff --git a/gmp/mpn/x86/atom/gmp-mparam.h b/gmp/mpn/x86/atom/gmp-mparam.h deleted file mode 100644 index 45df12806c..0000000000 --- a/gmp/mpn/x86/atom/gmp-mparam.h +++ /dev/null @@ -1,201 +0,0 @@ -/* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define GMP_LIMB_BITS 32 -#define GMP_LIMB_BYTES 4 - -/* 1667 MHz Pineview (Atom D510) */ -/* FFT tuning limit = 25000000 */ -/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */ - -#define MOD_1_NORM_THRESHOLD 3 -#define MOD_1_UNNORM_THRESHOLD 5 -#define MOD_1N_TO_MOD_1_1_THRESHOLD 11 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 5 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13 -#define USE_PREINV_DIVREM_1 1 /* native */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD 4 -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define BMOD_1_TO_MOD_1_THRESHOLD 31 - -#define MUL_TOOM22_THRESHOLD 20 -#define MUL_TOOM33_THRESHOLD 74 -#define MUL_TOOM44_THRESHOLD 178 -#define MUL_TOOM6H_THRESHOLD 270 -#define MUL_TOOM8H_THRESHOLD 399 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 127 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_TOOM2_THRESHOLD 30 -#define SQR_TOOM3_THRESHOLD 105 -#define SQR_TOOM4_THRESHOLD 178 -#define SQR_TOOM6_THRESHOLD 303 -#define SQR_TOOM8_THRESHOLD 527 - -#define MULMID_TOOM42_THRESHOLD 54 - -#define MULMOD_BNM1_THRESHOLD 13 -#define SQRMOD_BNM1_THRESHOLD 18 - -#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 380, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ - { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \ - { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \ - { 47, 8}, { 95,10}, { 31, 9}, { 79,10}, \ - { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \ - { 127, 8}, { 255, 9}, { 135,10}, { 79, 9}, \ - { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \ - { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \ - { 143, 9}, { 287, 8}, { 575,10}, { 159,11}, \ - { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \ - { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \ - { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \ - { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \ - { 671,10}, { 351, 9}, { 703,11}, { 191,10}, \ - { 383, 9}, { 767,10}, { 415, 9}, { 831,11}, \ - { 223,10}, { 447,12}, { 127,11}, { 255,10}, \ - { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \ - { 319,10}, { 671,11}, { 351,10}, { 703,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ - { 831,11}, { 447,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \ - { 319,11}, { 735,12}, { 383,11}, { 831,12}, \ - { 447,11}, { 959,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,11}, { 1151,12}, { 703,11}, \ - { 1471,13}, { 383,12}, { 831,11}, { 1663,12}, \ - { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \ - { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \ - { 1663,13}, { 895,12}, { 1919,14}, { 511,13}, \ - { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \ - { 1407,12}, { 2943,14}, { 767,13}, { 1663,12}, \ - { 3455,13}, { 1919,15}, { 511,14}, { 1023,13}, \ - { 2431,14}, { 1279,13}, { 2943,12}, { 5887,14}, \ - { 16384,15}, { 32768,16} } -#define MUL_FFT_TABLE3_SIZE 150 -#define MUL_FFT_THRESHOLD 4544 - -#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 340, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \ - { 12, 5}, { 25, 6}, { 21, 7}, { 11, 6}, \ - { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \ - { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \ - { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \ - { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \ - { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \ - { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \ - { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \ - { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \ - { 255,10}, { 79, 9}, { 159, 8}, { 319,10}, \ - { 95, 9}, { 191,11}, { 63,10}, { 127, 9}, \ - { 255, 8}, { 511, 9}, { 271,10}, { 143, 9}, \ - { 287, 8}, { 575, 9}, { 303, 8}, { 607,10}, \ - { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \ - { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ - { 511,10}, { 271, 9}, { 543,10}, { 287, 9}, \ - { 575,10}, { 303, 9}, { 607,10}, { 319, 9}, \ - { 639,10}, { 335, 9}, { 671,10}, { 351, 9}, \ - { 703,11}, { 191,10}, { 383, 9}, { 767,10}, \ - { 415,11}, { 223,10}, { 447,12}, { 127,11}, \ - { 255,10}, { 543,11}, { 287,10}, { 607,11}, \ - { 319,10}, { 671,11}, { 351,10}, { 703,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,10}, \ - { 831,11}, { 479,13}, { 127,12}, { 255,11}, \ - { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \ - { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \ - { 831,12}, { 447,11}, { 959,13}, { 255,12}, \ - { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \ - { 639,11}, { 1343,12}, { 703,11}, { 1407,13}, \ - { 383,12}, { 831,11}, { 1663,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1215,13}, { 639,12}, \ - { 1471,13}, { 767,12}, { 1663,13}, { 895,12}, \ - { 1791,14}, { 511,13}, { 1023,12}, { 2111,13}, \ - { 1151,12}, { 2431,13}, { 1407,14}, { 767,13}, \ - { 1663,12}, { 3455,13}, { 1791,15}, { 511,14}, \ - { 1023,13}, { 2431,14}, { 1279,13}, { 2943,12}, \ - { 5887,14}, { 16384,15}, { 32768,16} } -#define SQR_FFT_TABLE3_SIZE 151 -#define SQR_FFT_THRESHOLD 2880 - -#define MULLO_BASECASE_THRESHOLD 6 -#define MULLO_DC_THRESHOLD 48 -#define MULLO_MUL_N_THRESHOLD 8907 - -#define DC_DIV_QR_THRESHOLD 59 -#define DC_DIVAPPR_Q_THRESHOLD 250 -#define DC_BDIV_QR_THRESHOLD 59 -#define DC_BDIV_Q_THRESHOLD 169 - -#define INV_MULMOD_BNM1_THRESHOLD 38 -#define INV_NEWTON_THRESHOLD 246 -#define INV_APPR_THRESHOLD 246 - -#define BINV_NEWTON_THRESHOLD 276 -#define REDC_1_TO_REDC_N_THRESHOLD 67 - -#define MU_DIV_QR_THRESHOLD 1334 -#define MU_DIVAPPR_Q_THRESHOLD 1442 -#define MUPI_DIV_QR_THRESHOLD 114 -#define MU_BDIV_QR_THRESHOLD 1142 -#define MU_BDIV_Q_THRESHOLD 1334 - -#define POWM_SEC_TABLE 1,22,98,416,1378 - -#define MATRIX22_STRASSEN_THRESHOLD 13 -#define HGCD_THRESHOLD 133 -#define HGCD_APPR_THRESHOLD 169 -#define HGCD_REDUCE_THRESHOLD 2479 -#define GCD_DC_THRESHOLD 460 -#define GCDEXT_DC_THRESHOLD 342 -#define JACOBI_BASE_METHOD 3 - -#define GET_STR_DC_THRESHOLD 12 -#define GET_STR_PRECOMPUTE_THRESHOLD 23 -#define SET_STR_DC_THRESHOLD 321 -#define SET_STR_PRECOMPUTE_THRESHOLD 1099 - -#define FAC_DSC_THRESHOLD 198 -#define FAC_ODD_THRESHOLD 34 diff --git a/gmp/mpn/x86/atom/logops_n.asm b/gmp/mpn/x86/atom/logops_n.asm deleted file mode 100644 index 3cb6d7310c..0000000000 --- a/gmp/mpn/x86/atom/logops_n.asm +++ /dev/null @@ -1,151 +0,0 @@ -dnl Intel Atom mpn_and_n,...,mpn_xnor_n -- bitwise logical operations. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C op nop opn -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 3 3.5 3.5 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -define(M4_choose_op, -`ifdef(`OPERATION_$1',` -define(`M4_function', `mpn_$1') -define(`M4_want_pre', `$4') -define(`M4_inst', `$3') -define(`M4_want_post',`$2') -')') -define(M4pre, `ifelse(M4_want_pre, yes,`$1')') -define(M4post,`ifelse(M4_want_post,yes,`$1')') - -M4_choose_op( and_n, , andl, ) -M4_choose_op( andn_n, , andl, yes) -M4_choose_op( nand_n, yes, andl, ) -M4_choose_op( ior_n, , orl, ) -M4_choose_op( iorn_n, , orl, yes) -M4_choose_op( nior_n, yes, orl, ) -M4_choose_op( xor_n, , xorl, ) -M4_choose_op( xnor_n, yes, xorl, ) - -ifdef(`M4_function',, -`m4_error(`Unrecognised or undefined OPERATION symbol -')') - -MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) - -C void M4_function (mp_ptr dst, mp_srcptr src2, mp_srcptr src1, mp_size_t size); -C - -defframe(PARAM_SIZE, 16) -defframe(PARAM_SRC1, 12) -defframe(PARAM_SRC2, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_RP,`PARAM_SIZE') -define(SAVE_VP,`PARAM_SRC1') -define(SAVE_UP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`vp', `%ebx') -define(`cnt', `%eax') -define(`r1', `%ecx') -define(`r2', `%edx') - -ASM_START() - TEXT - ALIGN(16) -deflit(`FRAME',0) - -PROLOGUE(M4_function) - mov PARAM_SIZE, cnt C size - mov rp, SAVE_RP - mov PARAM_DST, rp - mov up, SAVE_UP - mov PARAM_SRC1, up - shr cnt C size >> 1 - mov vp, SAVE_VP - mov PARAM_SRC2, vp - mov (up), r1 - jz L(end) C size == 1 - jnc L(even) C size % 2 == 0 - - ALIGN(16) -L(oop): -M4pre(` notl_or_xorl_GMP_NUMB_MASK(r1)') - M4_inst (vp), r1 - lea 8(up), up - mov -4(up), r2 -M4post(` notl_or_xorl_GMP_NUMB_MASK(r1)') - lea 8(vp), vp - mov r1, (rp) -L(entry): -M4pre(` notl_or_xorl_GMP_NUMB_MASK(r2)') - M4_inst -4(vp), r2 - lea 8(rp), rp -M4post(` notl_or_xorl_GMP_NUMB_MASK(r2)') - dec cnt - mov (up), r1 - mov r2, -4(rp) - jnz L(oop) - -L(end): -M4pre(` notl_or_xorl_GMP_NUMB_MASK(r1)') - mov SAVE_UP, up - M4_inst (vp), r1 -M4post(`notl_or_xorl_GMP_NUMB_MASK(r1)') - mov SAVE_VP, vp - mov r1, (rp) - mov SAVE_RP, rp - ret - -L(even): - mov r1, r2 - lea 4(up), up - lea 4(vp), vp - lea -4(rp), rp - jmp L(entry) -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/lshift.asm b/gmp/mpn/x86/atom/lshift.asm deleted file mode 100644 index f2c70dd3e8..0000000000 --- a/gmp/mpn/x86/atom/lshift.asm +++ /dev/null @@ -1,218 +0,0 @@ -dnl Intel Atom mpn_lshift -- mpn left shift. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, -C unsigned cnt); - -C cycles/limb -C cnt!=1 cnt==1 -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 5 2.5 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(PARAM_CNT, 16) -defframe(PARAM_SIZE,12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_UP,`PARAM_CNT') -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_EBP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`cnt', `%ecx') - -ASM_START() - TEXT - ALIGN(8) -deflit(`FRAME',0) -PROLOGUE(mpn_lshift) - mov PARAM_CNT, cnt - mov PARAM_SIZE, %edx - mov up, SAVE_UP - mov PARAM_SRC, up - push rp FRAME_pushl() - mov PARAM_DST, rp - -C We can use faster code for shift-by-1 under certain conditions. - cmp $1,cnt - jne L(normal) - cmpl rp, up - jnc L(special) C jump if s_ptr + 1 >= res_ptr - leal (up,%edx,4),%eax - cmpl %eax,rp - jnc L(special) C jump if res_ptr >= s_ptr + size - -L(normal): - lea -4(up,%edx,4), up - mov %ebx, SAVE_EBX - lea -4(rp,%edx,4), rp - - shr %edx - mov (up), %eax - mov %edx, VAR_COUNT - jnc L(evn) - - mov %eax, %ebx - shl %cl, %ebx - neg cnt - shr %cl, %eax - test %edx, %edx - jnz L(gt1) - mov %ebx, (rp) - jmp L(quit) - -L(gt1): mov %ebp, SAVE_EBP - push %eax - mov -4(up), %eax - mov %eax, %ebp - shr %cl, %eax - jmp L(lo1) - -L(evn): mov %ebp, SAVE_EBP - neg cnt - mov %eax, %ebp - mov -4(up), %edx - shr %cl, %eax - mov %edx, %ebx - shr %cl, %edx - neg cnt - decl VAR_COUNT - lea 4(rp), rp - lea -4(up), up - jz L(end) - push %eax FRAME_pushl() - - ALIGN(8) -L(top): shl %cl, %ebp - or %ebp, %edx - shl %cl, %ebx - neg cnt - mov -4(up), %eax - mov %eax, %ebp - mov %edx, -4(rp) - shr %cl, %eax - lea -8(rp), rp -L(lo1): mov -8(up), %edx - or %ebx, %eax - mov %edx, %ebx - shr %cl, %edx - lea -8(up), up - neg cnt - mov %eax, (rp) - decl VAR_COUNT - jg L(top) - - pop %eax FRAME_popl() -L(end): - shl %cl, %ebp - shl %cl, %ebx - or %ebp, %edx - mov SAVE_EBP, %ebp - mov %edx, -4(rp) - mov %ebx, -8(rp) - -L(quit): - mov SAVE_UP, up - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret - -L(special): -deflit(`FRAME',4) - lea 3(%edx), %eax C size + 3 - dec %edx C size - 1 - mov (up), %ecx - shr $2, %eax C (size + 3) / 4 - and $3, %edx C (size - 1) % 4 - jz L(goloop) C jmp if size == 1 (mod 4) - shr %edx - jnc L(odd) C jum if size == 3 (mod 4) - - add %ecx, %ecx - lea 4(up), up - mov %ecx, (rp) - mov (up), %ecx - lea 4(rp), rp - - dec %edx - jnz L(goloop) C jump if size == 0 (mod 4) -L(odd): lea -8(up), up - lea -8(rp), rp - jmp L(sentry) C reached if size == 2 or 3 (mod 4) - -L(sloop): - adc %ecx, %ecx - mov 4(up), %edx - mov %ecx, (rp) - adc %edx, %edx - mov 8(up), %ecx - mov %edx, 4(rp) -L(sentry): - adc %ecx, %ecx - mov 12(up), %edx - mov %ecx, 8(rp) - adc %edx, %edx - lea 16(up), up - mov %edx, 12(rp) - lea 16(rp), rp - mov (up), %ecx -L(goloop): - decl %eax - jnz L(sloop) - -L(squit): - adc %ecx, %ecx - mov %ecx, (rp) - adc %eax, %eax - - mov SAVE_UP, up - pop rp FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/lshiftc.asm b/gmp/mpn/x86/atom/lshiftc.asm deleted file mode 100644 index 5be53ed19d..0000000000 --- a/gmp/mpn/x86/atom/lshiftc.asm +++ /dev/null @@ -1,159 +0,0 @@ -dnl Intel Atom mpn_lshiftc -- mpn left shift with complement. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size, -C unsigned cnt); - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 5.5 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(PARAM_CNT, 16) -defframe(PARAM_SIZE,12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_UP,`PARAM_CNT') -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_EBP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`cnt', `%ecx') - -ASM_START() - TEXT - -PROLOGUE(mpn_lshiftc) -deflit(`FRAME',0) - mov PARAM_CNT, cnt - mov PARAM_SIZE, %edx - mov up, SAVE_UP - mov PARAM_SRC, up - push rp FRAME_pushl() - mov PARAM_DST, rp - - lea -4(up,%edx,4), up - mov %ebx, SAVE_EBX - lea -4(rp,%edx,4), rp - - shr %edx - mov (up), %eax - mov %edx, VAR_COUNT - jnc L(evn) - - mov %eax, %ebx - shl %cl, %ebx - neg cnt - shr %cl, %eax - test %edx, %edx - jnz L(gt1) - not %ebx - mov %ebx, (rp) - jmp L(quit) - -L(gt1): mov %ebp, SAVE_EBP - push %eax - mov -4(up), %eax - mov %eax, %ebp - shr %cl, %eax - jmp L(lo1) - -L(evn): mov %ebp, SAVE_EBP - neg cnt - mov %eax, %ebp - mov -4(up), %edx - shr %cl, %eax - mov %edx, %ebx - shr %cl, %edx - neg cnt - decl VAR_COUNT - lea 4(rp), rp - lea -4(up), up - jz L(end) - push %eax FRAME_pushl() - -L(top): shl %cl, %ebp - or %ebp, %edx - shl %cl, %ebx - neg cnt - not %edx - mov -4(up), %eax - mov %eax, %ebp - mov %edx, -4(rp) - shr %cl, %eax - lea -8(rp), rp -L(lo1): mov -8(up), %edx - or %ebx, %eax - mov %edx, %ebx - shr %cl, %edx - not %eax - lea -8(up), up - neg cnt - mov %eax, (rp) - decl VAR_COUNT - jg L(top) - - pop %eax FRAME_popl() -L(end): - shl %cl, %ebp - shl %cl, %ebx - or %ebp, %edx - mov SAVE_EBP, %ebp - not %edx - not %ebx - mov %edx, -4(rp) - mov %ebx, -8(rp) - -L(quit): - mov SAVE_UP, up - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/mmx/copyd.asm b/gmp/mpn/x86/atom/mmx/copyd.asm deleted file mode 100644 index b80fb033fe..0000000000 --- a/gmp/mpn/x86/atom/mmx/copyd.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_copyd -- copy limb vector, decrementing. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_copyd) -include_mpn(`x86/k7/mmx/copyd.asm') diff --git a/gmp/mpn/x86/atom/mmx/copyi.asm b/gmp/mpn/x86/atom/mmx/copyi.asm deleted file mode 100644 index 49b6b8d662..0000000000 --- a/gmp/mpn/x86/atom/mmx/copyi.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_copyi -- copy limb vector, incrementing. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_copyi) -include_mpn(`x86/k7/mmx/copyi.asm') diff --git a/gmp/mpn/x86/atom/mmx/hamdist.asm b/gmp/mpn/x86/atom/mmx/hamdist.asm deleted file mode 100644 index 3fe8253240..0000000000 --- a/gmp/mpn/x86/atom/mmx/hamdist.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_hamdist -- hamming distance. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_hamdist) -include_mpn(`x86/k7/mmx/popham.asm') diff --git a/gmp/mpn/x86/atom/mod_34lsub1.asm b/gmp/mpn/x86/atom/mod_34lsub1.asm deleted file mode 100644 index 6d57ba385d..0000000000 --- a/gmp/mpn/x86/atom/mod_34lsub1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_mod_34lsub1 -- remainder modulo 2^24-1. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_mod_34lsub1) -include_mpn(`x86/p6/mod_34lsub1.asm') diff --git a/gmp/mpn/x86/atom/mode1o.asm b/gmp/mpn/x86/atom/mode1o.asm deleted file mode 100644 index c9ee6bd2db..0000000000 --- a/gmp/mpn/x86/atom/mode1o.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_modexact_1_odd -- exact division style remainder. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_modexact_1_odd mpn_modexact_1c_odd) -include_mpn(`x86/pentium/mode1o.asm') diff --git a/gmp/mpn/x86/atom/rshift.asm b/gmp/mpn/x86/atom/rshift.asm deleted file mode 100644 index 1cb5dbefe9..0000000000 --- a/gmp/mpn/x86/atom/rshift.asm +++ /dev/null @@ -1,152 +0,0 @@ -dnl Intel Atom mpn_rshift -- mpn right shift. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl Converted from AMD64 by Marco Bodrato. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, -C unsigned cnt); - -C cycles/limb -C P5 -C P6 model 0-8,10-12 -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 5 -C AMD K6 -C AMD K7 -C AMD K8 -C AMD K10 - -defframe(PARAM_CNT, 16) -defframe(PARAM_SIZE,12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -dnl re-use parameter space -define(SAVE_UP,`PARAM_CNT') -define(VAR_COUNT,`PARAM_SIZE') -define(SAVE_EBX,`PARAM_SRC') -define(SAVE_EBP,`PARAM_DST') - -define(`rp', `%edi') -define(`up', `%esi') -define(`cnt', `%ecx') - -ASM_START() - TEXT - ALIGN(8) -deflit(`FRAME',0) -PROLOGUE(mpn_rshift) - mov PARAM_CNT, cnt - mov PARAM_SIZE, %edx - mov up, SAVE_UP - mov PARAM_SRC, up - push rp FRAME_pushl() - mov PARAM_DST, rp - mov %ebx, SAVE_EBX - - shr %edx - mov (up), %eax - mov %edx, VAR_COUNT - jnc L(evn) - - mov %eax, %ebx - shr %cl, %ebx - neg cnt - shl %cl, %eax - test %edx, %edx - jnz L(gt1) - mov %ebx, (rp) - jmp L(quit) - -L(gt1): mov %ebp, SAVE_EBP - push %eax - mov 4(up), %eax - mov %eax, %ebp - shl %cl, %eax - jmp L(lo1) - -L(evn): mov %ebp, SAVE_EBP - neg cnt - mov %eax, %ebp - mov 4(up), %edx - shl %cl, %eax - mov %edx, %ebx - shl %cl, %edx - neg cnt - decl VAR_COUNT - lea -4(rp), rp - lea 4(up), up - jz L(end) - push %eax FRAME_pushl() - - ALIGN(8) -L(top): shr %cl, %ebp - or %ebp, %edx - shr %cl, %ebx - neg cnt - mov 4(up), %eax - mov %eax, %ebp - mov %edx, 4(rp) - shl %cl, %eax - lea 8(rp), rp -L(lo1): mov 8(up), %edx - or %ebx, %eax - mov %edx, %ebx - shl %cl, %edx - lea 8(up), up - neg cnt - mov %eax, (rp) - decl VAR_COUNT - jg L(top) - - pop %eax FRAME_popl() -L(end): - shr %cl, %ebp - shr %cl, %ebx - or %ebp, %edx - mov SAVE_EBP, %ebp - mov %edx, 4(rp) - mov %ebx, 8(rp) - -L(quit): - mov SAVE_UP, up - mov SAVE_EBX, %ebx - pop rp FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm b/gmp/mpn/x86/atom/sse2/aorsmul_1.asm deleted file mode 100644 index 969a14a919..0000000000 --- a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm +++ /dev/null @@ -1,174 +0,0 @@ -dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C cycles/limb -C P5 - -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 8 -C AMD K6 -C AMD K7 - -C AMD K8 -C AMD K10 - -define(`rp', `%edi') -define(`up', `%esi') -define(`n', `%ecx') - -ifdef(`OPERATION_addmul_1',` - define(ADDSUB, add) - define(func_1, mpn_addmul_1) - define(func_1c, mpn_addmul_1c)') -ifdef(`OPERATION_submul_1',` - define(ADDSUB, sub) - define(func_1, mpn_submul_1) - define(func_1c, mpn_submul_1c)') - -MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c) - - TEXT - ALIGN(16) -PROLOGUE(func_1) - xor %edx, %edx -L(ent): push %edi - push %esi - push %ebx - mov 16(%esp), rp - mov 20(%esp), up - mov 24(%esp), n - movd 28(%esp), %mm7 - test $1, n - jz L(fi0or2) - movd (up), %mm0 - pmuludq %mm7, %mm0 - shr $2, n - jnc L(fi1) - -L(fi3): lea -8(up), up - lea -8(rp), rp - movd 12(up), %mm1 - movd %mm0, %ebx - pmuludq %mm7, %mm1 - add $1, n C increment and clear carry - jmp L(lo3) - -L(fi1): movd %mm0, %ebx - jz L(wd1) - movd 4(up), %mm1 - pmuludq %mm7, %mm1 - jmp L(lo1) - -L(fi0or2): - movd (up), %mm1 - pmuludq %mm7, %mm1 - shr $2, n - movd 4(up), %mm0 - jc L(fi2) - lea -4(up), up - lea -4(rp), rp - movd %mm1, %eax - pmuludq %mm7, %mm0 - jmp L(lo0) - -L(fi2): lea 4(up), up - add $1, n C increment and clear carry - movd %mm1, %eax - lea -12(rp), rp - jmp L(lo2) - -C ALIGN(16) C alignment seems irrelevant -L(top): movd 4(up), %mm1 - adc $0, %edx - ADDSUB %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp -L(lo1): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - ADDSUB %ebx, (rp) -L(lo0): psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - ADDSUB %eax, 4(rp) -L(lo3): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - ADDSUB %ebx, 8(rp) -L(lo2): psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - dec n - jnz L(top) - -L(end): adc n, %edx C n is zero here - ADDSUB %eax, 12(rp) - movd %mm0, %ebx - lea 16(rp), rp -L(wd1): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc n, %eax - ADDSUB %ebx, (rp) - emms - adc n, %eax - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() -PROLOGUE(func_1c) - mov 20(%esp), %edx C carry - jmp L(ent) -EPILOGUE() diff --git a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm b/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm deleted file mode 100644 index 782e914019..0000000000 --- a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_bdiv_dbm1. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_bdiv_dbm1c) -include_mpn(`x86/pentium4/sse2/bdiv_dbm1c.asm') diff --git a/gmp/mpn/x86/atom/sse2/divrem_1.asm b/gmp/mpn/x86/atom/sse2/divrem_1.asm deleted file mode 100644 index f84709a22e..0000000000 --- a/gmp/mpn/x86/atom/sse2/divrem_1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_divrem_1 -- mpn by limb division. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_preinv_divrem_1 mpn_divrem_1c mpn_divrem_1) -include_mpn(`x86/pentium4/sse2/divrem_1.asm') diff --git a/gmp/mpn/x86/atom/sse2/mod_1_1.asm b/gmp/mpn/x86/atom/sse2/mod_1_1.asm deleted file mode 100644 index ae6581d9b6..0000000000 --- a/gmp/mpn/x86/atom/sse2/mod_1_1.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom/SSE2 mpn_mod_1_1. - -dnl Copyright 2009, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_mod_1_1p) -include_mpn(`x86/pentium4/sse2/mod_1_1.asm') diff --git a/gmp/mpn/x86/atom/sse2/mod_1_4.asm b/gmp/mpn/x86/atom/sse2/mod_1_4.asm deleted file mode 100644 index 31faa3f0a3..0000000000 --- a/gmp/mpn/x86/atom/sse2/mod_1_4.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom/SSE2 mpn_mod_1_4. - -dnl Copyright 2009, 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_mod_1s_4p) -include_mpn(`x86/pentium4/sse2/mod_1_4.asm') diff --git a/gmp/mpn/x86/atom/sse2/mul_1.asm b/gmp/mpn/x86/atom/sse2/mul_1.asm deleted file mode 100644 index aa3bb974bb..0000000000 --- a/gmp/mpn/x86/atom/sse2/mul_1.asm +++ /dev/null @@ -1,124 +0,0 @@ -dnl Intel Atom mpn_mul_1. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C cycles/limb -C P5 - -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) -C P6 model 13 (Dothan) -C P4 model 0 (Willamette) -C P4 model 1 (?) -C P4 model 2 (Northwood) -C P4 model 3 (Prescott) -C P4 model 4 (Nocona) -C Intel Atom 7.5 -C AMD K6 - -C AMD K7 - -C AMD K8 -C AMD K10 - -defframe(PARAM_CARRY,20) -defframe(PARAM_MUL, 16) -defframe(PARAM_SIZE, 12) -defframe(PARAM_SRC, 8) -defframe(PARAM_DST, 4) - -define(`rp', `%edx') -define(`up', `%esi') -define(`n', `%ecx') - -ASM_START() - TEXT - ALIGN(16) -deflit(`FRAME',0) - -PROLOGUE(mpn_mul_1c) - movd PARAM_CARRY, %mm6 C carry - jmp L(ent) -EPILOGUE() - - ALIGN(8) C for compact code -PROLOGUE(mpn_mul_1) - pxor %mm6, %mm6 -L(ent): push %esi FRAME_pushl() - mov PARAM_SRC, up - mov PARAM_SIZE, %eax C size - movd PARAM_MUL, %mm7 - movd (up), %mm0 - mov %eax, n - and $3, %eax - pmuludq %mm7, %mm0 - mov PARAM_DST, rp - jz L(lo0) - cmp $2, %eax - lea -16(up,%eax,4),up - lea -16(rp,%eax,4),rp - jc L(lo1) - jz L(lo2) - jmp L(lo3) - - ALIGN(16) -L(top): movd (up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp -L(lo0): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 -L(lo3): paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 -L(lo2): paddq %mm0, %mm6 - movd 12(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 8(rp) - psrlq $32, %mm6 -L(lo1): paddq %mm0, %mm6 - sub $4, n - movd %mm6, 12(rp) - lea 16(up), up - ja L(top) - - psrlq $32, %mm6 - movd %mm6, %eax - emms - pop %esi FRAME_popl() - ret -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/x86/atom/sse2/mul_basecase.asm b/gmp/mpn/x86/atom/sse2/mul_basecase.asm deleted file mode 100644 index 97d3aeb5ad..0000000000 --- a/gmp/mpn/x86/atom/sse2/mul_basecase.asm +++ /dev/null @@ -1,501 +0,0 @@ -dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result in -dnl a third limb vector. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C TODO -C * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the -C 4 large loops into one; we could use it for the outer loop branch. -C * Optimise code outside of inner loops. -C * Write combined addmul_1 feed-in a wind-down code, and use when iterating -C outer each loop. ("Overlapping software pipelining") -C * Postpone push of ebx until we know vn > 1. Perhaps use caller-saves regs -C for inlined mul_1, allowing us to postpone all pushes. -C * Perhaps write special code for vn <= un < M, for some small M. - -C void mpn_mul_basecase (mp_ptr wp, -C mp_srcptr xp, mp_size_t xn, -C mp_srcptr yp, mp_size_t yn); -C - -define(`rp', `%edi') -define(`up', `%esi') -define(`un', `%ecx') -define(`vp', `%ebp') -define(`vn', `36(%esp)') - - TEXT - ALIGN(16) -PROLOGUE(mpn_mul_basecase) - push %edi - push %esi - push %ebx - push %ebp - mov 20(%esp), rp - mov 24(%esp), up - mov 28(%esp), un - mov 32(%esp), vp - - movd (up), %mm0 - movd (vp), %mm7 - pmuludq %mm7, %mm0 - pxor %mm6, %mm6 - - mov un, %eax - and $3, %eax - jz L(of0) - cmp $2, %eax - jc L(of1) - jz L(of2) - -C ================================================================ - jmp L(m3) - ALIGN(16) -L(lm3): movd -4(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(m3): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - sub $4, un - movd %mm6, 8(rp) - lea 16(up), up - ja L(lm3) - - psrlq $32, %mm6 - movd %mm6, 12(rp) - - decl vn - jz L(done) - lea -8(rp), rp - -L(ol3): mov 28(%esp), un - neg un - lea 4(vp), vp - movd (vp), %mm7 C read next V limb - mov 24(%esp), up - lea 16(rp,un,4), rp - - movd (up), %mm0 - pmuludq %mm7, %mm0 - sar $2, un - movd 4(up), %mm1 - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea -8(up), up - xor %edx, %edx C zero edx and CF - jmp L(a3) - -L(la3): movd 4(up), %mm1 - adc $0, %edx - add %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %ebx, (rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %eax, 4(rp) -L(a3): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %ebx, 8(rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - jnz L(la3) - - adc un, %edx C un is zero here - add %eax, 12(rp) - movd %mm0, %ebx - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc un, %eax - add %ebx, 16(rp) - adc un, %eax - mov %eax, 20(rp) - - decl vn - jnz L(ol3) - jmp L(done) - -C ================================================================ - ALIGN(16) -L(lm0): movd (up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp -L(of0): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 12(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - sub $4, un - movd %mm6, 12(rp) - lea 16(up), up - ja L(lm0) - - psrlq $32, %mm6 - movd %mm6, 16(rp) - - decl vn - jz L(done) - lea -4(rp), rp - -L(ol0): mov 28(%esp), un - neg un - lea 4(vp), vp - movd (vp), %mm7 C read next V limb - mov 24(%esp), up - lea 20(rp,un,4), rp - - movd (up), %mm1 - pmuludq %mm7, %mm1 - sar $2, un - movd 4(up), %mm0 - lea -4(up), up - movd %mm1, %eax - pmuludq %mm7, %mm0 - xor %edx, %edx C zero edx and CF - jmp L(a0) - -L(la0): movd 4(up), %mm1 - adc $0, %edx - add %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %ebx, (rp) -L(a0): psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %eax, 4(rp) - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %ebx, 8(rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - jnz L(la0) - - adc un, %edx C un is zero here - add %eax, 12(rp) - movd %mm0, %ebx - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc un, %eax - add %ebx, 16(rp) - adc un, %eax - mov %eax, 20(rp) - - decl vn - jnz L(ol0) - jmp L(done) - -C ================================================================ - ALIGN(16) -L(lm1): movd -12(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd -8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -12(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd -4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(of1): paddq %mm0, %mm6 - sub $4, un - movd %mm6, (rp) - lea 16(up), up - ja L(lm1) - - psrlq $32, %mm6 - movd %mm6, 4(rp) - - decl vn - jz L(done) - lea -16(rp), rp - -L(ol1): mov 28(%esp), un - neg un - lea 4(vp), vp - movd (vp), %mm7 C read next V limb - mov 24(%esp), up - lea 24(rp,un,4), rp - - movd (up), %mm0 - pmuludq %mm7, %mm0 - sar $2, un - movd %mm0, %ebx - movd 4(up), %mm1 - pmuludq %mm7, %mm1 - xor %edx, %edx C zero edx and CF - inc un - jmp L(a1) - -L(la1): movd 4(up), %mm1 - adc $0, %edx - add %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp -L(a1): psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %ebx, (rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %eax, 4(rp) - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %ebx, 8(rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - jnz L(la1) - - adc un, %edx C un is zero here - add %eax, 12(rp) - movd %mm0, %ebx - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc un, %eax - add %ebx, 16(rp) - adc un, %eax - mov %eax, 20(rp) - - decl vn - jnz L(ol1) - jmp L(done) - -C ================================================================ - ALIGN(16) -L(lm2): movd -8(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd -4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(of2): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - sub $4, un - movd %mm6, 4(rp) - lea 16(up), up - ja L(lm2) - - psrlq $32, %mm6 - movd %mm6, 8(rp) - - decl vn - jz L(done) - lea -12(rp), rp - -L(ol2): mov 28(%esp), un - neg un - lea 4(vp), vp - movd (vp), %mm7 C read next V limb - mov 24(%esp), up - lea 12(rp,un,4), rp - - movd (up), %mm1 - pmuludq %mm7, %mm1 - sar $2, un - movd 4(up), %mm0 - lea 4(up), up - movd %mm1, %eax - xor %edx, %edx C zero edx and CF - jmp L(lo2) - -L(la2): movd 4(up), %mm1 - adc $0, %edx - add %eax, 12(rp) - movd %mm0, %ebx - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %ebx, (rp) - psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - movd %mm0, %ebx - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %eax, 4(rp) - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %edx - movd %mm1, %eax - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %ebx, 8(rp) -L(lo2): psrlq $32, %mm1 - adc %edx, %eax - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - jnz L(la2) - - adc un, %edx C un is zero here - add %eax, 12(rp) - movd %mm0, %ebx - psrlq $32, %mm0 - adc %edx, %ebx - movd %mm0, %eax - adc un, %eax - add %ebx, 16(rp) - adc un, %eax - mov %eax, 20(rp) - - decl vn - jnz L(ol2) -C jmp L(done) - -C ================================================================ -L(done): - emms - pop %ebp - pop %ebx - pop %esi - pop %edi - ret -EPILOGUE() diff --git a/gmp/mpn/x86/atom/sse2/popcount.asm b/gmp/mpn/x86/atom/sse2/popcount.asm deleted file mode 100644 index 7847aec8e6..0000000000 --- a/gmp/mpn/x86/atom/sse2/popcount.asm +++ /dev/null @@ -1,35 +0,0 @@ -dnl Intel Atom mpn_popcount -- population count. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -MULFUNC_PROLOGUE(mpn_popcount) -include_mpn(`x86/pentium4/sse2/popcount.asm') diff --git a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm b/gmp/mpn/x86/atom/sse2/sqr_basecase.asm deleted file mode 100644 index af19ed854d..0000000000 --- a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm +++ /dev/null @@ -1,634 +0,0 @@ -dnl x86 mpn_sqr_basecase -- square an mpn number, optimised for atom. - -dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C TODO -C * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the -C 4 large loops into one; we could use it for the outer loop branch. -C * Optimise code outside of inner loops. -C * Write combined addmul_1 feed-in a wind-down code, and use when iterating -C outer each loop. ("Overlapping software pipelining") -C * Perhaps use caller-saves regs for inlined mul_1, allowing us to postpone -C all pushes. -C * Perhaps write special code for n < M, for some small M. -C * Replace inlined addmul_1 with smaller code from aorsmul_1.asm, or perhaps -C with even less pipelined code. -C * We run the outer loop until we have a 2-limb by 1-limb addmul_1 left. -C Consider breaking out earlier, saving high the cost of short loops. - -C void mpn_sqr_basecase (mp_ptr wp, -C mp_srcptr xp, mp_size_t xn); - -define(`rp', `%edi') -define(`up', `%esi') -define(`n', `%ecx') - -define(`un', `%ebp') - - TEXT - ALIGN(16) -PROLOGUE(mpn_sqr_basecase) - push %edi - push %esi - mov 12(%esp), rp - mov 16(%esp), up - mov 20(%esp), n - - lea 4(rp), rp C write triangular product starting at rp[1] - dec n - movd (up), %mm7 - - jz L(one) - lea 4(up), up - push %ebx - push %ebp - mov n, %eax - - movd (up), %mm0 - neg n - pmuludq %mm7, %mm0 - pxor %mm6, %mm6 - mov n, un - - and $3, %eax - jz L(of0) - cmp $2, %eax - jc L(of1) - jz L(of2) - -C ================================================================ - jmp L(m3) - ALIGN(16) -L(lm3): movd -4(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(m3): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - add $4, un - movd %mm6, 8(rp) - lea 16(up), up - js L(lm3) - - psrlq $32, %mm6 - movd %mm6, 12(rp) - - inc n -C jz L(done) - lea -12(up), up - lea 4(rp), rp - jmp L(ol2) - -C ================================================================ - ALIGN(16) -L(lm0): movd (up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp -L(of0): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 4(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd 12(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, 8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - add $4, un - movd %mm6, 12(rp) - lea 16(up), up - js L(lm0) - - psrlq $32, %mm6 - movd %mm6, 16(rp) - - inc n -C jz L(done) - lea -8(up), up - lea 8(rp), rp - jmp L(ol3) - -C ================================================================ - ALIGN(16) -L(lm1): movd -12(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd -8(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -12(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd -4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(of1): paddq %mm0, %mm6 - add $4, un - movd %mm6, (rp) - lea 16(up), up - js L(lm1) - - psrlq $32, %mm6 - movd %mm6, 4(rp) - - inc n - jz L(done) C goes away when we add special n=2 code - lea -20(up), up - lea -4(rp), rp - jmp L(ol0) - -C ================================================================ - ALIGN(16) -L(lm2): movd -8(up), %mm0 - pmuludq %mm7, %mm0 - psrlq $32, %mm6 - lea 16(rp), rp - paddq %mm0, %mm6 - movd -4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -8(rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - movd (up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, -4(rp) - psrlq $32, %mm6 -L(of2): paddq %mm0, %mm6 - movd 4(up), %mm0 - pmuludq %mm7, %mm0 - movd %mm6, (rp) - psrlq $32, %mm6 - paddq %mm0, %mm6 - add $4, un - movd %mm6, 4(rp) - lea 16(up), up - js L(lm2) - - psrlq $32, %mm6 - movd %mm6, 8(rp) - - inc n -C jz L(done) - lea -16(up), up -C lea (rp), rp -C jmp L(ol1) - -C ================================================================ - -L(ol1): lea 4(up,n,4), up - movd (up), %mm7 C read next U invariant limb - lea 8(rp,n,4), rp - mov n, un - - movd 4(up), %mm1 - pmuludq %mm7, %mm1 - sar $2, un - movd %mm1, %ebx - inc un - jz L(re1) - - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - xor %edx, %edx C zero edx and CF - jmp L(a1) - -L(la1): adc $0, %edx - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %eax, (rp) -L(a1): psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - movd %mm0, %eax - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %ebx, 4(rp) - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %eax, 8(rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - movd 4(up), %mm1 - jnz L(la1) - - adc un, %edx C un is zero here - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - adc un, %eax - add %ebx, 4(rp) - adc un, %eax - mov %eax, 8(rp) - - inc n - -C ================================================================ - -L(ol0): lea (up,n,4), up - movd 4(up), %mm7 C read next U invariant limb - lea 4(rp,n,4), rp - mov n, un - - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - sar $2, un - movd 12(up), %mm1 - movd %mm0, %eax - pmuludq %mm7, %mm1 - xor %edx, %edx C zero edx and CF - jmp L(a0) - -L(la0): adc $0, %edx - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - movd %mm0, %eax - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %ebx, 4(rp) -L(a0): psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %eax, 8(rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - movd 4(up), %mm1 - jnz L(la0) - - adc un, %edx C un is zero here - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - adc un, %eax - add %ebx, 4(rp) - adc un, %eax - mov %eax, 8(rp) - - inc n - -C ================================================================ - -L(ol3): lea 12(up,n,4), up - movd -8(up), %mm7 C read next U invariant limb - lea (rp,n,4), rp C put rp back - mov n, un - - movd -4(up), %mm1 - pmuludq %mm7, %mm1 - sar $2, un - movd %mm1, %ebx - movd (up), %mm0 - xor %edx, %edx C zero edx and CF - jmp L(a3) - -L(la3): adc $0, %edx - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - movd %mm0, %eax - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %ebx, 4(rp) - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %eax, 8(rp) -L(a3): psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - movd 4(up), %mm1 - jnz L(la3) - - adc un, %edx C un is zero here - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - adc un, %eax - add %ebx, 4(rp) - adc un, %eax - mov %eax, 8(rp) - - inc n - -C ================================================================ - -L(ol2): lea 8(up,n,4), up - movd -4(up), %mm7 C read next U invariant limb - lea 12(rp,n,4), rp - mov n, un - - movd (up), %mm0 - pmuludq %mm7, %mm0 - xor %edx, %edx - sar $2, un - movd 4(up), %mm1 - test un, un C clear carry - movd %mm0, %eax - pmuludq %mm7, %mm1 - inc un - jnz L(a2) - jmp L(re2) - -L(la2): adc $0, %edx - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp -L(a2): psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - movd 8(up), %mm0 - pmuludq %mm7, %mm0 - adc $0, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - movd %mm0, %eax - movd 12(up), %mm1 - pmuludq %mm7, %mm1 - adc $0, %edx - add %ebx, 4(rp) - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - lea 16(up), up - movd (up), %mm0 - adc $0, %edx - add %eax, 8(rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %edx - pmuludq %mm7, %mm0 - inc un - movd 4(up), %mm1 - jnz L(la2) - - adc un, %edx C un is zero here - add %ebx, 12(rp) - movd %mm0, %eax - pmuludq %mm7, %mm1 - lea 16(rp), rp - psrlq $32, %mm0 - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - adc un, %eax - add %ebx, 4(rp) - adc un, %eax - mov %eax, 8(rp) - - inc n - jmp L(ol1) - -C ================================================================ -L(re2): psrlq $32, %mm0 - movd (up), %mm7 C read next U invariant limb - adc %edx, %eax - movd %mm0, %edx - movd %mm1, %ebx - adc un, %edx - add %eax, (rp) - lea 4(rp), rp - psrlq $32, %mm1 - adc %edx, %ebx - movd %mm1, %eax - movd 4(up), %mm1 - adc un, %eax - add %ebx, (rp) - pmuludq %mm7, %mm1 - adc un, %eax - mov %eax, 4(rp) - movd %mm1, %ebx - -L(re1): psrlq $32, %mm1 - add %ebx, 4(rp) - movd %mm1, %eax - adc un, %eax - xor n, n C make n zeroness assumption below true - mov %eax, 8(rp) - -L(done): C n is zero here - mov 24(%esp), up - mov 28(%esp), %eax - - movd (up), %mm0 - inc %eax - pmuludq %mm0, %mm0 - lea 4(up), up - mov 20(%esp), rp - shr %eax - movd %mm0, (rp) - psrlq $32, %mm0 - lea -12(rp), rp - mov %eax, 28(%esp) - jnc L(odd) - - movd %mm0, %ebp - movd (up), %mm0 - lea 8(rp), rp - pmuludq %mm0, %mm0 - lea -4(up), up - add 8(rp), %ebp - movd %mm0, %edx - adc 12(rp), %edx - rcr n - jmp L(ent) - -C ALIGN(16) C alignment seems irrelevant -L(top): movd (up), %mm1 - adc n, n - movd %mm0, %eax - pmuludq %mm1, %mm1 - movd 4(up), %mm0 - adc (rp), %eax - movd %mm1, %ebx - pmuludq %mm0, %mm0 - psrlq $32, %mm1 - adc 4(rp), %ebx - movd %mm1, %ebp - movd %mm0, %edx - adc 8(rp), %ebp - adc 12(rp), %edx - rcr n C FIXME: isn't this awfully slow on atom??? - adc %eax, (rp) - adc %ebx, 4(rp) -L(ent): lea 8(up), up - adc %ebp, 8(rp) - psrlq $32, %mm0 - adc %edx, 12(rp) -L(odd): decl 28(%esp) - lea 16(rp), rp - jnz L(top) - -L(end): adc n, n - movd %mm0, %eax - adc n, %eax - mov %eax, (rp) - -L(rtn): emms - pop %ebp - pop %ebx - pop %esi - pop %edi - ret - -L(one): pmuludq %mm7, %mm7 - movq %mm7, -4(rp) - emms - pop %esi - pop %edi - ret -EPILOGUE() diff --git a/gmp/mpn/x86/atom/sublsh1_n.asm b/gmp/mpn/x86/atom/sublsh1_n.asm deleted file mode 100644 index d3e7e5b5cb..0000000000 --- a/gmp/mpn/x86/atom/sublsh1_n.asm +++ /dev/null @@ -1,34 +0,0 @@ -dnl Intel Atom mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1) - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -MULFUNC_PROLOGUE(mpn_sublsh1_n_ip1) -include_mpn(`x86/k7/sublsh1_n.asm') diff --git a/gmp/mpn/x86/atom/sublsh2_n.asm b/gmp/mpn/x86/atom/sublsh2_n.asm deleted file mode 100644 index 79405cf9f4..0000000000 --- a/gmp/mpn/x86/atom/sublsh2_n.asm +++ /dev/null @@ -1,57 +0,0 @@ -dnl Intel Atom mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2). - -dnl Contributed to the GNU project by Marco Bodrato. - -dnl Copyright 2011 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -define(LSH, 2) -define(RSH, 30) - -ifdef(`OPERATION_addlsh2_n', ` - define(M4_inst, adcl) - define(M4_opp, subl) - define(M4_function, mpn_addlsh2_n) - define(M4_function_c, mpn_addlsh2_nc) - define(M4_ip_function_c, mpn_addlsh2_nc_ip1) - define(M4_ip_function, mpn_addlsh2_n_ip1) -',`ifdef(`OPERATION_sublsh2_n', ` - define(M4_inst, sbbl) - define(M4_opp, addl) - define(M4_function, mpn_sublsh2_n) - define(M4_function_c, mpn_sublsh2_nc) - define(M4_ip_function_c, mpn_sublsh2_nc_ip1) - define(M4_ip_function, mpn_sublsh2_n_ip1) -',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_sublsh2_n -')')') - -MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_sublsh2_n_ip1 mpn_sublsh2_nc_ip1) - -include_mpn(`x86/atom/aorslshC_n.asm') |