diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2011-11-29 21:59:39 +0100 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2011-11-29 21:59:39 +0100 |
commit | 23df1f61b5f28b4bf4953acd2b069d1f09d6450f (patch) | |
tree | 90c979b4081ce64d927d15f630aff84e245652ef | |
parent | cdaf5d1a1e737e7db82e6509571468fa660c043b (diff) | |
download | gmp-23df1f61b5f28b4bf4953acd2b069d1f09d6450f.tar.gz |
Add DOS64 ABI support to most files.
47 files changed, 458 insertions, 111 deletions
@@ -1,3 +1,7 @@ +2011-11-29 Torbjorn Granlund <tege@gmplib.org> + + * mpn/x86_64: Add DOS64 ABI support to most files. + 2011-11-28 Torbjorn Granlund <tege@gmplib.org> * mpn/x86_64/mul_basecase.asm: Support ABI DOS64. diff --git a/configure.in b/configure.in index 5b7cf188a..601d6348f 100644 --- a/configure.in +++ b/configure.in @@ -1640,7 +1640,6 @@ case $host in ;; *-*-mingw* | *-*-cygwin) limb_64=longlong - extra_functions_64="" # FIXME: remove when invert_limb is ported CALLING_CONVENTIONS_OBJS_64="" AC_DEFINE(HOST_DOS64,1,[Define to 1 for Windos/64]) AC_SUBST(GMP_NONSTD_ABI,DOS64) diff --git a/mpn/x86_64/addmul_2.asm b/mpn/x86_64/addmul_2.asm index 107c3dafe..5c6647888 100644 --- a/mpn/x86_64/addmul_2.asm +++ b/mpn/x86_64/addmul_2.asm @@ -50,10 +50,14 @@ define(`w2', `%rbp') define(`w3', `%r10') define(`n', `%r11') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + +ASM_START() TEXT ALIGN(16) -ASM_START() PROLOGUE(mpn_addmul_2) + DOS64_ENTRY(4) mov n_param, n push %rbx push %rbp @@ -164,6 +168,7 @@ L(end): xor R32(w1), R32(w1) pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/aorrlsh1_n.asm b/mpn/x86_64/aorrlsh1_n.asm index 2ea556b73..dda7d590e 100644 --- a/mpn/x86_64/aorrlsh1_n.asm +++ b/mpn/x86_64/aorrlsh1_n.asm @@ -1,7 +1,8 @@ dnl AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1) dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[] -dnl Copyright 2003, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2006, 2007, 2008, 2009, 2011 Free Software +dnl Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -54,10 +55,14 @@ ifdef(`OPERATION_rsblsh1_n', ` MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func) + DOS64_ENTRY(4) push %rbp mov (vp), %r8 @@ -147,5 +152,6 @@ ifdef(`OPERATION_rsblsh1_n',` movslq R32(%rbp), %rax') pop %rbp + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/aorrlsh2_n.asm b/mpn/x86_64/aorrlsh2_n.asm index 6d55cfd10..8c427a674 100644 --- a/mpn/x86_64/aorrlsh2_n.asm +++ b/mpn/x86_64/aorrlsh2_n.asm @@ -3,7 +3,7 @@ dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[] dnl Contributed to the GNU project by Torbjorn Granlund. -dnl Copyright 2009, 2010 Free Software Foundation, Inc. +dnl Copyright 2009, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -36,4 +36,7 @@ ifdef(`OPERATION_rsblsh2_n',` MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + include_mpn(`x86_64/aorrlshC_n.asm') diff --git a/mpn/x86_64/aorrlshC_n.asm b/mpn/x86_64/aorrlshC_n.asm index cab0b07f4..ae9a9d952 100644 --- a/mpn/x86_64/aorrlshC_n.asm +++ b/mpn/x86_64/aorrlshC_n.asm @@ -1,7 +1,7 @@ dnl AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C) dnl AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[] -dnl Copyright 2009, 2010 Free Software Foundation, Inc. +dnl Copyright 2009, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -37,10 +37,14 @@ define(`n', `%rcx') define(M, eval(m4_lshift(1,LSH))) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func) + DOS64_ENTRY(4) push %r12 push %r13 push %r14 @@ -140,5 +144,6 @@ ifelse(ADDSUB,add,` pop %r14 pop %r13 pop %r12 + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/aorrlsh_n.asm b/mpn/x86_64/aorrlsh_n.asm index d19dea535..8ab3688d2 100644 --- a/mpn/x86_64/aorrlsh_n.asm +++ b/mpn/x86_64/aorrlsh_n.asm @@ -56,10 +56,23 @@ ifdef(`OPERATION_rsblsh_n',` MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n) +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8d ') push %r12 push %r13 push %r14 @@ -155,5 +168,6 @@ L(end): add R32(%rbx), R32(%rbx) pop %r14 pop %r13 pop %r12 + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/aors_n.asm b/mpn/x86_64/aors_n.asm index 916e9b664..eadde641b 100644 --- a/mpn/x86_64/aors_n.asm +++ b/mpn/x86_64/aors_n.asm @@ -1,7 +1,7 @@ dnl AMD64 mpn_add_n, mpn_sub_n -dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation, -dnl Inc. +dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011 Free Software +dnl Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -30,15 +30,15 @@ C Intel SBR 1.59 C Intel atom 4 C VIA nano 3.25 -C The inner loop of this code is the result of running a code generation and +C The loop of this code is the result of running a code generation and C optimization tool suite written by David Harvey and Torbjorn Granlund. C INPUT PARAMETERS -define(`rp', `%rdi') -define(`up', `%rsi') -define(`vp', `%rdx') -define(`n', `%rcx') -define(`cy', `%r8') C (only for mpn_add_nc) +define(`rp', `%rdi') C rcx +define(`up', `%rsi') C rdx +define(`vp', `%rdx') C r8 +define(`n', `%rcx') C r9 +define(`cy', `%r8') C rsp+40 (only for mpn_add_nc) ifdef(`OPERATION_add_n', ` define(ADCSBB, adc) @@ -51,10 +51,23 @@ ifdef(`OPERATION_sub_n', ` MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func_nc) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') mov R32(n), R32(%rax) shr $2, n and $3, R32(%rax) @@ -69,6 +82,7 @@ PROLOGUE(func_nc) EPILOGUE() ALIGN(16) PROLOGUE(func) + DOS64_ENTRY(4) mov R32(n), R32(%rax) shr $2, n and $3, R32(%rax) @@ -85,6 +99,7 @@ L(lt4): dec R32(%rax) ADCSBB (vp), %r8 mov %r8, (rp) adc R32(%rax), R32(%rax) + DOS64_EXIT() ret L(2): dec R32(%rax) @@ -95,6 +110,7 @@ L(2): dec R32(%rax) mov %r8, (rp) mov %r9, 8(rp) adc R32(%rax), R32(%rax) + DOS64_EXIT() ret L(3): mov 16(up), %r10 @@ -105,6 +121,7 @@ L(3): mov 16(up), %r10 mov %r9, 8(rp) mov %r10, 16(rp) setc R8(%rax) + DOS64_EXIT() ret ALIGN(16) @@ -142,5 +159,6 @@ L(end): lea 32(up), up dec R32(%rax) jnz L(lt4) adc R32(%rax), R32(%rax) + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/aorscnd_n.asm b/mpn/x86_64/aorscnd_n.asm index 19ea42f2a..d22a2a218 100644 --- a/mpn/x86_64/aorscnd_n.asm +++ b/mpn/x86_64/aorscnd_n.asm @@ -59,10 +59,23 @@ ifdef(`OPERATION_subcnd_n', ` MULFUNC_PROLOGUE(mpn_addcnd_n mpn_subcnd_n) +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') push %rbx push %rbp push %r12 @@ -160,5 +173,6 @@ L(end): neg R32(%rax) pop %r12 pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/bdiv_dbm1c.asm b/mpn/x86_64/bdiv_dbm1c.asm index f6a77507d..0fef478d9 100644 --- a/mpn/x86_64/bdiv_dbm1c.asm +++ b/mpn/x86_64/bdiv_dbm1c.asm @@ -41,10 +41,23 @@ define(`cy', `%r8') define(`n', `%r9') +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_bdiv_dbm1c) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') mov (up), %rax mov n_param, n mov R32(n_param), R32(%r11) @@ -84,6 +97,7 @@ L(lo1): sub %rax, %r8 add $4, n jnz L(top) -L(end): mov %r8, %rax + mov %r8, %rax + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/bdiv_q_1.asm b/mpn/x86_64/bdiv_q_1.asm index 01624a52a..e1e1db5a5 100644 --- a/mpn/x86_64/bdiv_q_1.asm +++ b/mpn/x86_64/bdiv_q_1.asm @@ -1,8 +1,8 @@ dnl AMD64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by dnl 1-limb divisor, returning quotient only. -dnl Copyright 2001, 2002, 2004, 2005, 2006, 2009 Free Software Foundation, -dnl Inc. +dnl Copyright 2001, 2002, 2004, 2005, 2006, 2009, 2011 Free Software +dnl Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -41,10 +41,22 @@ C di r8 just mpn_pi1_bdiv_q_1 C shift r9 just mpn_pi1_bdiv_q_1 +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_bdiv_q_1) + DOS64_ENTRY(4) push %rbx mov %rcx, %rax @@ -91,6 +103,9 @@ L(evn): bsf %rax, %rcx EPILOGUE() PROLOGUE(mpn_pi1_bdiv_q_1) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') +IFDOS(` mov 64(%rsp), %r9 ') push %rbx mov %rcx, %r11 C d @@ -144,11 +159,13 @@ L(ent): imul %r8, %rax imul %r8, %rax mov %rax, (%rdi) pop %rbx + DOS64_EXIT() ret L(one): shr R8(%rcx), %rax imul %r8, %rax mov %rax, (%rdi) pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/com.asm b/mpn/x86_64/com.asm index 6ff62eeac..3a232fc20 100644 --- a/mpn/x86_64/com.asm +++ b/mpn/x86_64/com.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_com. -dnl Copyright 2004, 2005, 2006 Free Software Foundation, Inc. +dnl Copyright 2004, 2005, 2006, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -34,11 +34,14 @@ define(`rp',`%rdi') define(`up',`%rsi') define(`n',`%rdx') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() TEXT ALIGN(32) PROLOGUE(mpn_com) + DOS64_ENTRY(3) movq (up), %r8 movl R32(%rdx), R32(%rax) leaq (up,n,8), up @@ -76,5 +79,6 @@ L(e10): movq 24(up,n,8), %r9 movq %r9, 24(rp,n,8) addq $4, n jnc L(oop) -L(ret): ret +L(ret): DOS64_EXIT() + ret EPILOGUE() diff --git a/mpn/x86_64/copyd.asm b/mpn/x86_64/copyd.asm index 13210217b..15e929f4e 100644 --- a/mpn/x86_64/copyd.asm +++ b/mpn/x86_64/copyd.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_copyd -- copy limb vector, decrementing. -dnl Copyright 2003, 2005, 2007 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2007, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -39,10 +39,14 @@ define(`rp',`%rdi') define(`up',`%rsi') define(`n',`%rdx') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_copyd) + DOS64_ENTRY(3) leaq -8(up,n,8), up leaq (rp,n,8), rp subq $4, n @@ -73,5 +77,6 @@ L(end): shrl R32(%rdx) C edx = lowpart(n) movq -8(up), %r9 movq %r8, -8(rp) movq %r9, -16(rp) -1: ret +1: DOS64_EXIT() + ret EPILOGUE() diff --git a/mpn/x86_64/copyi.asm b/mpn/x86_64/copyi.asm index d5cbdd644..1dd6c3168 100644 --- a/mpn/x86_64/copyi.asm +++ b/mpn/x86_64/copyi.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_copyi -- copy limb vector, incrementing. -dnl Copyright 2003, 2005, 2007 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2007, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -39,10 +39,14 @@ define(`rp',`%rdi') define(`up',`%rsi') define(`n',`%rdx') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_copyi) + DOS64_ENTRY(3) leaq -8(rp), rp subq $4, n jc L(end) @@ -72,5 +76,6 @@ L(end): shrl R32(%rdx) C edx = lowpart(n) movq 8(up), %r9 movq %r8, 8(rp) movq %r9, 16(rp) -1: ret +1: DOS64_EXIT() + ret EPILOGUE() diff --git a/mpn/x86_64/core2/aorrlsh1_n.asm b/mpn/x86_64/core2/aorrlsh1_n.asm index 346c21f33..e44e718a6 100644 --- a/mpn/x86_64/core2/aorrlsh1_n.asm +++ b/mpn/x86_64/core2/aorrlsh1_n.asm @@ -3,7 +3,7 @@ dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[] dnl Contributed to the GNU project by Torbjorn Granlund. -dnl Copyright 2008, 2010 Free Software Foundation, Inc. +dnl Copyright 2008, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -36,4 +36,7 @@ ifdef(`OPERATION_rsblsh1_n', ` MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + include_mpn(`x86_64/aorrlshC_n.asm') diff --git a/mpn/x86_64/core2/aorrlsh2_n.asm b/mpn/x86_64/core2/aorrlsh2_n.asm index 1da0c527f..2d9c89553 100644 --- a/mpn/x86_64/core2/aorrlsh2_n.asm +++ b/mpn/x86_64/core2/aorrlsh2_n.asm @@ -3,7 +3,7 @@ dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[] dnl Contributed to the GNU project by Torbjorn Granlund. -dnl Copyright 2008, 2010 Free Software Foundation, Inc. +dnl Copyright 2008, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -36,4 +36,7 @@ ifdef(`OPERATION_rsblsh2_n', ` MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + include_mpn(`x86_64/aorrlshC_n.asm') diff --git a/mpn/x86_64/core2/aorrlsh_n.asm b/mpn/x86_64/core2/aorrlsh_n.asm index 8d03970ca..a8f5c051a 100644 --- a/mpn/x86_64/core2/aorrlsh_n.asm +++ b/mpn/x86_64/core2/aorrlsh_n.asm @@ -20,4 +20,8 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n) + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + include_mpn(`x86_64/coreinhm/aorrlsh_n.asm') diff --git a/mpn/x86_64/core2/aors_n.asm b/mpn/x86_64/core2/aors_n.asm index 75807c79a..bc109cc22 100644 --- a/mpn/x86_64/core2/aors_n.asm +++ b/mpn/x86_64/core2/aors_n.asm @@ -1,6 +1,6 @@ dnl Intel P6-15 mpn_add_n/mpn_sub_n -- mpn add or subtract. -dnl Copyright 2006, 2007 Free Software Foundation, Inc. +dnl Copyright 2006, 2007, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -48,16 +48,28 @@ ifdef(`OPERATION_sub_n', ` MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) -ASM_START() +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) +ASM_START() TEXT ALIGN(16) - PROLOGUE(func_nc) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') jmp L(start) EPILOGUE() PROLOGUE(func) + DOS64_ENTRY(4) xor %r8, %r8 L(start): mov (up), %r10 @@ -96,6 +108,7 @@ L(end): ADCSBB %r11, %r10 mov %r10, 8(rp) mov R32(%rcx), R32(%rax) C clear eax, ecx contains 0 adc R32(%rax), R32(%rax) + DOS64_EXIT() ret ALIGN(16) diff --git a/mpn/x86_64/core2/aorsmul_1.asm b/mpn/x86_64/core2/aorsmul_1.asm index bb4f663c4..aeda30159 100644 --- a/mpn/x86_64/core2/aorsmul_1.asm +++ b/mpn/x86_64/core2/aorsmul_1.asm @@ -1,6 +1,7 @@ dnl x86-64 mpn_addmul_1 and mpn_submul_1, optimized for "Core 2". -dnl Copyright 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. +dnl Copyright 2003, 2004, 2005, 2007, 2008, 2009, 2011 Free Software +dnl Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -45,10 +46,14 @@ ifdef(`OPERATION_submul_1',` MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func) + DOS64_ENTRY(4) push %rbx push %rbp lea (%rdx), %rbx @@ -127,5 +132,6 @@ L(n1): mov 8(rp), %r10 adc %rdx, %rax pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/core2/lshift.asm b/mpn/x86_64/core2/lshift.asm index 3b17e8315..2e175de76 100644 --- a/mpn/x86_64/core2/lshift.asm +++ b/mpn/x86_64/core2/lshift.asm @@ -1,6 +1,6 @@ dnl x86-64 mpn_lshift optimized for "Core 2". -dnl Copyright 2007, 2009 Free Software Foundation, Inc. +dnl Copyright 2007, 2009, 2011 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -35,12 +35,16 @@ C INPUT PARAMETERS define(`rp', `%rdi') define(`up', `%rsi') define(`n', `%rdx') -define(`cnt', `%cl') +define(`cnt', `%rcx') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_lshift) + DOS64_ENTRY(4) lea -8(rp,n,8), rp lea -8(up,n,8), up @@ -51,7 +55,7 @@ L(b00): C n = 4, 8, 12, ... mov (up), %r10 mov -8(up), %r11 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r10, %rax + shld R8(cnt), %r10, %rax mov -16(up), %r8 lea 24(rp), rp sub $4, n @@ -62,7 +66,7 @@ L(nb00):C n = 1, 5, 9, ... jae L(nb01) L(b01): mov (up), %r9 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r9, %rax + shld R8(cnt), %r9, %rax sub $2, n jb L(le1) mov -8(up), %r10 @@ -70,8 +74,9 @@ L(b01): mov (up), %r9 lea -8(up), up lea 16(rp), rp jmp L(01) -L(le1): shl R8(%rcx), %r9 +L(le1): shl R8(cnt), %r9 mov %r9, (rp) + DOS64_EXIT() ret L(nb01):C n = 2, 6, 10, ... @@ -79,17 +84,18 @@ L(nb01):C n = 2, 6, 10, ... L(b10): mov (up), %r8 mov -8(up), %r9 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r8, %rax + shld R8(cnt), %r8, %rax sub $3, n jb L(le2) mov -16(up), %r10 lea -16(up), up lea 8(rp), rp jmp L(10) -L(le2): shld R8(%rcx), %r9, %r8 +L(le2): shld R8(cnt), %r9, %r8 mov %r8, (rp) - shl R8(%rcx), %r9 + shl R8(cnt), %r9 mov %r9, -8(rp) + DOS64_EXIT() ret ALIGN(16) C performance critical! @@ -97,23 +103,23 @@ L(b11): C n = 3, 7, 11, ... mov (up), %r11 mov -8(up), %r8 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r11, %rax + shld R8(cnt), %r11, %rax mov -16(up), %r9 lea -24(up), up sub $4, n jb L(end) ALIGN(16) -L(top): shld R8(%rcx), %r8, %r11 +L(top): shld R8(cnt), %r8, %r11 mov (up), %r10 mov %r11, (rp) -L(10): shld R8(%rcx), %r9, %r8 +L(10): shld R8(cnt), %r9, %r8 mov -8(up), %r11 mov %r8, -8(rp) -L(01): shld R8(%rcx), %r10, %r9 +L(01): shld R8(cnt), %r10, %r9 mov -16(up), %r8 mov %r9, -16(rp) -L(00): shld R8(%rcx), %r11, %r10 +L(00): shld R8(cnt), %r11, %r10 mov -24(up), %r9 mov %r10, -24(rp) add $-32, up @@ -121,11 +127,12 @@ L(00): shld R8(%rcx), %r11, %r10 sub $4, n jnc L(top) -L(end): shld R8(%rcx), %r8, %r11 +L(end): shld R8(cnt), %r8, %r11 mov %r11, (rp) - shld R8(%rcx), %r9, %r8 + shld R8(cnt), %r9, %r8 mov %r8, -8(rp) - shl R8(%rcx), %r9 + shl R8(cnt), %r9 mov %r9, -16(rp) + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/core2/lshiftc.asm b/mpn/x86_64/core2/lshiftc.asm index a19f72297..31a08f7ae 100644 --- a/mpn/x86_64/core2/lshiftc.asm +++ b/mpn/x86_64/core2/lshiftc.asm @@ -1,6 +1,6 @@ dnl x86-64 mpn_lshiftc optimized for "Core 2". -dnl Copyright 2007, 2009 Free Software Foundation, Inc. +dnl Copyright 2007, 2009, 2011 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -35,12 +35,16 @@ C INPUT PARAMETERS define(`rp', `%rdi') define(`up', `%rsi') define(`n', `%rdx') -define(`cnt', `%cl') +define(`cnt', `%rcx') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_lshiftc) + DOS64_ENTRY(4) lea -8(rp,n,8), rp lea -8(up,n,8), up @@ -51,7 +55,7 @@ L(b00): C n = 4, 8, 12, ... mov (up), %r10 mov -8(up), %r11 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r10, %rax + shld R8(cnt), %r10, %rax mov -16(up), %r8 lea 24(rp), rp sub $4, n @@ -62,7 +66,7 @@ L(nb00):C n = 1, 5, 9, ... jae L(nb01) L(b01): mov (up), %r9 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r9, %rax + shld R8(cnt), %r9, %rax sub $2, n jb L(le1) mov -8(up), %r10 @@ -70,9 +74,10 @@ L(b01): mov (up), %r9 lea -8(up), up lea 16(rp), rp jmp L(01) -L(le1): shl R8(%rcx), %r9 +L(le1): shl R8(cnt), %r9 not %r9 mov %r9, (rp) + DOS64_EXIT() ret L(nb01):C n = 2, 6, 10, ... @@ -80,19 +85,20 @@ L(nb01):C n = 2, 6, 10, ... L(b10): mov (up), %r8 mov -8(up), %r9 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r8, %rax + shld R8(cnt), %r8, %rax sub $3, n jb L(le2) mov -16(up), %r10 lea -16(up), up lea 8(rp), rp jmp L(10) -L(le2): shld R8(%rcx), %r9, %r8 +L(le2): shld R8(cnt), %r9, %r8 not %r8 mov %r8, (rp) - shl R8(%rcx), %r9 + shl R8(cnt), %r9 not %r9 mov %r9, -8(rp) + DOS64_EXIT() ret ALIGN(16) C performance critical! @@ -100,26 +106,26 @@ L(b11): C n = 3, 7, 11, ... mov (up), %r11 mov -8(up), %r8 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r11, %rax + shld R8(cnt), %r11, %rax mov -16(up), %r9 lea -24(up), up sub $4, n jb L(end) ALIGN(16) -L(top): shld R8(%rcx), %r8, %r11 +L(top): shld R8(cnt), %r8, %r11 mov (up), %r10 not %r11 mov %r11, (rp) -L(10): shld R8(%rcx), %r9, %r8 +L(10): shld R8(cnt), %r9, %r8 mov -8(up), %r11 not %r8 mov %r8, -8(rp) -L(01): shld R8(%rcx), %r10, %r9 +L(01): shld R8(cnt), %r10, %r9 mov -16(up), %r8 not %r9 mov %r9, -16(rp) -L(00): shld R8(%rcx), %r11, %r10 +L(00): shld R8(cnt), %r11, %r10 mov -24(up), %r9 not %r10 mov %r10, -24(rp) @@ -128,14 +134,15 @@ L(00): shld R8(%rcx), %r11, %r10 sub $4, n jnc L(top) -L(end): shld R8(%rcx), %r8, %r11 +L(end): shld R8(cnt), %r8, %r11 not %r11 mov %r11, (rp) - shld R8(%rcx), %r9, %r8 + shld R8(cnt), %r9, %r8 not %r8 mov %r8, -8(rp) - shl R8(%rcx), %r9 + shl R8(cnt), %r9 not %r9 mov %r9, -16(rp) + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/core2/rsh1aors_n.asm b/mpn/x86_64/core2/rsh1aors_n.asm index eb52efc08..b350e4a43 100644 --- a/mpn/x86_64/core2/rsh1aors_n.asm +++ b/mpn/x86_64/core2/rsh1aors_n.asm @@ -1,6 +1,6 @@ dnl Intel P6/64 mpn_rsh1add_n and mpn_rsh1sub_n -- rp[] = (up[] +- vp[]) >> 1 -dnl Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2009, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -49,11 +49,24 @@ ifdef(`OPERATION_rsh1sub_n', ` MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc) +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func_nc) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') push %rbx push %rbp @@ -66,6 +79,7 @@ EPILOGUE() ALIGN(16) PROLOGUE(func_n) + DOS64_ENTRY(4) push %rbx push %rbp @@ -171,5 +185,6 @@ L(end): shrd $1, %rbx, %rbp mov %rbp, (rp) pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/core2/rshift.asm b/mpn/x86_64/core2/rshift.asm index 38a77364f..68306881c 100644 --- a/mpn/x86_64/core2/rshift.asm +++ b/mpn/x86_64/core2/rshift.asm @@ -1,6 +1,6 @@ dnl x86-64 mpn_rshift optimized for "Core 2". -dnl Copyright 2007, 2009 Free Software Foundation, Inc. +dnl Copyright 2007, 2009, 2011 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -35,12 +35,16 @@ C INPUT PARAMETERS define(`rp', `%rdi') define(`up', `%rsi') define(`n', `%rdx') -define(`cnt', `%cl') +define(`cnt', `%rcx') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_rshift) + DOS64_ENTRY(4) mov R32(%rdx), R32(%rax) and $3, R32(%rax) jne L(nb00) @@ -48,7 +52,7 @@ L(b00): C n = 4, 8, 12, ... mov (up), %r10 mov 8(up), %r11 xor R32(%rax), R32(%rax) - shrd R8(%rcx), %r10, %rax + shrd R8(cnt), %r10, %rax mov 16(up), %r8 lea 8(up), up lea -24(rp), rp @@ -60,7 +64,7 @@ L(nb00):C n = 1, 5, 9, ... jae L(nb01) L(b01): mov (up), %r9 xor R32(%rax), R32(%rax) - shrd R8(%rcx), %r9, %rax + shrd R8(cnt), %r9, %rax sub $2, n jb L(le1) mov 8(up), %r10 @@ -68,8 +72,9 @@ L(b01): mov (up), %r9 lea 16(up), up lea -16(rp), rp jmp L(01) -L(le1): shr R8(%rcx), %r9 +L(le1): shr R8(cnt), %r9 mov %r9, (rp) + DOS64_EXIT() ret L(nb01):C n = 2, 6, 10, ... @@ -77,17 +82,18 @@ L(nb01):C n = 2, 6, 10, ... L(b10): mov (up), %r8 mov 8(up), %r9 xor R32(%rax), R32(%rax) - shrd R8(%rcx), %r8, %rax + shrd R8(cnt), %r8, %rax sub $3, n jb L(le2) mov 16(up), %r10 lea 24(up), up lea -8(rp), rp jmp L(10) -L(le2): shrd R8(%rcx), %r9, %r8 +L(le2): shrd R8(cnt), %r9, %r8 mov %r8, (rp) - shr R8(%rcx), %r9 + shr R8(cnt), %r9 mov %r9, 8(rp) + DOS64_EXIT() ret ALIGN(16) @@ -95,23 +101,23 @@ L(b11): C n = 3, 7, 11, ... mov (up), %r11 mov 8(up), %r8 xor R32(%rax), R32(%rax) - shrd R8(%rcx), %r11, %rax + shrd R8(cnt), %r11, %rax mov 16(up), %r9 lea 32(up), up sub $4, n jb L(end) ALIGN(16) -L(top): shrd R8(%rcx), %r8, %r11 +L(top): shrd R8(cnt), %r8, %r11 mov -8(up), %r10 mov %r11, (rp) -L(10): shrd R8(%rcx), %r9, %r8 +L(10): shrd R8(cnt), %r9, %r8 mov (up), %r11 mov %r8, 8(rp) -L(01): shrd R8(%rcx), %r10, %r9 +L(01): shrd R8(cnt), %r10, %r9 mov 8(up), %r8 mov %r9, 16(rp) -L(00): shrd R8(%rcx), %r11, %r10 +L(00): shrd R8(cnt), %r11, %r10 mov 16(up), %r9 mov %r10, 24(rp) add $32, up @@ -119,11 +125,12 @@ L(00): shrd R8(%rcx), %r11, %r10 sub $4, n jnc L(top) -L(end): shrd R8(%rcx), %r8, %r11 +L(end): shrd R8(cnt), %r8, %r11 mov %r11, (rp) - shrd R8(%rcx), %r9, %r8 + shrd R8(cnt), %r9, %r8 mov %r8, 8(rp) - shr R8(%rcx), %r9 + shr R8(cnt), %r9 mov %r9, 16(rp) + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/core2/sublsh1_n.asm b/mpn/x86_64/core2/sublsh1_n.asm index 7522b429f..50411d7d0 100644 --- a/mpn/x86_64/core2/sublsh1_n.asm +++ b/mpn/x86_64/core2/sublsh1_n.asm @@ -2,7 +2,7 @@ dnl AMD64 mpn_sublsh1_n optimised for Core 2 and Core iN. dnl Contributed to the GNU project by Torbjorn Granlund. -dnl Copyright 2008, 2010 Free Software Foundation, Inc. +dnl Copyright 2008, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -30,4 +30,7 @@ define(func, mpn_sublsh1_n) MULFUNC_PROLOGUE(mpn_sublsh1_n) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + include_mpn(`x86_64/core2/sublshC_n.asm') diff --git a/mpn/x86_64/core2/sublsh2_n.asm b/mpn/x86_64/core2/sublsh2_n.asm index 036d2c859..affc87177 100644 --- a/mpn/x86_64/core2/sublsh2_n.asm +++ b/mpn/x86_64/core2/sublsh2_n.asm @@ -2,7 +2,7 @@ dnl AMD64 mpn_sublsh2_n optimised for Core 2 and Core iN. dnl Contributed to the GNU project by Torbjorn Granlund. -dnl Copyright 2008, 2010 Free Software Foundation, Inc. +dnl Copyright 2008, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -30,4 +30,7 @@ define(func, mpn_sublsh2_n) MULFUNC_PROLOGUE(mpn_sublsh2_n) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + include_mpn(`x86_64/core2/sublshC_n.asm') diff --git a/mpn/x86_64/core2/sublshC_n.asm b/mpn/x86_64/core2/sublshC_n.asm index 2f89c35e3..7c4545f5a 100644 --- a/mpn/x86_64/core2/sublshC_n.asm +++ b/mpn/x86_64/core2/sublshC_n.asm @@ -3,7 +3,7 @@ dnl Core iN. dnl Contributed to the GNU project by Torbjorn Granlund. -dnl Copyright 2008, 2010 Free Software Foundation, Inc. +dnl Copyright 2008, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -40,6 +40,7 @@ ASM_START() TEXT ALIGN(8) PROLOGUE(func) + DOS64_ENTRY(4) push %rbx push %r12 @@ -141,5 +142,6 @@ L(end): shr $RSH, %r11 pop %rbx sub R32(%r11), R32(%rax) neg R32(%rax) + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/coreinhm/aorrlsh_n.asm b/mpn/x86_64/coreinhm/aorrlsh_n.asm index a4afae69d..e22cc065d 100644 --- a/mpn/x86_64/coreinhm/aorrlsh_n.asm +++ b/mpn/x86_64/coreinhm/aorrlsh_n.asm @@ -62,10 +62,23 @@ C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with C refmpn_rsblsh_nc MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n) +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(32) PROLOGUE(func_n) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8d ') C cnt push %rbx xor R32(%rbx), R32(%rbx) C clear CF save register L(ent): push %rbp @@ -170,9 +183,13 @@ L(wd1): shrd %cl, %r8, %r11 IFRSB( neg %rax) pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() PROLOGUE(func_nc) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8d ') C cnt +IFDOS(` mov 64(%rsp), %r9 ') C cy push %rbx neg cy sbb R32(%rbx), R32(%rbx) C initialise CF save register diff --git a/mpn/x86_64/coreisbr/aors_n.asm b/mpn/x86_64/coreisbr/aors_n.asm index 66a5e3b60..4d8d1cccf 100644 --- a/mpn/x86_64/coreisbr/aors_n.asm +++ b/mpn/x86_64/coreisbr/aors_n.asm @@ -49,10 +49,22 @@ ifdef(`OPERATION_sub_n', ` MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func) + DOS64_ENTRY(4) xor %r8, %r8 L(ent): mov R32(n), R32(%rax) shr $2, n @@ -144,5 +156,7 @@ L(e1): ADCSBB 16(vp), %r10 ret EPILOGUE() PROLOGUE(func_nc) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') jmp L(ent) EPILOGUE() diff --git a/mpn/x86_64/invert_limb.asm b/mpn/x86_64/invert_limb.asm index 8c6aa68b6..06cf1414a 100644 --- a/mpn/x86_64/invert_limb.asm +++ b/mpn/x86_64/invert_limb.asm @@ -2,7 +2,7 @@ dnl AMD64 mpn_invert_limb -- Invert a normalized limb. dnl Contributed to the GNU project by Torbjorn Granlund and Niels Möller. -dnl Copyright 2004, 2007, 2008, 2009 Free Software Foundation, Inc. +dnl Copyright 2004, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -33,11 +33,14 @@ C VIA nano 79 157 C rax rcx rdx rdi rsi r8 +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_invert_limb) C Kn C2 Ci + DOS64_ENTRY(1) mov %rdi, %rax C 0 0 0 shr $55, %rax C 1 1 1 ifdef(`PIC',` @@ -94,6 +97,7 @@ ifdef(`DARWIN',` adc %rdi, %rdx sub %rdx, %rax + DOS64_EXIT() ret EPILOGUE() ASM_END() diff --git a/mpn/x86_64/invert_limb_table.asm b/mpn/x86_64/invert_limb_table.asm index 98a331372..86d75b8ce 100644 --- a/mpn/x86_64/invert_limb_table.asm +++ b/mpn/x86_64/invert_limb_table.asm @@ -21,6 +21,9 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() C Table entry X contains floor (0x7fd00 / (0x100 + X)) diff --git a/mpn/x86_64/logops_n.asm b/mpn/x86_64/logops_n.asm index 1df564a8f..02b9da549 100644 --- a/mpn/x86_64/logops_n.asm +++ b/mpn/x86_64/logops_n.asm @@ -1,6 +1,6 @@ dnl AMD64 logops. -dnl Copyright 2004, 2005, 2006 Free Software Foundation, Inc. +dnl Copyright 2004, 2005, 2006, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -72,6 +72,8 @@ define(`up',`%rsi') define(`vp',`%rdx') define(`n',`%rcx') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() @@ -79,6 +81,7 @@ ifdef(`VARIANT_1',` TEXT ALIGN(32) PROLOGUE(func) + DOS64_ENTRY(4) movq (vp), %r8 movl R32(%rcx), R32(%rax) leaq (vp,n,8), vp @@ -117,7 +120,8 @@ L(e10): movq 24(vp,n,8), %r9 movq %r9, 24(rp,n,8) addq $4, n jnc L(oop) -L(ret): ret +L(ret): DOS64_EXIT() + ret EPILOGUE() ') @@ -125,6 +129,7 @@ ifdef(`VARIANT_2',` TEXT ALIGN(32) PROLOGUE(func) + DOS64_ENTRY(4) movq (vp), %r8 notq %r8 movl R32(%rcx), R32(%rax) @@ -168,7 +173,8 @@ L(e10): movq 24(vp,n,8), %r9 movq %r9, 24(rp,n,8) addq $4, n jnc L(oop) -L(ret): ret +L(ret): DOS64_EXIT() + ret EPILOGUE() ') @@ -176,6 +182,7 @@ ifdef(`VARIANT_3',` TEXT ALIGN(32) PROLOGUE(func) + DOS64_ENTRY(4) movq (vp), %r8 movl R32(%rcx), R32(%rax) leaq (vp,n,8), vp @@ -220,6 +227,7 @@ L(e10): movq 24(vp,n,8), %r9 movq %r9, 24(rp,n,8) addq $4, n jnc L(oop) -L(ret): ret +L(ret): DOS64_EXIT() + ret EPILOGUE() ') diff --git a/mpn/x86_64/lshift.asm b/mpn/x86_64/lshift.asm index 2f3d5c94d..5852ba9f9 100644 --- a/mpn/x86_64/lshift.asm +++ b/mpn/x86_64/lshift.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_lshift -- mpn left shift. -dnl Copyright 2003, 2005, 2007, 2009 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2007, 2009, 2011 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -36,10 +36,14 @@ define(`up', `%rsi') define(`n', `%rdx') define(`cnt', `%rcx') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(32) PROLOGUE(mpn_lshift) + DOS64_ENTRY(4) cmp $1, R8(%rcx) jne L(gen) @@ -83,6 +87,7 @@ L(t1): mov (up), %r8 dec R32(%rax) jne L(n00) adc R32(%rax), R32(%rax) + DOS64_EXIT() ret L(e1): test R32(%rax), R32(%rax) C clear cy L(n00): mov (up), %r8 @@ -91,6 +96,7 @@ L(n00): mov (up), %r8 adc %r8, %r8 mov %r8, (rp) L(ret): adc R32(%rax), R32(%rax) + DOS64_EXIT() ret L(n01): dec R32(%rax) mov 8(up), %r9 @@ -100,6 +106,7 @@ L(n01): dec R32(%rax) mov %r8, (rp) mov %r9, 8(rp) adc R32(%rax), R32(%rax) + DOS64_EXIT() ret L(n10): mov 16(up), %r10 adc %r8, %r8 @@ -109,6 +116,7 @@ L(n10): mov 16(up), %r10 mov %r9, 8(rp) mov %r10, 16(rp) adc $-1, R32(%rax) + DOS64_EXIT() ret L(gen): neg R32(%rcx) C put rsh count in cl @@ -222,5 +230,6 @@ L(end): L(ast): mov (up), %r10 shl R8(%rcx), %r10 mov %r10, (rp) + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/lshiftc.asm b/mpn/x86_64/lshiftc.asm index 93bb614d3..b4124b037 100644 --- a/mpn/x86_64/lshiftc.asm +++ b/mpn/x86_64/lshiftc.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_lshiftc -- mpn left shift with complement. -dnl Copyright 2003, 2005, 2006, 2009 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2006, 2009, 2011 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -36,10 +36,14 @@ define(`up', `%rsi') define(`n', `%rdx') define(`cnt', `%rcx') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(32) PROLOGUE(mpn_lshiftc) + DOS64_ENTRY(4) neg R32(%rcx) C put rsh count in cl mov -8(up,n,8), %rax shr R8(%rcx), %rax C function return value @@ -162,5 +166,6 @@ L(ast): mov (up), %r10 shl R8(%rcx), %r10 not %r10 mov %r10, (rp) + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/lshsub_n.asm b/mpn/x86_64/lshsub_n.asm index 3a42863ad..6e5816b1c 100644 --- a/mpn/x86_64/lshsub_n.asm +++ b/mpn/x86_64/lshsub_n.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_lshsub_n. R = 2^k(U - V). -dnl Copyright 2006 Free Software Foundation, Inc. +dnl Copyright 2006, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -44,10 +44,23 @@ define(`vp', `%rdx') define(`n', `%rcx') define(`cnt', `%r8') +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_lshsub_n) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8d ') push %r12 push %r13 @@ -151,5 +164,6 @@ L(end): pop %r13 pop %r12 + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/mod_1_1.asm b/mpn/x86_64/mod_1_1.asm index 56f708a75..8afa96e05 100644 --- a/mpn/x86_64/mod_1_1.asm +++ b/mpn/x86_64/mod_1_1.asm @@ -67,10 +67,14 @@ C the source of the cmov in the loop. C C We have the invariant that r_2 B^2 + r_1 B + r_0 < B^2 + B b +C ABI_SUPPORT(DOS64) +C ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_mod_1_1p) + DOS64_ENTRY(4) push %rbp push %rbx mov %rdx, b @@ -163,6 +167,7 @@ L(ok): shr R8(%rcx), %rax pop %rbx pop %rbp + DOS64_EXIT() ret L(fix): sub b, %rax jmp L(ok) @@ -170,6 +175,7 @@ EPILOGUE() ALIGN(16) PROLOGUE(mpn_mod_1_1p_cps) + DOS64_ENTRY(2) push %rbp bsr %rsi, %rcx push %rbx @@ -211,6 +217,7 @@ L(z): pop %r12 pop %rbx pop %rbp + DOS64_EXIT() ret EPILOGUE() ASM_END() diff --git a/mpn/x86_64/mod_1_2.asm b/mpn/x86_64/mod_1_2.asm index a0ecb6855..b09f24bc0 100644 --- a/mpn/x86_64/mod_1_2.asm +++ b/mpn/x86_64/mod_1_2.asm @@ -2,7 +2,7 @@ dnl AMD64 mpn_mod_1s_2p dnl Contributed to the GNU project by Torbjorn Granlund. -dnl Copyright 2009, 2010 Free Software Foundation, Inc. +dnl Copyright 2009, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -31,10 +31,14 @@ C Intel SBR 4.5 C Intel atom 28 C VIA nano 8 +C ABI_SUPPORT(DOS64) +C ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_mod_1s_2p) + DOS64_ENTRY(4) push %r14 test $1, R8(%rsi) mov %rdx, %r14 @@ -145,6 +149,7 @@ L(1): xor R32(%rcx), R32(%rcx) pop %r12 pop %r13 pop %r14 + DOS64_EXIT() ret L(one): mov (%rdi), %r8 @@ -154,6 +159,7 @@ L(one): EPILOGUE() PROLOGUE(mpn_mod_1s_2p_cps) + DOS64_ENTRY(2) push %rbp bsr %rsi, %rcx push %rbx @@ -214,5 +220,6 @@ ifdef(`SHLD_SLOW',` pop %r12 pop %rbx pop %rbp + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm index d99080d7f..3068e3def 100644 --- a/mpn/x86_64/mod_1_4.asm +++ b/mpn/x86_64/mod_1_4.asm @@ -2,7 +2,7 @@ dnl AMD64 mpn_mod_1s_4p dnl Contributed to the GNU project by Torbjorn Granlund. -dnl Copyright 2009, 2010 Free Software Foundation, Inc. +dnl Copyright 2009, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -30,17 +30,22 @@ C Intel corei 4 C Intel atom 23 C VIA nano 4.75 +C ABI_SUPPORT(DOS64) +C ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_mod_1s_4p) + DOS64_ENTRY(4) + push %r15 push %r14 push %r13 push %r12 push %rbp push %rbx - mov %rdx, -16(%rsp) + mov %rdx, %r15 mov %rcx, %r14 mov 16(%rcx), %r11 C B1modb mov 24(%rcx), %rbx C B2modb @@ -135,7 +140,7 @@ L(end): mov 8(%r14), R32(%rsi) or %rdx, %rdi mov %rdi, %rax mulq (%r14) - mov -16(%rsp), %rbx + mov %r15, %rbx mov %rax, %r9 sal R8(%rcx), %r8 inc %rdi @@ -155,11 +160,13 @@ L(end): mov 8(%r14), R32(%rsi) pop %r12 pop %r13 pop %r14 + DOS64_EXIT() ret EPILOGUE() ALIGN(16) PROLOGUE(mpn_mod_1s_4p_cps) + DOS64_ENTRY(2) push %rbp bsr %rsi, %rcx push %rbx @@ -244,5 +251,6 @@ ifdef(`SHLD_SLOW',` pop %r12 pop %rbx pop %rbp + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/mod_34lsub1.asm b/mpn/x86_64/mod_34lsub1.asm index 08cd7d939..ee4d0d347 100644 --- a/mpn/x86_64/mod_34lsub1.asm +++ b/mpn/x86_64/mod_34lsub1.asm @@ -1,7 +1,7 @@ dnl AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1. -dnl Copyright 2000, 2001, 2002, 2004, 2005, 2007, 2009, 2010 Free Software -dnl Foundation, Inc. +dnl Copyright 2000, 2001, 2002, 2004, 2005, 2007, 2009, 2010, 2011 Free +dnl Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -39,10 +39,14 @@ C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n) C TODO C * Review feed-in and wind-down code. +C ABI_SUPPORT(DOS64) +C ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(32) PROLOGUE(mpn_mod_34lsub1) + DOS64_ENTRY(2) mov $0x0000FFFFFFFFFFFF, %r11 @@ -66,7 +70,8 @@ PROLOGUE(mpn_mod_34lsub1) shl $16, %rdx C src[1] low add %rdx, %rax -L(one): ret +L(one): DOS64_EXIT() + ret C Don't change this, the wind-down code is not able to handle greater values @@ -176,5 +181,6 @@ L(0): add %r9, %rax add %rdx, %rax C apply 2mod3 high add %rdi, %rax C apply 2mod3 low + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/mul_2.asm b/mpn/x86_64/mul_2.asm index 206a4ea2c..35deefa8b 100644 --- a/mpn/x86_64/mul_2.asm +++ b/mpn/x86_64/mul_2.asm @@ -1,7 +1,7 @@ dnl AMD64 mpn_mul_2 -- Multiply an n-limb vector with a 2-limb vector and dnl store the result in a third limb vector. -dnl Copyright 2008 Free Software Foundation, Inc. +dnl Copyright 2008, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -53,10 +53,14 @@ define(`w2', `%rbp') define(`w3', `%r10') define(`n', `%r11') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_mul_2) + DOS64_ENTRY(4) push %rbx push %rbp @@ -172,5 +176,6 @@ L(m22): mul v1 pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/mulmid_basecase.asm b/mpn/x86_64/mulmid_basecase.asm index 375e7f70e..d2d56d4a4 100644 --- a/mpn/x86_64/mulmid_basecase.asm +++ b/mpn/x86_64/mulmid_basecase.asm @@ -50,11 +50,23 @@ define(`vp', `%r15') define(`vp_inner', `%r10') +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_mulmid_basecase) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8d ') push %rbx push %rbp push %r12 @@ -539,6 +551,6 @@ L(ret): pop %r15 pop %r12 pop %rbp pop %rbx + DOS64_EXIT() ret - EPILOGUE() diff --git a/mpn/x86_64/popham.asm b/mpn/x86_64/popham.asm index 9db368106..999452328 100644 --- a/mpn/x86_64/popham.asm +++ b/mpn/x86_64/popham.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_popcount, mpn_hamdist -- population count and hamming distance. -dnl Copyright 2004, 2005, 2007, 2010 Free Software Foundation, Inc. +dnl Copyright 2004, 2005, 2007, 2010, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -44,6 +44,7 @@ ifdef(`OPERATION_popcount',` define(`h33333333', `%r11') define(`h0f0f0f0f', `%rcx') define(`h01010101', `%rdx') + define(`POP', `$1') define(`HAM', `dnl') ') ifdef(`OPERATION_hamdist',` @@ -55,17 +56,22 @@ ifdef(`OPERATION_hamdist',` define(`h33333333', `%r11') define(`h0f0f0f0f', `%rcx') define(`h01010101', `%r14') + define(`POP', `dnl') define(`HAM', `$1') ') MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(32) PROLOGUE(func) - + POP(` DOS64_ENTRY(2) ') + HAM(` DOS64_ENTRY(3) ') push %r12 push %r13 HAM(` push %r14 ') @@ -155,6 +161,6 @@ L(end): HAM(` pop %r14 ') pop %r13 pop %r12 + DOS64_EXIT() ret - EPILOGUE() diff --git a/mpn/x86_64/redc_1.asm b/mpn/x86_64/redc_1.asm index 8d731c68c..53b5641a0 100644 --- a/mpn/x86_64/redc_1.asm +++ b/mpn/x86_64/redc_1.asm @@ -49,10 +49,14 @@ define(`n', `%r13') define(`i', `%r11') define(`nneg', `%r12') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(32) PROLOGUE(mpn_redc_1) + DOS64_ENTRY(4) push %rbp push %rbx push %r12 @@ -293,5 +297,6 @@ L(ret): pop %r14 pop %r12 pop %rbx pop %rbp + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/rsh1aors_n.asm b/mpn/x86_64/rsh1aors_n.asm index c4a336446..1b6a103f1 100644 --- a/mpn/x86_64/rsh1aors_n.asm +++ b/mpn/x86_64/rsh1aors_n.asm @@ -1,7 +1,7 @@ dnl AMD64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1 dnl AMD64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1 -dnl Copyright 2003, 2005, 2009 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2009, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -53,11 +53,24 @@ ifdef(`OPERATION_rsh1sub_n', ` MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc) +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(func_nc) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') push %rbx xor R32(%rax), R32(%rax) @@ -69,6 +82,7 @@ EPILOGUE() ALIGN(16) PROLOGUE(func_n) + DOS64_ENTRY(4) push %rbx xor R32(%rax), R32(%rax) @@ -169,5 +183,6 @@ L(top): add %rbx, %rbx C rotate carry limb, restore acy L(end): mov %rbx, (rp) pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/rshift.asm b/mpn/x86_64/rshift.asm index 0f822a4a0..57a4ab093 100644 --- a/mpn/x86_64/rshift.asm +++ b/mpn/x86_64/rshift.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_rshift -- mpn right shift. -dnl Copyright 2003, 2005, 2009 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2009, 2011 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -36,10 +36,14 @@ define(`up', `%rsi') define(`n', `%rdx') define(`cnt', `%rcx') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(32) PROLOGUE(mpn_rshift) + DOS64_ENTRY(4) neg R32(%rcx) C put rsh count in cl mov (up), %rax shl R8(%rcx), %rax C function return value @@ -156,5 +160,6 @@ L(end): L(ast): mov (up), %r10 shr R8(%rcx), %r10 mov %r10, (rp) + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/sqr_basecase.asm b/mpn/x86_64/sqr_basecase.asm index f71627ab9..71195d7ae 100644 --- a/mpn/x86_64/sqr_basecase.asm +++ b/mpn/x86_64/sqr_basecase.asm @@ -75,14 +75,6 @@ define(`w1', `%rcx') define(`w2', `%rbp') define(`w3', `%r10') -ifdef(`HOST_DOS64',` - define(`IFDOS', `$1') - define(`IFELF', `') -',` - define(`IFDOS', `') - define(`IFELF', `$1') -') - ABI_SUPPORT(DOS64) ABI_SUPPORT(ELF64) diff --git a/mpn/x86_64/sublsh1_n.asm b/mpn/x86_64/sublsh1_n.asm index a2f48c007..a0515cf18 100644 --- a/mpn/x86_64/sublsh1_n.asm +++ b/mpn/x86_64/sublsh1_n.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1) -dnl Copyright 2003, 2005, 2006, 2007 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2006, 2007, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -41,10 +41,14 @@ define(`up',`%rsi') define(`vp',`%rdx') define(`n', `%rcx') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_sublsh1_n) + DOS64_ENTRY(4) push %rbx push %rbp @@ -140,5 +144,6 @@ L(end): add R32(%rbp), R32(%rax) pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/tabselect.asm b/mpn/x86_64/tabselect.asm index 2611b3212..a6699a9a4 100644 --- a/mpn/x86_64/tabselect.asm +++ b/mpn/x86_64/tabselect.asm @@ -50,10 +50,23 @@ define(`maskn', `%r12') C rax rbx rcx rdx rdi rsi rbp (rsp) r8 r9 r10 r11 r12 r13 r14 r15 C nents n rp tab which +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_tabselect) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8d ') push %rbx push %rbp push %r12 @@ -105,5 +118,6 @@ L(outer_end): pop %r12 pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() |