summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2011-11-28 23:13:37 +0100
committerTorbjorn Granlund <tege@gmplib.org>2011-11-28 23:13:37 +0100
commit68afbfbde8fb3e1bc9bb31d53ce5d81f438262a1 (patch)
tree51b42ba53dcf8ef4818287f92b649b3bef253fc4
parentd4f5eddea43f682b380e85a9db69b4e8fd8ea54a (diff)
downloadgmp-68afbfbde8fb3e1bc9bb31d53ce5d81f438262a1.tar.gz
Support ABI DOS64.
-rw-r--r--mpn/x86_64/aorsmul_1.asm51
-rw-r--r--mpn/x86_64/mul_1.asm55
-rw-r--r--mpn/x86_64/mul_basecase.asm14
-rw-r--r--mpn/x86_64/sqr_basecase.asm17
4 files changed, 110 insertions, 27 deletions
diff --git a/mpn/x86_64/aorsmul_1.asm b/mpn/x86_64/aorsmul_1.asm
index 9c64d56fc..a406bc9e8 100644
--- a/mpn/x86_64/aorsmul_1.asm
+++ b/mpn/x86_64/aorsmul_1.asm
@@ -1,6 +1,6 @@
dnl AMD64 mpn_addmul_1 and mpn_submul_1.
-dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -28,20 +28,27 @@ C Intel corei ?
C Intel atom 21.3
C VIA nano 5.5
-C The inner loop of this code is the result of running a code generation and
+C The loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
-C TODO:
-C * The inner loop is great, but the prologue and epilogue code was
-C quickly written. Tune it!
+C TODO
+C * The loop is great, but the prologue and epilogue code was quickly written.
+C Tune it!
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n_param',`%rdx')
-define(`vl', `%rcx')
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`vl', `%rcx') C r9
-define(`n', `%r11')
+define(`n', `%r11')
+
+ifdef(`HOST_DOS64',`
+ define(`IFDOS', `$1')
+ define(`IFELF', `')
+',`
+ define(`IFDOS', `')
+ define(`IFELF', `$1')
+')
ifdef(`OPERATION_addmul_1',`
define(`ADDSUB', `add')
@@ -52,17 +59,33 @@ ifdef(`OPERATION_submul_1',`
define(`func', `mpn_submul_1')
')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(ELF64)
+
MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`vl', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``r11'') ') dnl
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(func)
+
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
mov (up), %rax C read first u limb early
push %rbx
- mov n_param, %rbx C move away n from rdx, mul uses it
+IFELF(` mov n_param, %rbx ') C move away n from rdx, mul uses it
+IFDOS(` mov n, %rbx ')
mul vl
- mov %rbx, n
+IFELF(` mov %rbx, n ')
and $3, R32(%rbx)
jz L(b0)
@@ -145,5 +168,7 @@ L(ret): adc $0, %rdx
mov %rdx, %rax
pop %rbx
+IFDOS(``pop %rdi '')
+IFDOS(``pop %rsi '')
ret
EPILOGUE()
diff --git a/mpn/x86_64/mul_1.asm b/mpn/x86_64/mul_1.asm
index 5f8dc4c9c..3b87bbf01 100644
--- a/mpn/x86_64/mul_1.asm
+++ b/mpn/x86_64/mul_1.asm
@@ -28,38 +28,65 @@ C Intel corei 3.8
C Intel atom 19.8
C VIA nano ?
-C The inner loop of this code is the result of running a code generation and
+C The loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
-C TODO:
-C * The inner loop is great, but the prologue and epilogue code was
-C quickly written. Tune it!
+C TODO
+C * The loop is great, but the prologue and epilogue code was quickly written.
+C Tune it!
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n_param',`%rdx')
-define(`vl', `%rcx')
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`vl', `%rcx') C r9
-define(`n', `%r11')
+define(`n', `%r11')
+
+ifdef(`HOST_DOS64',`
+ define(`IFDOS', `$1')
+ define(`IFELF', `')
+',`
+ define(`IFDOS', `')
+ define(`IFELF', `$1')
+')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(ELF64)
+
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`vl', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``r11'') ') dnl
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_mul_1c)
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
push %rbx
- mov %r8, %r10
+IFELF(` mov %r8, %r10')
+IFDOS(` mov 64(%rsp), %r10') C 40 + 3*8 (3 push insns)
jmp L(common)
EPILOGUE()
PROLOGUE(mpn_mul_1)
+
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
push %rbx
xor %r10, %r10
L(common):
mov (up), %rax C read first u limb early
- mov n_param, %rbx C move away n from rdx, mul uses it
+IFELF(` mov n_param, %rbx ') C move away n from rdx, mul uses it
+IFDOS(` mov n, %rbx ')
mul vl
- mov %rbx, %r11
+IFELF(` mov %rbx, n ')
add %r10, %rax
adc $0, %rdx
@@ -145,5 +172,7 @@ L(L2): mul vl
L(ret): mov %rdx, %rax
pop %rbx
+IFDOS(``pop %rdi '')
+IFDOS(``pop %rsi '')
ret
EPILOGUE()
diff --git a/mpn/x86_64/mul_basecase.asm b/mpn/x86_64/mul_basecase.asm
index fdba9a6e3..5fede9234 100644
--- a/mpn/x86_64/mul_basecase.asm
+++ b/mpn/x86_64/mul_basecase.asm
@@ -59,10 +59,23 @@ define(`n', `%r11')
define(`outer_addr', `%r14')
define(`un', `%r13')
+ifdef(`HOST_DOS64',`
+ define(`IFDOS', `$1')
+ define(`IFELF', `')
+',`
+ define(`IFDOS', `')
+ define(`IFELF', `$1')
+')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(ELF64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_mul_basecase)
+ DOS64_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ')
push %rbx
push %rbp
push %r12
@@ -448,6 +461,7 @@ L(ret): pop %r15
pop %r12
pop %rbp
pop %rbx
+ DOS64_EXIT()
ret
EPILOGUE()
diff --git a/mpn/x86_64/sqr_basecase.asm b/mpn/x86_64/sqr_basecase.asm
index 311daab8a..f71627ab9 100644
--- a/mpn/x86_64/sqr_basecase.asm
+++ b/mpn/x86_64/sqr_basecase.asm
@@ -75,12 +75,22 @@ define(`w1', `%rcx')
define(`w2', `%rbp')
define(`w3', `%r10')
+ifdef(`HOST_DOS64',`
+ define(`IFDOS', `$1')
+ define(`IFELF', `')
+',`
+ define(`IFDOS', `')
+ define(`IFELF', `$1')
+')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(ELF64)
ASM_START()
TEXT
ALIGN(16)
-
PROLOGUE(mpn_sqr_basecase)
+ DOS64_ENTRY(3)
add $-40, %rsp
mov %rbx, 32(%rsp)
mov %rbp, 24(%rsp)
@@ -115,6 +125,7 @@ L(1): mov (up), %rax
mov %rdx, 8(rp)
add $32, %rsp
pop %rbx
+ DOS64_EXIT()
ret
L(2): mov (up), %rax
@@ -139,6 +150,7 @@ L(2): mov (up), %rax
mov %r11, 24(rp)
add $32, %rsp
pop %rbx
+ DOS64_EXIT()
ret
L(3): mov (up), %rax
@@ -184,6 +196,7 @@ L(3): mov (up), %rax
adc %rbx, 40(rp)
add $32, %rsp
pop %rbx
+ DOS64_EXIT()
ret
L(4): mov (up), %rax
@@ -256,6 +269,7 @@ L(4): mov (up), %rax
pop %r12
pop %rbp
pop %rbx
+ DOS64_EXIT()
ret
@@ -780,5 +794,6 @@ L(d1): mov %r11, 24(rp,j,8)
pop %r12
pop %rbp
pop %rbx
+ DOS64_EXIT()
ret
EPILOGUE()