summaryrefslogtreecommitdiff
path: root/mpn/x86
diff options
context:
space:
mode:
authorMarco Bodrato <bodrato@mail.dm.unipi.it>2011-03-09 16:16:01 +0100
committerMarco Bodrato <bodrato@mail.dm.unipi.it>2011-03-09 16:16:01 +0100
commit1694b67f0a5dad1f5fd1cf8ae5cb71b0bc922bbe (patch)
tree55a0c0d9c2f50420af6c81ffd98b4dca5a649e91 /mpn/x86
parent98feb1803bb24d13938ff3fc1e32527aca30ab7b (diff)
downloadgmp-1694b67f0a5dad1f5fd1cf8ae5cb71b0bc922bbe.tar.gz
bdiv_dbm1c for atom/32
Diffstat (limited to 'mpn/x86')
-rw-r--r--mpn/x86/atom/sse2/bdiv_dbm1c.asm123
1 files changed, 123 insertions, 0 deletions
diff --git a/mpn/x86/atom/sse2/bdiv_dbm1c.asm b/mpn/x86/atom/sse2/bdiv_dbm1c.asm
new file mode 100644
index 000000000..4490e930b
--- /dev/null
+++ b/mpn/x86/atom/sse2/bdiv_dbm1c.asm
@@ -0,0 +1,123 @@
+dnl Intel Atom mpn_bdiv_dbm1.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C cycles/limb
+C P5 -
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) 9.75
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 8
+C AMD K6 -
+C AMD K7 -
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_MUL, 16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`n', `%ecx')
+define(`reg', `%edx')
+define(`cy', `%eax') C contains the return value
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(mpn_bdiv_dbm1c)
+ push %edi FRAME_pushl()
+ push %esi FRAME_pushl()
+ mov PARAM_DST, rp
+ mov PARAM_SRC, up
+ mov PARAM_SIZE, n C size
+ movd PARAM_MUL, %mm7
+ mov PARAM_CARRY, cy
+
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ shr n
+ jz L(eq1)
+
+ movd 4(up), %mm1
+ jc L(odd)
+
+ pmuludq %mm7, %mm1
+ movd %mm0, reg
+ psrlq $32, %mm0
+ sub reg, cy
+ movd %mm0, reg
+ movq %mm1, %mm0
+ mov cy, (rp)
+ lea 4(rp), rp
+ lea 4(up), up
+ dec n
+ jz L(end)
+
+C ALIGN(16)
+L(top): movd 4(up), %mm1
+ sbb reg, cy
+L(odd): movd %mm0, reg
+ psrlq $32, %mm0
+ pmuludq %mm7, %mm1
+ sub reg, cy
+ lea 8(up), up
+ movd %mm0, reg
+ movd (up), %mm0
+ mov cy, (rp)
+ sbb reg, cy
+ movd %mm1, reg
+ psrlq $32, %mm1
+ sub reg, cy
+ movd %mm1, reg
+ pmuludq %mm7, %mm0
+ dec n
+ mov cy, 4(rp)
+ lea 8(rp), rp
+ jnz L(top)
+
+L(end): sbb reg, cy
+
+L(eq1): movd %mm0, reg
+ psrlq $32, %mm0
+ sub reg, cy
+ movd %mm0, reg
+ mov cy, (rp)
+ sbb reg, cy
+
+ emms
+ pop %esi FRAME_popl()
+ pop %edi FRAME_popl()
+ ret
+EPILOGUE()
+ASM_END()