summaryrefslogtreecommitdiff
path: root/mpn
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2001-11-02 00:48:06 +0100
committerKevin Ryde <user42@zip.com.au>2001-11-02 00:48:06 +0100
commitee3a58d59e99744bba5f8e1b5a0877d299e3d12a (patch)
treec562b7bc0d354e1050fa7029661c5309f7ea7d27 /mpn
parent87272edd067fdf751e715c79d8d3e80665e0fb1b (diff)
downloadgmp-ee3a58d59e99744bba5f8e1b5a0877d299e3d12a.tar.gz
* mpn/x86/x86-defs.m4 (psadbw): New macro.
Diffstat (limited to 'mpn')
-rw-r--r--mpn/x86/x86-defs.m472
1 files changed, 72 insertions, 0 deletions
diff --git a/mpn/x86/x86-defs.m4 b/mpn/x86/x86-defs.m4
index 5bea9aa53..92dbd517f 100644
--- a/mpn/x86/x86-defs.m4
+++ b/mpn/x86/x86-defs.m4
@@ -582,6 +582,78 @@ eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
C `$1 $3, $4'')
+dnl Usage: x86_opcode_regmmx(reg)
+dnl
+dnl Validate the given mmx register, and return its number, 0 to 7.
+
+define(x86_opcode_regmmx,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_regmmx_list)')
+
+define(x86_opcode_regmmx_list,
+``%mm0',0,
+`%mm1',1,
+`%mm2',2,
+`%mm3',3,
+`%mm4',4,
+`%mm5',5,
+`%mm6',6,
+`%mm7',7')
+
+
+dnl Usage: psadbw(src,dst)
+dnl
+dnl Only recent versions of gas know psadbw, in particular gas 2.9.1 on
+dnl FreeBSD 3.3 and 3.4 doesn't recognise it, so instead emit .byte
+dnl sequences.
+dnl
+dnl Only register->register forms are supported here, which suffices for
+dnl the current code.
+
+define(psadbw,
+m4_instruction_wrapper()
+m4_assert_numargs(2)
+`ifelse(psadbw_available_p,1,
+`psadbw_bytes(`$1',`$2')',
+`psadbw_simulate(`$1',`$2')')')
+
+define(psadbw_available_p,
+m4_assert_numargs(-1)
+`m4_ifdef_anyof_p(`HAVE_HOST_CPU_pentium3',
+ `HAVE_HOST_CPU_pentium4',
+ `HAVE_HOST_CPU_athlon')')
+
+dnl Called: psadbw_bytes(src,dst)
+define(psadbw_bytes,
+m4_assert_numargs(2)
+`.byte 0x0f,0xf6,dnl
+eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
+ C `psadbw $1, $2'')
+
+dnl Called: psadbw_simulate(src,dst)
+define(psadbw_simulate,
+m4_assert_numargs(2)
+`m4_warning(`warning, using simulated and only partly functional psadbw, use testing only
+') C This works enough for the sum of bytes done in some of the popcounts,
+ C but is otherwise a long way short of correct.
+ pushl %eax
+ pushl %edx
+ pushf
+ subl $`'8, %esp
+ movq $2, (%esp)
+ movzbl (%esp), %eax
+forloop(i,1,7,
+` movzbl i`'(%esp), %edx
+ addl %edx, %eax
+')
+ movd %eax, $2
+ addl $`'8, %esp
+ popf
+ popl %edx
+ popl %eax
+')
+
+
dnl Usage: loop_or_decljnz label
dnl
dnl Generate either a "loop" instruction or a "decl %ecx / jnz", whichever