summaryrefslogtreecommitdiff
path: root/mpn/x86
diff options
context:
space:
mode:
Diffstat (limited to 'mpn/x86')
-rw-r--r--mpn/x86/k7/mod_1_1.asm23
1 files changed, 5 insertions, 18 deletions
diff --git a/mpn/x86/k7/mod_1_1.asm b/mpn/x86/k7/mod_1_1.asm
index 9a527ce74..bb19626b2 100644
--- a/mpn/x86/k7/mod_1_1.asm
+++ b/mpn/x86/k7/mod_1_1.asm
@@ -77,9 +77,6 @@ PROLOGUE(mpn_mod_1_1p)
mov 12(%ebp), %eax C B2modb
push %eax C Put it on stack
- mov 4(%ebp), %cl
- shrl %cl, b
-
mov n, %edx
mov 24(%esp), ap
@@ -147,7 +144,6 @@ L(reduce_two):
mov t0, %eax
C Left-shift to normalize
- shll %cl, b
shld %cl, r0, %eax C Always use shld?
shl %cl, r0
@@ -185,7 +181,6 @@ L(fix): sub %ebx, %eax
EPILOGUE()
PROLOGUE(mpn_mod_1_1p_cps)
-C CAUTION: This is the same code as in pentium4/sse2//mod_1_1.asm
push %ebp
mov 12(%esp), %ebp
push %esi
@@ -197,25 +192,17 @@ C CAUTION: This is the same code as in pentium4/sse2//mod_1_1.asm
mov %ebp, %edx
not %edx
mov $-1, %eax
- div %ebp
+ div %ebp C On K7, invert_limb would be a few cycles faster.
mov %eax, (%esi) C store bi
mov %ecx, 4(%esi) C store cnt
- xor %ebx, %ebx
- sub %ebp, %ebx
+ neg %ebp
mov $1, %edx
shld %cl, %eax, %edx
- imul %edx, %ebx
- mul %ebx
- add %ebx, %edx
- not %edx
imul %ebp, %edx
- add %edx, %ebp
- cmp %edx, %eax
- cmovc( %ebp, %edx)
- shr %cl, %ebx
- mov %ebx, 8(%esi) C store B1modb
shr %cl, %edx
- mov %edx, 12(%esi) C store B2modb
+ imul %ebp, %eax
+ mov %edx, 8(%esi) C store B1modb
+ mov %eax, 12(%esi) C store B2modb
pop %ebx
pop %esi
pop %ebp