summaryrefslogtreecommitdiff
path: root/mpi/alpha
diff options
context:
space:
mode:
authorWerner Koch <wk@gnupg.org>1998-05-26 13:37:59 +0000
committerWerner Koch <wk@gnupg.org>1998-05-26 13:37:59 +0000
commit525b0bc632ccfef3b5d5b5829a082f5a2b6150ff (patch)
tree5ca7ba86848678c3198daadc917de67891e81798 /mpi/alpha
parent17e97734857f1032785583d760f19d0f23a431f7 (diff)
downloadlibgcrypt-525b0bc632ccfef3b5d5b5829a082f5a2b6150ff.tar.gz
add-key works
Diffstat (limited to 'mpi/alpha')
-rw-r--r--mpi/alpha/distfiles7
-rw-r--r--mpi/alpha/mpih-add1.S8
-rw-r--r--mpi/alpha/mpih-lshift.S (renamed from mpi/alpha/mpih-shift.S)93
-rw-r--r--mpi/alpha/mpih-mul1.S89
-rw-r--r--mpi/alpha/mpih-mul2.S96
-rw-r--r--mpi/alpha/mpih-mul3.S94
-rw-r--r--mpi/alpha/mpih-rshift.S120
-rw-r--r--mpi/alpha/mpih-sub1.S123
8 files changed, 529 insertions, 101 deletions
diff --git a/mpi/alpha/distfiles b/mpi/alpha/distfiles
index e92d183d..f2ab9fc3 100644
--- a/mpi/alpha/distfiles
+++ b/mpi/alpha/distfiles
@@ -1,6 +1,11 @@
README
mpih-add1.S
-mpih-shift.S
+mpih-sub1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-lshift.S
+mpih-rshift.S
udiv-qrnnd.S
diff --git a/mpi/alpha/mpih-add1.S b/mpi/alpha/mpih-add1.S
index 54cec43f..dc3bcfbb 100644
--- a/mpi/alpha/mpih-add1.S
+++ b/mpi/alpha/mpih-add1.S
@@ -19,14 +19,6 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- * Actually it's the same code with only minor changes in the
- * way the data is stored; this is to support the abstraction
- * of an optional secure memory allocation which may be used
- * to avoid revealing of sensitive data due to paging etc.
- * The GNU MP Library itself is published under the LGPL;
- * however I decided to publish this code under the plain GPL.
*/
diff --git a/mpi/alpha/mpih-shift.S b/mpi/alpha/mpih-lshift.S
index 8bbd10cd..9688588f 100644
--- a/mpi/alpha/mpih-shift.S
+++ b/mpi/alpha/mpih-lshift.S
@@ -1,4 +1,4 @@
-/* alpha rshift, lshift
+/* alpha lshift
* Copyright (C) 1994, 1995 Free Software Foundation, Inc.
* Copyright (C) 1998 Free Software Foundation, Inc.
*
@@ -120,94 +120,3 @@ mpihelp_lshift:
.end mpihelp_lshift
-
-
-
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp, (r16)
- * mpi_ptr_t up, (r17)
- * mpi_size_t usize, (r18)
- * unsigned cnt) (r19)
- *
- * This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
- * it would take 4 cycles/limb. It should be possible to get down to 3
- * cycles/limb since both ldq and stq can be paired with the other used
- * instructions. But there are many restrictions in the 21064 pipeline that
- * makes it hard, if not impossible, to get down to 3 cycles/limb:
- *
- * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
- * 2. Only aligned instruction pairs can be paired.
- * 3. The store buffer or silo might not be able to deal with the bandwidth.
- */
-
- .set noreorder
- .set noat
-.text
- .align 3
- .globl mpihelp_rshift
- .ent mpihelp_rshift
-mpihelp_rshift:
- .frame $30,0,$26,0
-
- ldq $4,0($17) # load first limb
- addq $17,8,$17
- subq $31,$19,$7
- subq $18,1,$18
- and $18,4-1,$20 # number of limbs in first loop
- sll $4,$7,$0 # compute function result
-
- beq $20,.R0
- subq $18,$20,$18
-
- .align 3
-.Roop0:
- ldq $3,0($17)
- addq $16,8,$16
- addq $17,8,$17
- subq $20,1,$20
- srl $4,$19,$5
- sll $3,$7,$6
- bis $3,$3,$4
- bis $5,$6,$8
- stq $8,-8($16)
- bne $20,.Roop0
-
-.R0: beq $18,.Rend
-
- .align 3
-.Roop: ldq $3,0($17)
- addq $16,32,$16
- subq $18,4,$18
- srl $4,$19,$5
- sll $3,$7,$6
-
- ldq $4,8($17)
- srl $3,$19,$1
- bis $5,$6,$8
- stq $8,-32($16)
- sll $4,$7,$2
-
- ldq $3,16($17)
- srl $4,$19,$5
- bis $1,$2,$8
- stq $8,-24($16)
- sll $3,$7,$6
-
- ldq $4,24($17)
- srl $3,$19,$1
- bis $5,$6,$8
- stq $8,-16($16)
- sll $4,$7,$2
-
- addq $17,32,$17
- bis $1,$2,$8
- stq $8,-8($16)
-
- bgt $18,.Roop
-
-.Rend: srl $4,$19,$8
- stq $8,0($16)
- ret $31,($26),1
- .end mpihelp_rshift
-
diff --git a/mpi/alpha/mpih-mul1.S b/mpi/alpha/mpih-mul1.S
new file mode 100644
index 00000000..5b24d98d
--- /dev/null
+++ b/mpi/alpha/mpih-mul1.S
@@ -0,0 +1,89 @@
+/* Alpha 21064 mpih-mul1.S -- Multiply a limb vector with a limb and store
+ * the result in a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr, (r16)
+ * mpi_ptr_t s1_ptr, (r17)
+ * mpi_size_t s1_size, (r18)
+ * mpi_limb_t s2_limb) (r19)
+ *
+ * This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
+ *
+ * To improve performance for long multiplications, we would use
+ * 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
+ * these instructions without slowing down the general code: 1. We can
+ * only have two prefetches in operation at any time in the Alpha
+ * architecture. 2. There will seldom be any special alignment
+ * between RES_PTR and S1_PTR. Maybe we can simply divide the current
+ * loop into an inner and outer loop, having the inner loop handle
+ * exactly one prefetch block?
+ */
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_mul_1
+ .ent mpihelp_mul_1 2
+mpihelp_mul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ bic $31,$31,$4 # clear cy_limb
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,Lend1 # jump if size was == 1
+ ldq $2,8($17) # $2 = s1_limb
+ subq $18,1,$18 # size--
+ stq $3,0($16)
+ beq $18,Lend2 # jump if size was == 2
+
+ .align 3
+Loop: mulq $2,$19,$3 # $3 = prod_low
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,16($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ stq $3,8($16)
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $16,8,$16 # res_ptr++
+ bne $18,Loop
+
+Lend2: mulq $2,$19,$3 # $3 = prod_low
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ stq $3,8($16)
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+Lend1: stq $3,0($16)
+ ret $31,($26),1
+
+ .end mpihelp_mul_1
+
+
diff --git a/mpi/alpha/mpih-mul2.S b/mpi/alpha/mpih-mul2.S
new file mode 100644
index 00000000..0c8d361c
--- /dev/null
+++ b/mpi/alpha/mpih-mul2.S
@@ -0,0 +1,96 @@
+/* Alpha 21064 addmul_1 -- Multiply a limb vector with a limb and add
+ * the result to a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr, (r16)
+ * mpi_ptr_t s1_ptr, (r17)
+ * mpi_size_t s1_size, (r18)
+ * mpi_limb_t s2_limb) (r19)
+ *
+ * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+ */
+
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_addmul_1
+ .ent mpihelp_addmul_1 2
+mpihelp_addmul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,.Lend1 # jump if size was == 1
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ addq $5,$3,$3
+ cmpult $3,$5,$4
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ beq $18,.Lend2 # jump if size was == 2
+
+ .align 3
+.Loop: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ addq $5,$0,$0 # combine carries
+ bne $18,.Loop
+
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $5,$0,$0 # combine carries
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+.Lend1: addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $0,$5,$0
+ ret $31,($26),1
+
+ .end mpihelp_addmul_1
+
diff --git a/mpi/alpha/mpih-mul3.S b/mpi/alpha/mpih-mul3.S
new file mode 100644
index 00000000..bdf16b57
--- /dev/null
+++ b/mpi/alpha/mpih-mul3.S
@@ -0,0 +1,94 @@
+/* Alpha 21064 submul_1 -- Multiply a limb vector with a limb and
+ * subtract the result from a second limb vector.
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr, (r16 )
+ * mpi_ptr_t s1_ptr, (r17 )
+ * mpi_size_t s1_size, (r18 )
+ * mpi_limb_t s2_limb) (r19 )
+ *
+ * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+ */
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_submul_1
+ .ent mpihelp_submul_1 2
+mpihelp_submul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,.Lend1 # jump if size was == 1
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ subq $5,$3,$3
+ cmpult $5,$3,$4
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ beq $18,.Lend2 # jump if size was == 2
+
+ .align 3
+.Loop: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ addq $5,$0,$0 # combine carries
+ bne $18,.Loop
+
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $5,$0,$0 # combine carries
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+.Lend1: subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $0,$5,$0
+ ret $31,($26),1
+
+ .end mpihelp_submul_1
+
diff --git a/mpi/alpha/mpih-rshift.S b/mpi/alpha/mpih-rshift.S
new file mode 100644
index 00000000..e93315ae
--- /dev/null
+++ b/mpi/alpha/mpih-rshift.S
@@ -0,0 +1,120 @@
+/* alpha rshift
+ * Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+ * Copyright (C) 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp, (r16)
+ * mpi_ptr_t up, (r17)
+ * mpi_size_t usize, (r18)
+ * unsigned cnt) (r19)
+ *
+ * This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
+ * it would take 4 cycles/limb. It should be possible to get down to 3
+ * cycles/limb since both ldq and stq can be paired with the other used
+ * instructions. But there are many restrictions in the 21064 pipeline that
+ * makes it hard, if not impossible, to get down to 3 cycles/limb:
+ *
+ * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ * 2. Only aligned instruction pairs can be paired.
+ * 3. The store buffer or silo might not be able to deal with the bandwidth.
+ */
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_rshift
+ .ent mpihelp_rshift
+mpihelp_rshift:
+ .frame $30,0,$26,0
+
+ ldq $4,0($17) # load first limb
+ addq $17,8,$17
+ subq $31,$19,$7
+ subq $18,1,$18
+ and $18,4-1,$20 # number of limbs in first loop
+ sll $4,$7,$0 # compute function result
+
+ beq $20,.R0
+ subq $18,$20,$18
+
+ .align 3
+.Roop0:
+ ldq $3,0($17)
+ addq $16,8,$16
+ addq $17,8,$17
+ subq $20,1,$20
+ srl $4,$19,$5
+ sll $3,$7,$6
+ bis $3,$3,$4
+ bis $5,$6,$8
+ stq $8,-8($16)
+ bne $20,.Roop0
+
+.R0: beq $18,.Rend
+
+ .align 3
+.Roop: ldq $3,0($17)
+ addq $16,32,$16
+ subq $18,4,$18
+ srl $4,$19,$5
+ sll $3,$7,$6
+
+ ldq $4,8($17)
+ srl $3,$19,$1
+ bis $5,$6,$8
+ stq $8,-32($16)
+ sll $4,$7,$2
+
+ ldq $3,16($17)
+ srl $4,$19,$5
+ bis $1,$2,$8
+ stq $8,-24($16)
+ sll $3,$7,$6
+
+ ldq $4,24($17)
+ srl $3,$19,$1
+ bis $5,$6,$8
+ stq $8,-16($16)
+ sll $4,$7,$2
+
+ addq $17,32,$17
+ bis $1,$2,$8
+ stq $8,-8($16)
+
+ bgt $18,.Roop
+
+.Rend: srl $4,$19,$8
+ stq $8,0($16)
+ ret $31,($26),1
+ .end mpihelp_rshift
+
diff --git a/mpi/alpha/mpih-sub1.S b/mpi/alpha/mpih-sub1.S
new file mode 100644
index 00000000..bf614309
--- /dev/null
+++ b/mpi/alpha/mpih-sub1.S
@@ -0,0 +1,123 @@
+/* Alpha sub_n -- Subtract two limb vectors of the same length > 0 and
+ * store difference in a third limb vector.
+ * Copyright (C) 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_sub_n( mpi_ptr_t res_ptr, (r16)
+ * mpi_ptr_t s1_ptr, (r17)
+ * mpi_ptr_t s2_ptr, (r18)
+ * mpi_size_t size) (r19)
+ */
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_sub_n
+ .ent mpihelp_sub_n
+mpihelp_sub_n:
+ .frame $30,0,$26,0
+
+ ldq $3,0($17)
+ ldq $4,0($18)
+
+ subq $19,1,$19
+ and $19,4-1,$2 # number of limbs in first loop
+ bis $31,$31,$0
+ beq $2,.L0 # if multiple of 4 limbs, skip first loop
+
+ subq $19,$2,$19
+
+.Loop0: subq $2,1,$2
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ addq $17,8,$17
+ addq $18,8,$18
+ bis $5,$5,$3
+ bis $6,$6,$4
+ addq $16,8,$16
+ bne $2,.Loop0
+
+.L0: beq $19,.Lend
+
+ .align 3
+.Loop: subq $19,4,$19
+
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ ldq $3,16($17)
+ addq $6,$0,$6
+ ldq $4,16($18)
+ cmpult $6,$0,$1
+ subq $5,$6,$6
+ cmpult $5,$6,$0
+ stq $6,8($16)
+ or $0,$1,$0
+
+ ldq $5,24($17)
+ addq $4,$0,$4
+ ldq $6,24($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,16($16)
+ or $0,$1,$0
+
+ ldq $3,32($17)
+ addq $6,$0,$6
+ ldq $4,32($18)
+ cmpult $6,$0,$1
+ subq $5,$6,$6
+ cmpult $5,$6,$0
+ stq $6,24($16)
+ or $0,$1,$0
+
+ addq $17,32,$17
+ addq $18,32,$18
+ addq $16,32,$16
+ bne $19,.Loop
+
+.Lend: addq $4,$0,$4
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+ ret $31,($26),1
+
+ .end mpihelp_sub_n
+
+